]> Raphaël G. Git Repositories - youtubedl/commitdiff
Merge tag 'upstream/2016.06.25'
authorRogério Brito <rbrito@ime.usp.br>
Sat, 25 Jun 2016 01:51:51 +0000 (22:51 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Sat, 25 Jun 2016 01:51:51 +0000 (22:51 -0300)
Upstream version 2016.06.25

499 files changed:
.github/ISSUE_TEMPLATE.md [new file with mode: 0644]
.github/ISSUE_TEMPLATE_tmpl.md [new file with mode: 0644]
.gitignore [new file with mode: 0644]
.travis.yml [new file with mode: 0644]
AUTHORS [new file with mode: 0644]
CONTRIBUTING.md [new file with mode: 0644]
Makefile
README.md
README.txt [deleted file]
devscripts/buildserver.py
devscripts/create-github-release.py [new file with mode: 0644]
devscripts/install_srelay.sh [new file with mode: 0755]
devscripts/lazy_load_template.py [new file with mode: 0644]
devscripts/make_issue_template.py [new file with mode: 0644]
devscripts/make_lazy_extractors.py [new file with mode: 0644]
devscripts/prepare_manpage.py
devscripts/release.sh
docs/supportedsites.md
setup.cfg [new file with mode: 0644]
setup.py
test/helper.py
test/swftests/ArrayAccess.swf [deleted file]
test/swftests/ClassCall.swf [deleted file]
test/swftests/ClassConstruction.swf [deleted file]
test/swftests/ConstArrayAccess.swf [deleted file]
test/swftests/ConstantInt.swf [deleted file]
test/swftests/DictCall.swf [deleted file]
test/swftests/EqualsOperator.swf [deleted file]
test/swftests/LocalVars.swf [deleted file]
test/swftests/MemberAssignment.swf [deleted file]
test/swftests/NeOperator.swf [deleted file]
test/swftests/PrivateCall.swf [deleted file]
test/swftests/PrivateVoidCall.swf [deleted file]
test/swftests/StaticAssignment.swf [deleted file]
test/swftests/StaticConstArrayAccess.swf [deleted file]
test/swftests/StaticRetrieval.swf [deleted file]
test/swftests/StringBasics.swf [deleted file]
test/swftests/StringCharCodeAt.swf [deleted file]
test/swftests/StringConversion.swf [deleted file]
test/swftests/StringFunctions.swf [deleted file]
test/test_InfoExtractor.py
test/test_YoutubeDL.py
test/test_compat.py
test/test_http.py
test/test_socks.py [new file with mode: 0644]
test/test_utils.py
test/test_youtube_lists.py
test/video-vid.mp4 [deleted file]
tox.ini [new file with mode: 0644]
youtube-dl [deleted file]
youtube-dl.1 [deleted file]
youtube-dl.bash-completion [deleted file]
youtube-dl.fish [deleted file]
youtube-dl.plugin.zsh [new file with mode: 0644]
youtube-dl.zsh [deleted file]
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/compat.py
youtube_dl/downloader/__init__.py
youtube_dl/downloader/common.py
youtube_dl/downloader/dash.py
youtube_dl/downloader/external.py
youtube_dl/downloader/f4m.py
youtube_dl/downloader/fragment.py
youtube_dl/downloader/hls.py
youtube_dl/downloader/rtsp.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/abc.py
youtube_dl/extractor/abc7news.py
youtube_dl/extractor/abcnews.py [new file with mode: 0644]
youtube_dl/extractor/acast.py
youtube_dl/extractor/addanime.py
youtube_dl/extractor/adobetv.py
youtube_dl/extractor/aenetworks.py
youtube_dl/extractor/afreecatv.py [new file with mode: 0644]
youtube_dl/extractor/aftonbladet.py
youtube_dl/extractor/aljazeera.py
youtube_dl/extractor/amp.py
youtube_dl/extractor/animeondemand.py
youtube_dl/extractor/anvato.py [new file with mode: 0644]
youtube_dl/extractor/aol.py
youtube_dl/extractor/appletrailers.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/atresplayer.py
youtube_dl/extractor/audimedia.py
youtube_dl/extractor/audioboom.py [new file with mode: 0644]
youtube_dl/extractor/audiomack.py
youtube_dl/extractor/azubu.py
youtube_dl/extractor/baidu.py
youtube_dl/extractor/bambuser.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/beeg.py
youtube_dl/extractor/behindkink.py
youtube_dl/extractor/bet.py
youtube_dl/extractor/bilibili.py
youtube_dl/extractor/biobiochiletv.py [new file with mode: 0644]
youtube_dl/extractor/biqle.py [new file with mode: 0644]
youtube_dl/extractor/bleacherreport.py
youtube_dl/extractor/bloomberg.py
youtube_dl/extractor/bokecc.py [new file with mode: 0644]
youtube_dl/extractor/bpb.py
youtube_dl/extractor/br.py
youtube_dl/extractor/bravotv.py [new file with mode: 0644]
youtube_dl/extractor/breakcom.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/byutv.py
youtube_dl/extractor/c56.py
youtube_dl/extractor/camdemy.py
youtube_dl/extractor/camwithher.py [new file with mode: 0644]
youtube_dl/extractor/canalplus.py
youtube_dl/extractor/carambatv.py [new file with mode: 0644]
youtube_dl/extractor/cbc.py
youtube_dl/extractor/cbs.py
youtube_dl/extractor/cbsinteractive.py [moved from youtube_dl/extractor/cnet.py with 59% similarity]
youtube_dl/extractor/cbslocal.py [new file with mode: 0644]
youtube_dl/extractor/cbsnews.py
youtube_dl/extractor/cbssports.py
youtube_dl/extractor/ccc.py
youtube_dl/extractor/cda.py [new file with mode: 0755]
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/channel9.py
youtube_dl/extractor/chaturbate.py
youtube_dl/extractor/cinemassacre.py [deleted file]
youtube_dl/extractor/cliphunter.py
youtube_dl/extractor/cliprs.py [new file with mode: 0644]
youtube_dl/extractor/clipsyndicate.py
youtube_dl/extractor/closertotruth.py [new file with mode: 0644]
youtube_dl/extractor/cloudy.py
youtube_dl/extractor/clubic.py
youtube_dl/extractor/cnbc.py [new file with mode: 0644]
youtube_dl/extractor/collegehumor.py [deleted file]
youtube_dl/extractor/comcarcoff.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/commonprotocols.py [new file with mode: 0644]
youtube_dl/extractor/condenast.py
youtube_dl/extractor/coub.py [new file with mode: 0644]
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/cspan.py
youtube_dl/extractor/ctsnews.py
youtube_dl/extractor/cwtv.py
youtube_dl/extractor/dailymail.py [new file with mode: 0644]
youtube_dl/extractor/daum.py
youtube_dl/extractor/dcn.py
youtube_dl/extractor/dctp.py
youtube_dl/extractor/deezer.py
youtube_dl/extractor/defense.py
youtube_dl/extractor/democracynow.py
youtube_dl/extractor/dfb.py
youtube_dl/extractor/discovery.py
youtube_dl/extractor/dispeak.py [new file with mode: 0644]
youtube_dl/extractor/douyutv.py
youtube_dl/extractor/dplay.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/dreisat.py
youtube_dl/extractor/dump.py [deleted file]
youtube_dl/extractor/dvtv.py
youtube_dl/extractor/dw.py [new file with mode: 0644]
youtube_dl/extractor/eagleplatform.py
youtube_dl/extractor/ebaumsworld.py
youtube_dl/extractor/echomsk.py
youtube_dl/extractor/elpais.py
youtube_dl/extractor/engadget.py
youtube_dl/extractor/eporner.py
youtube_dl/extractor/eroprofile.py
youtube_dl/extractor/espn.py
youtube_dl/extractor/exfm.py
youtube_dl/extractor/extractors.py [new file with mode: 0644]
youtube_dl/extractor/eyedotv.py [new file with mode: 0644]
youtube_dl/extractor/facebook.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/fczenit.py
youtube_dl/extractor/firstpost.py
youtube_dl/extractor/firsttv.py
youtube_dl/extractor/fivemin.py
youtube_dl/extractor/fktv.py
youtube_dl/extractor/flickr.py
youtube_dl/extractor/footyroom.py
youtube_dl/extractor/formula1.py [new file with mode: 0644]
youtube_dl/extractor/fox.py
youtube_dl/extractor/foxgay.py
youtube_dl/extractor/foxnews.py
youtube_dl/extractor/foxsports.py
youtube_dl/extractor/franceinter.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/freespeech.py
youtube_dl/extractor/freevideo.py
youtube_dl/extractor/funimation.py
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/gameinformer.py
youtube_dl/extractor/gamekings.py
youtube_dl/extractor/gamespot.py
youtube_dl/extractor/gamestar.py
youtube_dl/extractor/gametrailers.py [deleted file]
youtube_dl/extractor/gazeta.py
youtube_dl/extractor/gdcvault.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/glide.py
youtube_dl/extractor/godtv.py [new file with mode: 0644]
youtube_dl/extractor/googledrive.py
youtube_dl/extractor/goshgay.py
youtube_dl/extractor/gputechconf.py
youtube_dl/extractor/groupon.py
youtube_dl/extractor/hbo.py [new file with mode: 0644]
youtube_dl/extractor/hearthisat.py
youtube_dl/extractor/hotnewhiphop.py
youtube_dl/extractor/howcast.py
youtube_dl/extractor/howstuffworks.py
youtube_dl/extractor/huffpost.py
youtube_dl/extractor/hypem.py
youtube_dl/extractor/imdb.py
youtube_dl/extractor/indavideo.py
youtube_dl/extractor/infoq.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/internetvideoarchive.py
youtube_dl/extractor/iprima.py
youtube_dl/extractor/iqiyi.py
youtube_dl/extractor/ivideon.py
youtube_dl/extractor/izlesene.py
youtube_dl/extractor/jadorecettepub.py [deleted file]
youtube_dl/extractor/jeuxvideo.py
youtube_dl/extractor/jwplatform.py
youtube_dl/extractor/kaltura.py
youtube_dl/extractor/karaoketv.py
youtube_dl/extractor/karrierevideos.py
youtube_dl/extractor/khanacademy.py
youtube_dl/extractor/kontrtube.py
youtube_dl/extractor/ku6.py
youtube_dl/extractor/kusi.py [new file with mode: 0644]
youtube_dl/extractor/kuwo.py
youtube_dl/extractor/laola1tv.py
youtube_dl/extractor/learnr.py [new file with mode: 0644]
youtube_dl/extractor/lecture2go.py
youtube_dl/extractor/leeco.py [moved from youtube_dl/extractor/letv.py with 81% similarity]
youtube_dl/extractor/libraryofcongress.py [new file with mode: 0644]
youtube_dl/extractor/lifenews.py
youtube_dl/extractor/limelight.py
youtube_dl/extractor/litv.py [new file with mode: 0644]
youtube_dl/extractor/liveleak.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/localnews8.py [new file with mode: 0644]
youtube_dl/extractor/lrt.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/m6.py
youtube_dl/extractor/mailru.py
youtube_dl/extractor/makerschannel.py [new file with mode: 0644]
youtube_dl/extractor/malemotion.py [deleted file]
youtube_dl/extractor/matchtv.py
youtube_dl/extractor/mdr.py
youtube_dl/extractor/metacafe.py
youtube_dl/extractor/metacritic.py
youtube_dl/extractor/mgtv.py [new file with mode: 0644]
youtube_dl/extractor/microsoftvirtualacademy.py [new file with mode: 0644]
youtube_dl/extractor/minhateca.py
youtube_dl/extractor/ministrygrid.py
youtube_dl/extractor/minoto.py [new file with mode: 0644]
youtube_dl/extractor/mit.py
youtube_dl/extractor/mitele.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mnet.py [new file with mode: 0644]
youtube_dl/extractor/moevideo.py
youtube_dl/extractor/moniker.py
youtube_dl/extractor/mooshare.py [deleted file]
youtube_dl/extractor/motherless.py
youtube_dl/extractor/motorsport.py
youtube_dl/extractor/movieclips.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/musicplayon.py
youtube_dl/extractor/muzu.py [deleted file]
youtube_dl/extractor/mwave.py
youtube_dl/extractor/myspace.py
youtube_dl/extractor/myspass.py
youtube_dl/extractor/myvideo.py
youtube_dl/extractor/myvidster.py
youtube_dl/extractor/nationalgeographic.py
youtube_dl/extractor/naver.py
youtube_dl/extractor/nba.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/ndtv.py
youtube_dl/extractor/nerdist.py [deleted file]
youtube_dl/extractor/neteasemusic.py
youtube_dl/extractor/newgrounds.py
youtube_dl/extractor/newstube.py
youtube_dl/extractor/nextmedia.py
youtube_dl/extractor/nextmovie.py
youtube_dl/extractor/nfb.py
youtube_dl/extractor/nhl.py
youtube_dl/extractor/nick.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/normalboots.py
youtube_dl/extractor/nova.py
youtube_dl/extractor/novamov.py
youtube_dl/extractor/nowness.py
youtube_dl/extractor/noz.py
youtube_dl/extractor/npr.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/ntvru.py
youtube_dl/extractor/nuvid.py
youtube_dl/extractor/nytimes.py
youtube_dl/extractor/odnoklassniki.py
youtube_dl/extractor/once.py [new file with mode: 0644]
youtube_dl/extractor/onionstudios.py
youtube_dl/extractor/ooyala.py
youtube_dl/extractor/openload.py [new file with mode: 0644]
youtube_dl/extractor/ora.py
youtube_dl/extractor/orf.py
youtube_dl/extractor/patreon.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/people.py [new file with mode: 0644]
youtube_dl/extractor/periscope.py
youtube_dl/extractor/philharmoniedeparis.py
youtube_dl/extractor/photobucket.py
youtube_dl/extractor/planetaplay.py [deleted file]
youtube_dl/extractor/played.py
youtube_dl/extractor/playtvak.py
youtube_dl/extractor/playwire.py
youtube_dl/extractor/pluralsight.py
youtube_dl/extractor/porn91.py
youtube_dl/extractor/pornhd.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/pornovoisines.py
youtube_dl/extractor/presstv.py [new file with mode: 0644]
youtube_dl/extractor/primesharetv.py
youtube_dl/extractor/promptfile.py
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/puls4.py
youtube_dl/extractor/pyvideo.py
youtube_dl/extractor/qqmusic.py
youtube_dl/extractor/quickvid.py [deleted file]
youtube_dl/extractor/r7.py
youtube_dl/extractor/radiocanada.py [new file with mode: 0644]
youtube_dl/extractor/radiojavan.py
youtube_dl/extractor/rai.py
youtube_dl/extractor/redtube.py
youtube_dl/extractor/restudy.py
youtube_dl/extractor/reuters.py [new file with mode: 0644]
youtube_dl/extractor/revision3.py
youtube_dl/extractor/rice.py [new file with mode: 0644]
youtube_dl/extractor/ringtv.py
youtube_dl/extractor/rockstargames.py [new file with mode: 0644]
youtube_dl/extractor/rottentomatoes.py
youtube_dl/extractor/rtbf.py
youtube_dl/extractor/rte.py
youtube_dl/extractor/rtlnl.py
youtube_dl/extractor/rtve.py
youtube_dl/extractor/rtvnh.py
youtube_dl/extractor/ruhd.py
youtube_dl/extractor/rutube.py
youtube_dl/extractor/rutv.py
youtube_dl/extractor/safari.py
youtube_dl/extractor/sbs.py
youtube_dl/extractor/scivee.py
youtube_dl/extractor/screencast.py
youtube_dl/extractor/screencastomatic.py
youtube_dl/extractor/screenjunkies.py
youtube_dl/extractor/screenwavemedia.py
youtube_dl/extractor/seeker.py [new file with mode: 0644]
youtube_dl/extractor/senateisvp.py
youtube_dl/extractor/sendtonews.py [new file with mode: 0644]
youtube_dl/extractor/sexu.py
youtube_dl/extractor/shahid.py
youtube_dl/extractor/shared.py
youtube_dl/extractor/sharesix.py
youtube_dl/extractor/sina.py
youtube_dl/extractor/smotri.py
youtube_dl/extractor/sohu.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/space.py [deleted file]
youtube_dl/extractor/spankwire.py
youtube_dl/extractor/sport5.py
youtube_dl/extractor/sportbox.py
youtube_dl/extractor/sportschau.py [new file with mode: 0644]
youtube_dl/extractor/ssa.py
youtube_dl/extractor/streamcloud.py
youtube_dl/extractor/streetvoice.py
youtube_dl/extractor/svt.py
youtube_dl/extractor/sztvhu.py
youtube_dl/extractor/tagesschau.py
youtube_dl/extractor/tdslifeway.py [new file with mode: 0644]
youtube_dl/extractor/teachingchannel.py
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/tele13.py
youtube_dl/extractor/telebruxelles.py
youtube_dl/extractor/telecinco.py
youtube_dl/extractor/telegraaf.py
youtube_dl/extractor/telewebion.py [new file with mode: 0644]
youtube_dl/extractor/tenplay.py [deleted file]
youtube_dl/extractor/tf1.py
youtube_dl/extractor/theonion.py [deleted file]
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/thescene.py [new file with mode: 0644]
youtube_dl/extractor/thesixtyone.py
youtube_dl/extractor/thestar.py [new file with mode: 0644]
youtube_dl/extractor/threeqsdn.py [new file with mode: 0644]
youtube_dl/extractor/thvideo.py
youtube_dl/extractor/tinypic.py
youtube_dl/extractor/tlc.py
youtube_dl/extractor/tnaflix.py
youtube_dl/extractor/toypics.py
youtube_dl/extractor/traileraddict.py
youtube_dl/extractor/trollvids.py
youtube_dl/extractor/tubitv.py
youtube_dl/extractor/tudou.py
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/tunein.py
youtube_dl/extractor/tv2.py
youtube_dl/extractor/tv3.py [new file with mode: 0644]
youtube_dl/extractor/tvc.py
youtube_dl/extractor/tvigle.py
youtube_dl/extractor/tvp.py
youtube_dl/extractor/tvplay.py
youtube_dl/extractor/twentyfourvideo.py
youtube_dl/extractor/twentymin.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/twitter.py
youtube_dl/extractor/ubu.py [deleted file]
youtube_dl/extractor/udemy.py
youtube_dl/extractor/udn.py
youtube_dl/extractor/unistra.py
youtube_dl/extractor/usatoday.py [new file with mode: 0644]
youtube_dl/extractor/ustream.py
youtube_dl/extractor/ustudio.py [new file with mode: 0644]
youtube_dl/extractor/varzesh3.py
youtube_dl/extractor/vbox7.py
youtube_dl/extractor/veoh.py
youtube_dl/extractor/vessel.py
youtube_dl/extractor/vesti.py
youtube_dl/extractor/vevo.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/vice.py
youtube_dl/extractor/viddler.py
youtube_dl/extractor/videodetective.py
youtube_dl/extractor/videomega.py
youtube_dl/extractor/videomore.py
youtube_dl/extractor/videott.py
youtube_dl/extractor/vidio.py [new file with mode: 0644]
youtube_dl/extractor/vidzi.py
youtube_dl/extractor/vier.py
youtube_dl/extractor/viewlift.py [moved from youtube_dl/extractor/snagfilms.py with 80% similarity]
youtube_dl/extractor/viewster.py
youtube_dl/extractor/viidea.py
youtube_dl/extractor/viki.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vine.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/vlive.py
youtube_dl/extractor/vodlocker.py
youtube_dl/extractor/voicerepublic.py
youtube_dl/extractor/voxmedia.py [new file with mode: 0644]
youtube_dl/extractor/vporn.py
youtube_dl/extractor/vrt.py
youtube_dl/extractor/vube.py
youtube_dl/extractor/vuclip.py
youtube_dl/extractor/vulture.py [deleted file]
youtube_dl/extractor/walla.py
youtube_dl/extractor/washingtonpost.py
youtube_dl/extractor/wat.py
youtube_dl/extractor/watchindianporn.py [moved from youtube_dl/extractor/sexykarma.py with 54% similarity]
youtube_dl/extractor/wayofthemaster.py [deleted file]
youtube_dl/extractor/wdr.py
youtube_dl/extractor/webofstories.py
youtube_dl/extractor/weibo.py [deleted file]
youtube_dl/extractor/weiqitv.py
youtube_dl/extractor/wimp.py
youtube_dl/extractor/wistia.py
youtube_dl/extractor/wrzuta.py
youtube_dl/extractor/wsj.py
youtube_dl/extractor/xbef.py
youtube_dl/extractor/xboxclips.py
youtube_dl/extractor/xfileshare.py
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/xiami.py [new file with mode: 0644]
youtube_dl/extractor/xminus.py
youtube_dl/extractor/xnxx.py
youtube_dl/extractor/xuite.py
youtube_dl/extractor/xvideos.py
youtube_dl/extractor/yahoo.py
youtube_dl/extractor/yam.py
youtube_dl/extractor/yandexmusic.py
youtube_dl/extractor/ynet.py
youtube_dl/extractor/youku.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zdf.py
youtube_dl/jsinterp.py
youtube_dl/options.py
youtube_dl/postprocessor/__init__.py
youtube_dl/postprocessor/execafterdownload.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/postprocessor/xattrpp.py
youtube_dl/socks.py [new file with mode: 0644]
youtube_dl/swfinterp.py
youtube_dl/update.py
youtube_dl/utils.py
youtube_dl/version.py

diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
new file mode 100644 (file)
index 0000000..c73f9a9
--- /dev/null
@@ -0,0 +1,58 @@
+## Please follow the guide below
+
+- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
+- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x])
+- Use *Preview* tab to see how your issue will actually look like
+
+---
+
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.25**
+
+### Before submitting an *issue* make sure you have:
+- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
+- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
+
+### What is the purpose of your *issue*?
+- [ ] Bug report (encountered problems with youtube-dl)
+- [ ] Site support request (request for adding support for a new site)
+- [ ] Feature request (request for a new functionality)
+- [ ] Question
+- [ ] Other
+
+---
+
+### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue*
+
+---
+
+### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
+
+Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
+```
+$ youtube-dl -v <your command line>
+[debug] System config: []
+[debug] User config: []
+[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
+[debug] youtube-dl version 2016.06.25
+[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
+[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
+[debug] Proxy map: {}
+...
+<end of log>
+```
+
+---
+
+### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**):
+- Single video: https://www.youtube.com/watch?v=BaW_jenozKc
+- Single video: https://youtu.be/BaW_jenozKc
+- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
+
+---
+
+### Description of your *issue*, suggested solution and other information
+
+Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
+If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md
new file mode 100644 (file)
index 0000000..a5e6a42
--- /dev/null
@@ -0,0 +1,58 @@
+## Please follow the guide below
+
+- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
+- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x])
+- Use *Preview* tab to see how your issue will actually look like
+
+---
+
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s**
+
+### Before submitting an *issue* make sure you have:
+- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
+- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
+
+### What is the purpose of your *issue*?
+- [ ] Bug report (encountered problems with youtube-dl)
+- [ ] Site support request (request for adding support for a new site)
+- [ ] Feature request (request for a new functionality)
+- [ ] Question
+- [ ] Other
+
+---
+
+### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue*
+
+---
+
+### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
+
+Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
+```
+$ youtube-dl -v <your command line>
+[debug] System config: []
+[debug] User config: []
+[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
+[debug] youtube-dl version %(version)s
+[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
+[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
+[debug] Proxy map: {}
+...
+<end of log>
+```
+
+---
+
+### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**):
+- Single video: https://www.youtube.com/watch?v=BaW_jenozKc
+- Single video: https://youtu.be/BaW_jenozKc
+- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
+
+---
+
+### Description of your *issue*, suggested solution and other information
+
+Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
+If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..a802c75
--- /dev/null
@@ -0,0 +1,43 @@
+*.pyc
+*.pyo
+*.class
+*~
+*.DS_Store
+wine-py2exe/
+py2exe.log
+*.kate-swp
+build/
+dist/
+MANIFEST
+README.txt
+youtube-dl.1
+youtube-dl.bash-completion
+youtube-dl.fish
+youtube_dl/extractor/lazy_extractors.py
+youtube-dl
+youtube-dl.exe
+youtube-dl.tar.gz
+.coverage
+cover/
+updates_key.pem
+*.egg-info
+*.srt
+*.sbv
+*.vtt
+*.flv
+*.mp4
+*.m4a
+*.m4v
+*.mp3
+*.part
+*.swp
+test/testdata
+test/local_parameters.json
+.tox
+youtube-dl.zsh
+
+# IntelliJ related files
+.idea
+*.iml
+
+tmp/
diff --git a/.travis.yml b/.travis.yml
new file mode 100644 (file)
index 0000000..136c339
--- /dev/null
@@ -0,0 +1,21 @@
+language: python
+python:
+  - "2.6"
+  - "2.7"
+  - "3.2"
+  - "3.3"
+  - "3.4"
+  - "3.5"
+sudo: false
+install:
+  - bash ./devscripts/install_srelay.sh
+  - export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6
+script: nosetests test --verbose
+notifications:
+  email:
+    - filippo.valsorda@gmail.com
+    - yasoob.khld@gmail.com
+#  irc:
+#    channels:
+#      - "irc.freenode.org#youtube-dl"
+#    skip_join: true
diff --git a/AUTHORS b/AUTHORS
new file mode 100644 (file)
index 0000000..cdf655c
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,177 @@
+Ricardo Garcia Gonzalez
+Danny Colligan
+Benjamin Johnson
+Vasyl' Vavrychuk
+Witold Baryluk
+Paweł Paprota
+Gergely Imreh
+Rogério Brito
+Philipp Hagemeister
+Sören Schulze
+Kevin Ngo
+Ori Avtalion
+shizeeg
+Filippo Valsorda
+Christian Albrecht
+Dave Vasilevsky
+Jaime Marquínez Ferrándiz
+Jeff Crouse
+Osama Khalid
+Michael Walter
+M. Yasoob Ullah Khalid
+Julien Fraichard
+Johny Mo Swag
+Axel Noack
+Albert Kim
+Pierre Rudloff
+Huarong Huo
+Ismael Mejía
+Steffan 'Ruirize' James
+Andras Elso
+Jelle van der Waa
+Marcin Cieślak
+Anton Larionov
+Takuya Tsuchida
+Sergey M.
+Michael Orlitzky
+Chris Gahan
+Saimadhav Heblikar
+Mike Col
+Oleg Prutz
+pulpe
+Andreas Schmitz
+Michael Kaiser
+Niklas Laxström
+David Triendl
+Anthony Weems
+David Wagner
+Juan C. Olivares
+Mattias Harrysson
+phaer
+Sainyam Kapoor
+Nicolas Évrard
+Jason Normore
+Hoje Lee
+Adam Thalhammer
+Georg Jähnig
+Ralf Haring
+Koki Takahashi
+Ariset Llerena
+Adam Malcontenti-Wilson
+Tobias Bell
+Naglis Jonaitis
+Charles Chen
+Hassaan Ali
+Dobrosław Żybort
+David Fabijan
+Sebastian Haas
+Alexander Kirk
+Erik Johnson
+Keith Beckman
+Ole Ernst
+Aaron McDaniel (mcd1992)
+Magnus Kolstad
+Hari Padmanaban
+Carlos Ramos
+5moufl
+lenaten
+Dennis Scheiba
+Damon Timm
+winwon
+Xavier Beynon
+Gabriel Schubiner
+xantares
+Jan Matějka
+Mauroy Sébastien
+William Sewell
+Dao Hoang Son
+Oskar Jauch
+Matthew Rayfield
+t0mm0
+Tithen-Firion
+Zack Fernandes
+cryptonaut
+Adrian Kretz
+Mathias Rav
+Petr Kutalek
+Will Glynn
+Max Reimann
+Cédric Luthi
+Thijs Vermeir
+Joel Leclerc
+Christopher Krooss
+Ondřej Caletka
+Dinesh S
+Johan K. Jensen
+Yen Chi Hsuan
+Enam Mijbah Noor
+David Luhmer
+Shaya Goldberg
+Paul Hartmann
+Frans de Jonge
+Robin de Rooij
+Ryan Schmidt
+Leslie P. Polzer
+Duncan Keall
+Alexander Mamay
+Devin J. Pohly
+Eduardo Ferro Aldama
+Jeff Buchbinder
+Amish Bhadeshia
+Joram Schrijver
+Will W.
+Mohammad Teimori Pabandi
+Roman Le Négrate
+Matthias Küch
+Julian Richen
+Ping O.
+Mister Hat
+Peter Ding
+jackyzy823
+George Brighton
+Remita Amine
+Aurélio A. Heckert
+Bernhard Minks
+sceext
+Zach Bruggeman
+Tjark Saul
+slangangular
+Behrouz Abbasi
+ngld
+nyuszika7h
+Shaun Walbridge
+Lee Jenkins
+Anssi Hannula
+Lukáš Lalinský
+Qijiang Fan
+Rémy Léone
+Marco Ferragina
+reiv
+Muratcan Simsek
+Evan Lu
+flatgreen
+Brian Foley
+Vignesh Venkat
+Tom Gijselinck
+Founder Fang
+Andrew Alexeyew
+Saso Bezlaj
+Erwin de Haan
+Jens Wille
+Robin Houtevelts
+Patrick Griffis
+Aidan Rowe
+mutantmonkey
+Ben Congdon
+Kacper Michajłow
+José Joaquín Atria
+Viťas Strádal
+Kagami Hiiragi
+Philip Huppert
+blahgeek
+Kevin Deldycke
+inondle
+Tomáš Čech
+Déstin Reed
+Roman Tsiupa
+Artur Krysiak
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644 (file)
index 0000000..a59fac9
--- /dev/null
@@ -0,0 +1,158 @@
+**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
+```
+$ youtube-dl -v <your command line>
+[debug] System config: []
+[debug] User config: []
+[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
+[debug] youtube-dl version 2015.12.06
+[debug] Git HEAD: 135392e
+[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2
+[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
+[debug] Proxy map: {}
+...
+```
+**Do not post screenshots of verbose log only plain text is acceptable.**
+
+The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
+
+Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
+
+### Is the description of the issue itself sufficient?
+
+We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
+
+So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
+
+- What the problem is
+- How it could be fixed
+- How your proposed solution would look like
+
+If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
+
+For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
+
+If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
+
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
+
+###  Are you using the latest version?
+
+Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
+
+###  Is the issue already documented?
+
+Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
+
+###  Why are existing options not enough?
+
+Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
+
+###  Is there enough context in your bug report?
+
+People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one).
+
+We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful.
+
+###  Does the issue involve one problem, and one problem only?
+
+Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
+
+In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
+
+###  Is anyone going to need the feature?
+
+Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
+
+###  Is your question about youtube-dl?
+
+It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
+
+# DEVELOPER INSTRUCTIONS
+
+Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
+
+To run youtube-dl as a developer, you don't need to build anything either. Simply execute
+
+    python -m youtube_dl
+
+To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work:
+
+    python -m unittest discover
+    python test/test_download.py
+    nosetests
+
+If you want to create a build of youtube-dl yourself, you'll need
+
+* python
+* make (both GNU make and BSD make are supported)
+* pandoc
+* zip
+* nosetests
+
+### Adding support for a new site
+
+If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
+
+After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
+
+1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
+2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
+3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
+4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
+    ```python
+    # coding: utf-8
+    from __future__ import unicode_literals
+
+    from .common import InfoExtractor
+
+
+    class YourExtractorIE(InfoExtractor):
+        _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
+        _TEST = {
+            'url': 'http://yourextractor.com/watch/42',
+            'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
+            'info_dict': {
+                'id': '42',
+                'ext': 'mp4',
+                'title': 'Video title goes here',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                # TODO more properties, either as:
+                # * A value
+                # * MD5 checksum; start the string with md5:
+                # * A regular expression; start the string with re:
+                # * Any Python type (for example int or float)
+            }
+        }
+
+        def _real_extract(self, url):
+            video_id = self._match_id(url)
+            webpage = self._download_webpage(url, video_id)
+
+            # TODO more code goes here, for example ...
+            title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+
+            return {
+                'id': video_id,
+                'title': title,
+                'description': self._og_search_description(webpage),
+                'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
+                # TODO more properties (see youtube_dl/extractor/common.py)
+            }
+    ```
+5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
+7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
+8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
+9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
+10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
+
+        $ git add youtube_dl/extractor/extractors.py
+        $ git add youtube_dl/extractor/yourextractor.py
+        $ git commit -m '[yourextractor] Add new extractor'
+        $ git push origin yourextractor
+
+11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
+
+In any case, thank you very much for your contributions!
+
index cb449b7e61c4c27d44177cc775c26fa79ca97a3a..6ee4ba4ebc6804ad78061d6b346fd67cd3fd01e5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,9 @@
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 
 clean:
-       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
+       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
        find . -name "*.pyc" -delete
+       find . -name "*.class" -delete
 
 PREFIX ?= /usr/local
 BINDIR ?= $(PREFIX)/bin
@@ -11,15 +12,7 @@ SHAREDIR ?= $(PREFIX)/share
 PYTHON ?= /usr/bin/env python
 
 # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
-ifeq ($(PREFIX),/usr)
-       SYSCONFDIR=/etc
-else
-       ifeq ($(PREFIX),/usr/local)
-               SYSCONFDIR=/etc
-       else
-               SYSCONFDIR=$(PREFIX)/etc
-       endif
-endif
+SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi
 
 install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
        install -d $(DESTDIR)$(BINDIR)
@@ -44,7 +37,7 @@ test:
 ot: offlinetest
 
 offlinetest: codetest
-       nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
+       $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py
 
 tar: youtube-dl.tar.gz
 
@@ -66,6 +59,9 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py
 CONTRIBUTING.md: README.md
        $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md
 
+.github/ISSUE_TEMPLATE.md: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md  youtube_dl/version.py
+       $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md .github/ISSUE_TEMPLATE.md
+
 supportedsites:
        $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md
 
@@ -73,7 +69,7 @@ README.txt: README.md
        pandoc -f markdown -t plain README.md -o README.txt
 
 youtube-dl.1: README.md
-       $(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md
+       $(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md
        pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
        rm -f youtube-dl.1.temp.md
 
@@ -92,6 +88,12 @@ youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
 
 fish-completion: youtube-dl.fish
 
+lazy-extractors: youtube_dl/extractor/lazy_extractors.py
+
+_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py'
+youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
+       $(PYTHON) devscripts/make_lazy_extractors.py $@
+
 youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
        @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
                --exclude '*.DS_Store' \
index db49ab6d523a8f16fc03806d1ad27f439f5e7b14..c6feef1162e1a614e4db4d7afa02a543c15943b8 100644 (file)
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
 
 To install it right away for all UNIX users (Linux, OS X, etc.), type:
 
-    sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
+    sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
     sudo chmod a+rx /usr/local/bin/youtube-dl
 
 If you do not have curl, you can alternatively use a recent wget:
@@ -25,20 +25,26 @@ If you do not have curl, you can alternatively use a recent wget:
     sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
     sudo chmod a+rx /usr/local/bin/youtube-dl
 
-Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
+Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
 
-OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
+You can also use pip:
+
+    sudo pip install --upgrade youtube-dl
+    
+This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
+
+OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
 
     brew install youtube-dl
 
-You can also use pip:
+Or with [MacPorts](https://www.macports.org/):
 
-    sudo pip install youtube-dl
+    sudo port install youtube-dl
 
 Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
 
 # DESCRIPTION
-**youtube-dl** is a small command-line program to download videos from
+**youtube-dl** is a command-line program to download videos from
 YouTube.com and a few more sites. It requires the Python interpreter, version
 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on
 your Unix box, on Windows or on Mac OS X. It is released to the public domain,
@@ -73,19 +79,23 @@ which means you can modify it, redistribute it or use it however you like.
                                      repairs broken URLs, but emits an error if
                                      this is not possible instead of searching.
     --ignore-config                  Do not read configuration files. When given
-                                     in the global configuration file /etc
-                                     /youtube-dl.conf: Do not read the user
+                                     in the global configuration file
+                                     /etc/youtube-dl.conf: Do not read the user
                                      configuration in ~/.config/youtube-
                                      dl/config (%APPDATA%/youtube-dl/config.txt
                                      on Windows)
     --flat-playlist                  Do not extract the videos of a playlist,
                                      only list them.
+    --mark-watched                   Mark videos watched (YouTube only)
+    --no-mark-watched                Do not mark videos watched (YouTube only)
     --no-color                       Do not emit color codes in output
 
 ## Network Options:
-    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
-                                     an empty string (--proxy "") for direct
-                                     connection
+    --proxy URL                      Use the specified HTTP/HTTPS/SOCKS proxy.
+                                     To enable experimental SOCKS proxy, specify
+                                     a proper scheme. For example
+                                     socks5://127.0.0.1:1080/. Pass in an empty
+                                     string (--proxy "") for direct connection
     --socket-timeout SECONDS         Time to wait before giving up, in seconds
     --source-address IP              Client-side IP address to bind to
                                      (experimental)
@@ -158,10 +168,12 @@ which means you can modify it, redistribute it or use it however you like.
                                      (experimental)
 
 ## Download Options:
-    -r, --rate-limit LIMIT           Maximum download rate in bytes per second
+    -r, --limit-rate RATE            Maximum download rate in bytes per second
                                      (e.g. 50K or 4.2M)
     -R, --retries RETRIES            Number of retries (default is 10), or
                                      "infinite".
+    --fragment-retries RETRIES       Number of retries for a fragment (default
+                                     is 10), or "infinite" (DASH only)
     --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K)
                                      (default is 1024)
     --no-resize-buffer               Do not automatically adjust the buffer
@@ -172,14 +184,16 @@ which means you can modify it, redistribute it or use it however you like.
     --xattr-set-filesize             Set file xattribute ytdl.filesize with
                                      expected filesize (experimental)
     --hls-prefer-native              Use the native HLS downloader instead of
-                                     ffmpeg (experimental)
+                                     ffmpeg
+    --hls-prefer-ffmpeg              Use ffmpeg instead of the native HLS
+                                     downloader
     --hls-use-mpegts                 Use the mpegts container for HLS videos,
                                      allowing to play the video while
                                      downloading (some players may not be able
                                      to play it)
     --external-downloader COMMAND    Use the specified external downloader.
                                      Currently supports
-                                     aria2c,axel,curl,httpie,wget
+                                     aria2c,avconv,axel,curl,ffmpeg,httpie,wget
     --external-downloader-args ARGS  Give these arguments to the external
                                      downloader
 
@@ -241,18 +255,19 @@ which means you can modify it, redistribute it or use it however you like.
     --write-info-json                Write video metadata to a .info.json file
     --write-annotations              Write video annotations to a
                                      .annotations.xml file
-    --load-info FILE                 JSON file containing the video information
+    --load-info-json FILE            JSON file containing the video information
                                      (created with the "--write-info-json"
                                      option)
     --cookies FILE                   File to read cookies from and dump cookie
                                      jar in
     --cache-dir DIR                  Location in the filesystem where youtube-dl
                                      can store some downloaded information
-                                     permanently. By default $XDG_CACHE_HOME
-                                     /youtube-dl or ~/.cache/youtube-dl . At the
-                                     moment, only YouTube player files (for
-                                     videos with obfuscated signatures) are
-                                     cached, but that may change.
+                                     permanently. By default
+                                     $XDG_CACHE_HOME/youtube-dl or
+                                     ~/.cache/youtube-dl . At the moment, only
+                                     YouTube player files (for videos with
+                                     obfuscated signatures) are cached, but that
+                                     may change.
     --no-cache-dir                   Disable filesystem caching
     --rm-cache-dir                   Delete all filesystem cache files
 
@@ -374,8 +389,8 @@ which means you can modify it, redistribute it or use it however you like.
     --no-post-overwrites             Do not overwrite post-processed files; the
                                      post-processed files are overwritten by
                                      default
-    --embed-subs                     Embed subtitles in the video (only for mkv
-                                     and mp4 videos)
+    --embed-subs                     Embed subtitles in the video (only for mp4,
+                                     webm and mkv videos)
     --embed-thumbnail                Embed thumbnail in the audio as cover art
     --add-metadata                   Write metadata to the video file
     --metadata-from-title FORMAT     Parse additional metadata like song title /
@@ -409,18 +424,23 @@ which means you can modify it, redistribute it or use it however you like.
 
 # CONFIGURATION
 
-You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime and use a proxy:
+You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
+
+For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
 ```
---extract-audio
+-x
 --no-mtime
 --proxy 127.0.0.1:3128
+-o ~/Movies/%(title)s.%(ext)s
 ```
 
+Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
+
 You can use `--ignore-config` if you want to disable the configuration file for a particular youtube-dl run.
 
 ### Authentication with `.netrc` file
 
-You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a`.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
 ```
 touch $HOME/.netrc
 chmod a-rwx,u+rw $HOME/.netrc
@@ -440,7 +460,11 @@ On Windows you may also need to setup the `%HOME%` environment variable manually
 
 # OUTPUT TEMPLATE
 
-The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
+The `-o` option allows users to indicate a template for the output file names.
+
+**tl;dr:** [navigate me to examples](#output-template-examples).
+
+The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
 
  - `id`: Video identifier
  - `title`: Video title
@@ -449,7 +473,8 @@ The `-o` option allows users to indicate a template for the output file names. T
  - `alt_title`: A secondary title of the video
  - `display_id`: An alternative identifier for the video
  - `uploader`: Full name of the video uploader
- - `creator`: The main artist who created the video
+ - `license`: License name the video is licensed under
+ - `creator`: The creator of the video
  - `release_date`: The date (YYYYMMDD) when the video was released
  - `timestamp`: UNIX timestamp of the moment the video became available
  - `upload_date`: Video upload date (YYYYMMDD)
@@ -486,6 +511,9 @@ The `-o` option allows users to indicate a template for the output file names. T
  - `autonumber`: Five-digit number that will be increased with each download, starting at zero
  - `playlist`: Name or id of the playlist that contains the video
  - `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
+ - `playlist_id`: Playlist identifier
+ - `playlist_title`: Playlist title
+
 
 Available for the video that belongs to some logical chapter or section:
  - `chapter`: Name or title of the chapter the video belongs to
@@ -501,6 +529,18 @@ Available for the video that is an episode of some series or programme:
  - `episode_number`: Number of the video episode within a season
  - `episode_id`: Id of the video episode
 
+Available for the media that is a track or a part of a music album:
+ - `track`: Title of the track
+ - `track_number`: Number of the track within an album or a disc
+ - `track_id`: Id of the track
+ - `artist`: Artist(s) of the track
+ - `genre`: Genre(s) of the track
+ - `album`: Title of the album the track belongs to
+ - `album_type`: Type of the album
+ - `album_artist`: List of all artists appeared on the album
+ - `disc_number`: Number of the disc or other physical medium the track belongs to
+ - `release_year`: Year (YYYY) when the album was released
+
 Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
 
 For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
@@ -513,7 +553,13 @@ The current default template is `%(title)s-%(id)s.%(ext)s`.
 
 In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
 
-Examples (note on Windows you may need to use double quotes instead of single):
+#### Output template and Windows batch files
+
+If you are using output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
+
+#### Output template examples
+
+Note on Windows you may need to use double quotes instead of single.
 
 ```bash
 $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
@@ -525,6 +571,9 @@ youtube-dl_test_video_.mp4          # A simple file name
 # Download YouTube playlist videos in separate directory indexed by video order in a playlist
 $ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
 
+# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
+$ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/user/TheLinuxFoundation/playlists
+
 # Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
 $ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
 
@@ -543,6 +592,8 @@ But sometimes you may want to download in a different format, for example when y
 
 The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
 
+**tl;dr:** [navigate me to examples](#format-selection-examples).
+
 The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. 
 
 You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download best quality format of particular file extension served as a single file, e.g. `-f webm` will download best quality format with `webm` extension served as a single file.
@@ -579,6 +630,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
  - `vcodec`: Name of the video codec in use
  - `container`: Name of the container format
  - `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native`
+ - `format_id`: A short description of the format
 
 Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by video hoster.
 
@@ -588,11 +640,14 @@ You can merge the video and audio of two formats into a single file using `-f <v
 
 Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
 
-Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
+Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
 
 If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
 
-Examples (note on Windows you may need to use double quotes instead of single):
+#### Format selection examples
+
+Note on Windows you may need to use double quotes instead of single.
+
 ```bash
 # Download best mp4 format available or any other best if no mp4 available
 $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
@@ -652,12 +707,20 @@ hash -r
 
 Again, from then on you'll be able to update with `sudo youtube-dl -U`.
 
+### youtube-dl is extremely slow to start on Windows
+
+Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
+
 ### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
 
 YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
 
 If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
 
+### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
+
+Make sure you are not using `-o` with any of these options `-t`, `--title`, `--id`, `-A` or `--auto-number` set in command line or in a configuration file. Remove the latter if any.
+
 ### Do I always have to pass `-citw`?
 
 By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
@@ -678,7 +741,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
 
 ### I have downloaded a video but how can I play it?
 
-Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
+Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
 
 ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
 
@@ -733,11 +796,11 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
 
 ### What is this binary file? Where has the code gone?
 
-Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
+Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
 
-### The exe throws a *Runtime error from Visual C++*
+### The exe throws an error due to missing `MSVCR100.dll`
 
-To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
+To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
 
 ### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
 
@@ -792,6 +855,12 @@ It is *not* possible to detect whether a URL is supported or not. That's because
 
 If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
 
+# Why do I need to go through that much red tape when filing bugs?
+
+Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
+
+youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current.
+
 # DEVELOPER INSTRUCTIONS
 
 Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
@@ -809,14 +878,16 @@ To run the test, simply invoke your favorite test runner, or execute a test file
 If you want to create a build of youtube-dl yourself, you'll need
 
 * python
-* make
+* make (both GNU make and BSD make are supported)
 * pandoc
 * zip
 * nosetests
 
 ### Adding support for a new site
 
-If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
+If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
+
+After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
 
 1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
 2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
@@ -862,18 +933,19 @@ If you want to add support for a new site, you can follow this quick list (assum
                 # TODO more properties (see youtube_dl/extractor/common.py)
             }
     ```
-5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
+5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
-7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
-8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
-9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
+7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
+8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
+9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
+10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
 
-        $ git add youtube_dl/extractor/__init__.py
+        $ git add youtube_dl/extractor/extractors.py
         $ git add youtube_dl/extractor/yourextractor.py
         $ git commit -m '[yourextractor] Add new extractor'
         $ git push origin yourextractor
 
-10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
+11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
 
 In any case, thank you very much for your contributions!
 
@@ -892,7 +964,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
     ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
 ```
 
-Most likely, you'll want to use various options. For a list of what can be done, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L121-L269). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
+Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L128-L278). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
 
 Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
 
diff --git a/README.txt b/README.txt
deleted file mode 100644 (file)
index 1a674b5..0000000
+++ /dev/null
@@ -1,1410 +0,0 @@
-youtube-dl - download videos from youtube.com or other video platforms
-
--   INSTALLATION
--   DESCRIPTION
--   OPTIONS
--   CONFIGURATION
--   OUTPUT TEMPLATE
--   FORMAT SELECTION
--   VIDEO SELECTION
--   FAQ
--   DEVELOPER INSTRUCTIONS
--   EMBEDDING YOUTUBE-DL
--   BUGS
--   COPYRIGHT
-
-
-
-INSTALLATION
-
-
-To install it right away for all UNIX users (Linux, OS X, etc.), type:
-
-    sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
-    sudo chmod a+rx /usr/local/bin/youtube-dl
-
-If you do not have curl, you can alternatively use a recent wget:
-
-    sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
-    sudo chmod a+rx /usr/local/bin/youtube-dl
-
-Windows users can download a .exe file and place it in their home
-directory or any other location on their PATH.
-
-OS X users can install YOUTUBE-DL with Homebrew.
-
-    brew install youtube-dl
-
-You can also use pip:
-
-    sudo pip install youtube-dl
-
-Alternatively, refer to the developer instructions for how to check out
-and work with the git repository. For further options, including PGP
-signatures, see the youtube-dl Download Page.
-
-
-
-DESCRIPTION
-
-
-YOUTUBE-DL is a small command-line program to download videos from
-YouTube.com and a few more sites. It requires the Python interpreter,
-version 2.6, 2.7, or 3.2+, and it is not platform specific. It should
-work on your Unix box, on Windows or on Mac OS X. It is released to the
-public domain, which means you can modify it, redistribute it or use it
-however you like.
-
-    youtube-dl [OPTIONS] URL [URL...]
-
-
-
-OPTIONS
-
-
-    -h, --help                       Print this help text and exit
-    --version                        Print program version and exit
-    -U, --update                     Update this program to latest version. Make
-                                     sure that you have sufficient permissions
-                                     (run with sudo if needed)
-    -i, --ignore-errors              Continue on download errors, for example to
-                                     skip unavailable videos in a playlist
-    --abort-on-error                 Abort downloading of further videos (in the
-                                     playlist or the command line) if an error
-                                     occurs
-    --dump-user-agent                Display the current browser identification
-    --list-extractors                List all supported extractors
-    --extractor-descriptions         Output descriptions of all supported
-                                     extractors
-    --force-generic-extractor        Force extraction to use the generic
-                                     extractor
-    --default-search PREFIX          Use this prefix for unqualified URLs. For
-                                     example "gvsearch2:" downloads two videos
-                                     from google videos for youtube-dl "large
-                                     apple". Use the value "auto" to let
-                                     youtube-dl guess ("auto_warning" to emit a
-                                     warning when guessing). "error" just throws
-                                     an error. The default value "fixup_error"
-                                     repairs broken URLs, but emits an error if
-                                     this is not possible instead of searching.
-    --ignore-config                  Do not read configuration files. When given
-                                     in the global configuration file /etc
-                                     /youtube-dl.conf: Do not read the user
-                                     configuration in ~/.config/youtube-
-                                     dl/config (%APPDATA%/youtube-dl/config.txt
-                                     on Windows)
-    --flat-playlist                  Do not extract the videos of a playlist,
-                                     only list them.
-    --no-color                       Do not emit color codes in output
-
-
-Network Options:
-
-    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
-                                     an empty string (--proxy "") for direct
-                                     connection
-    --socket-timeout SECONDS         Time to wait before giving up, in seconds
-    --source-address IP              Client-side IP address to bind to
-                                     (experimental)
-    -4, --force-ipv4                 Make all connections via IPv4
-                                     (experimental)
-    -6, --force-ipv6                 Make all connections via IPv6
-                                     (experimental)
-    --cn-verification-proxy URL      Use this proxy to verify the IP address for
-                                     some Chinese sites. The default proxy
-                                     specified by --proxy (or none, if the
-                                     options is not present) is used for the
-                                     actual downloading. (experimental)
-
-
-Video Selection:
-
-    --playlist-start NUMBER          Playlist video to start at (default is 1)
-    --playlist-end NUMBER            Playlist video to end at (default is last)
-    --playlist-items ITEM_SPEC       Playlist video items to download. Specify
-                                     indices of the videos in the playlist
-                                     separated by commas like: "--playlist-items
-                                     1,2,5,8" if you want to download videos
-                                     indexed 1, 2, 5, 8 in the playlist. You can
-                                     specify range: "--playlist-items
-                                     1-3,7,10-13", it will download the videos
-                                     at index 1, 2, 3, 7, 10, 11, 12 and 13.
-    --match-title REGEX              Download only matching titles (regex or
-                                     caseless sub-string)
-    --reject-title REGEX             Skip download for matching titles (regex or
-                                     caseless sub-string)
-    --max-downloads NUMBER           Abort after downloading NUMBER files
-    --min-filesize SIZE              Do not download any videos smaller than
-                                     SIZE (e.g. 50k or 44.6m)
-    --max-filesize SIZE              Do not download any videos larger than SIZE
-                                     (e.g. 50k or 44.6m)
-    --date DATE                      Download only videos uploaded in this date
-    --datebefore DATE                Download only videos uploaded on or before
-                                     this date (i.e. inclusive)
-    --dateafter DATE                 Download only videos uploaded on or after
-                                     this date (i.e. inclusive)
-    --min-views COUNT                Do not download any videos with less than
-                                     COUNT views
-    --max-views COUNT                Do not download any videos with more than
-                                     COUNT views
-    --match-filter FILTER            Generic video filter (experimental).
-                                     Specify any key (see help for -o for a list
-                                     of available keys) to match if the key is
-                                     present, !key to check if the key is not
-                                     present,key > NUMBER (like "comment_count >
-                                     12", also works with >=, <, <=, !=, =) to
-                                     compare against a number, and & to require
-                                     multiple matches. Values which are not
-                                     known are excluded unless you put a
-                                     question mark (?) after the operator.For
-                                     example, to only match videos that have
-                                     been liked more than 100 times and disliked
-                                     less than 50 times (or the dislike
-                                     functionality is not available at the given
-                                     service), but who also have a description,
-                                     use --match-filter "like_count > 100 &
-                                     dislike_count <? 50 & description" .
-    --no-playlist                    Download only the video, if the URL refers
-                                     to a video and a playlist.
-    --yes-playlist                   Download the playlist, if the URL refers to
-                                     a video and a playlist.
-    --age-limit YEARS                Download only videos suitable for the given
-                                     age
-    --download-archive FILE          Download only videos not listed in the
-                                     archive file. Record the IDs of all
-                                     downloaded videos in it.
-    --include-ads                    Download advertisements as well
-                                     (experimental)
-
-
-Download Options:
-
-    -r, --rate-limit LIMIT           Maximum download rate in bytes per second
-                                     (e.g. 50K or 4.2M)
-    -R, --retries RETRIES            Number of retries (default is 10), or
-                                     "infinite".
-    --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K)
-                                     (default is 1024)
-    --no-resize-buffer               Do not automatically adjust the buffer
-                                     size. By default, the buffer size is
-                                     automatically resized from an initial value
-                                     of SIZE.
-    --playlist-reverse               Download playlist videos in reverse order
-    --xattr-set-filesize             Set file xattribute ytdl.filesize with
-                                     expected filesize (experimental)
-    --hls-prefer-native              Use the native HLS downloader instead of
-                                     ffmpeg (experimental)
-    --hls-use-mpegts                 Use the mpegts container for HLS videos,
-                                     allowing to play the video while
-                                     downloading (some players may not be able
-                                     to play it)
-    --external-downloader COMMAND    Use the specified external downloader.
-                                     Currently supports
-                                     aria2c,axel,curl,httpie,wget
-    --external-downloader-args ARGS  Give these arguments to the external
-                                     downloader
-
-
-Filesystem Options:
-
-    -a, --batch-file FILE            File containing URLs to download ('-' for
-                                     stdin)
-    --id                             Use only video ID in file name
-    -o, --output TEMPLATE            Output filename template. Use %(title)s to
-                                     get the title, %(uploader)s for the
-                                     uploader name, %(uploader_id)s for the
-                                     uploader nickname if different,
-                                     %(autonumber)s to get an automatically
-                                     incremented number, %(ext)s for the
-                                     filename extension, %(format)s for the
-                                     format description (like "22 - 1280x720" or
-                                     "HD"), %(format_id)s for the unique id of
-                                     the format (like YouTube's itags: "137"),
-                                     %(upload_date)s for the upload date
-                                     (YYYYMMDD), %(extractor)s for the provider
-                                     (youtube, metacafe, etc), %(id)s for the
-                                     video id, %(playlist_title)s,
-                                     %(playlist_id)s, or %(playlist)s (=title if
-                                     present, ID otherwise) for the playlist the
-                                     video is in, %(playlist_index)s for the
-                                     position in the playlist. %(height)s and
-                                     %(width)s for the width and height of the
-                                     video format. %(resolution)s for a textual
-                                     description of the resolution of the video
-                                     format. %% for a literal percent. Use - to
-                                     output to stdout. Can also be used to
-                                     download to a different directory, for
-                                     example with -o '/my/downloads/%(uploader)s
-                                     /%(title)s-%(id)s.%(ext)s' .
-    --autonumber-size NUMBER         Specify the number of digits in
-                                     %(autonumber)s when it is present in output
-                                     filename template or --auto-number option
-                                     is given
-    --restrict-filenames             Restrict filenames to only ASCII
-                                     characters, and avoid "&" and spaces in
-                                     filenames
-    -A, --auto-number                [deprecated; use -o
-                                     "%(autonumber)s-%(title)s.%(ext)s" ] Number
-                                     downloaded files starting from 00000
-    -t, --title                      [deprecated] Use title in file name
-                                     (default)
-    -l, --literal                    [deprecated] Alias of --title
-    -w, --no-overwrites              Do not overwrite files
-    -c, --continue                   Force resume of partially downloaded files.
-                                     By default, youtube-dl will resume
-                                     downloads if possible.
-    --no-continue                    Do not resume partially downloaded files
-                                     (restart from beginning)
-    --no-part                        Do not use .part files - write directly
-                                     into output file
-    --no-mtime                       Do not use the Last-modified header to set
-                                     the file modification time
-    --write-description              Write video description to a .description
-                                     file
-    --write-info-json                Write video metadata to a .info.json file
-    --write-annotations              Write video annotations to a
-                                     .annotations.xml file
-    --load-info FILE                 JSON file containing the video information
-                                     (created with the "--write-info-json"
-                                     option)
-    --cookies FILE                   File to read cookies from and dump cookie
-                                     jar in
-    --cache-dir DIR                  Location in the filesystem where youtube-dl
-                                     can store some downloaded information
-                                     permanently. By default $XDG_CACHE_HOME
-                                     /youtube-dl or ~/.cache/youtube-dl . At the
-                                     moment, only YouTube player files (for
-                                     videos with obfuscated signatures) are
-                                     cached, but that may change.
-    --no-cache-dir                   Disable filesystem caching
-    --rm-cache-dir                   Delete all filesystem cache files
-
-
-Thumbnail images:
-
-    --write-thumbnail                Write thumbnail image to disk
-    --write-all-thumbnails           Write all thumbnail image formats to disk
-    --list-thumbnails                Simulate and list all available thumbnail
-                                     formats
-
-
-Verbosity / Simulation Options:
-
-    -q, --quiet                      Activate quiet mode
-    --no-warnings                    Ignore warnings
-    -s, --simulate                   Do not download the video and do not write
-                                     anything to disk
-    --skip-download                  Do not download the video
-    -g, --get-url                    Simulate, quiet but print URL
-    -e, --get-title                  Simulate, quiet but print title
-    --get-id                         Simulate, quiet but print id
-    --get-thumbnail                  Simulate, quiet but print thumbnail URL
-    --get-description                Simulate, quiet but print video description
-    --get-duration                   Simulate, quiet but print video length
-    --get-filename                   Simulate, quiet but print output filename
-    --get-format                     Simulate, quiet but print output format
-    -j, --dump-json                  Simulate, quiet but print JSON information.
-                                     See --output for a description of available
-                                     keys.
-    -J, --dump-single-json           Simulate, quiet but print JSON information
-                                     for each command-line argument. If the URL
-                                     refers to a playlist, dump the whole
-                                     playlist information in a single line.
-    --print-json                     Be quiet and print the video information as
-                                     JSON (video is still being downloaded).
-    --newline                        Output progress bar as new lines
-    --no-progress                    Do not print progress bar
-    --console-title                  Display progress in console titlebar
-    -v, --verbose                    Print various debugging information
-    --dump-pages                     Print downloaded pages encoded using base64
-                                     to debug problems (very verbose)
-    --write-pages                    Write downloaded intermediary pages to
-                                     files in the current directory to debug
-                                     problems
-    --print-traffic                  Display sent and read HTTP traffic
-    -C, --call-home                  Contact the youtube-dl server for debugging
-    --no-call-home                   Do NOT contact the youtube-dl server for
-                                     debugging
-
-
-Workarounds:
-
-    --encoding ENCODING              Force the specified encoding (experimental)
-    --no-check-certificate           Suppress HTTPS certificate validation
-    --prefer-insecure                Use an unencrypted connection to retrieve
-                                     information about the video. (Currently
-                                     supported only for YouTube)
-    --user-agent UA                  Specify a custom user agent
-    --referer URL                    Specify a custom referer, use if the video
-                                     access is restricted to one domain
-    --add-header FIELD:VALUE         Specify a custom HTTP header and its value,
-                                     separated by a colon ':'. You can use this
-                                     option multiple times
-    --bidi-workaround                Work around terminals that lack
-                                     bidirectional text support. Requires bidiv
-                                     or fribidi executable in PATH
-    --sleep-interval SECONDS         Number of seconds to sleep before each
-                                     download.
-
-
-Video Format Options:
-
-    -f, --format FORMAT              Video format code, see the "FORMAT
-                                     SELECTION" for all the info
-    --all-formats                    Download all available video formats
-    --prefer-free-formats            Prefer free video formats unless a specific
-                                     one is requested
-    -F, --list-formats               List all available formats of requested
-                                     videos
-    --youtube-skip-dash-manifest     Do not download the DASH manifests and
-                                     related data on YouTube videos
-    --merge-output-format FORMAT     If a merge is required (e.g.
-                                     bestvideo+bestaudio), output to given
-                                     container format. One of mkv, mp4, ogg,
-                                     webm, flv. Ignored if no merge is required
-
-
-Subtitle Options:
-
-    --write-sub                      Write subtitle file
-    --write-auto-sub                 Write automatically generated subtitle file
-                                     (YouTube only)
-    --all-subs                       Download all the available subtitles of the
-                                     video
-    --list-subs                      List all available subtitles for the video
-    --sub-format FORMAT              Subtitle format, accepts formats
-                                     preference, for example: "srt" or
-                                     "ass/srt/best"
-    --sub-lang LANGS                 Languages of the subtitles to download
-                                     (optional) separated by commas, use --list-
-                                     subs for available language tags
-
-
-Authentication Options:
-
-    -u, --username USERNAME          Login with this account ID
-    -p, --password PASSWORD          Account password. If this option is left
-                                     out, youtube-dl will ask interactively.
-    -2, --twofactor TWOFACTOR        Two-factor auth code
-    -n, --netrc                      Use .netrc authentication data
-    --video-password PASSWORD        Video password (vimeo, smotri, youku)
-
-
-Post-processing Options:
-
-    -x, --extract-audio              Convert video files to audio-only files
-                                     (requires ffmpeg or avconv and ffprobe or
-                                     avprobe)
-    --audio-format FORMAT            Specify audio format: "best", "aac",
-                                     "vorbis", "mp3", "m4a", "opus", or "wav";
-                                     "best" by default
-    --audio-quality QUALITY          Specify ffmpeg/avconv audio quality, insert
-                                     a value between 0 (better) and 9 (worse)
-                                     for VBR or a specific bitrate like 128K
-                                     (default 5)
-    --recode-video FORMAT            Encode the video to another format if
-                                     necessary (currently supported:
-                                     mp4|flv|ogg|webm|mkv|avi)
-    --postprocessor-args ARGS        Give these arguments to the postprocessor
-    -k, --keep-video                 Keep the video file on disk after the post-
-                                     processing; the video is erased by default
-    --no-post-overwrites             Do not overwrite post-processed files; the
-                                     post-processed files are overwritten by
-                                     default
-    --embed-subs                     Embed subtitles in the video (only for mkv
-                                     and mp4 videos)
-    --embed-thumbnail                Embed thumbnail in the audio as cover art
-    --add-metadata                   Write metadata to the video file
-    --metadata-from-title FORMAT     Parse additional metadata like song title /
-                                     artist from the video title. The format
-                                     syntax is the same as --output, the parsed
-                                     parameters replace existing values.
-                                     Additional templates: %(album)s,
-                                     %(artist)s. Example: --metadata-from-title
-                                     "%(artist)s - %(title)s" matches a title
-                                     like "Coldplay - Paradise"
-    --xattrs                         Write metadata to the video file's xattrs
-                                     (using dublin core and xdg standards)
-    --fixup POLICY                   Automatically correct known faults of the
-                                     file. One of never (do nothing), warn (only
-                                     emit a warning), detect_or_warn (the
-                                     default; fix file if we can, warn
-                                     otherwise)
-    --prefer-avconv                  Prefer avconv over ffmpeg for running the
-                                     postprocessors (default)
-    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
-                                     postprocessors
-    --ffmpeg-location PATH           Location of the ffmpeg/avconv binary;
-                                     either the path to the binary or its
-                                     containing directory.
-    --exec CMD                       Execute a command on the file after
-                                     downloading, similar to find's -exec
-                                     syntax. Example: --exec 'adb push {}
-                                     /sdcard/Music/ && rm {}'
-    --convert-subs FORMAT            Convert the subtitles to other format
-                                     (currently supported: srt|ass|vtt)
-
-
-
-CONFIGURATION
-
-
-You can configure youtube-dl by placing any supported command line
-option to a configuration file. On Linux, the system wide configuration
-file is located at /etc/youtube-dl.conf and the user wide configuration
-file at ~/.config/youtube-dl/config. On Windows, the user wide
-configuration file locations are %APPDATA%\youtube-dl\config.txt or
-C:\Users\<user name>\youtube-dl.conf. For example, with the following
-configuration file youtube-dl will always extract the audio, not copy
-the mtime and use a proxy:
-
-    --extract-audio
-    --no-mtime
-    --proxy 127.0.0.1:3128
-
-You can use --ignore-config if you want to disable the configuration
-file for a particular youtube-dl run.
-
-Authentication with .netrc file
-
-You may also want to configure automatic credentials storage for
-extractors that support authentication (by providing login and password
-with --username and --password) in order not to pass credentials as
-command line arguments on every youtube-dl execution and prevent
-tracking plain text passwords in the shell command history. You can
-achieve this using a .netrc file on per extractor basis. For that you
-will need to create a.netrc file in your $HOME and restrict permissions
-to read/write by you only:
-
-    touch $HOME/.netrc
-    chmod a-rwx,u+rw $HOME/.netrc
-
-After that you can add credentials for extractor in the following
-format, where _extractor_ is the name of extractor in lowercase:
-
-    machine <extractor> login <login> password <password>
-
-For example:
-
-    machine youtube login myaccount@gmail.com password my_youtube_password
-    machine twitch login my_twitch_account_name password my_twitch_password
-
-To activate authentication with the .netrc file you should pass --netrc
-to youtube-dl or place it in the configuration file.
-
-On Windows you may also need to setup the %HOME% environment variable
-manually.
-
-
-
-OUTPUT TEMPLATE
-
-
-The -o option allows users to indicate a template for the output file
-names. The basic usage is not to set any template arguments when
-downloading a single file, like in
-youtube-dl -o funny_video.flv "http://some/video". However, it may
-contain special sequences that will be replaced when downloading each
-video. The special sequences have the format %(NAME)s. To clarify, that
-is a percent symbol followed by a name in parentheses, followed by a
-lowercase S. Allowed names are:
-
--   id: Video identifier
--   title: Video title
--   url: Video URL
--   ext: Video filename extension
--   alt_title: A secondary title of the video
--   display_id: An alternative identifier for the video
--   uploader: Full name of the video uploader
--   creator: The main artist who created the video
--   release_date: The date (YYYYMMDD) when the video was released
--   timestamp: UNIX timestamp of the moment the video became available
--   upload_date: Video upload date (YYYYMMDD)
--   uploader_id: Nickname or id of the video uploader
--   location: Physical location where the video was filmed
--   duration: Length of the video in seconds
--   view_count: How many users have watched the video on the platform
--   like_count: Number of positive ratings of the video
--   dislike_count: Number of negative ratings of the video
--   repost_count: Number of reposts of the video
--   average_rating: Average rating give by users, the scale used depends
-    on the webpage
--   comment_count: Number of comments on the video
--   age_limit: Age restriction for the video (years)
--   format: A human-readable description of the format
--   format_id: Format code specified by --format
--   format_note: Additional info about the format
--   width: Width of the video
--   height: Height of the video
--   resolution: Textual description of width and height
--   tbr: Average bitrate of audio and video in KBit/s
--   abr: Average audio bitrate in KBit/s
--   acodec: Name of the audio codec in use
--   asr: Audio sampling rate in Hertz
--   vbr: Average video bitrate in KBit/s
--   fps: Frame rate
--   vcodec: Name of the video codec in use
--   container: Name of the container format
--   filesize: The number of bytes, if known in advance
--   filesize_approx: An estimate for the number of bytes
--   protocol: The protocol that will be used for the actual download
--   extractor: Name of the extractor
--   extractor_key: Key name of the extractor
--   epoch: Unix epoch when creating the file
--   autonumber: Five-digit number that will be increased with each
-    download, starting at zero
--   playlist: Name or id of the playlist that contains the video
--   playlist_index: Index of the video in the playlist padded with
-    leading zeros according to the total length of the playlist
-
-Available for the video that belongs to some logical chapter or section:
-- chapter: Name or title of the chapter the video belongs to -
-chapter_number: Number of the chapter the video belongs to - chapter_id:
-Id of the chapter the video belongs to
-
-Available for the video that is an episode of some series or programme:
-- series: Title of the series or programme the video episode belongs to
-- season: Title of the season the video episode belongs to -
-season_number: Number of the season the video episode belongs to -
-season_id: Id of the season the video episode belongs to - episode:
-Title of the video episode - episode_number: Number of the video episode
-within a season - episode_id: Id of the video episode
-
-Each aforementioned sequence when referenced in output template will be
-replaced by the actual value corresponding to the sequence name. Note
-that some of the sequences are not guaranteed to be present since they
-depend on the metadata obtained by particular extractor, such sequences
-will be replaced with NA.
-
-For example for -o %(title)s-%(id)s.%(ext)s and mp4 video with title
-youtube-dl test video and id BaW_jenozKcj this will result in a
-youtube-dl test video-BaW_jenozKcj.mp4 file created in the current
-directory.
-
-Output template can also contain arbitrary hierarchical path, e.g.
--o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' that will
-result in downloading each video in a directory corresponding to this
-path template. Any missing directory will be automatically created for
-you.
-
-To specify percent literal in output template use %%. To output to
-stdout use -o -.
-
-The current default template is %(title)s-%(id)s.%(ext)s.
-
-In some cases, you don't want special characters such as 中, spaces, or
-&, such as when transferring the downloaded filename to a Windows system
-or the filename through an 8bit-unsafe channel. In these cases, add the
---restrict-filenames flag to get a shorter title:
-
-Examples (note on Windows you may need to use double quotes instead of
-single):
-
-    $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
-    youtube-dl test video ''_ä↭𝕐.mp4    # All kinds of weird characters
-
-    $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames
-    youtube-dl_test_video_.mp4          # A simple file name
-
-    # Download YouTube playlist videos in separate directory indexed by video order in a playlist
-    $ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
-
-    # Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
-    $ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
-
-    # Download entire series season keeping each series and each season in separate directory under C:/MyVideos
-    $ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
-
-    # Stream the video being downloaded to stdout
-    $ youtube-dl -o - BaW_jenozKc
-
-
-
-FORMAT SELECTION
-
-
-By default youtube-dl tries to download the best available quality, i.e.
-if you want the best quality you DON'T NEED to pass any special options,
-youtube-dl will guess it for you by DEFAULT.
-
-But sometimes you may want to download in a different format, for
-example when you are on a slow or intermittent connection. The key
-mechanism for achieving this is so called _format selection_ based on
-which you can explicitly specify desired format, select formats based on
-some criterion or criteria, setup precedence and much more.
-
-The general syntax for format selection is --format FORMAT or shorter
--f FORMAT where FORMAT is a _selector expression_, i.e. an expression
-that describes format or formats you would like to download.
-
-The simplest case is requesting a specific format, for example with
--f 22 you can download the format with format code equal to 22. You can
-get the list of available format codes for particular video using
---list-formats or -F. Note that these format codes are extractor
-specific.
-
-You can also use a file extension (currently 3gp, aac, flv, m4a, mp3,
-mp4, ogg, wav, webm are supported) to download best quality format of
-particular file extension served as a single file, e.g. -f webm will
-download best quality format with webm extension served as a single
-file.
-
-You can also use special names to select particular edge case format: -
-best: Select best quality format represented by single file with video
-and audio - worst: Select worst quality format represented by single
-file with video and audio - bestvideo: Select best quality video only
-format (e.g. DASH video), may not be available - worstvideo: Select
-worst quality video only format, may not be available - bestaudio:
-Select best quality audio only format, may not be available -
-worstaudio: Select worst quality audio only format, may not be available
-
-For example, to download worst quality video only format you can use
--f worstvideo.
-
-If you want to download multiple videos and they don't have the same
-formats available, you can specify the order of preference using
-slashes. Note that slash is left-associative, i.e. formats on the left
-hand side are preferred, for example -f 22/17/18 will download format 22
-if it's available, otherwise it will download format 17 if it's
-available, otherwise it will download format 18 if it's available,
-otherwise it will complain that no suitable formats are available for
-download.
-
-If you want to download several formats of the same video use comma as a
-separator, e.g. -f 22,17,18 will download all these three formats, of
-course if they are available. Or more sophisticated example combined
-with precedence feature -f 136/137/mp4/bestvideo,140/m4a/bestaudio.
-
-You can also filter the video formats by putting a condition in
-brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").
-
-The following numeric meta fields can be used with comparisons <, <=, >,
->=, = (equals), != (not equals): - filesize: The number of bytes, if
-known in advance - width: Width of the video, if known - height: Height
-of the video, if known - tbr: Average bitrate of audio and video in
-KBit/s - abr: Average audio bitrate in KBit/s - vbr: Average video
-bitrate in KBit/s - asr: Audio sampling rate in Hertz - fps: Frame rate
-
-Also filtering work for comparisons = (equals), != (not equals), ^=
-(begins with), $= (ends with), *= (contains) and following string meta
-fields: - ext: File extension - acodec: Name of the audio codec in use -
-vcodec: Name of the video codec in use - container: Name of the
-container format - protocol: The protocol that will be used for the
-actual download, lower-case. http, https, rtsp, rtmp, rtmpe, m3u8, or
-m3u8_native
-
-Note that none of the aforementioned meta fields are guaranteed to be
-present since this solely depends on the metadata obtained by particular
-extractor, i.e. the metadata offered by video hoster.
-
-Formats for which the value is not known are excluded unless you put a
-question mark (?) after the operator. You can combine format filters, so
--f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos
-where the height is not known) with a bitrate of at least 500 KBit/s.
-
-You can merge the video and audio of two formats into a single file
-using -f <video-format>+<audio-format> (requires ffmpeg or avconv
-installed), for example -f bestvideo+bestaudio will download best video
-only format, best audio only format and mux them together with
-ffmpeg/avconv.
-
-Format selectors can also be grouped using parentheses, for example if
-you want to download the best mp4 and webm formats with a height lower
-than 480 you can use -f '(mp4,webm)[height<480]'.
-
-Since the end of April 2015 and version 2015.04.26 youtube-dl uses
--f bestvideo+bestaudio/best as default format selection (see #5447,
-#5456). If ffmpeg or avconv are installed this results in downloading
-bestvideo and bestaudio separately and muxing them together into a
-single file giving the best overall quality available. Otherwise it
-falls back to best and results in downloading the best available quality
-served as a single file. best is also needed for videos that don't come
-from YouTube because they don't provide the audio and video in two
-different files. If you want to only download some DASH formats (for
-example if you are not interested in getting videos with a resolution
-higher than 1080p), you can add
--f bestvideo[height<=?1080]+bestaudio/best to your configuration file.
-Note that if you use youtube-dl to stream to stdout (and most likely to
-pipe it to your media player then), i.e. you explicitly specify output
-template as -o -, youtube-dl still uses -f best format selection in
-order to start content delivery immediately to your player and not to
-wait until bestvideo and bestaudio are downloaded and muxed.
-
-If you want to preserve the old format selection behavior (prior to
-youtube-dl 2015.04.26), i.e. you want to download the best available
-quality media served as a single file, you should explicitly specify
-your choice with -f best. You may want to add it to the configuration
-file in order not to type it every time you run youtube-dl.
-
-Examples (note on Windows you may need to use double quotes instead of
-single):
-
-    # Download best mp4 format available or any other best if no mp4 available
-    $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
-
-    # Download best format available but not better that 480p
-    $ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
-
-    # Download best video only format but no bigger that 50 MB
-    $ youtube-dl -f 'best[filesize<50M]'
-
-    # Download best format available via direct link over HTTP/HTTPS protocol
-    $ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]'
-
-
-
-VIDEO SELECTION
-
-
-Videos can be filtered by their upload date using the options --date,
---datebefore or --dateafter. They accept dates in two formats:
-
--   Absolute dates: Dates in the format YYYYMMDD.
--   Relative dates: Dates in the format
-    (now|today)[+-][0-9](day|week|month|year)(s)?
-
-Examples:
-
-    # Download only the videos uploaded in the last 6 months
-    $ youtube-dl --dateafter now-6months
-
-    # Download only the videos uploaded on January 1, 1970
-    $ youtube-dl --date 19700101
-
-    $ # Download only the videos uploaded in the 200x decade
-    $ youtube-dl --dateafter 20000101 --datebefore 20091231
-
-
-
-FAQ
-
-
-How do I update youtube-dl?
-
-If you've followed our manual installation instructions, you can simply
-run youtube-dl -U (or, on Linux, sudo youtube-dl -U).
-
-If you have used pip, a simple sudo pip install -U youtube-dl is
-sufficient to update.
-
-If you have installed youtube-dl using a package manager like _apt-get_
-or _yum_, use the standard system update mechanism to update. Note that
-distribution packages are often outdated. As a rule of thumb, youtube-dl
-releases at least once a month, and often weekly or even daily. Simply
-go to http://yt-dl.org/ to find out the current version. Unfortunately,
-there is nothing we youtube-dl developers can do if your distribution
-serves a really outdated version. You can (and should) complain to your
-distribution in their bugtracker or support forum.
-
-As a last resort, you can also uninstall the version installed by your
-package manager and follow our manual installation instructions. For
-that, remove the distribution's package, with a line like
-
-    sudo apt-get remove -y youtube-dl
-
-Afterwards, simply follow our manual installation instructions:
-
-    sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
-    sudo chmod a+x /usr/local/bin/youtube-dl
-    hash -r
-
-Again, from then on you'll be able to update with sudo youtube-dl -U.
-
-I'm getting an error Unable to extract OpenGraph title on YouTube playlists
-
-YouTube changed their playlist format in March 2014 and later on, so
-you'll need at least youtube-dl 2014.07.25 to download all YouTube
-videos.
-
-If you have installed youtube-dl with a package manager, pip, setup.py
-or a tarball, please use that to update. Note that Ubuntu packages do
-not seem to get updated anymore. Since we are not affiliated with
-Ubuntu, there is little we can do. Feel free to report bugs to the
-Ubuntu packaging guys - all they have to do is update the package to a
-somewhat recent version. See above for a way to update.
-
-Do I always have to pass -citw?
-
-By default, youtube-dl intends to have the best options (incidentally,
-if you have a convincing case that these should be different, please
-file an issue where you explain that). Therefore, it is unnecessary and
-sometimes harmful to copy long option strings from webpages. In
-particular, the only option out of -citw that is regularly useful is -i.
-
-Can you please put the -b option back?
-
-Most people asking this question are not aware that youtube-dl now
-defaults to downloading the highest available quality as reported by
-YouTube, which will be 1080p or 720p in some cases, so you no longer
-need the -b option. For some specific videos, maybe YouTube does not
-report them to be available in a specific high quality format you're
-interested in. In that case, simply request it with the -f option and
-youtube-dl will try to download it.
-
-I get HTTP error 402 when trying to download a video. What's this?
-
-Apparently YouTube requires you to pass a CAPTCHA test if you download
-too much. We're considering to provide a way to let you solve the
-CAPTCHA, but at the moment, your best course of action is pointing a
-webbrowser to the youtube URL, solving the CAPTCHA, and restart
-youtube-dl.
-
-Do I need any other programs?
-
-youtube-dl works fine on its own on most sites. However, if you want to
-convert video/audio, you'll need avconv or ffmpeg. On some sites - most
-notably YouTube - videos can be retrieved in a higher quality format
-without sound. youtube-dl will detect whether avconv/ffmpeg is present
-and automatically pick the best option.
-
-Videos or video formats streamed via RTMP protocol can only be
-downloaded when rtmpdump is installed. Downloading MMS and RTSP videos
-requires either mplayer or mpv to be installed.
-
-I have downloaded a video but how can I play it?
-
-Once the video is fully downloaded, use any video player, such as vlc or
-mplayer.
-
-I extracted a video URL with -g, but it does not play on another machine / in my webbrowser.
-
-It depends a lot on the service. In many cases, requests for the video
-(to download/play it) must come from the same IP address and with the
-same cookies. Use the --cookies option to write the required cookies
-into a file, and advise your downloader to read cookies from that file.
-Some sites also require a common user agent to be used, use
---dump-user-agent to see the one in use by youtube-dl.
-
-It may be beneficial to use IPv6; in some cases, the restrictions are
-only applied to IPv4. Some services (sometimes only for a subset of
-videos) do not restrict the video URL by IP address, cookie, or
-user-agent, but these are the exception rather than the rule.
-
-Please bear in mind that some URL protocols are NOT supported by
-browsers out of the box, including RTMP. If you are using -g, your own
-downloader must support these as well.
-
-If you want to play the video on a machine that is not running
-youtube-dl, you can relay the video content from the machine that runs
-youtube-dl. You can use -o - to let youtube-dl stream a video to stdout,
-or simply allow the player to download the files written by youtube-dl
-in turn.
-
-ERROR: no fmt_url_map or conn information found in video info
-
-YouTube has switched to a new video info format in July 2011 which is
-not supported by old versions of youtube-dl. See above for how to update
-youtube-dl.
-
-ERROR: unable to download video
-
-YouTube requires an additional signature since September 2012 which is
-not supported by old versions of youtube-dl. See above for how to update
-youtube-dl.
-
-Video URL contains an ampersand and I'm getting some strange output [1] 2839 or 'v' is not recognized as an internal or external command
-
-That's actually the output from your shell. Since ampersand is one of
-the special shell characters it's interpreted by the shell preventing
-you from passing the whole URL to youtube-dl. To disable your shell from
-interpreting the ampersands (or any other special characters) you have
-to either put the whole URL in quotes or escape them with a backslash
-(which approach will work depends on your shell).
-
-For example if your URL is
-https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with
-following command:
-
-youtube-dl 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'
-
-or
-
-youtube-dl https://www.youtube.com/watch?t=4\&v=BaW_jenozKc
-
-For Windows you have to use the double quotes:
-
-youtube-dl "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"
-
-ExtractorError: Could not find JS function u'OF'
-
-In February 2015, the new YouTube player contained a character sequence
-in a string that was misinterpreted by old versions of youtube-dl. See
-above for how to update youtube-dl.
-
-HTTP Error 429: Too Many Requests or 402: Payment Required
-
-These two error codes indicate that the service is blocking your IP
-address because of overuse. Contact the service and ask them to unblock
-your IP address, or - if you have acquired a whitelisted IP address
-already - use the --proxy or --source-address options to select another
-IP address.
-
-SyntaxError: Non-ASCII character
-
-The error
-
-    File "youtube-dl", line 2
-    SyntaxError: Non-ASCII character '\x93' ...
-
-means you're using an outdated version of Python. Please update to
-Python 2.6 or 2.7.
-
-What is this binary file? Where has the code gone?
-
-Since June 2012 (#342) youtube-dl is packed as an executable zipfile,
-simply unzip it (might need renaming to youtube-dl.zip first on some
-systems) or clone the git repository, as laid out above. If you modify
-the code, you can run it by executing the __main__.py file. To recompile
-the executable, run make youtube-dl.
-
-The exe throws a _Runtime error from Visual C++_
-
-To run the exe you need to install first the Microsoft Visual C++ 2008
-Redistributable Package.
-
-On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
-
-If you put youtube-dl and ffmpeg in the same directory that you're
-running the command from, it will work, but that's rather cumbersome.
-
-To make a different directory work - either for ffmpeg, or for
-youtube-dl, or for both - simply create the directory (say, C:\bin, or
-C:\Users\<User name>\bin), put all the executables directly in there,
-and then set your PATH environment variable to include that directory.
-
-From then on, after restarting your shell, you will be able to access
-both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg)
-by simply typing youtube-dl or ffmpeg, no matter what directory you're
-in.
-
-How do I put downloads into a specific folder?
-
-Use the -o to specify an output template, for example
--o "/home/user/videos/%(title)s-%(id)s.%(ext)s". If you want this for
-all of your downloads, put the option into your configuration file.
-
-How do I download a video starting with a -?
-
-Either prepend http://www.youtube.com/watch?v= or separate the ID from
-the options with --:
-
-    youtube-dl -- -wNyEUrxzFU
-    youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
-
-How do I pass cookies to youtube-dl?
-
-Use the --cookies option, for example
---cookies /path/to/cookies/file.txt. Note that the cookies file must be
-in Mozilla/Netscape format and the first line of the cookies file must
-be either # HTTP Cookie File or # Netscape HTTP Cookie File. Make sure
-you have correct newline format in the cookies file and convert newlines
-if necessary to correspond with your OS, namely CRLF (\r\n) for Windows,
-LF (\n) for Linux and CR (\r) for Mac OS. HTTP Error 400: Bad Request
-when using --cookies is a good sign of invalid newline format.
-
-Passing cookies to youtube-dl is a good way to workaround login when a
-particular extractor does not implement it explicitly. Another use case
-is working around CAPTCHA some websites require you to solve in
-particular cases in order to get access (e.g. YouTube, CloudFlare).
-
-Can you add support for this anime video site, or site which shows current movies for free?
-
-As a matter of policy (as well as legality), youtube-dl does not include
-support for services that specialize in infringing copyright. As a rule
-of thumb, if you cannot easily find a video that the service is quite
-obviously allowed to distribute (i.e. that has been uploaded by the
-creator, the creator's distributor, or is published under a free
-license), the service is probably unfit for inclusion to youtube-dl.
-
-A note on the service that they don't host the infringing content, but
-just link to those who do, is evidence that the service should NOT be
-included into youtube-dl. The same goes for any DMCA note when the whole
-front page of the service is filled with videos they are not allowed to
-distribute. A "fair use" note is equally unconvincing if the service
-shows copyright-protected videos in full without authorization.
-
-Support requests for services that DO purchase the rights to distribute
-their content are perfectly fine though. If in doubt, you can simply
-include a source that mentions the legitimate purchase of content.
-
-How can I speed up work on my issue?
-
-(Also known as: Help, my important issue not being solved!) The
-youtube-dl core developer team is quite small. While we do our best to
-solve as many issues as possible, sometimes that can take quite a while.
-To speed up your issue, here's what you can do:
-
-First of all, please do report the issue at our issue tracker. That
-allows us to coordinate all efforts by users and developers, and serves
-as a unified point. Unfortunately, the youtube-dl project has grown too
-large to use personal email as an effective communication channel.
-
-Please read the bug reporting instructions below. A lot of bugs lack all
-the necessary information. If you can, offer proxy, VPN, or shell access
-to the youtube-dl developers. If you are able to, test the issue from
-multiple computers in multiple countries to exclude local censorship or
-misconfiguration issues.
-
-If nobody is interested in solving your issue, you are welcome to take
-matters into your own hands and submit a pull request (or coerce/pay
-somebody else to do so).
-
-Feel free to bump the issue from time to time by writing a small comment
-("Issue is still present in youtube-dl version ...from France, but fixed
-from Belgium"), but please not more than once a month. Please do not
-declare your issue as important or urgent.
-
-How can I detect whether a given URL is supported by youtube-dl?
-
-For one, have a look at the list of supported sites. Note that it can
-sometimes happen that the site changes its URL scheme (say, from
-http://example.com/video/1234567 to http://example.com/v/1234567 ) and
-youtube-dl reports an URL of a service in that list as unsupported. In
-that case, simply report a bug.
-
-It is _not_ possible to detect whether a URL is supported or not. That's
-because youtube-dl contains a generic extractor which matches ALL URLs.
-You may be tempted to disable, exclude, or remove the generic extractor,
-but the generic extractor not only allows users to extract videos from
-lots of websites that embed a video from another service, but may also
-be used to extract video from a service that it's hosting itself.
-Therefore, we neither recommend nor support disabling, excluding, or
-removing the generic extractor.
-
-If you want to find out whether a given URL is supported, simply call
-youtube-dl with it. If you get no videos back, chances are the URL is
-either not referring to a video or unsupported. You can find out which
-by examining the output (if you run youtube-dl on the console) or
-catching an UnsupportedError exception if you run it from a Python
-program.
-
-
-
-DEVELOPER INSTRUCTIONS
-
-
-Most users do not need to build youtube-dl and can download the builds
-or get them from their distribution.
-
-To run youtube-dl as a developer, you don't need to build anything
-either. Simply execute
-
-    python -m youtube_dl
-
-To run the test, simply invoke your favorite test runner, or execute a
-test file directly; any of the following work:
-
-    python -m unittest discover
-    python test/test_download.py
-    nosetests
-
-If you want to create a build of youtube-dl yourself, you'll need
-
--   python
--   make
--   pandoc
--   zip
--   nosetests
-
-Adding support for a new site
-
-If you want to add support for a new site, you can follow this quick
-list (assuming your service is called yourextractor):
-
-1.  Fork this repository
-2.  Check out the source code with
-    git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
-3.  Start a new git branch with
-    cd youtube-dl; git checkout -b yourextractor
-4.  Start with this simple template and save it to
-    youtube_dl/extractor/yourextractor.py:
-
-        # coding: utf-8
-        from __future__ import unicode_literals
-
-        from .common import InfoExtractor
-
-
-        class YourExtractorIE(InfoExtractor):
-            _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
-            _TEST = {
-                'url': 'http://yourextractor.com/watch/42',
-                'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
-                'info_dict': {
-                    'id': '42',
-                    'ext': 'mp4',
-                    'title': 'Video title goes here',
-                    'thumbnail': 're:^https?://.*\.jpg$',
-                    # TODO more properties, either as:
-                    # * A value
-                    # * MD5 checksum; start the string with md5:
-                    # * A regular expression; start the string with re:
-                    # * Any Python type (for example int or float)
-                }
-            }
-
-            def _real_extract(self, url):
-                video_id = self._match_id(url)
-                webpage = self._download_webpage(url, video_id)
-
-                # TODO more code goes here, for example ...
-                title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
-
-                return {
-                    'id': video_id,
-                    'title': title,
-                    'description': self._og_search_description(webpage),
-                    'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
-                    # TODO more properties (see youtube_dl/extractor/common.py)
-                }
-
-5.  Add an import in youtube_dl/extractor/__init__.py.
-6.  Run python test/test_download.py TestDownload.test_YourExtractor.
-    This _should fail_ at first, but you can continually re-run it until
-    you're done. If you decide to add more than one test, then rename
-    _TEST to _TESTS and make it into a list of dictionaries. The tests
-    will then be named TestDownload.test_YourExtractor,
-    TestDownload.test_YourExtractor_1,
-    TestDownload.test_YourExtractor_2, etc.
-7.  Have a look at youtube_dl/extractor/common.py for possible helper
-    methods and a detailed description of what your extractor should and
-    may return. Add tests and code for as many as you want.
-8.  If you can, check the code with flake8.
-9.  When the tests pass, add the new files and commit them and push the
-    result, like this:
-
-        $ git add youtube_dl/extractor/__init__.py
-        $ git add youtube_dl/extractor/yourextractor.py
-        $ git commit -m '[yourextractor] Add new extractor'
-        $ git push origin yourextractor
-
-10. Finally, create a pull request. We'll then review and merge it.
-
-In any case, thank you very much for your contributions!
-
-
-
-EMBEDDING YOUTUBE-DL
-
-
-youtube-dl makes the best effort to be a good command-line program, and
-thus should be callable from any programming language. If you encounter
-any problems parsing its output, feel free to create a report.
-
-From a Python program, you can embed youtube-dl in a more powerful
-fashion, like this:
-
-    from __future__ import unicode_literals
-    import youtube_dl
-
-    ydl_opts = {}
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
-
-Most likely, you'll want to use various options. For a list of what can
-be done, have a look at youtube_dl/YoutubeDL.py. For a start, if you
-want to intercept youtube-dl's output, set a logger object.
-
-Here's a more complete example of a program that outputs only errors
-(and a short message after the download is finished), and
-downloads/converts the video to an mp3 file:
-
-    from __future__ import unicode_literals
-    import youtube_dl
-
-
-    class MyLogger(object):
-        def debug(self, msg):
-            pass
-
-        def warning(self, msg):
-            pass
-
-        def error(self, msg):
-            print(msg)
-
-
-    def my_hook(d):
-        if d['status'] == 'finished':
-            print('Done downloading, now converting ...')
-
-
-    ydl_opts = {
-        'format': 'bestaudio/best',
-        'postprocessors': [{
-            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'mp3',
-            'preferredquality': '192',
-        }],
-        'logger': MyLogger(),
-        'progress_hooks': [my_hook],
-    }
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
-
-
-
-BUGS
-
-
-Bugs and suggestions should be reported at:
-https://github.com/rg3/youtube-dl/issues. Unless you were prompted so or
-there is another pertinent reason (e.g. GitHub fails to accept the bug
-report), please do not send bug reports via personal email. For
-discussions, join us in the IRC channel #youtube-dl on freenode
-(webchat).
-
-PLEASE INCLUDE THE FULL OUTPUT OF YOUTUBE-DL WHEN RUN WITH -v, i.e. ADD
--v flag to YOUR COMMAND LINE, copy the WHOLE output and post it in the
-issue body wrapped in ``` for better formatting. It should look similar
-to this:
-
-    $ youtube-dl -v <your command line>
-    [debug] System config: []
-    [debug] User config: []
-    [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
-    [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-    [debug] youtube-dl version 2015.12.06
-    [debug] Git HEAD: 135392e
-    [debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2
-    [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
-    [debug] Proxy map: {}
-    ...
-
-DO NOT POST SCREENSHOTS OF VERBOSE LOG ONLY PLAIN TEXT IS ACCEPTABLE.
-
-The output (including the first lines) contains important debugging
-information. Issues without the full output are often not reproducible
-and therefore do not get solved in short order, if ever.
-
-Please re-read your issue once again to avoid a couple of common
-mistakes (you can and should use this as a checklist):
-
-Is the description of the issue itself sufficient?
-
-We often get issue reports that we cannot really decipher. While in most
-cases we eventually get the required information after asking back
-multiple times, this poses an unnecessary drain on our resources. Many
-contributors, including myself, are also not native speakers, so we may
-misread some parts.
-
-So please elaborate on what feature you are requesting, or what bug you
-want to be fixed. Make sure that it's obvious
-
--   What the problem is
--   How it could be fixed
--   How your proposed solution would look like
-
-If your report is shorter than two lines, it is almost certainly missing
-some of these, which makes it hard for us to respond to it. We're often
-too polite to close the issue outright, but the missing info makes
-misinterpretation likely. As a committer myself, I often get frustrated
-by these issues, since the only possible way for me to move forward on
-them is to ask for clarification over and over.
-
-For bug reports, this means that your report should contain the
-_complete_ output of youtube-dl when called with the -v flag. The error
-message you get for (most) bugs even says so, but you would not believe
-how many of our bug reports do not contain this information.
-
-If your server has multiple IPs or you suspect censorship, adding
---call-home may be a good idea to get more diagnostics. If the error is
-ERROR: Unable to extract ... and you cannot reproduce it from multiple
-countries, add --dump-pages (warning: this will yield a rather large
-output, redirect it to the file log.txt by adding >log.txt 2>&1 to your
-command-line) or upload the .dump files you get when you add
---write-pages somewhere.
-
-SITE SUPPORT REQUESTS MUST CONTAIN AN EXAMPLE URL. An example URL is a
-URL you might want to download, like
-http://www.youtube.com/watch?v=BaW_jenozKc. There should be an obvious
-video present. Except under very special circumstances, the main page of
-a video service (e.g. http://www.youtube.com/) is _not_ an example URL.
-
-Are you using the latest version?
-
-Before reporting any issue, type youtube-dl -U. This should report that
-you're up-to-date. About 20% of the reports we receive are already
-fixed, but people are using outdated versions. This goes for feature
-requests as well.
-
-Is the issue already documented?
-
-Make sure that someone has not already opened the issue you're trying to
-open. Search at the top of the window or browse the GitHub Issues of
-this repository. If there is an issue, feel free to write something
-along the lines of "This affects me as well, with version 2015.01.01.
-Here is some more information on the issue: ...". While some issues may
-be old, a new post into them often spurs rapid activity.
-
-Why are existing options not enough?
-
-Before requesting a new feature, please have a quick peek at the list of
-supported options. Many feature requests are for features that actually
-exist already! Please, absolutely do show off your work in the issue
-report and detail how the existing similar options do _not_ solve your
-problem.
-
-Is there enough context in your bug report?
-
-People want to solve problems, and often think they do us a favor by
-breaking down their larger problems (e.g. wanting to skip already
-downloaded files) to a specific request (e.g. requesting us to look
-whether the file exists before downloading the info page). However, what
-often happens is that they break down the problem into two steps: One
-simple, and one impossible (or extremely complicated one).
-
-We are then presented with a very complicated request when the original
-problem could be solved far easier, e.g. by recording the downloaded
-video IDs in a separate file. To avoid this, you must include the
-greater context where it is non-obvious. In particular, every feature
-request that does not consist of adding support for a new site should
-contain a use case scenario that explains in what situation the missing
-feature would be useful.
-
-Does the issue involve one problem, and one problem only?
-
-Some of our users seem to think there is a limit of issues they can or
-should open. There is no limit of issues they can or should open. While
-it may seem appealing to be able to dump all your issues into one
-ticket, that means that someone who solves one of your issues cannot
-mark the issue as closed. Typically, reporting a bunch of issues leads
-to the ticket lingering since nobody wants to attack that behemoth,
-until someone mercifully splits the issue into multiple ones.
-
-In particular, every site support request issue should only pertain to
-services at one site (generally under a common domain, but always using
-the same backend technology). Do not request support for vimeo user
-videos, Whitehouse podcasts, and Google Plus pages in the same issue.
-Also, make sure that you don't post bug reports alongside feature
-requests. As a rule of thumb, a feature request does not include outputs
-of youtube-dl that are not immediately related to the feature at hand.
-Do not post reports of a network error alongside the request for a new
-video service.
-
-Is anyone going to need the feature?
-
-Only post features that you (or an incapacitated friend you can
-personally talk to) require. Do not post features because they seem like
-a good idea. If they are really useful, they will be requested by
-someone who requires them.
-
-Is your question about youtube-dl?
-
-It may sound strange, but some bug reports we receive are completely
-unrelated to youtube-dl and relate to a different or even the reporter's
-own application. Please make sure that you are actually using
-youtube-dl. If you are using a UI for youtube-dl, report the bug to the
-maintainer of the actual application providing the UI. On the other
-hand, if your UI for youtube-dl fails in some way you believe is related
-to youtube-dl, by all means, go ahead and report the bug.
-
-
-
-COPYRIGHT
-
-
-youtube-dl is released into the public domain by the copyright holders.
-
-This README file was originally written by Daniel Bolton and is likewise
-released into the public domain.
index 7c2f49f8bb63bbe2b47efca151129a7e6b49674d..fc99c3213dddf985cfcf4fe74584cc09eeaf3175 100644 (file)
@@ -1,17 +1,38 @@
 #!/usr/bin/python3
 
-from http.server import HTTPServer, BaseHTTPRequestHandler
-from socketserver import ThreadingMixIn
 import argparse
 import ctypes
 import functools
+import shutil
+import subprocess
 import sys
+import tempfile
 import threading
 import traceback
 import os.path
 
+sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
+from youtube_dl.compat import (
+    compat_input,
+    compat_http_server,
+    compat_str,
+    compat_urlparse,
+)
+
+# These are not used outside of buildserver.py thus not in compat.py
+
+try:
+    import winreg as compat_winreg
+except ImportError:  # Python 2
+    import _winreg as compat_winreg
 
-class BuildHTTPServer(ThreadingMixIn, HTTPServer):
+try:
+    import socketserver as compat_socketserver
+except ImportError:  # Python 2
+    import SocketServer as compat_socketserver
+
+
+class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
     allow_reuse_address = True
 
 
@@ -191,7 +212,7 @@ def main(args=None):
                         action='store_const', dest='action', const='service',
                         help='Run as a Windows service')
     parser.add_argument('-b', '--bind', metavar='<host:port>',
-                        action='store', default='localhost:8142',
+                        action='store', default='0.0.0.0:8142',
                         help='Bind to host:port (default %default)')
     options = parser.parse_args(args=args)
 
@@ -216,7 +237,7 @@ def main(args=None):
     srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
     thr = threading.Thread(target=srv.serve_forever)
     thr.start()
-    input('Press ENTER to shut down')
+    compat_input('Press ENTER to shut down')
     srv.shutdown()
     thr.join()
 
@@ -231,8 +252,6 @@ def rmtree(path):
             os.remove(fname)
     os.rmdir(path)
 
-#==============================================================================
-
 
 class BuildError(Exception):
     def __init__(self, output, code=500):
@@ -249,15 +268,25 @@ class HTTPError(BuildError):
 
 class PythonBuilder(object):
     def __init__(self, **kwargs):
-        pythonVersion = kwargs.pop('python', '2.7')
-        try:
-            key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
+        python_version = kwargs.pop('python', '3.4')
+        python_path = None
+        for node in ('Wow6432Node\\', ''):
             try:
-                self.pythonPath, _ = _winreg.QueryValueEx(key, '')
-            finally:
-                _winreg.CloseKey(key)
-        except Exception:
-            raise BuildError('No such Python version: %s' % pythonVersion)
+                key = compat_winreg.OpenKey(
+                    compat_winreg.HKEY_LOCAL_MACHINE,
+                    r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version))
+                try:
+                    python_path, _ = compat_winreg.QueryValueEx(key, '')
+                finally:
+                    compat_winreg.CloseKey(key)
+                break
+            except Exception:
+                pass
+
+        if not python_path:
+            raise BuildError('No such Python version: %s' % python_version)
+
+        self.pythonPath = python_path
 
         super(PythonBuilder, self).__init__(**kwargs)
 
@@ -305,8 +334,10 @@ class YoutubeDLBuilder(object):
 
     def build(self):
         try:
-            subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
-                                    cwd=self.buildPath)
+            proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath)
+            proc.wait()
+            #subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
+            #                        cwd=self.buildPath)
         except subprocess.CalledProcessError as e:
             raise BuildError(e.output)
 
@@ -369,12 +400,12 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea
     pass
 
 
-class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
+class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
     actionDict = {'build': Builder, 'download': Builder}  # They're the same, no more caching.
 
     def do_GET(self):
-        path = urlparse.urlparse(self.path)
-        paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
+        path = compat_urlparse.urlparse(self.path)
+        paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()])
         action, _, path = path.path.strip('/').partition('/')
         if path:
             path = path.split('/')
@@ -388,7 +419,7 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
                         builder.close()
                 except BuildError as e:
                     self.send_response(e.code)
-                    msg = unicode(e).encode('UTF-8')
+                    msg = compat_str(e).encode('UTF-8')
                     self.send_header('Content-Type', 'text/plain; charset=UTF-8')
                     self.send_header('Content-Length', len(msg))
                     self.end_headers()
@@ -400,7 +431,5 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
         else:
             self.send_response(500, 'Malformed URL')
 
-#==============================================================================
-
 if __name__ == '__main__':
     main()
diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py
new file mode 100644 (file)
index 0000000..3b8021e
--- /dev/null
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+import base64
+import json
+import mimetypes
+import netrc
+import optparse
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.compat import (
+    compat_basestring,
+    compat_input,
+    compat_getpass,
+    compat_print,
+    compat_urllib_request,
+)
+from youtube_dl.utils import (
+    make_HTTPS_handler,
+    sanitized_Request,
+)
+
+
+class GitHubReleaser(object):
+    _API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases'
+    _UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s'
+    _NETRC_MACHINE = 'github.com'
+
+    def __init__(self, debuglevel=0):
+        self._init_github_account()
+        https_handler = make_HTTPS_handler({}, debuglevel=debuglevel)
+        self._opener = compat_urllib_request.build_opener(https_handler)
+
+    def _init_github_account(self):
+        try:
+            info = netrc.netrc().authenticators(self._NETRC_MACHINE)
+            if info is not None:
+                self._username = info[0]
+                self._password = info[2]
+                compat_print('Using GitHub credentials found in .netrc...')
+                return
+            else:
+                compat_print('No GitHub credentials found in .netrc')
+        except (IOError, netrc.NetrcParseError):
+            compat_print('Unable to parse .netrc')
+        self._username = compat_input(
+            'Type your GitHub username or email address and press [Return]: ')
+        self._password = compat_getpass(
+            'Type your GitHub password and press [Return]: ')
+
+    def _call(self, req):
+        if isinstance(req, compat_basestring):
+            req = sanitized_Request(req)
+        # Authorizing manually since GitHub does not response with 401 with
+        # WWW-Authenticate header set (see
+        # https://developer.github.com/v3/#basic-authentication)
+        b64 = base64.b64encode(
+            ('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii')
+        req.add_header('Authorization', 'Basic %s' % b64)
+        response = self._opener.open(req).read().decode('utf-8')
+        return json.loads(response)
+
+    def list_releases(self):
+        return self._call(self._API_URL)
+
+    def create_release(self, tag_name, name=None, body='', draft=False, prerelease=False):
+        data = {
+            'tag_name': tag_name,
+            'target_commitish': 'master',
+            'name': name,
+            'body': body,
+            'draft': draft,
+            'prerelease': prerelease,
+        }
+        req = sanitized_Request(self._API_URL, json.dumps(data).encode('utf-8'))
+        return self._call(req)
+
+    def create_asset(self, release_id, asset):
+        asset_name = os.path.basename(asset)
+        url = self._UPLOADS_URL % (release_id, asset_name)
+        # Our files are small enough to be loaded directly into memory.
+        data = open(asset, 'rb').read()
+        req = sanitized_Request(url, data)
+        mime_type, _ = mimetypes.guess_type(asset_name)
+        req.add_header('Content-Type', mime_type or 'application/octet-stream')
+        return self._call(req)
+
+
+def main():
+    parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH')
+    options, args = parser.parse_args()
+    if len(args) != 2:
+        parser.error('Expected a version and a build directory')
+
+    version, build_path = args
+
+    releaser = GitHubReleaser()
+
+    new_release = releaser.create_release(version, name='youtube-dl %s' % version)
+    release_id = new_release['id']
+
+    for asset in os.listdir(build_path):
+        compat_print('Uploading %s...' % asset)
+        releaser.create_asset(release_id, os.path.join(build_path, asset))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/devscripts/install_srelay.sh b/devscripts/install_srelay.sh
new file mode 100755 (executable)
index 0000000..33ce8a3
--- /dev/null
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+mkdir -p tmp && cd tmp
+wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz
+tar zxvf srelay-0.4.8b6.tar.gz
+cd srelay-0.4.8b6
+./configure
+make
diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py
new file mode 100644 (file)
index 0000000..2e6e664
--- /dev/null
@@ -0,0 +1,19 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+
+class LazyLoadExtractor(object):
+    _module = None
+
+    @classmethod
+    def ie_key(cls):
+        return cls.__name__[:-2]
+
+    def __new__(cls, *args, **kwargs):
+        mod = __import__(cls._module, fromlist=(cls.__name__,))
+        real_cls = getattr(mod, cls.__name__)
+        instance = real_cls.__new__(real_cls)
+        instance.__init__(*args, **kwargs)
+        return instance
diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py
new file mode 100644 (file)
index 0000000..b7ad23d
--- /dev/null
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+import io
+import optparse
+
+
+def main():
+    parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
+    options, args = parser.parse_args()
+    if len(args) != 2:
+        parser.error('Expected an input and an output filename')
+
+    infile, outfile = args
+
+    with io.open(infile, encoding='utf-8') as inf:
+        issue_template_tmpl = inf.read()
+
+    # Get the version from youtube_dl/version.py without importing the package
+    exec(compile(open('youtube_dl/version.py').read(),
+                 'youtube_dl/version.py', 'exec'))
+
+    out = issue_template_tmpl % {'version': locals()['__version__']}
+
+    with io.open(outfile, 'w', encoding='utf-8') as outf:
+        outf.write(out)
+
+if __name__ == '__main__':
+    main()
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
new file mode 100644 (file)
index 0000000..9a79c2b
--- /dev/null
@@ -0,0 +1,98 @@
+from __future__ import unicode_literals, print_function
+
+from inspect import getsource
+import os
+from os.path import dirname as dirn
+import sys
+
+print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
+
+sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+
+lazy_extractors_filename = sys.argv[1]
+if os.path.exists(lazy_extractors_filename):
+    os.remove(lazy_extractors_filename)
+
+from youtube_dl.extractor import _ALL_CLASSES
+from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
+
+with open('devscripts/lazy_load_template.py', 'rt') as f:
+    module_template = f.read()
+
+module_contents = [
+    module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
+    'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n']
+
+ie_template = '''
+class {name}({bases}):
+    _VALID_URL = {valid_url!r}
+    _module = '{module}'
+'''
+
+make_valid_template = '''
+    @classmethod
+    def _make_valid_url(cls):
+        return {valid_url!r}
+'''
+
+
+def get_base_name(base):
+    if base is InfoExtractor:
+        return 'LazyLoadExtractor'
+    elif base is SearchInfoExtractor:
+        return 'LazyLoadSearchExtractor'
+    else:
+        return base.__name__
+
+
+def build_lazy_ie(ie, name):
+    valid_url = getattr(ie, '_VALID_URL', None)
+    s = ie_template.format(
+        name=name,
+        bases=', '.join(map(get_base_name, ie.__bases__)),
+        valid_url=valid_url,
+        module=ie.__module__)
+    if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
+        s += '\n' + getsource(ie.suitable)
+    if hasattr(ie, '_make_valid_url'):
+        # search extractors
+        s += make_valid_template.format(valid_url=ie._make_valid_url())
+    return s
+
+# find the correct sorting and add the required base classes so that sublcasses
+# can be correctly created
+classes = _ALL_CLASSES[:-1]
+ordered_cls = []
+while classes:
+    for c in classes[:]:
+        bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor))
+        stop = False
+        for b in bases:
+            if b not in classes and b not in ordered_cls:
+                if b.__name__ == 'GenericIE':
+                    exit()
+                classes.insert(0, b)
+                stop = True
+        if stop:
+            break
+        if all(b in ordered_cls for b in bases):
+            ordered_cls.append(c)
+            classes.remove(c)
+            break
+ordered_cls.append(_ALL_CLASSES[-1])
+
+names = []
+for ie in ordered_cls:
+    name = ie.__name__
+    src = build_lazy_ie(ie, name)
+    module_contents.append(src)
+    if ie in _ALL_CLASSES:
+        names.append(name)
+
+module_contents.append(
+    '_ALL_CLASSES = [{0}]'.format(', '.join(names)))
+
+module_src = '\n'.join(module_contents) + '\n'
+
+with open(lazy_extractors_filename, 'wt') as f:
+    f.write(module_src)
index 776e6556e5b2bd683acbcf79d7bc07431be6548a..e3f6339b5a60fc0a19106e7447842d08e680dce2 100644 (file)
@@ -1,13 +1,46 @@
 from __future__ import unicode_literals
 
 import io
+import optparse
 import os.path
-import sys
 import re
 
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 README_FILE = os.path.join(ROOT_DIR, 'README.md')
 
+PREFIX = '''%YOUTUBE-DL(1)
+
+# NAME
+
+youtube\-dl \- download videos from youtube.com or other video platforms
+
+# SYNOPSIS
+
+**youtube-dl** \[OPTIONS\] URL [URL...]
+
+'''
+
+
+def main():
+    parser = optparse.OptionParser(usage='%prog OUTFILE.md')
+    options, args = parser.parse_args()
+    if len(args) != 1:
+        parser.error('Expected an output filename')
+
+    outfile, = args
+
+    with io.open(README_FILE, encoding='utf-8') as f:
+        readme = f.read()
+
+    readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
+    readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
+    readme = PREFIX + readme
+
+    readme = filter_options(readme)
+
+    with io.open(outfile, 'w', encoding='utf-8') as outf:
+        outf.write(readme)
+
 
 def filter_options(readme):
     ret = ''
@@ -37,27 +70,5 @@ def filter_options(readme):
 
     return ret
 
-with io.open(README_FILE, encoding='utf-8') as f:
-    readme = f.read()
-
-PREFIX = '''%YOUTUBE-DL(1)
-
-# NAME
-
-youtube\-dl \- download videos from youtube.com or other video platforms
-
-# SYNOPSIS
-
-**youtube-dl** \[OPTIONS\] URL [URL...]
-
-'''
-readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
-readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
-readme = PREFIX + readme
-
-readme = filter_options(readme)
-
-if sys.version_info < (3, 0):
-    print(readme.encode('utf-8'))
-else:
-    print(readme)
+if __name__ == '__main__':
+    main()
index 61806961c63798dfab081ceae5d76f9afc0ff773..f8d466ba879ff7832a652ea41cfb2527b49ce954 100755 (executable)
@@ -6,7 +6,7 @@
 # * the git config user.signingkey is properly set
 
 # You will need
-# pip install coverage nose rsa
+# pip install coverage nose rsa wheel
 
 # TODO
 # release notes
 set -e
 
 skip_tests=true
-if [ "$1" = '--run-tests' ]; then
-    skip_tests=false
-    shift
-fi
+gpg_sign_commits=""
+buildserver='localhost:8142'
+
+while true
+do
+case "$1" in
+    --run-tests)
+        skip_tests=false
+        shift
+    ;;
+    --gpg-sign-commits|-S)
+        gpg_sign_commits="-S"
+        shift
+    ;;
+    --buildserver)
+        buildserver="$2"
+        shift 2
+    ;;
+    --*)
+        echo "ERROR: unknown option $1"
+        exit 1
+    ;;
+    *)
+        break
+    ;;
+esac
+done
 
 if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
 version="$1"
@@ -33,6 +56,9 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th
 useless_files=$(find youtube_dl -type f -not -name '*.py')
 if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi
 if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
+if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
+if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
+if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
 
 /bin/echo -e "\n### First of all, testing..."
 make clean
@@ -45,10 +71,10 @@ fi
 /bin/echo -e "\n### Changing version in version.py..."
 sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
 
-/bin/echo -e "\n### Committing documentation and youtube_dl/version.py..."
-make README.md CONTRIBUTING.md supportedsites
-git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py
-git commit -m "release $version"
+/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
+make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites
+git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py
+git commit $gpg_sign_commits -m "release $version"
 
 /bin/echo -e "\n### Now tagging, signing and pushing..."
 git tag -s -m "Release $version" "$version"
@@ -64,7 +90,7 @@ git push origin "$version"
 REV=$(git rev-parse HEAD)
 make youtube-dl youtube-dl.tar.gz
 read -p "VM running? (y/n) " -n 1
-wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
+wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
 mkdir -p "build/$version"
 mv youtube-dl youtube-dl.exe "build/$version"
 mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
@@ -74,15 +100,16 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
 (cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
 (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
 
-/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
+/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..."
 for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
-scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
-ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
+
+ROOT=$(pwd)
+python devscripts/create-github-release.py $version "$ROOT/build/$version"
+
 ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
 
 /bin/echo -e "\n### Now switching to gh-pages..."
 git clone --branch gh-pages --single-branch . build/gh-pages
-ROOT=$(pwd)
 (
     set -e
     ORIGIN_URL=$(git config --get remote.origin.url)
@@ -94,7 +121,7 @@ ROOT=$(pwd)
     "$ROOT/devscripts/gh-pages/update-copyright.py"
     "$ROOT/devscripts/gh-pages/update-sites.py"
     git add *.html *.html.in update
-    git commit -m "release $version"
+    git commit $gpg_sign_commits -m "release $version"
     git push "$ROOT" gh-pages
     git push "$ORIGIN_URL" gh-pages
 )
index 74596155ce61895c72922cf1a9b4248882feac79..891499f59f71583cc4e29a3c7c61b003f373227d 100644 (file)
@@ -6,6 +6,7 @@
  - **22tracks:genre**
  - **22tracks:track**
  - **24video**
+ - **3qsdn**: 3Q SDN
  - **3sat**
  - **4tube**
  - **56.com**
@@ -15,6 +16,8 @@
  - **9gag**
  - **abc.net.au**
  - **Abc7News**
+ - **abcnews**
+ - **abcnews:video**
  - **AcademicEarth:Course**
  - **acast**
  - **acast:channel**
@@ -25,6 +28,7 @@
  - **AdobeTVVideo**
  - **AdultSwim**
  - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
+ - **AfreecaTV**: afreecatv.com
  - **Aftonbladet**
  - **AirMozilla**
  - **AlJazeera**
@@ -40,8 +44,8 @@
  - **appletrailers:section**
  - **archive.org**: archive.org videos
  - **ARD**
- - **ARD:mediathek**: Saarländischer Rundfunk
  - **ARD:mediathek**
+ - **ARD:mediathek**: Saarländischer Rundfunk
  - **arte.tv**
  - **arte.tv:+7**
  - **arte.tv:cinema**
  - **arte.tv:ddc**
  - **arte.tv:embed**
  - **arte.tv:future**
+ - **arte.tv:info**
  - **arte.tv:magazine**
+ - **arte.tv:playlist**
  - **AtresPlayer**
  - **ATTTechChannel**
  - **AudiMedia**
+ - **AudioBoom**
  - **audiomack**
  - **audiomack:album**
+ - **auroravid**: AuroraVid
  - **Azubu**
  - **AzubuLive**
  - **BaiduVideo**: 百度视频
@@ -66,6 +74,8 @@
  - **bbc**: BBC
  - **bbc.co.uk**: BBC iPlayer
  - **bbc.co.uk:article**: BBC articles
+ - **bbc.co.uk:iplayer:playlist**
+ - **bbc.co.uk:playlist**
  - **BeatportPro**
  - **Beeg**
  - **BehindKink**
  - **Bigflix**
  - **Bild**: Bild.de
  - **BiliBili**
+ - **BioBioChileTV**
+ - **BIQLE**
  - **BleacherReport**
  - **BleacherReportCMS**
  - **blinkx**
  - **Bloomberg**
+ - **BokeCC**
  - **Bpb**: Bundeszentrale für politische Bildung
  - **BR**: Bayerischer Rundfunk Mediathek
+ - **BravoTV**
  - **Break**
  - **brightcove:legacy**
  - **brightcove:new**
  - **BYUtv**
  - **Camdemy**
  - **CamdemyFolder**
+ - **CamWithHer**
  - **canalc2.tv**
  - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
  - **Canvas**
+ - **CarambaTV**
+ - **CarambaTVPage**
  - **CBC**
  - **CBCPlayer**
  - **CBS**
+ - **CBSInteractive**
+ - **CBSLocal**
  - **CBSNews**: CBS News
  - **CBSNewsLiveVideo**: CBS News Live Videos
  - **CBSSports**
+ - **CDA**
  - **CeskaTelevize**
  - **channel9**: Channel 9
  - **Chaturbate**
  - **chirbit**
  - **chirbit:profile**
  - **Cinchcast**
- - **Cinemassacre**
  - **Clipfish**
  - **cliphunter**
+ - **ClipRs**
  - **Clipsyndicate**
+ - **CloserToTruth**
  - **cloudtime**: CloudTime
  - **Cloudy**
  - **Clubic**
  - **Clyp**
  - **cmt.com**
- - **CNET**
+ - **CNBC**
  - **CNN**
  - **CNNArticle**
  - **CNNBlogs**
- - **CollegeHumor**
  - **CollegeRama**
  - **ComCarCoff**
  - **ComedyCentral**
  - **ComedyCentralShows**: The Daily Show / The Colbert Report
  - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
+ - **Coub**
  - **Cracked**
  - **Crackle**
  - **Criterion**
  - **CrooksAndLiars**
  - **Crunchyroll**
  - **crunchyroll:playlist**
+ - **CSNNE**
  - **CSpan**: C-SPAN
  - **CtsNews**: 華視新聞
  - **culturebox.francetvinfo.fr**
  - **CultureUnplugged**
  - **CWTV**
+ - **DailyMail**
  - **dailymotion**
  - **dailymotion:playlist**
  - **dailymotion:user**
  - **defense.gouv.fr**
  - **democracynow**
  - **DHM**: Filmarchiv - Deutsches Historisches Museum
+ - **DigitallySpeaking**
  - **Digiteka**
  - **Discovery**
  - **Dotsub**
  - **Dropbox**
  - **DrTuber**
  - **DRTV**
- - **Dump**
  - **Dumpert**
  - **dvtv**: http://video.aktualne.cz/
+ - **dw**
+ - **dw:article**
  - **EaglePlatform**
  - **EbaumsWorld**
  - **EchoMsk**
  - **exfm**: ex.fm
  - **ExpoTV**
  - **ExtremeTube**
+ - **EyedoTV**
  - **facebook**
- - **facebook:post**
  - **faz.net**
  - **fc2**
  - **Fczenit**
+ - **features.aol.com**
  - **fernsehkritik.tv**
  - **Firstpost**
  - **FiveTV**
  - **Flickr**
  - **Folketinget**: Folketinget (ft.dk; Danish parliament)
  - **FootyRoom**
+ - **Formula1**
  - **FOX**
  - **Foxgay**
  - **FoxNews**: Fox News and Fox Business Video
  - **Gamersyde**
  - **GameSpot**
  - **GameStar**
- - **Gametrailers**
  - **Gazeta**
  - **GDCVault**
  - **generic**: Generic downloader that works on some sites
  - **Globo**
  - **GloboArticle**
  - **GodTube**
+ - **GodTV**
  - **GoldenMoustache**
  - **Golem**
  - **GoogleDrive**
  - **GPUTechConf**
  - **Groupon**
  - **Hark**
+ - **HBO**
  - **HearThisAt**
  - **Heise**
  - **HellPorno**
  - **ivi:compilation**: ivi.ru compilations
  - **ivideon**: Ivideon TV
  - **Izlesene**
- - **JadoreCettePub**
  - **JeuxVideo**
  - **Jove**
  - **jpopsuki.tv**
  - **kontrtube**: KontrTube.ru - Труба зовёт
  - **KrasView**: Красвью
  - **Ku6**
+ - **KUSI**
  - **kuwo:album**: 酷我音乐 - 专辑
  - **kuwo:category**: 酷我音乐 - 分类
  - **kuwo:chart**: 酷我音乐 - 排行榜
  - **kuwo:song**: 酷我音乐
  - **la7.tv**
  - **Laola1Tv**
+ - **Le**: 乐视网
+ - **Learnr**
  - **Lecture2Go**
  - **Lemonde**
- - **Letv**: 乐视网
+ - **LePlaylist**
  - **LetvCloud**: 乐视云
- - **LetvPlaylist**
- - **LetvTv**
  - **Libsyn**
+ - **life**: Life.ru
  - **life:embed**
- - **lifenews**: LIFE | NEWS
  - **limelight**
  - **limelight:channel**
  - **limelight:channel_list**
+ - **LiTV**
  - **LiveLeak**
  - **livestream**
  - **livestream:original**
  - **LnkGo**
+ - **loc**: Library of Congress
+ - **LocalNews8**
  - **LoveHomePorn**
  - **lrt.lt**
  - **lynda**: lynda.com videos
  - **m6**
  - **macgamestore**: MacGameStore trailers
  - **mailru**: Видео@Mail.Ru
+ - **MakersChannel**
  - **MakerTV**
- - **Malemotion**
  - **MatchTV**
  - **MDR**: MDR.DE and KiKA
  - **media.ccc.de**
  - **metacafe**
  - **Metacritic**
  - **Mgoon**
+ - **MGTV**: 芒果TV
  - **Minhateca**
  - **MinistryGrid**
+ - **Minoto**
  - **miomio.tv**
  - **MiTele**: mitele.es
  - **mixcloud**
+ - **mixcloud:playlist**
+ - **mixcloud:stream**
+ - **mixcloud:user**
  - **MLB**
+ - **Mnet**
  - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
  - **Mofosex**
  - **Mojvideo**
  - **Moniker**: allmyvideos.net and vidspot.net
- - **mooshare**: Mooshare.biz
  - **Morningstar**: morningstar.com
  - **Motherless**
  - **Motorsport**: motorsport.com
  - **MovieFap**
  - **Moviezine**
  - **MPORA**
- - **MSNBC**
  - **MTV**
  - **mtv.de**
  - **mtviggy.com**
  - **mtvservices:embedded**
  - **MuenchenTV**: münchen.tv
  - **MusicPlayOn**
- - **muzu.tv**
+ - **mva**: Microsoft Virtual Academy videos
+ - **mva:course**: Microsoft Virtual Academy courses
  - **Mwave**
+ - **MwaveMeetGreet**
  - **MySpace**
  - **MySpace:album**
  - **MySpass**
  - **myvideo** (Currently broken)
  - **MyVidster**
  - **n-tv.de**
- - **NationalGeographic**
+ - **natgeo**
+ - **natgeo:channel**
  - **Naver**
  - **NBA**
  - **NBC**
  - **ndr:embed:base**
  - **NDTV**
  - **NerdCubedFeed**
- - **Nerdist**
  - **netease:album**: 网易云音乐 - 专辑
  - **netease:djradio**: 网易云音乐 - 电台
  - **netease:mv**: 网易云音乐 - MV
  - **nfl.com**
  - **nhl.com**
  - **nhl.com:news**: NHL news
- - **nhl.com:videocenter**: NHL videocenter category
+ - **nhl.com:videocenter**
+ - **nhl.com:videocenter:category**: NHL videocenter category
  - **nick.com**
+ - **nick.de**
  - **niconico**: ニコニコ動画
  - **NiconicoPlaylist**
  - **njoy**: N-JOY
  - **Normalboots**
  - **NosVideo**
  - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
- - **novamov**: NovaMov
  - **nowness**
  - **nowness:playlist**
  - **nowness:series**
  - **Npr**
  - **NRK**
  - **NRKPlaylist**
+ - **NRKSkole**: NRK Skole
  - **NRKTV**: NRK TV and NRK Radio
  - **ntv.ru**
  - **Nuvid**
  - **OnionStudios**
  - **Ooyala**
  - **OoyalaExternal**
+ - **Openload**
  - **OraTV**
  - **orf:fm4**: radio FM4
  - **orf:iptv**: iptv.ORF.at
  - **Patreon**
  - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET  (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
  - **pcmag**
- - **Periscope**: Periscope
+ - **People**
+ - **periscope**: Periscope
+ - **periscope:user**: Periscope user videos
  - **PhilharmonieDeParis**: Philharmonie de Paris
  - **phoenix.de**
  - **Photobucket**
  - **Pinkbike**
  - **Pladform**
- - **PlanetaPlay**
  - **play.fm**
  - **played.to**
  - **PlaysTV**
  - **Pornotube**
  - **PornoVoisines**
  - **PornoXO**
+ - **PressTV**
  - **PrimeShareTV**
  - **PromptFile**
  - **prosiebensat1**: ProSiebenSat.1 Digital
  - **qqmusic:playlist**: QQ音乐 - 歌单
  - **qqmusic:singer**: QQ音乐 - 歌手
  - **qqmusic:toplist**: QQ音乐 - 排行榜
- - **QuickVid**
  - **R7**
+ - **R7Article**
  - **radio.de**
  - **radiobremen**
+ - **radiocanada**
+ - **RadioCanadaAudioVideo**
  - **radiofrance**
  - **RadioJavan**
  - **Rai**
  - **RedTube**
  - **RegioTV**
  - **Restudy**
+ - **Reuters**
  - **ReverbNation**
- - **Revision3**
+ - **revision**
+ - **revision3:embed**
+ - **RICE**
  - **RingTV**
+ - **RockstarGames**
  - **RottenTomatoes**
  - **Roxwel**
  - **RTBF**
  - **RUTV**: RUTV.RU
  - **Ruutu**
  - **safari**: safaribooksonline.com online video
+ - **safari:api**
  - **safari:course**: safaribooksonline.com online courses
  - **Sandia**: Sandia National Laboratories
  - **Sapo**: SAPO Vídeos
  - **ScreencastOMatic**
  - **ScreenJunkies**
  - **ScreenwaveMedia**
+ - **Seeker**
  - **SenateISVP**
+ - **SendtoNews**
  - **ServingSys**
  - **Sexu**
- - **SexyKarma**: Sexy Karma and Watch Indian Porn
  - **Shahid**
  - **Shared**: shared.sx and vivo.sx
  - **ShareSix**
  - **smotri:broadcast**: Smotri.com broadcasts
  - **smotri:community**: Smotri.com community videos
  - **smotri:user**: Smotri.com user videos
- - **SnagFilms**
- - **SnagFilmsEmbed**
  - **Snotr**
  - **Sohu**
  - **soundcloud**
  - **southpark.de**
  - **southpark.nl**
  - **southparkstudios.dk**
- - **Space**
  - **SpankBang**
  - **Spankwire**
  - **Spiegel**
  - **Syfy**
  - **SztvHu**
  - **Tagesschau**
+ - **tagesschau:player**
  - **Tapely**
  - **Tass**
+ - **TDSLifeway**
  - **teachertube**: teachertube.com videos
  - **teachertube:user:collection**: teachertube.com user and collection videos
  - **TeachingChannel**
  - **Telegraaf**
  - **TeleMB**
  - **TeleTask**
- - **TenPlay**
+ - **Telewebion**
  - **TF1**
  - **TheIntercept**
- - **TheOnion**
  - **ThePlatform**
  - **ThePlatformFeed**
+ - **TheScene**
  - **TheSixtyOne**
+ - **TheStar**
  - **ThisAmericanLife**
  - **ThisAV**
  - **THVideo**
  - **TMZ**
  - **TMZArticle**
  - **TNAFlix**
+ - **TNAFlixNetworkEmbed**
  - **toggle**
  - **tou.tv**
  - **Toypics**: Toypics user profile
  - **tv.dfb.de**
  - **TV2**
  - **TV2Article**
+ - **TV3**
  - **TV4**: tv4.se and tv4play.se
  - **TVC**
  - **TVCArticle**
  - **tvigle**: Интернет-телевидение Tvigle.ru
  - **tvland.com**
- - **tvp.pl**
- - **tvp.pl:Series**
+ - **tvp**: Telewizja Polska
+ - **tvp:series**
  - **TVPlay**: TV3Play and related services
  - **Tweakers**
- - **twitch:bookmarks**
  - **twitch:chapter**
+ - **twitch:clips**
  - **twitch:past_broadcasts**
  - **twitch:profile**
  - **twitch:stream**
  - **twitter**
  - **twitter:amplify**
  - **twitter:card**
- - **Ubu**
  - **udemy**
  - **udemy:course**
  - **UDNEmbed**: 聯合影音
  - **Unistra**
  - **Urort**: NRK P3 Urørt
+ - **USAToday**
  - **ustream**
  - **ustream:channel**
+ - **ustudio**
+ - **ustudio:embed**
  - **Varzesh3**
  - **Vbox7**
  - **VeeHD**
  - **Vessel**
  - **Vesti**: Вести.Ru
  - **Vevo**
+ - **VevoPlaylist**
  - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
  - **vh1.com**
  - **Vice**
+ - **ViceShow**
  - **Viddler**
  - **video.google:search**: Google Video search
  - **video.mit.edu**
  - **VideoDetective**
  - **videofy.me**
- - **VideoMega** (Currently broken)
+ - **VideoMega**
  - **videomore**
  - **videomore:season**
  - **videomore:video**
  - **VideoPremium**
  - **VideoTt**: video.tt - Your True Tube (Currently broken)
  - **videoweed**: VideoWeed
+ - **Vidio**
  - **vidme**
  - **vidme:user**
  - **vidme:user:likes**
  - **Vidzi**
  - **vier**
  - **vier:videos**
+ - **ViewLift**
+ - **ViewLiftEmbed**
  - **Viewster**
  - **Viidea**
  - **viki**
  - **vimeo:channel**
  - **vimeo:group**
  - **vimeo:likes**: Vimeo user likes
+ - **vimeo:ondemand**
  - **vimeo:review**: Review pages on vimeo
  - **vimeo:user**
  - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
  - **vlive**
  - **Vodlocker**
  - **VoiceRepublic**
+ - **VoxMedia**
  - **Vporn**
  - **vpro**: npo.nl and ntr.nl
  - **VRT**
  - **vube**: Vube.com
  - **VuClip**
- - **vulture.com**
  - **Walla**
- - **WashingtonPost**
+ - **washingtonpost**
+ - **washingtonpost:article**
  - **wat.tv**
- - **WayOfTheMaster**
+ - **WatchIndianPorn**: Watch Indian Porn
  - **WDR**
  - **wdr:mobile**
- - **WDRMaus**: Sendung mit der Maus
  - **WebOfStories**
  - **WebOfStoriesPlaylist**
- - **Weibo**
  - **WeiqiTV**: WQTV
  - **wholecloud**: WholeCloud
  - **Wimp**
  - **WNL**
  - **WorldStarHipHop**
  - **wrzuta.pl**
+ - **wrzuta.pl:playlist**
  - **WSJ**: Wall Street Journal
  - **XBef**
  - **XboxClips**
- - **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
+ - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE
  - **XHamster**
  - **XHamsterEmbed**
+ - **xiami:album**: 虾米音乐 - 专辑
+ - **xiami:artist**: 虾米音乐 - 歌手
+ - **xiami:collection**: 虾米音乐 - 精选集
+ - **xiami:song**: 虾米音乐
  - **XMinus**
  - **XNXX**
  - **Xstream**
  - **Ynet**
  - **YouJizz**
  - **youku**: 优酷
+ - **youku:show**
  - **YouPorn**
  - **YourUpload**
  - **youtube**: YouTube.com
  - **youtube:channel**: YouTube.com channels
  - **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
  - **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
+ - **youtube:live**: YouTube.com live streams
  - **youtube:playlist**: YouTube.com playlists
  - **youtube:playlists**: YouTube.com user/channel playlists
  - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
diff --git a/setup.cfg b/setup.cfg
new file mode 100644 (file)
index 0000000..2dc06ff
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[wheel]
+universal = True
+
+[flake8]
+exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
+ignore = E402,E501,E731
index bfe931f5b42a506ca3cceb1a5ec4acdb6a6a4813..508b27f3707898d07d303cd1ce44b7e4d54b152f 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -8,11 +8,12 @@ import warnings
 import sys
 
 try:
-    from setuptools import setup
+    from setuptools import setup, Command
     setuptools_available = True
 except ImportError:
-    from distutils.core import setup
+    from distutils.core import setup, Command
     setuptools_available = False
+from distutils.spawn import spawn
 
 try:
     # This will create an exe that needs Microsoft Visual C++ 2008
@@ -20,25 +21,37 @@ try:
     import py2exe
 except ImportError:
     if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
-        print("Cannot import py2exe", file=sys.stderr)
+        print('Cannot import py2exe', file=sys.stderr)
         exit(1)
 
 py2exe_options = {
-    "bundle_files": 1,
-    "compressed": 1,
-    "optimize": 2,
-    "dist_dir": '.',
-    "dll_excludes": ['w9xpopen.exe', 'crypt32.dll'],
+    'bundle_files': 1,
+    'compressed': 1,
+    'optimize': 2,
+    'dist_dir': '.',
+    'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
 }
 
+# Get the version from youtube_dl/version.py without importing the package
+exec(compile(open('youtube_dl/version.py').read(),
+             'youtube_dl/version.py', 'exec'))
+
+DESCRIPTION = 'YouTube video downloader'
+LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
+
 py2exe_console = [{
-    "script": "./youtube_dl/__main__.py",
-    "dest_base": "youtube-dl",
+    'script': './youtube_dl/__main__.py',
+    'dest_base': 'youtube-dl',
+    'version': __version__,
+    'description': DESCRIPTION,
+    'comments': LONG_DESCRIPTION,
+    'product_name': 'youtube-dl',
+    'product_version': __version__,
 }]
 
 py2exe_params = {
     'console': py2exe_console,
-    'options': {"py2exe": py2exe_options},
+    'options': {'py2exe': py2exe_options},
     'zipfile': None
 }
 
@@ -70,16 +83,27 @@ else:
     else:
         params['scripts'] = ['bin/youtube-dl']
 
-# Get the version from youtube_dl/version.py without importing the package
-exec(compile(open('youtube_dl/version.py').read(),
-             'youtube_dl/version.py', 'exec'))
+class build_lazy_extractors(Command):
+    description = 'Build the extractor lazy loading module'
+    user_options = []
+
+    def initialize_options(self):
+        pass
+
+    def finalize_options(self):
+        pass
+
+    def run(self):
+        spawn(
+            [sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'],
+            dry_run=self.dry_run,
+        )
 
 setup(
     name='youtube_dl',
     version=__version__,
-    description='YouTube video downloader',
-    long_description='Small command-line program to download videos from'
-    ' YouTube.com and other video sites.',
+    description=DESCRIPTION,
+    long_description=LONG_DESCRIPTION,
     url='https://github.com/rg3/youtube-dl',
     author='Ricardo Garcia',
     author_email='ytdl@yt-dl.org',
@@ -95,17 +119,19 @@ setup(
     # test_requires = ['nosetest'],
 
     classifiers=[
-        "Topic :: Multimedia :: Video",
-        "Development Status :: 5 - Production/Stable",
-        "Environment :: Console",
-        "License :: Public Domain",
-        "Programming Language :: Python :: 2.6",
-        "Programming Language :: Python :: 2.7",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.2",
-        "Programming Language :: Python :: 3.3",
-        "Programming Language :: Python :: 3.4",
+        'Topic :: Multimedia :: Video',
+        'Development Status :: 5 - Production/Stable',
+        'Environment :: Console',
+        'License :: Public Domain',
+        'Programming Language :: Python :: 2.6',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.2',
+        'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
     ],
 
+    cmdclass={'build_lazy_extractors': build_lazy_extractors},
     **params
 )
index bdd7acca4d91c490f29c21aeac7cc9ba01c86952..dfee217a9b8acb64e426c3ce8fc5c11a9c5a0121 100644 (file)
@@ -11,8 +11,11 @@ import sys
 
 import youtube_dl.extractor
 from youtube_dl import YoutubeDL
-from youtube_dl.utils import (
+from youtube_dl.compat import (
+    compat_os_name,
     compat_str,
+)
+from youtube_dl.utils import (
     preferredencoding,
     write_string,
 )
@@ -21,8 +24,13 @@ from youtube_dl.utils import (
 def get_params(override=None):
     PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                    "parameters.json")
+    LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                         "local_parameters.json")
     with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
         parameters = json.load(pf)
+    if os.path.exists(LOCAL_PARAMETERS_FILE):
+        with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
+            parameters.update(json.load(pf))
     if override:
         parameters.update(override)
     return parameters
@@ -42,7 +50,7 @@ def report_warning(message):
     Print the message to stderr, it will be prefixed with 'WARNING:'
     If stderr is a tty file the 'WARNING:' will be colored
     '''
-    if sys.stderr.isatty() and os.name != 'nt':
+    if sys.stderr.isatty() and compat_os_name != 'nt':
         _msg_header = '\033[0;33mWARNING:\033[0m'
     else:
         _msg_header = 'WARNING:'
@@ -140,6 +148,9 @@ def expect_value(self, got, expected, field):
             expect_value(self, item_got, item_expected, field)
     else:
         if isinstance(expected, compat_str) and expected.startswith('md5:'):
+            self.assertTrue(
+                isinstance(got, compat_str),
+                'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
             got = 'md5:' + md5(got)
         elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
             self.assertTrue(
diff --git a/test/swftests/ArrayAccess.swf b/test/swftests/ArrayAccess.swf
deleted file mode 100644 (file)
index 6cb49ab..0000000
Binary files a/test/swftests/ArrayAccess.swf and /dev/null differ
diff --git a/test/swftests/ClassCall.swf b/test/swftests/ClassCall.swf
deleted file mode 100644 (file)
index 849cca4..0000000
Binary files a/test/swftests/ClassCall.swf and /dev/null differ
diff --git a/test/swftests/ClassConstruction.swf b/test/swftests/ClassConstruction.swf
deleted file mode 100644 (file)
index 5e27a13..0000000
Binary files a/test/swftests/ClassConstruction.swf and /dev/null differ
diff --git a/test/swftests/ConstArrayAccess.swf b/test/swftests/ConstArrayAccess.swf
deleted file mode 100644 (file)
index 00bde84..0000000
Binary files a/test/swftests/ConstArrayAccess.swf and /dev/null differ
diff --git a/test/swftests/ConstantInt.swf b/test/swftests/ConstantInt.swf
deleted file mode 100644 (file)
index fe9c5c6..0000000
Binary files a/test/swftests/ConstantInt.swf and /dev/null differ
diff --git a/test/swftests/DictCall.swf b/test/swftests/DictCall.swf
deleted file mode 100644 (file)
index b0296d0..0000000
Binary files a/test/swftests/DictCall.swf and /dev/null differ
diff --git a/test/swftests/EqualsOperator.swf b/test/swftests/EqualsOperator.swf
deleted file mode 100644 (file)
index d5628df..0000000
Binary files a/test/swftests/EqualsOperator.swf and /dev/null differ
diff --git a/test/swftests/LocalVars.swf b/test/swftests/LocalVars.swf
deleted file mode 100644 (file)
index 02a267f..0000000
Binary files a/test/swftests/LocalVars.swf and /dev/null differ
diff --git a/test/swftests/MemberAssignment.swf b/test/swftests/MemberAssignment.swf
deleted file mode 100644 (file)
index 970cfcb..0000000
Binary files a/test/swftests/MemberAssignment.swf and /dev/null differ
diff --git a/test/swftests/NeOperator.swf b/test/swftests/NeOperator.swf
deleted file mode 100644 (file)
index f882b5f..0000000
Binary files a/test/swftests/NeOperator.swf and /dev/null differ
diff --git a/test/swftests/PrivateCall.swf b/test/swftests/PrivateCall.swf
deleted file mode 100644 (file)
index abc66d6..0000000
Binary files a/test/swftests/PrivateCall.swf and /dev/null differ
diff --git a/test/swftests/PrivateVoidCall.swf b/test/swftests/PrivateVoidCall.swf
deleted file mode 100644 (file)
index 3d60c75..0000000
Binary files a/test/swftests/PrivateVoidCall.swf and /dev/null differ
diff --git a/test/swftests/StaticAssignment.swf b/test/swftests/StaticAssignment.swf
deleted file mode 100644 (file)
index 2b8bf88..0000000
Binary files a/test/swftests/StaticAssignment.swf and /dev/null differ
diff --git a/test/swftests/StaticConstArrayAccess.swf b/test/swftests/StaticConstArrayAccess.swf
deleted file mode 100644 (file)
index 4eec380..0000000
Binary files a/test/swftests/StaticConstArrayAccess.swf and /dev/null differ
diff --git a/test/swftests/StaticRetrieval.swf b/test/swftests/StaticRetrieval.swf
deleted file mode 100644 (file)
index 0823df4..0000000
Binary files a/test/swftests/StaticRetrieval.swf and /dev/null differ
diff --git a/test/swftests/StringBasics.swf b/test/swftests/StringBasics.swf
deleted file mode 100644 (file)
index 7ba6ba6..0000000
Binary files a/test/swftests/StringBasics.swf and /dev/null differ
diff --git a/test/swftests/StringCharCodeAt.swf b/test/swftests/StringCharCodeAt.swf
deleted file mode 100644 (file)
index d037757..0000000
Binary files a/test/swftests/StringCharCodeAt.swf and /dev/null differ
diff --git a/test/swftests/StringConversion.swf b/test/swftests/StringConversion.swf
deleted file mode 100644 (file)
index 8c88556..0000000
Binary files a/test/swftests/StringConversion.swf and /dev/null differ
diff --git a/test/swftests/StringFunctions.swf b/test/swftests/StringFunctions.swf
deleted file mode 100644 (file)
index 8baf0dd..0000000
Binary files a/test/swftests/StringFunctions.swf and /dev/null differ
index 938466a800122211ab0414d9aa9de831951e2903..6404ac89f55df282e9525f6ae1a8e62f7344dd40 100644 (file)
@@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import FakeYDL
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
+from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError
 
 
 class TestIE(InfoExtractor):
@@ -66,5 +67,14 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertEqual(ie._html_search_meta('e', html), '5')
         self.assertEqual(ie._html_search_meta('f', html), '6')
 
+    def test_download_json(self):
+        uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
+        self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})
+        uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript')
+        self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'})
+        uri = encode_data_uri(b'{"foo": invalid}', 'application/json')
+        self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
+        self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
+
 if __name__ == '__main__':
     unittest.main()
index 59f7ab49dbe4458b5b821d9fae7d629ffab5db1a..ca25025e23a1eb1fd82eed39a534072bffe293ac 100644 (file)
@@ -222,6 +222,11 @@ class TestFormatSelection(unittest.TestCase):
         downloaded = ydl.downloaded_info_dicts[0]
         self.assertEqual(downloaded['format_id'], 'dash-video-low')
 
+        ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'dash-video-low')
+
         formats = [
             {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
         ]
@@ -502,6 +507,9 @@ class TestYoutubeDL(unittest.TestCase):
         assertRegexpMatches(self, ydl._format_note({
             'vbr': 10,
         }), '^\s*10k$')
+        assertRegexpMatches(self, ydl._format_note({
+            'fps': 30,
+        }), '^30fps$')
 
     def test_postprocessors(self):
         filename = 'post-processor-testfile.mp4'
index b6bfad05e3c85c07854cc00c337a12caf493e849..f5317ac3e24290d5aa73e12c7e490bfed72d6c21 100644 (file)
@@ -10,34 +10,39 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
-from youtube_dl.utils import get_filesystem_encoding
 from youtube_dl.compat import (
     compat_getenv,
+    compat_setenv,
     compat_etree_fromstring,
     compat_expanduser,
     compat_shlex_split,
     compat_str,
+    compat_struct_unpack,
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote_plus,
+    compat_urllib_parse_urlencode,
 )
 
 
 class TestCompat(unittest.TestCase):
     def test_compat_getenv(self):
         test_str = 'тест'
-        os.environ['YOUTUBE-DL-TEST'] = (
-            test_str if sys.version_info >= (3, 0)
-            else test_str.encode(get_filesystem_encoding()))
+        compat_setenv('YOUTUBE-DL-TEST', test_str)
         self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
 
+    def test_compat_setenv(self):
+        test_var = 'YOUTUBE-DL-TEST'
+        test_str = 'тест'
+        compat_setenv(test_var, test_str)
+        compat_getenv(test_var)
+        self.assertEqual(compat_getenv(test_var), test_str)
+
     def test_compat_expanduser(self):
         old_home = os.environ.get('HOME')
         test_str = 'C:\Documents and Settings\тест\Application Data'
-        os.environ['HOME'] = (
-            test_str if sys.version_info >= (3, 0)
-            else test_str.encode(get_filesystem_encoding()))
+        compat_setenv('HOME', test_str)
         self.assertEqual(compat_expanduser('~'), test_str)
-        os.environ['HOME'] = old_home
+        compat_setenv('HOME', old_home or '')
 
     def test_all_present(self):
         import youtube_dl.compat
@@ -70,6 +75,16 @@ class TestCompat(unittest.TestCase):
         self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
         self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
 
+    def test_compat_urllib_parse_urlencode(self):
+        self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def')
+        self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def')
+        self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def')
+        self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def')
+        self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def')
+        self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def')
+        self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def')
+        self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def')
+
     def test_compat_shlex_split(self):
         self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
 
@@ -88,5 +103,15 @@ class TestCompat(unittest.TestCase):
         self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
         self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
 
+    def test_compat_etree_fromstring_doctype(self):
+        xml = '''<?xml version="1.0"?>
+<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
+<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
+        compat_etree_fromstring(xml)
+
+    def test_struct_unpack(self):
+        self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
+
+
 if __name__ == '__main__':
     unittest.main()
index f2e305b6fed3ce2f0574a7c20e89ffb977934f28..5076ced510847b83b2ac7a5374b92ef248ba8ec8 100644 (file)
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# coding: utf-8
 from __future__ import unicode_literals
 
 # Allow direct execution
@@ -15,6 +16,15 @@ import threading
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
+def http_server_port(httpd):
+    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
+        # In Jython SSLSocket is not a subclass of socket.socket
+        sock = httpd.socket.sock
+    else:
+        sock = httpd.socket
+    return sock.getsockname()[1]
+
+
 class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
     def log_message(self, format, *args):
         pass
@@ -30,6 +40,22 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             self.send_header('Content-Type', 'video/mp4')
             self.end_headers()
             self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
+        elif self.path == '/302':
+            if sys.version_info[0] == 3:
+                # XXX: Python 3 http server does not allow non-ASCII header values
+                self.send_response(404)
+                self.end_headers()
+                return
+
+            new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
+            self.send_response(302)
+            self.send_header(b'Location', new_url.encode('utf-8'))
+            self.end_headers()
+        elif self.path == '/%E4%B8%AD%E6%96%87.html':
+            self.send_response(200)
+            self.send_header('Content-Type', 'text/html; charset=utf-8')
+            self.end_headers()
+            self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
         else:
             assert False
 
@@ -46,13 +72,32 @@ class FakeLogger(object):
 
 
 class TestHTTP(unittest.TestCase):
+    def setUp(self):
+        self.httpd = compat_http_server.HTTPServer(
+            ('localhost', 0), HTTPTestRequestHandler)
+        self.port = http_server_port(self.httpd)
+        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+        self.server_thread.daemon = True
+        self.server_thread.start()
+
+    def test_unicode_path_redirection(self):
+        # XXX: Python 3 http server does not allow non-ASCII header values
+        if sys.version_info[0] == 3:
+            return
+
+        ydl = YoutubeDL({'logger': FakeLogger()})
+        r = ydl.extract_info('http://localhost:%d/302' % self.port)
+        self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port)
+
+
+class TestHTTPS(unittest.TestCase):
     def setUp(self):
         certfn = os.path.join(TEST_DIR, 'testcert.pem')
         self.httpd = compat_http_server.HTTPServer(
             ('localhost', 0), HTTPTestRequestHandler)
         self.httpd.socket = ssl.wrap_socket(
             self.httpd.socket, certfile=certfn, server_side=True)
-        self.port = self.httpd.socket.getsockname()[1]
+        self.port = http_server_port(self.httpd)
         self.server_thread = threading.Thread(target=self.httpd.serve_forever)
         self.server_thread.daemon = True
         self.server_thread.start()
@@ -88,14 +133,14 @@ class TestProxy(unittest.TestCase):
     def setUp(self):
         self.proxy = compat_http_server.HTTPServer(
             ('localhost', 0), _build_proxy_handler('normal'))
-        self.port = self.proxy.socket.getsockname()[1]
+        self.port = http_server_port(self.proxy)
         self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
         self.proxy_thread.daemon = True
         self.proxy_thread.start()
 
         self.cn_proxy = compat_http_server.HTTPServer(
             ('localhost', 0), _build_proxy_handler('cn'))
-        self.cn_port = self.cn_proxy.socket.getsockname()[1]
+        self.cn_port = http_server_port(self.cn_proxy)
         self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
         self.cn_proxy_thread.daemon = True
         self.cn_proxy_thread.start()
@@ -115,5 +160,14 @@ class TestProxy(unittest.TestCase):
         response = ydl.urlopen(req).read().decode('utf-8')
         self.assertEqual(response, 'cn: {0}'.format(url))
 
+    def test_proxy_with_idn(self):
+        ydl = YoutubeDL({
+            'proxy': 'localhost:{0}'.format(self.port),
+        })
+        url = 'http://中文.tw/'
+        response = ydl.urlopen(url).read().decode('utf-8')
+        # b'xn--fiq228c' is '中文'.encode('idna')
+        self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_socks.py b/test/test_socks.py
new file mode 100644 (file)
index 0000000..1e68eb0
--- /dev/null
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import random
+import subprocess
+
+from test.helper import (
+    FakeYDL,
+    get_params,
+)
+from youtube_dl.compat import (
+    compat_str,
+    compat_urllib_request,
+)
+
+
+class TestMultipleSocks(unittest.TestCase):
+    @staticmethod
+    def _check_params(attrs):
+        params = get_params()
+        for attr in attrs:
+            if attr not in params:
+                print('Missing %s. Skipping.' % attr)
+                return
+        return params
+
+    def test_proxy_http(self):
+        params = self._check_params(['primary_proxy', 'primary_server_ip'])
+        if params is None:
+            return
+        ydl = FakeYDL({
+            'proxy': params['primary_proxy']
+        })
+        self.assertEqual(
+            ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'),
+            params['primary_server_ip'])
+
+    def test_proxy_https(self):
+        params = self._check_params(['primary_proxy', 'primary_server_ip'])
+        if params is None:
+            return
+        ydl = FakeYDL({
+            'proxy': params['primary_proxy']
+        })
+        self.assertEqual(
+            ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'),
+            params['primary_server_ip'])
+
+    def test_secondary_proxy_http(self):
+        params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
+        if params is None:
+            return
+        ydl = FakeYDL()
+        req = compat_urllib_request.Request('http://yt-dl.org/ip')
+        req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
+        self.assertEqual(
+            ydl.urlopen(req).read().decode('utf-8'),
+            params['secondary_server_ip'])
+
+    def test_secondary_proxy_https(self):
+        params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
+        if params is None:
+            return
+        ydl = FakeYDL()
+        req = compat_urllib_request.Request('https://yt-dl.org/ip')
+        req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
+        self.assertEqual(
+            ydl.urlopen(req).read().decode('utf-8'),
+            params['secondary_server_ip'])
+
+
+class TestSocks(unittest.TestCase):
+    _SKIP_SOCKS_TEST = True
+
+    def setUp(self):
+        if self._SKIP_SOCKS_TEST:
+            return
+
+        self.port = random.randint(20000, 30000)
+        self.server_process = subprocess.Popen([
+            'srelay', '-f', '-i', '127.0.0.1:%d' % self.port],
+            stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+    def tearDown(self):
+        if self._SKIP_SOCKS_TEST:
+            return
+
+        self.server_process.terminate()
+        self.server_process.communicate()
+
+    def _get_ip(self, protocol):
+        if self._SKIP_SOCKS_TEST:
+            return '127.0.0.1'
+
+        ydl = FakeYDL({
+            'proxy': '%s://127.0.0.1:%d' % (protocol, self.port),
+        })
+        return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8')
+
+    def test_socks4(self):
+        self.assertTrue(isinstance(self._get_ip('socks4'), compat_str))
+
+    def test_socks4a(self):
+        self.assertTrue(isinstance(self._get_ip('socks4a'), compat_str))
+
+    def test_socks5(self):
+        self.assertTrue(isinstance(self._get_ip('socks5'), compat_str))
+
+
+if __name__ == '__main__':
+    unittest.main()
index e6887be9f29847984be257a73af67e5750f4225c..b7ef51f8defa05da4a329315a069f10133d26bea 100644 (file)
@@ -18,7 +18,9 @@ import xml.etree.ElementTree
 from youtube_dl.utils import (
     age_restricted,
     args_to_str,
+    encode_base_n,
     clean_html,
+    date_from_str,
     DateRange,
     detect_exe_version,
     determine_ext,
@@ -27,6 +29,7 @@ from youtube_dl.utils import (
     encodeFilename,
     escape_rfc3986,
     escape_url,
+    extract_attributes,
     ExtractorError,
     find_xpath_attr,
     fix_xml_ampersands,
@@ -40,18 +43,20 @@ from youtube_dl.utils import (
     orderedSet,
     parse_duration,
     parse_filesize,
+    parse_count,
     parse_iso8601,
     read_batch_urls,
     sanitize_filename,
     sanitize_path,
     prepend_extension,
     replace_extension,
+    remove_start,
+    remove_end,
     remove_quotes,
     shell_quote,
     smuggle_url,
     str_to_int,
     strip_jsonp,
-    struct_unpack,
     timeconvert,
     unescapeHTML,
     unified_strdate,
@@ -60,6 +65,7 @@ from youtube_dl.utils import (
     lowercase_escape,
     url_basename,
     urlencode_postdata,
+    update_url_query,
     version_tuple,
     xpath_with_ns,
     xpath_element,
@@ -74,7 +80,10 @@ from youtube_dl.utils import (
     cli_bool_option,
 )
 from youtube_dl.compat import (
+    compat_chr,
     compat_etree_fromstring,
+    compat_urlparse,
+    compat_parse_qs,
 )
 
 
@@ -131,8 +140,8 @@ class TestUtil(unittest.TestCase):
         self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
         self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
 
-        tests = 'a\xe4b\u4e2d\u56fd\u7684c'
-        self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
+        tests = 'aäb\u4e2d\u56fd\u7684c'
+        self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
         self.assertTrue(sanitize_filename('\xf6', restricted=True) != '')  # No empty filename
 
         forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
@@ -147,6 +156,10 @@ class TestUtil(unittest.TestCase):
         self.assertTrue(sanitize_filename('-', restricted=True) != '')
         self.assertTrue(sanitize_filename(':', restricted=True) != '')
 
+        self.assertEqual(sanitize_filename(
+            'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
+            'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
+
     def test_sanitize_ids(self):
         self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
         self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
@@ -204,6 +217,16 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
         self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
 
+    def test_remove_start(self):
+        self.assertEqual(remove_start(None, 'A - '), None)
+        self.assertEqual(remove_start('A - B', 'A - '), 'B')
+        self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
+
+    def test_remove_end(self):
+        self.assertEqual(remove_end(None, ' - B'), None)
+        self.assertEqual(remove_end('A - B', ' - B'), 'A')
+        self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
+
     def test_remove_quotes(self):
         self.assertEqual(remove_quotes(None), None)
         self.assertEqual(remove_quotes('"'), '"')
@@ -226,6 +249,15 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unescapeHTML('&#47;'), '/')
         self.assertEqual(unescapeHTML('&eacute;'), 'é')
         self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
+        # HTML5 entities
+        self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
+
+    def test_date_from_str(self):
+        self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
+        self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
+        self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
+        self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year'))
+        self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month'))
 
     def test_daterange(self):
         _20century = DateRange("19000101", "20000101")
@@ -249,6 +281,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(
             unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
             '20150202')
+        self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
         self.assertEqual(unified_strdate('25-09-2014'), '20140925')
         self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
 
@@ -397,6 +430,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_duration('01:02:03:04'), 93784)
         self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
         self.assertEqual(parse_duration('87 Min.'), 5220)
+        self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(
@@ -436,9 +470,6 @@ class TestUtil(unittest.TestCase):
         testPL(5, 2, (2, 99), [2, 3, 4])
         testPL(5, 2, (20, 99), [])
 
-    def test_struct_unpack(self):
-        self.assertEqual(struct_unpack('!B', b'\x00'), (0,))
-
     def test_read_batch_urls(self):
         f = io.StringIO('''\xef\xbb\xbf foo
             bar\r
@@ -452,6 +483,40 @@ class TestUtil(unittest.TestCase):
         data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
         self.assertTrue(isinstance(data, bytes))
 
+    def test_update_url_query(self):
+        def query_dict(url):
+            return compat_parse_qs(compat_urlparse.urlparse(url).query)
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
+            query_dict('http://example.com/path?quality=HD&format=mp4'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
+            query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'fields': 'id,formats,subtitles'})),
+            query_dict('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
+            query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path?manifest=f4m', {'manifest': []})),
+            query_dict('http://example.com/path'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
+            query_dict('http://example.com/path?system=LINUX'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'fields': b'id,formats,subtitles'})),
+            query_dict('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'width': 1080, 'height': 720})),
+            query_dict('http://example.com/path?width=1080&height=720'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'bitrate': 5020.43})),
+            query_dict('http://example.com/path?bitrate=5020.43'))
+        self.assertEqual(query_dict(update_url_query(
+            'http://example.com/path', {'test': '第二行тест'})),
+            query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+
     def test_dict_get(self):
         FALSE_VALUES = {
             'none': None,
@@ -535,11 +600,11 @@ class TestUtil(unittest.TestCase):
         )
         self.assertEqual(
             escape_url('http://тест.рф/фрагмент'),
-            'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
+            'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
         )
         self.assertEqual(
             escape_url('http://тест.рф/абв?абв=абв#абв'),
-            'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
+            'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
         )
         self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
 
@@ -566,6 +631,18 @@ class TestUtil(unittest.TestCase):
         json_code = js_to_json(inp)
         self.assertEqual(json.loads(json_code), json.loads(inp))
 
+        inp = '''{
+            0:{src:'skipped', type: 'application/dash+xml'},
+            1:{src:'skipped', type: 'application/vnd.apple.mpegURL'},
+        }'''
+        self.assertEqual(js_to_json(inp), '''{
+            "0":{"src":"skipped", "type": "application/dash+xml"},
+            "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
+        }''')
+
+        inp = '''{"foo":101}'''
+        self.assertEqual(js_to_json(inp), '''{"foo":101}''')
+
     def test_js_to_json_edgecases(self):
         on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
         self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
@@ -589,6 +666,65 @@ class TestUtil(unittest.TestCase):
         on = js_to_json('{"abc": "def",}')
         self.assertEqual(json.loads(on), {'abc': 'def'})
 
+        on = js_to_json('{ 0: /* " \n */ ",]" , }')
+        self.assertEqual(json.loads(on), {'0': ',]'})
+
+        on = js_to_json(r'["<p>x<\/p>"]')
+        self.assertEqual(json.loads(on), ['<p>x</p>'])
+
+        on = js_to_json(r'["\xaa"]')
+        self.assertEqual(json.loads(on), ['\u00aa'])
+
+        on = js_to_json("['a\\\nb']")
+        self.assertEqual(json.loads(on), ['ab'])
+
+        on = js_to_json('{0xff:0xff}')
+        self.assertEqual(json.loads(on), {'255': 255})
+
+        on = js_to_json('{077:077}')
+        self.assertEqual(json.loads(on), {'63': 63})
+
+        on = js_to_json('{42:42}')
+        self.assertEqual(json.loads(on), {'42': 42})
+
+    def test_extract_attributes(self):
+        self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
+        self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"})
+        self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'})
+        self.assertEqual(extract_attributes('<e x="&#121;">'), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e x="&#x79;">'), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e x="&amp;">'), {'x': '&'})  # XML
+        self.assertEqual(extract_attributes('<e x="&quot;">'), {'x': '"'})
+        self.assertEqual(extract_attributes('<e x="&pound;">'), {'x': '£'})  # HTML 3.2
+        self.assertEqual(extract_attributes('<e x="&lambda;">'), {'x': 'λ'})  # HTML 4.0
+        self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'})
+        self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"})
+        self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'})
+        self.assertEqual(extract_attributes('<e x >'), {'x': None})
+        self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None})
+        self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'})
+        self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'})
+        self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'})
+        self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'})
+        self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'})  # Names lowercased
+        self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'})
+        self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'})
+        self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'})
+        self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'})
+        self.assertEqual(extract_attributes('<e x="décompose&#769;">'), {'x': 'décompose\u0301'})
+        # "Narrow" Python builds don't support unicode code points outside BMP.
+        try:
+            compat_chr(0x10000)
+            supports_outside_bmp = True
+        except ValueError:
+            supports_outside_bmp = False
+        if supports_outside_bmp:
+            self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
+
     def test_clean_html(self):
         self.assertEqual(clean_html('a:\nb'), 'a: b')
         self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
@@ -614,6 +750,17 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
         self.assertEqual(parse_filesize('1,24 KB'), 1240)
 
+    def test_parse_count(self):
+        self.assertEqual(parse_count(None), None)
+        self.assertEqual(parse_count(''), None)
+        self.assertEqual(parse_count('0'), 0)
+        self.assertEqual(parse_count('1000'), 1000)
+        self.assertEqual(parse_count('1.000'), 1000)
+        self.assertEqual(parse_count('1.1k'), 1100)
+        self.assertEqual(parse_count('1.1kk'), 1100000)
+        self.assertEqual(parse_count('1.1kk '), 1100000)
+        self.assertEqual(parse_count('1.1kk views'), 1100000)
+
     def test_version_tuple(self):
         self.assertEqual(version_tuple('1'), (1,))
         self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
@@ -801,5 +948,16 @@ The first line
             ohdave_rsa_encrypt(b'aa111222', e, N),
             '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
 
+    def test_encode_base_n(self):
+        self.assertEqual(encode_base_n(0, 30), '0')
+        self.assertEqual(encode_base_n(80, 30), '2k')
+
+        custom_table = '9876543210ZYXWVUTSRQPONMLKJIHGFEDCBA'
+        self.assertEqual(encode_base_n(0, 30, custom_table), '9')
+        self.assertEqual(encode_base_n(80, 30, custom_table), '7P')
+
+        self.assertRaises(ValueError, encode_base_n, 0, 70)
+        self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
+
 if __name__ == '__main__':
     unittest.main()
index 47df0f348d862e4ab455058d00321636123db807..af1c454217d0bec66a27a1bdc89c02195bb6274f 100644 (file)
@@ -44,7 +44,7 @@ class TestYoutubeLists(unittest.TestCase):
         ie = YoutubePlaylistIE(dl)
         result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
         entries = result['entries']
-        self.assertTrue(len(entries) >= 20)
+        self.assertTrue(len(entries) >= 50)
         original_video = entries[0]
         self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
 
diff --git a/test/video-vid.mp4 b/test/video-vid.mp4
deleted file mode 100644 (file)
index 825eaa4..0000000
Binary files a/test/video-vid.mp4 and /dev/null differ
diff --git a/tox.ini b/tox.ini
new file mode 100644 (file)
index 0000000..9c4e4a3
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,14 @@
+[tox]
+envlist = py26,py27,py33,py34,py35
+[testenv]
+deps =
+   nose
+   coverage
+# We need a valid $HOME for test_compat_expanduser
+passenv = HOME
+defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
+    --exclude test_subtitles.py --exclude test_write_annotations.py
+    --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
+    --exclude test_socks.py
+commands = nosetests --verbose {posargs:{[testenv]defaultargs}}  # --with-coverage --cover-package=youtube_dl --cover-html
+                                               # test.test_download:TestDownload.test_NowVideo
diff --git a/youtube-dl b/youtube-dl
deleted file mode 100755 (executable)
index c70dbae..0000000
Binary files a/youtube-dl and /dev/null differ
diff --git a/youtube-dl.1 b/youtube-dl.1
deleted file mode 100644 (file)
index 7c038b8..0000000
+++ /dev/null
@@ -1,2001 +0,0 @@
-.\" Automatically generated by Pandoc 1.16.0.2
-.\"
-.TH "YOUTUBE\-DL" "1" "" "" ""
-.hy
-.SH NAME
-.PP
-youtube\-dl \- download videos from youtube.com or other video platforms
-.SH SYNOPSIS
-.PP
-\f[B]youtube\-dl\f[] [OPTIONS] URL [URL...]
-.SH DESCRIPTION
-.PP
-\f[B]youtube\-dl\f[] is a small command\-line program to download videos
-from YouTube.com and a few more sites.
-It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is
-not platform specific.
-It should work on your Unix box, on Windows or on Mac OS X.
-It is released to the public domain, which means you can modify it,
-redistribute it or use it however you like.
-.SH OPTIONS
-.TP
-.B \-h, \-\-help
-Print this help text and exit
-.RS
-.RE
-.TP
-.B \-\-version
-Print program version and exit
-.RS
-.RE
-.TP
-.B \-U, \-\-update
-Update this program to latest version.
-Make sure that you have sufficient permissions (run with sudo if needed)
-.RS
-.RE
-.TP
-.B \-i, \-\-ignore\-errors
-Continue on download errors, for example to skip unavailable videos in a
-playlist
-.RS
-.RE
-.TP
-.B \-\-abort\-on\-error
-Abort downloading of further videos (in the playlist or the command
-line) if an error occurs
-.RS
-.RE
-.TP
-.B \-\-dump\-user\-agent
-Display the current browser identification
-.RS
-.RE
-.TP
-.B \-\-list\-extractors
-List all supported extractors
-.RS
-.RE
-.TP
-.B \-\-extractor\-descriptions
-Output descriptions of all supported extractors
-.RS
-.RE
-.TP
-.B \-\-force\-generic\-extractor
-Force extraction to use the generic extractor
-.RS
-.RE
-.TP
-.B \-\-default\-search \f[I]PREFIX\f[]
-Use this prefix for unqualified URLs.
-For example "gvsearch2:" downloads two videos from google videos for
-youtube\-dl "large apple".
-Use the value "auto" to let youtube\-dl guess ("auto_warning" to emit a
-warning when guessing).
-"error" just throws an error.
-The default value "fixup_error" repairs broken URLs, but emits an error
-if this is not possible instead of searching.
-.RS
-.RE
-.TP
-.B \-\-ignore\-config
-Do not read configuration files.
-When given in the global configuration file /etc /youtube\-dl.conf: Do
-not read the user configuration in ~/.config/youtube\- dl/config
-(%APPDATA%/youtube\-dl/config.txt on Windows)
-.RS
-.RE
-.TP
-.B \-\-flat\-playlist
-Do not extract the videos of a playlist, only list them.
-.RS
-.RE
-.TP
-.B \-\-no\-color
-Do not emit color codes in output
-.RS
-.RE
-.SS Network Options:
-.TP
-.B \-\-proxy \f[I]URL\f[]
-Use the specified HTTP/HTTPS proxy.
-Pass in an empty string (\-\-proxy "") for direct connection
-.RS
-.RE
-.TP
-.B \-\-socket\-timeout \f[I]SECONDS\f[]
-Time to wait before giving up, in seconds
-.RS
-.RE
-.TP
-.B \-\-source\-address \f[I]IP\f[]
-Client\-side IP address to bind to (experimental)
-.RS
-.RE
-.TP
-.B \-4, \-\-force\-ipv4
-Make all connections via IPv4 (experimental)
-.RS
-.RE
-.TP
-.B \-6, \-\-force\-ipv6
-Make all connections via IPv6 (experimental)
-.RS
-.RE
-.TP
-.B \-\-cn\-verification\-proxy \f[I]URL\f[]
-Use this proxy to verify the IP address for some Chinese sites.
-The default proxy specified by \-\-proxy (or none, if the options is not
-present) is used for the actual downloading.
-(experimental)
-.RS
-.RE
-.SS Video Selection:
-.TP
-.B \-\-playlist\-start \f[I]NUMBER\f[]
-Playlist video to start at (default is 1)
-.RS
-.RE
-.TP
-.B \-\-playlist\-end \f[I]NUMBER\f[]
-Playlist video to end at (default is last)
-.RS
-.RE
-.TP
-.B \-\-playlist\-items \f[I]ITEM_SPEC\f[]
-Playlist video items to download.
-Specify indices of the videos in the playlist separated by commas like:
-"\-\-playlist\-items 1,2,5,8" if you want to download videos indexed 1,
-2, 5, 8 in the playlist.
-You can specify range: "\-\-playlist\-items 1\-3,7,10\-13", it will
-download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
-.RS
-.RE
-.TP
-.B \-\-match\-title \f[I]REGEX\f[]
-Download only matching titles (regex or caseless sub\-string)
-.RS
-.RE
-.TP
-.B \-\-reject\-title \f[I]REGEX\f[]
-Skip download for matching titles (regex or caseless sub\-string)
-.RS
-.RE
-.TP
-.B \-\-max\-downloads \f[I]NUMBER\f[]
-Abort after downloading NUMBER files
-.RS
-.RE
-.TP
-.B \-\-min\-filesize \f[I]SIZE\f[]
-Do not download any videos smaller than SIZE (e.g.
-50k or 44.6m)
-.RS
-.RE
-.TP
-.B \-\-max\-filesize \f[I]SIZE\f[]
-Do not download any videos larger than SIZE (e.g.
-50k or 44.6m)
-.RS
-.RE
-.TP
-.B \-\-date \f[I]DATE\f[]
-Download only videos uploaded in this date
-.RS
-.RE
-.TP
-.B \-\-datebefore \f[I]DATE\f[]
-Download only videos uploaded on or before this date (i.e.
-inclusive)
-.RS
-.RE
-.TP
-.B \-\-dateafter \f[I]DATE\f[]
-Download only videos uploaded on or after this date (i.e.
-inclusive)
-.RS
-.RE
-.TP
-.B \-\-min\-views \f[I]COUNT\f[]
-Do not download any videos with less than COUNT views
-.RS
-.RE
-.TP
-.B \-\-max\-views \f[I]COUNT\f[]
-Do not download any videos with more than COUNT views
-.RS
-.RE
-.TP
-.B \-\-match\-filter \f[I]FILTER\f[]
-Generic video filter (experimental).
-Specify any key (see help for \-o for a list of available keys) to match
-if the key is present, !key to check if the key is not present,key >
-NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to
-compare against a number, and & to require multiple matches.
-Values which are not known are excluded unless you put a question mark
-(?) after the operator.For example, to only match videos that have been
-liked more than 100 times and disliked less than 50 times (or the
-dislike functionality is not available at the given service), but who
-also have a description, use \-\-match\-filter "like_count > 100 &
-dislike_count <?
-50 & description" .
-.RS
-.RE
-.TP
-.B \-\-no\-playlist
-Download only the video, if the URL refers to a video and a playlist.
-.RS
-.RE
-.TP
-.B \-\-yes\-playlist
-Download the playlist, if the URL refers to a video and a playlist.
-.RS
-.RE
-.TP
-.B \-\-age\-limit \f[I]YEARS\f[]
-Download only videos suitable for the given age
-.RS
-.RE
-.TP
-.B \-\-download\-archive \f[I]FILE\f[]
-Download only videos not listed in the archive file.
-Record the IDs of all downloaded videos in it.
-.RS
-.RE
-.TP
-.B \-\-include\-ads
-Download advertisements as well (experimental)
-.RS
-.RE
-.SS Download Options:
-.TP
-.B \-r, \-\-rate\-limit \f[I]LIMIT\f[]
-Maximum download rate in bytes per second (e.g.
-50K or 4.2M)
-.RS
-.RE
-.TP
-.B \-R, \-\-retries \f[I]RETRIES\f[]
-Number of retries (default is 10), or "infinite".
-.RS
-.RE
-.TP
-.B \-\-buffer\-size \f[I]SIZE\f[]
-Size of download buffer (e.g.
-1024 or 16K) (default is 1024)
-.RS
-.RE
-.TP
-.B \-\-no\-resize\-buffer
-Do not automatically adjust the buffer size.
-By default, the buffer size is automatically resized from an initial
-value of SIZE.
-.RS
-.RE
-.TP
-.B \-\-playlist\-reverse
-Download playlist videos in reverse order
-.RS
-.RE
-.TP
-.B \-\-xattr\-set\-filesize
-Set file xattribute ytdl.filesize with expected filesize (experimental)
-.RS
-.RE
-.TP
-.B \-\-hls\-prefer\-native
-Use the native HLS downloader instead of ffmpeg (experimental)
-.RS
-.RE
-.TP
-.B \-\-hls\-use\-mpegts
-Use the mpegts container for HLS videos, allowing to play the video
-while downloading (some players may not be able to play it)
-.RS
-.RE
-.TP
-.B \-\-external\-downloader \f[I]COMMAND\f[]
-Use the specified external downloader.
-Currently supports aria2c,axel,curl,httpie,wget
-.RS
-.RE
-.TP
-.B \-\-external\-downloader\-args \f[I]ARGS\f[]
-Give these arguments to the external downloader
-.RS
-.RE
-.SS Filesystem Options:
-.TP
-.B \-a, \-\-batch\-file \f[I]FILE\f[]
-File containing URLs to download (\[aq]\-\[aq] for stdin)
-.RS
-.RE
-.TP
-.B \-\-id
-Use only video ID in file name
-.RS
-.RE
-.TP
-.B \-o, \-\-output \f[I]TEMPLATE\f[]
-Output filename template.
-Use %(title)s to get the title, %(uploader)s for the uploader name,
-%(uploader_id)s for the uploader nickname if different, %(autonumber)s
-to get an automatically incremented number, %(ext)s for the filename
-extension, %(format)s for the format description (like "22 \- 1280x720"
-or "HD"), %(format_id)s for the unique id of the format (like
-YouTube\[aq]s itags: "137"), %(upload_date)s for the upload date
-(YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc),
-%(id)s for the video id, %(playlist_title)s, %(playlist_id)s, or
-%(playlist)s (=title if present, ID otherwise) for the playlist the
-video is in, %(playlist_index)s for the position in the playlist.
-%(height)s and %(width)s for the width and height of the video format.
-%(resolution)s for a textual description of the resolution of the video
-format.
-%% for a literal percent.
-Use \- to output to stdout.
-Can also be used to download to a different directory, for example with
-\-o \[aq]/my/downloads/%(uploader)s /%(title)s\-%(id)s.%(ext)s\[aq] .
-.RS
-.RE
-.TP
-.B \-\-autonumber\-size \f[I]NUMBER\f[]
-Specify the number of digits in %(autonumber)s when it is present in
-output filename template or \-\-auto\-number option is given
-.RS
-.RE
-.TP
-.B \-\-restrict\-filenames
-Restrict filenames to only ASCII characters, and avoid "&" and spaces in
-filenames
-.RS
-.RE
-.TP
-.B \-A, \-\-auto\-number
-[deprecated; use \-o "%(autonumber)s\-%(title)s.%(ext)s" ] Number
-downloaded files starting from 00000
-.RS
-.RE
-.TP
-.B \-t, \-\-title
-[deprecated] Use title in file name (default)
-.RS
-.RE
-.TP
-.B \-l, \-\-literal
-[deprecated] Alias of \-\-title
-.RS
-.RE
-.TP
-.B \-w, \-\-no\-overwrites
-Do not overwrite files
-.RS
-.RE
-.TP
-.B \-c, \-\-continue
-Force resume of partially downloaded files.
-By default, youtube\-dl will resume downloads if possible.
-.RS
-.RE
-.TP
-.B \-\-no\-continue
-Do not resume partially downloaded files (restart from beginning)
-.RS
-.RE
-.TP
-.B \-\-no\-part
-Do not use .part files \- write directly into output file
-.RS
-.RE
-.TP
-.B \-\-no\-mtime
-Do not use the Last\-modified header to set the file modification time
-.RS
-.RE
-.TP
-.B \-\-write\-description
-Write video description to a .description file
-.RS
-.RE
-.TP
-.B \-\-write\-info\-json
-Write video metadata to a .info.json file
-.RS
-.RE
-.TP
-.B \-\-write\-annotations
-Write video annotations to a .annotations.xml file
-.RS
-.RE
-.TP
-.B \-\-load\-info \f[I]FILE\f[]
-JSON file containing the video information (created with the
-"\-\-write\-info\-json" option)
-.RS
-.RE
-.TP
-.B \-\-cookies \f[I]FILE\f[]
-File to read cookies from and dump cookie jar in
-.RS
-.RE
-.TP
-.B \-\-cache\-dir \f[I]DIR\f[]
-Location in the filesystem where youtube\-dl can store some downloaded
-information permanently.
-By default $XDG_CACHE_HOME /youtube\-dl or ~/.cache/youtube\-dl .
-At the moment, only YouTube player files (for videos with obfuscated
-signatures) are cached, but that may change.
-.RS
-.RE
-.TP
-.B \-\-no\-cache\-dir
-Disable filesystem caching
-.RS
-.RE
-.TP
-.B \-\-rm\-cache\-dir
-Delete all filesystem cache files
-.RS
-.RE
-.SS Thumbnail images:
-.TP
-.B \-\-write\-thumbnail
-Write thumbnail image to disk
-.RS
-.RE
-.TP
-.B \-\-write\-all\-thumbnails
-Write all thumbnail image formats to disk
-.RS
-.RE
-.TP
-.B \-\-list\-thumbnails
-Simulate and list all available thumbnail formats
-.RS
-.RE
-.SS Verbosity / Simulation Options:
-.TP
-.B \-q, \-\-quiet
-Activate quiet mode
-.RS
-.RE
-.TP
-.B \-\-no\-warnings
-Ignore warnings
-.RS
-.RE
-.TP
-.B \-s, \-\-simulate
-Do not download the video and do not write anything to disk
-.RS
-.RE
-.TP
-.B \-\-skip\-download
-Do not download the video
-.RS
-.RE
-.TP
-.B \-g, \-\-get\-url
-Simulate, quiet but print URL
-.RS
-.RE
-.TP
-.B \-e, \-\-get\-title
-Simulate, quiet but print title
-.RS
-.RE
-.TP
-.B \-\-get\-id
-Simulate, quiet but print id
-.RS
-.RE
-.TP
-.B \-\-get\-thumbnail
-Simulate, quiet but print thumbnail URL
-.RS
-.RE
-.TP
-.B \-\-get\-description
-Simulate, quiet but print video description
-.RS
-.RE
-.TP
-.B \-\-get\-duration
-Simulate, quiet but print video length
-.RS
-.RE
-.TP
-.B \-\-get\-filename
-Simulate, quiet but print output filename
-.RS
-.RE
-.TP
-.B \-\-get\-format
-Simulate, quiet but print output format
-.RS
-.RE
-.TP
-.B \-j, \-\-dump\-json
-Simulate, quiet but print JSON information.
-See \-\-output for a description of available keys.
-.RS
-.RE
-.TP
-.B \-J, \-\-dump\-single\-json
-Simulate, quiet but print JSON information for each command\-line
-argument.
-If the URL refers to a playlist, dump the whole playlist information in
-a single line.
-.RS
-.RE
-.TP
-.B \-\-print\-json
-Be quiet and print the video information as JSON (video is still being
-downloaded).
-.RS
-.RE
-.TP
-.B \-\-newline
-Output progress bar as new lines
-.RS
-.RE
-.TP
-.B \-\-no\-progress
-Do not print progress bar
-.RS
-.RE
-.TP
-.B \-\-console\-title
-Display progress in console titlebar
-.RS
-.RE
-.TP
-.B \-v, \-\-verbose
-Print various debugging information
-.RS
-.RE
-.TP
-.B \-\-dump\-pages
-Print downloaded pages encoded using base64 to debug problems (very
-verbose)
-.RS
-.RE
-.TP
-.B \-\-write\-pages
-Write downloaded intermediary pages to files in the current directory to
-debug problems
-.RS
-.RE
-.TP
-.B \-\-print\-traffic
-Display sent and read HTTP traffic
-.RS
-.RE
-.TP
-.B \-C, \-\-call\-home
-Contact the youtube\-dl server for debugging
-.RS
-.RE
-.TP
-.B \-\-no\-call\-home
-Do NOT contact the youtube\-dl server for debugging
-.RS
-.RE
-.SS Workarounds:
-.TP
-.B \-\-encoding \f[I]ENCODING\f[]
-Force the specified encoding (experimental)
-.RS
-.RE
-.TP
-.B \-\-no\-check\-certificate
-Suppress HTTPS certificate validation
-.RS
-.RE
-.TP
-.B \-\-prefer\-insecure
-Use an unencrypted connection to retrieve information about the video.
-(Currently supported only for YouTube)
-.RS
-.RE
-.TP
-.B \-\-user\-agent \f[I]UA\f[]
-Specify a custom user agent
-.RS
-.RE
-.TP
-.B \-\-referer \f[I]URL\f[]
-Specify a custom referer, use if the video access is restricted to one
-domain
-.RS
-.RE
-.TP
-.B \-\-add\-header \f[I]FIELD:VALUE\f[]
-Specify a custom HTTP header and its value, separated by a colon
-\[aq]:\[aq].
-You can use this option multiple times
-.RS
-.RE
-.TP
-.B \-\-bidi\-workaround
-Work around terminals that lack bidirectional text support.
-Requires bidiv or fribidi executable in PATH
-.RS
-.RE
-.TP
-.B \-\-sleep\-interval \f[I]SECONDS\f[]
-Number of seconds to sleep before each download.
-.RS
-.RE
-.SS Video Format Options:
-.TP
-.B \-f, \-\-format \f[I]FORMAT\f[]
-Video format code, see the "FORMAT SELECTION" for all the info
-.RS
-.RE
-.TP
-.B \-\-all\-formats
-Download all available video formats
-.RS
-.RE
-.TP
-.B \-\-prefer\-free\-formats
-Prefer free video formats unless a specific one is requested
-.RS
-.RE
-.TP
-.B \-F, \-\-list\-formats
-List all available formats of requested videos
-.RS
-.RE
-.TP
-.B \-\-youtube\-skip\-dash\-manifest
-Do not download the DASH manifests and related data on YouTube videos
-.RS
-.RE
-.TP
-.B \-\-merge\-output\-format \f[I]FORMAT\f[]
-If a merge is required (e.g.
-bestvideo+bestaudio), output to given container format.
-One of mkv, mp4, ogg, webm, flv.
-Ignored if no merge is required
-.RS
-.RE
-.SS Subtitle Options:
-.TP
-.B \-\-write\-sub
-Write subtitle file
-.RS
-.RE
-.TP
-.B \-\-write\-auto\-sub
-Write automatically generated subtitle file (YouTube only)
-.RS
-.RE
-.TP
-.B \-\-all\-subs
-Download all the available subtitles of the video
-.RS
-.RE
-.TP
-.B \-\-list\-subs
-List all available subtitles for the video
-.RS
-.RE
-.TP
-.B \-\-sub\-format \f[I]FORMAT\f[]
-Subtitle format, accepts formats preference, for example: "srt" or
-"ass/srt/best"
-.RS
-.RE
-.TP
-.B \-\-sub\-lang \f[I]LANGS\f[]
-Languages of the subtitles to download (optional) separated by commas,
-use \-\-list\- subs for available language tags
-.RS
-.RE
-.SS Authentication Options:
-.TP
-.B \-u, \-\-username \f[I]USERNAME\f[]
-Login with this account ID
-.RS
-.RE
-.TP
-.B \-p, \-\-password \f[I]PASSWORD\f[]
-Account password.
-If this option is left out, youtube\-dl will ask interactively.
-.RS
-.RE
-.TP
-.B \-2, \-\-twofactor \f[I]TWOFACTOR\f[]
-Two\-factor auth code
-.RS
-.RE
-.TP
-.B \-n, \-\-netrc
-Use .netrc authentication data
-.RS
-.RE
-.TP
-.B \-\-video\-password \f[I]PASSWORD\f[]
-Video password (vimeo, smotri, youku)
-.RS
-.RE
-.SS Post\-processing Options:
-.TP
-.B \-x, \-\-extract\-audio
-Convert video files to audio\-only files (requires ffmpeg or avconv and
-ffprobe or avprobe)
-.RS
-.RE
-.TP
-.B \-\-audio\-format \f[I]FORMAT\f[]
-Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or
-"wav"; "best" by default
-.RS
-.RE
-.TP
-.B \-\-audio\-quality \f[I]QUALITY\f[]
-Specify ffmpeg/avconv audio quality, insert a value between 0 (better)
-and 9 (worse) for VBR or a specific bitrate like 128K (default 5)
-.RS
-.RE
-.TP
-.B \-\-recode\-video \f[I]FORMAT\f[]
-Encode the video to another format if necessary (currently supported:
-mp4|flv|ogg|webm|mkv|avi)
-.RS
-.RE
-.TP
-.B \-\-postprocessor\-args \f[I]ARGS\f[]
-Give these arguments to the postprocessor
-.RS
-.RE
-.TP
-.B \-k, \-\-keep\-video
-Keep the video file on disk after the post\- processing; the video is
-erased by default
-.RS
-.RE
-.TP
-.B \-\-no\-post\-overwrites
-Do not overwrite post\-processed files; the post\-processed files are
-overwritten by default
-.RS
-.RE
-.TP
-.B \-\-embed\-subs
-Embed subtitles in the video (only for mkv and mp4 videos)
-.RS
-.RE
-.TP
-.B \-\-embed\-thumbnail
-Embed thumbnail in the audio as cover art
-.RS
-.RE
-.TP
-.B \-\-add\-metadata
-Write metadata to the video file
-.RS
-.RE
-.TP
-.B \-\-metadata\-from\-title \f[I]FORMAT\f[]
-Parse additional metadata like song title / artist from the video title.
-The format syntax is the same as \-\-output, the parsed parameters
-replace existing values.
-Additional templates: %(album)s, %(artist)s.
-Example: \-\-metadata\-from\-title "%(artist)s \- %(title)s" matches a
-title like "Coldplay \- Paradise"
-.RS
-.RE
-.TP
-.B \-\-xattrs
-Write metadata to the video file\[aq]s xattrs (using dublin core and xdg
-standards)
-.RS
-.RE
-.TP
-.B \-\-fixup \f[I]POLICY\f[]
-Automatically correct known faults of the file.
-One of never (do nothing), warn (only emit a warning), detect_or_warn
-(the default; fix file if we can, warn otherwise)
-.RS
-.RE
-.TP
-.B \-\-prefer\-avconv
-Prefer avconv over ffmpeg for running the postprocessors (default)
-.RS
-.RE
-.TP
-.B \-\-prefer\-ffmpeg
-Prefer ffmpeg over avconv for running the postprocessors
-.RS
-.RE
-.TP
-.B \-\-ffmpeg\-location \f[I]PATH\f[]
-Location of the ffmpeg/avconv binary; either the path to the binary or
-its containing directory.
-.RS
-.RE
-.TP
-.B \-\-exec \f[I]CMD\f[]
-Execute a command on the file after downloading, similar to find\[aq]s
-\-exec syntax.
-Example: \-\-exec \[aq]adb push {} /sdcard/Music/ && rm {}\[aq]
-.RS
-.RE
-.TP
-.B \-\-convert\-subs \f[I]FORMAT\f[]
-Convert the subtitles to other format (currently supported: srt|ass|vtt)
-.RS
-.RE
-.SH CONFIGURATION
-.PP
-You can configure youtube\-dl by placing any supported command line
-option to a configuration file.
-On Linux, the system wide configuration file is located at
-\f[C]/etc/youtube\-dl.conf\f[] and the user wide configuration file at
-\f[C]~/.config/youtube\-dl/config\f[].
-On Windows, the user wide configuration file locations are
-\f[C]%APPDATA%\\youtube\-dl\\config.txt\f[] or
-\f[C]C:\\Users\\<user\ name>\\youtube\-dl.conf\f[].
-For example, with the following configuration file youtube\-dl will
-always extract the audio, not copy the mtime and use a proxy:
-.IP
-.nf
-\f[C]
-\-\-extract\-audio
-\-\-no\-mtime
-\-\-proxy\ 127.0.0.1:3128
-\f[]
-.fi
-.PP
-You can use \f[C]\-\-ignore\-config\f[] if you want to disable the
-configuration file for a particular youtube\-dl run.
-.SS Authentication with \f[C]\&.netrc\f[] file
-.PP
-You may also want to configure automatic credentials storage for
-extractors that support authentication (by providing login and password
-with \f[C]\-\-username\f[] and \f[C]\-\-password\f[]) in order not to
-pass credentials as command line arguments on every youtube\-dl
-execution and prevent tracking plain text passwords in the shell command
-history.
-You can achieve this using a \f[C]\&.netrc\f[]
-file (http://stackoverflow.com/tags/.netrc/info) on per extractor basis.
-For that you will need to create a\f[C]\&.netrc\f[] file in your
-\f[C]$HOME\f[] and restrict permissions to read/write by you only:
-.IP
-.nf
-\f[C]
-touch\ $HOME/.netrc
-chmod\ a\-rwx,u+rw\ $HOME/.netrc
-\f[]
-.fi
-.PP
-After that you can add credentials for extractor in the following
-format, where \f[I]extractor\f[] is the name of extractor in lowercase:
-.IP
-.nf
-\f[C]
-machine\ <extractor>\ login\ <login>\ password\ <password>
-\f[]
-.fi
-.PP
-For example:
-.IP
-.nf
-\f[C]
-machine\ youtube\ login\ myaccount\@gmail.com\ password\ my_youtube_password
-machine\ twitch\ login\ my_twitch_account_name\ password\ my_twitch_password
-\f[]
-.fi
-.PP
-To activate authentication with the \f[C]\&.netrc\f[] file you should
-pass \f[C]\-\-netrc\f[] to youtube\-dl or place it in the configuration
-file (#configuration).
-.PP
-On Windows you may also need to setup the \f[C]%HOME%\f[] environment
-variable manually.
-.SH OUTPUT TEMPLATE
-.PP
-The \f[C]\-o\f[] option allows users to indicate a template for the
-output file names.
-The basic usage is not to set any template arguments when downloading a
-single file, like in
-\f[C]youtube\-dl\ \-o\ funny_video.flv\ "http://some/video"\f[].
-However, it may contain special sequences that will be replaced when
-downloading each video.
-The special sequences have the format \f[C]%(NAME)s\f[].
-To clarify, that is a percent symbol followed by a name in parentheses,
-followed by a lowercase S.
-Allowed names are:
-.IP \[bu] 2
-\f[C]id\f[]: Video identifier
-.IP \[bu] 2
-\f[C]title\f[]: Video title
-.IP \[bu] 2
-\f[C]url\f[]: Video URL
-.IP \[bu] 2
-\f[C]ext\f[]: Video filename extension
-.IP \[bu] 2
-\f[C]alt_title\f[]: A secondary title of the video
-.IP \[bu] 2
-\f[C]display_id\f[]: An alternative identifier for the video
-.IP \[bu] 2
-\f[C]uploader\f[]: Full name of the video uploader
-.IP \[bu] 2
-\f[C]creator\f[]: The main artist who created the video
-.IP \[bu] 2
-\f[C]release_date\f[]: The date (YYYYMMDD) when the video was released
-.IP \[bu] 2
-\f[C]timestamp\f[]: UNIX timestamp of the moment the video became
-available
-.IP \[bu] 2
-\f[C]upload_date\f[]: Video upload date (YYYYMMDD)
-.IP \[bu] 2
-\f[C]uploader_id\f[]: Nickname or id of the video uploader
-.IP \[bu] 2
-\f[C]location\f[]: Physical location where the video was filmed
-.IP \[bu] 2
-\f[C]duration\f[]: Length of the video in seconds
-.IP \[bu] 2
-\f[C]view_count\f[]: How many users have watched the video on the
-platform
-.IP \[bu] 2
-\f[C]like_count\f[]: Number of positive ratings of the video
-.IP \[bu] 2
-\f[C]dislike_count\f[]: Number of negative ratings of the video
-.IP \[bu] 2
-\f[C]repost_count\f[]: Number of reposts of the video
-.IP \[bu] 2
-\f[C]average_rating\f[]: Average rating give by users, the scale used
-depends on the webpage
-.IP \[bu] 2
-\f[C]comment_count\f[]: Number of comments on the video
-.IP \[bu] 2
-\f[C]age_limit\f[]: Age restriction for the video (years)
-.IP \[bu] 2
-\f[C]format\f[]: A human\-readable description of the format
-.IP \[bu] 2
-\f[C]format_id\f[]: Format code specified by \f[C]\-\-format\f[]
-.IP \[bu] 2
-\f[C]format_note\f[]: Additional info about the format
-.IP \[bu] 2
-\f[C]width\f[]: Width of the video
-.IP \[bu] 2
-\f[C]height\f[]: Height of the video
-.IP \[bu] 2
-\f[C]resolution\f[]: Textual description of width and height
-.IP \[bu] 2
-\f[C]tbr\f[]: Average bitrate of audio and video in KBit/s
-.IP \[bu] 2
-\f[C]abr\f[]: Average audio bitrate in KBit/s
-.IP \[bu] 2
-\f[C]acodec\f[]: Name of the audio codec in use
-.IP \[bu] 2
-\f[C]asr\f[]: Audio sampling rate in Hertz
-.IP \[bu] 2
-\f[C]vbr\f[]: Average video bitrate in KBit/s
-.IP \[bu] 2
-\f[C]fps\f[]: Frame rate
-.IP \[bu] 2
-\f[C]vcodec\f[]: Name of the video codec in use
-.IP \[bu] 2
-\f[C]container\f[]: Name of the container format
-.IP \[bu] 2
-\f[C]filesize\f[]: The number of bytes, if known in advance
-.IP \[bu] 2
-\f[C]filesize_approx\f[]: An estimate for the number of bytes
-.IP \[bu] 2
-\f[C]protocol\f[]: The protocol that will be used for the actual
-download
-.IP \[bu] 2
-\f[C]extractor\f[]: Name of the extractor
-.IP \[bu] 2
-\f[C]extractor_key\f[]: Key name of the extractor
-.IP \[bu] 2
-\f[C]epoch\f[]: Unix epoch when creating the file
-.IP \[bu] 2
-\f[C]autonumber\f[]: Five\-digit number that will be increased with each
-download, starting at zero
-.IP \[bu] 2
-\f[C]playlist\f[]: Name or id of the playlist that contains the video
-.IP \[bu] 2
-\f[C]playlist_index\f[]: Index of the video in the playlist padded with
-leading zeros according to the total length of the playlist
-.PP
-Available for the video that belongs to some logical chapter or section:
-\- \f[C]chapter\f[]: Name or title of the chapter the video belongs to
-\- \f[C]chapter_number\f[]: Number of the chapter the video belongs to
-\- \f[C]chapter_id\f[]: Id of the chapter the video belongs to
-.PP
-Available for the video that is an episode of some series or programme:
-\- \f[C]series\f[]: Title of the series or programme the video episode
-belongs to \- \f[C]season\f[]: Title of the season the video episode
-belongs to \- \f[C]season_number\f[]: Number of the season the video
-episode belongs to \- \f[C]season_id\f[]: Id of the season the video
-episode belongs to \- \f[C]episode\f[]: Title of the video episode \-
-\f[C]episode_number\f[]: Number of the video episode within a season \-
-\f[C]episode_id\f[]: Id of the video episode
-.PP
-Each aforementioned sequence when referenced in output template will be
-replaced by the actual value corresponding to the sequence name.
-Note that some of the sequences are not guaranteed to be present since
-they depend on the metadata obtained by particular extractor, such
-sequences will be replaced with \f[C]NA\f[].
-.PP
-For example for \f[C]\-o\ %(title)s\-%(id)s.%(ext)s\f[] and mp4 video
-with title \f[C]youtube\-dl\ test\ video\f[] and id
-\f[C]BaW_jenozKcj\f[] this will result in a
-\f[C]youtube\-dl\ test\ video\-BaW_jenozKcj.mp4\f[] file created in the
-current directory.
-.PP
-Output template can also contain arbitrary hierarchical path, e.g.
-\f[C]\-o\ \[aq]%(playlist)s/%(playlist_index)s\ \-\ %(title)s.%(ext)s\[aq]\f[]
-that will result in downloading each video in a directory corresponding
-to this path template.
-Any missing directory will be automatically created for you.
-.PP
-To specify percent literal in output template use \f[C]%%\f[].
-To output to stdout use \f[C]\-o\ \-\f[].
-.PP
-The current default template is \f[C]%(title)s\-%(id)s.%(ext)s\f[].
-.PP
-In some cases, you don\[aq]t want special characters such as 中, spaces,
-or &, such as when transferring the downloaded filename to a Windows
-system or the filename through an 8bit\-unsafe channel.
-In these cases, add the \f[C]\-\-restrict\-filenames\f[] flag to get a
-shorter title:
-.PP
-Examples (note on Windows you may need to use double quotes instead of
-single):
-.IP
-.nf
-\f[C]
-$\ youtube\-dl\ \-\-get\-filename\ \-o\ \[aq]%(title)s.%(ext)s\[aq]\ BaW_jenozKc
-youtube\-dl\ test\ video\ \[aq]\[aq]_ä↭𝕐.mp4\ \ \ \ #\ All\ kinds\ of\ weird\ characters
-
-$\ youtube\-dl\ \-\-get\-filename\ \-o\ \[aq]%(title)s.%(ext)s\[aq]\ BaW_jenozKc\ \-\-restrict\-filenames
-youtube\-dl_test_video_.mp4\ \ \ \ \ \ \ \ \ \ #\ A\ simple\ file\ name
-
-#\ Download\ YouTube\ playlist\ videos\ in\ separate\ directory\ indexed\ by\ video\ order\ in\ a\ playlist
-$\ youtube\-dl\ \-o\ \[aq]%(playlist)s/%(playlist_index)s\ \-\ %(title)s.%(ext)s\[aq]\ https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
-
-#\ Download\ Udemy\ course\ keeping\ each\ chapter\ in\ separate\ directory\ under\ MyVideos\ directory\ in\ your\ home
-$\ youtube\-dl\ \-u\ user\ \-p\ password\ \-o\ \[aq]~/MyVideos/%(playlist)s/%(chapter_number)s\ \-\ %(chapter)s/%(title)s.%(ext)s\[aq]\ https://www.udemy.com/java\-tutorial/
-
-#\ Download\ entire\ series\ season\ keeping\ each\ series\ and\ each\ season\ in\ separate\ directory\ under\ C:/MyVideos
-$\ youtube\-dl\ \-o\ "C:/MyVideos/%(series)s/%(season_number)s\ \-\ %(season)s/%(episode_number)s\ \-\ %(episode)s.%(ext)s"\ http://videomore.ru/kino_v_detalayah/5_sezon/367617
-
-#\ Stream\ the\ video\ being\ downloaded\ to\ stdout
-$\ youtube\-dl\ \-o\ \-\ BaW_jenozKc
-\f[]
-.fi
-.SH FORMAT SELECTION
-.PP
-By default youtube\-dl tries to download the best available quality,
-i.e.
-if you want the best quality you \f[B]don\[aq]t need\f[] to pass any
-special options, youtube\-dl will guess it for you by \f[B]default\f[].
-.PP
-But sometimes you may want to download in a different format, for
-example when you are on a slow or intermittent connection.
-The key mechanism for achieving this is so called \f[I]format
-selection\f[] based on which you can explicitly specify desired format,
-select formats based on some criterion or criteria, setup precedence and
-much more.
-.PP
-The general syntax for format selection is \f[C]\-\-format\ FORMAT\f[]
-or shorter \f[C]\-f\ FORMAT\f[] where \f[C]FORMAT\f[] is a \f[I]selector
-expression\f[], i.e.
-an expression that describes format or formats you would like to
-download.
-.PP
-The simplest case is requesting a specific format, for example with
-\f[C]\-f\ 22\f[] you can download the format with format code equal to
-22.
-You can get the list of available format codes for particular video
-using \f[C]\-\-list\-formats\f[] or \f[C]\-F\f[].
-Note that these format codes are extractor specific.
-.PP
-You can also use a file extension (currently \f[C]3gp\f[], \f[C]aac\f[],
-\f[C]flv\f[], \f[C]m4a\f[], \f[C]mp3\f[], \f[C]mp4\f[], \f[C]ogg\f[],
-\f[C]wav\f[], \f[C]webm\f[] are supported) to download best quality
-format of particular file extension served as a single file, e.g.
-\f[C]\-f\ webm\f[] will download best quality format with \f[C]webm\f[]
-extension served as a single file.
-.PP
-You can also use special names to select particular edge case format: \-
-\f[C]best\f[]: Select best quality format represented by single file
-with video and audio \- \f[C]worst\f[]: Select worst quality format
-represented by single file with video and audio \- \f[C]bestvideo\f[]:
-Select best quality video only format (e.g.
-DASH video), may not be available \- \f[C]worstvideo\f[]: Select worst
-quality video only format, may not be available \- \f[C]bestaudio\f[]:
-Select best quality audio only format, may not be available \-
-\f[C]worstaudio\f[]: Select worst quality audio only format, may not be
-available
-.PP
-For example, to download worst quality video only format you can use
-\f[C]\-f\ worstvideo\f[].
-.PP
-If you want to download multiple videos and they don\[aq]t have the same
-formats available, you can specify the order of preference using
-slashes.
-Note that slash is left\-associative, i.e.
-formats on the left hand side are preferred, for example
-\f[C]\-f\ 22/17/18\f[] will download format 22 if it\[aq]s available,
-otherwise it will download format 17 if it\[aq]s available, otherwise it
-will download format 18 if it\[aq]s available, otherwise it will
-complain that no suitable formats are available for download.
-.PP
-If you want to download several formats of the same video use comma as a
-separator, e.g.
-\f[C]\-f\ 22,17,18\f[] will download all these three formats, of course
-if they are available.
-Or more sophisticated example combined with precedence feature
-\f[C]\-f\ 136/137/mp4/bestvideo,140/m4a/bestaudio\f[].
-.PP
-You can also filter the video formats by putting a condition in
-brackets, as in \f[C]\-f\ "best[height=720]"\f[] (or
-\f[C]\-f\ "[filesize>10M]"\f[]).
-.PP
-The following numeric meta fields can be used with comparisons
-\f[C]<\f[], \f[C]<=\f[], \f[C]>\f[], \f[C]>=\f[], \f[C]=\f[] (equals),
-\f[C]!=\f[] (not equals): \- \f[C]filesize\f[]: The number of bytes, if
-known in advance \- \f[C]width\f[]: Width of the video, if known \-
-\f[C]height\f[]: Height of the video, if known \- \f[C]tbr\f[]: Average
-bitrate of audio and video in KBit/s \- \f[C]abr\f[]: Average audio
-bitrate in KBit/s \- \f[C]vbr\f[]: Average video bitrate in KBit/s \-
-\f[C]asr\f[]: Audio sampling rate in Hertz \- \f[C]fps\f[]: Frame rate
-.PP
-Also filtering work for comparisons \f[C]=\f[] (equals), \f[C]!=\f[]
-(not equals), \f[C]^=\f[] (begins with), \f[C]$=\f[] (ends with),
-\f[C]*=\f[] (contains) and following string meta fields: \-
-\f[C]ext\f[]: File extension \- \f[C]acodec\f[]: Name of the audio codec
-in use \- \f[C]vcodec\f[]: Name of the video codec in use \-
-\f[C]container\f[]: Name of the container format \- \f[C]protocol\f[]:
-The protocol that will be used for the actual download, lower\-case.
-\f[C]http\f[], \f[C]https\f[], \f[C]rtsp\f[], \f[C]rtmp\f[],
-\f[C]rtmpe\f[], \f[C]m3u8\f[], or \f[C]m3u8_native\f[]
-.PP
-Note that none of the aforementioned meta fields are guaranteed to be
-present since this solely depends on the metadata obtained by particular
-extractor, i.e.
-the metadata offered by video hoster.
-.PP
-Formats for which the value is not known are excluded unless you put a
-question mark (\f[C]?\f[]) after the operator.
-You can combine format filters, so
-\f[C]\-f\ "[height\ <=?\ 720][tbr>500]"\f[] selects up to 720p videos
-(or videos where the height is not known) with a bitrate of at least 500
-KBit/s.
-.PP
-You can merge the video and audio of two formats into a single file
-using \f[C]\-f\ <video\-format>+<audio\-format>\f[] (requires ffmpeg or
-avconv installed), for example \f[C]\-f\ bestvideo+bestaudio\f[] will
-download best video only format, best audio only format and mux them
-together with ffmpeg/avconv.
-.PP
-Format selectors can also be grouped using parentheses, for example if
-you want to download the best mp4 and webm formats with a height lower
-than 480 you can use \f[C]\-f\ \[aq](mp4,webm)[height<480]\[aq]\f[].
-.PP
-Since the end of April 2015 and version 2015.04.26 youtube\-dl uses
-\f[C]\-f\ bestvideo+bestaudio/best\f[] as default format selection (see
-#5447, #5456).
-If ffmpeg or avconv are installed this results in downloading
-\f[C]bestvideo\f[] and \f[C]bestaudio\f[] separately and muxing them
-together into a single file giving the best overall quality available.
-Otherwise it falls back to \f[C]best\f[] and results in downloading the
-best available quality served as a single file.
-\f[C]best\f[] is also needed for videos that don\[aq]t come from YouTube
-because they don\[aq]t provide the audio and video in two different
-files.
-If you want to only download some DASH formats (for example if you are
-not interested in getting videos with a resolution higher than 1080p),
-you can add \f[C]\-f\ bestvideo[height<=?1080]+bestaudio/best\f[] to
-your configuration file.
-Note that if you use youtube\-dl to stream to \f[C]stdout\f[] (and most
-likely to pipe it to your media player then), i.e.
-you explicitly specify output template as \f[C]\-o\ \-\f[], youtube\-dl
-still uses \f[C]\-f\ best\f[] format selection in order to start content
-delivery immediately to your player and not to wait until
-\f[C]bestvideo\f[] and \f[C]bestaudio\f[] are downloaded and muxed.
-.PP
-If you want to preserve the old format selection behavior (prior to
-youtube\-dl 2015.04.26), i.e.
-you want to download the best available quality media served as a single
-file, you should explicitly specify your choice with \f[C]\-f\ best\f[].
-You may want to add it to the configuration file (#configuration) in
-order not to type it every time you run youtube\-dl.
-.PP
-Examples (note on Windows you may need to use double quotes instead of
-single):
-.IP
-.nf
-\f[C]
-#\ Download\ best\ mp4\ format\ available\ or\ any\ other\ best\ if\ no\ mp4\ available
-$\ youtube\-dl\ \-f\ \[aq]bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best\[aq]
-
-#\ Download\ best\ format\ available\ but\ not\ better\ that\ 480p
-$\ youtube\-dl\ \-f\ \[aq]bestvideo[height<=480]+bestaudio/best[height<=480]\[aq]
-
-#\ Download\ best\ video\ only\ format\ but\ no\ bigger\ that\ 50\ MB
-$\ youtube\-dl\ \-f\ \[aq]best[filesize<50M]\[aq]
-
-#\ Download\ best\ format\ available\ via\ direct\ link\ over\ HTTP/HTTPS\ protocol
-$\ youtube\-dl\ \-f\ \[aq](bestvideo+bestaudio/best)[protocol^=http]\[aq]
-\f[]
-.fi
-.SH VIDEO SELECTION
-.PP
-Videos can be filtered by their upload date using the options
-\f[C]\-\-date\f[], \f[C]\-\-datebefore\f[] or \f[C]\-\-dateafter\f[].
-They accept dates in two formats:
-.IP \[bu] 2
-Absolute dates: Dates in the format \f[C]YYYYMMDD\f[].
-.IP \[bu] 2
-Relative dates: Dates in the format
-\f[C](now|today)[+\-][0\-9](day|week|month|year)(s)?\f[]
-.PP
-Examples:
-.IP
-.nf
-\f[C]
-#\ Download\ only\ the\ videos\ uploaded\ in\ the\ last\ 6\ months
-$\ youtube\-dl\ \-\-dateafter\ now\-6months
-
-#\ Download\ only\ the\ videos\ uploaded\ on\ January\ 1,\ 1970
-$\ youtube\-dl\ \-\-date\ 19700101
-
-$\ #\ Download\ only\ the\ videos\ uploaded\ in\ the\ 200x\ decade
-$\ youtube\-dl\ \-\-dateafter\ 20000101\ \-\-datebefore\ 20091231
-\f[]
-.fi
-.SH FAQ
-.SS How do I update youtube\-dl?
-.PP
-If you\[aq]ve followed our manual installation
-instructions (http://rg3.github.io/youtube-dl/download.html), you can
-simply run \f[C]youtube\-dl\ \-U\f[] (or, on Linux,
-\f[C]sudo\ youtube\-dl\ \-U\f[]).
-.PP
-If you have used pip, a simple
-\f[C]sudo\ pip\ install\ \-U\ youtube\-dl\f[] is sufficient to update.
-.PP
-If you have installed youtube\-dl using a package manager like
-\f[I]apt\-get\f[] or \f[I]yum\f[], use the standard system update
-mechanism to update.
-Note that distribution packages are often outdated.
-As a rule of thumb, youtube\-dl releases at least once a month, and
-often weekly or even daily.
-Simply go to http://yt\-dl.org/ to find out the current version.
-Unfortunately, there is nothing we youtube\-dl developers can do if your
-distribution serves a really outdated version.
-You can (and should) complain to your distribution in their bugtracker
-or support forum.
-.PP
-As a last resort, you can also uninstall the version installed by your
-package manager and follow our manual installation instructions.
-For that, remove the distribution\[aq]s package, with a line like
-.IP
-.nf
-\f[C]
-sudo\ apt\-get\ remove\ \-y\ youtube\-dl
-\f[]
-.fi
-.PP
-Afterwards, simply follow our manual installation
-instructions (http://rg3.github.io/youtube-dl/download.html):
-.IP
-.nf
-\f[C]
-sudo\ wget\ https://yt\-dl.org/latest/youtube\-dl\ \-O\ /usr/local/bin/youtube\-dl
-sudo\ chmod\ a+x\ /usr/local/bin/youtube\-dl
-hash\ \-r
-\f[]
-.fi
-.PP
-Again, from then on you\[aq]ll be able to update with
-\f[C]sudo\ youtube\-dl\ \-U\f[].
-.SS I\[aq]m getting an error
-\f[C]Unable\ to\ extract\ OpenGraph\ title\f[] on YouTube playlists
-.PP
-YouTube changed their playlist format in March 2014 and later on, so
-you\[aq]ll need at least youtube\-dl 2014.07.25 to download all YouTube
-videos.
-.PP
-If you have installed youtube\-dl with a package manager, pip, setup.py
-or a tarball, please use that to update.
-Note that Ubuntu packages do not seem to get updated anymore.
-Since we are not affiliated with Ubuntu, there is little we can do.
-Feel free to report
-bugs (https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to
-the Ubuntu packaging
-guys (mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl)
-\- all they have to do is update the package to a somewhat recent
-version.
-See above for a way to update.
-.SS Do I always have to pass \f[C]\-citw\f[]?
-.PP
-By default, youtube\-dl intends to have the best options (incidentally,
-if you have a convincing case that these should be different, please
-file an issue where you explain that (https://yt-dl.org/bug)).
-Therefore, it is unnecessary and sometimes harmful to copy long option
-strings from webpages.
-In particular, the only option out of \f[C]\-citw\f[] that is regularly
-useful is \f[C]\-i\f[].
-.SS Can you please put the \f[C]\-b\f[] option back?
-.PP
-Most people asking this question are not aware that youtube\-dl now
-defaults to downloading the highest available quality as reported by
-YouTube, which will be 1080p or 720p in some cases, so you no longer
-need the \f[C]\-b\f[] option.
-For some specific videos, maybe YouTube does not report them to be
-available in a specific high quality format you\[aq]re interested in.
-In that case, simply request it with the \f[C]\-f\f[] option and
-youtube\-dl will try to download it.
-.SS I get HTTP error 402 when trying to download a video. What\[aq]s
-this?
-.PP
-Apparently YouTube requires you to pass a CAPTCHA test if you download
-too much.
-We\[aq]re considering to provide a way to let you solve the
-CAPTCHA (https://github.com/rg3/youtube-dl/issues/154), but at the
-moment, your best course of action is pointing a webbrowser to the
-youtube URL, solving the CAPTCHA, and restart youtube\-dl.
-.SS Do I need any other programs?
-.PP
-youtube\-dl works fine on its own on most sites.
-However, if you want to convert video/audio, you\[aq]ll need
-avconv (https://libav.org/) or ffmpeg (https://www.ffmpeg.org/).
-On some sites \- most notably YouTube \- videos can be retrieved in a
-higher quality format without sound.
-youtube\-dl will detect whether avconv/ffmpeg is present and
-automatically pick the best option.
-.PP
-Videos or video formats streamed via RTMP protocol can only be
-downloaded when rtmpdump (https://rtmpdump.mplayerhq.hu/) is installed.
-Downloading MMS and RTSP videos requires either
-mplayer (http://mplayerhq.hu/) or mpv (https://mpv.io/) to be installed.
-.SS I have downloaded a video but how can I play it?
-.PP
-Once the video is fully downloaded, use any video player, such as
-vlc (http://www.videolan.org) or mplayer (http://www.mplayerhq.hu/).
-.SS I extracted a video URL with \f[C]\-g\f[], but it does not play on
-another machine / in my webbrowser.
-.PP
-It depends a lot on the service.
-In many cases, requests for the video (to download/play it) must come
-from the same IP address and with the same cookies.
-Use the \f[C]\-\-cookies\f[] option to write the required cookies into a
-file, and advise your downloader to read cookies from that file.
-Some sites also require a common user agent to be used, use
-\f[C]\-\-dump\-user\-agent\f[] to see the one in use by youtube\-dl.
-.PP
-It may be beneficial to use IPv6; in some cases, the restrictions are
-only applied to IPv4.
-Some services (sometimes only for a subset of videos) do not restrict
-the video URL by IP address, cookie, or user\-agent, but these are the
-exception rather than the rule.
-.PP
-Please bear in mind that some URL protocols are \f[B]not\f[] supported
-by browsers out of the box, including RTMP.
-If you are using \f[C]\-g\f[], your own downloader must support these as
-well.
-.PP
-If you want to play the video on a machine that is not running
-youtube\-dl, you can relay the video content from the machine that runs
-youtube\-dl.
-You can use \f[C]\-o\ \-\f[] to let youtube\-dl stream a video to
-stdout, or simply allow the player to download the files written by
-youtube\-dl in turn.
-.SS ERROR: no fmt_url_map or conn information found in video info
-.PP
-YouTube has switched to a new video info format in July 2011 which is
-not supported by old versions of youtube\-dl.
-See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
-.SS ERROR: unable to download video
-.PP
-YouTube requires an additional signature since September 2012 which is
-not supported by old versions of youtube\-dl.
-See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
-.SS Video URL contains an ampersand and I\[aq]m getting some strange
-output \f[C][1]\ 2839\f[] or
-\f[C]\[aq]v\[aq]\ is\ not\ recognized\ as\ an\ internal\ or\ external\ command\f[]
-.PP
-That\[aq]s actually the output from your shell.
-Since ampersand is one of the special shell characters it\[aq]s
-interpreted by the shell preventing you from passing the whole URL to
-youtube\-dl.
-To disable your shell from interpreting the ampersands (or any other
-special characters) you have to either put the whole URL in quotes or
-escape them with a backslash (which approach will work depends on your
-shell).
-.PP
-For example if your URL is
-https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with
-following command:
-.PP
-\f[C]youtube\-dl\ \[aq]https://www.youtube.com/watch?t=4&v=BaW_jenozKc\[aq]\f[]
-.PP
-or
-.PP
-\f[C]youtube\-dl\ https://www.youtube.com/watch?t=4\\&v=BaW_jenozKc\f[]
-.PP
-For Windows you have to use the double quotes:
-.PP
-\f[C]youtube\-dl\ "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"\f[]
-.SS ExtractorError: Could not find JS function u\[aq]OF\[aq]
-.PP
-In February 2015, the new YouTube player contained a character sequence
-in a string that was misinterpreted by old versions of youtube\-dl.
-See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
-.SS HTTP Error 429: Too Many Requests or 402: Payment Required
-.PP
-These two error codes indicate that the service is blocking your IP
-address because of overuse.
-Contact the service and ask them to unblock your IP address, or \- if
-you have acquired a whitelisted IP address already \- use the
-\f[C]\-\-proxy\f[] or \f[C]\-\-source\-address\f[]
-options (#network-options) to select another IP address.
-.SS SyntaxError: Non\-ASCII character
-.PP
-The error
-.IP
-.nf
-\f[C]
-File\ "youtube\-dl",\ line\ 2
-SyntaxError:\ Non\-ASCII\ character\ \[aq]\\x93\[aq]\ ...
-\f[]
-.fi
-.PP
-means you\[aq]re using an outdated version of Python.
-Please update to Python 2.6 or 2.7.
-.SS What is this binary file? Where has the code gone?
-.PP
-Since June 2012 (#342) youtube\-dl is packed as an executable zipfile,
-simply unzip it (might need renaming to \f[C]youtube\-dl.zip\f[] first
-on some systems) or clone the git repository, as laid out above.
-If you modify the code, you can run it by executing the
-\f[C]__main__.py\f[] file.
-To recompile the executable, run \f[C]make\ youtube\-dl\f[].
-.SS The exe throws a \f[I]Runtime error from Visual C++\f[]
-.PP
-To run the exe you need to install first the Microsoft Visual C++ 2008
-Redistributable
-Package (http://www.microsoft.com/en-us/download/details.aspx?id=29).
-.SS On Windows, how should I set up ffmpeg and youtube\-dl? Where should
-I put the exe files?
-.PP
-If you put youtube\-dl and ffmpeg in the same directory that you\[aq]re
-running the command from, it will work, but that\[aq]s rather
-cumbersome.
-.PP
-To make a different directory work \- either for ffmpeg, or for
-youtube\-dl, or for both \- simply create the directory (say,
-\f[C]C:\\bin\f[], or \f[C]C:\\Users\\<User\ name>\\bin\f[]), put all the
-executables directly in there, and then set your PATH environment
-variable (https://www.java.com/en/download/help/path.xml) to include
-that directory.
-.PP
-From then on, after restarting your shell, you will be able to access
-both youtube\-dl and ffmpeg (and youtube\-dl will be able to find
-ffmpeg) by simply typing \f[C]youtube\-dl\f[] or \f[C]ffmpeg\f[], no
-matter what directory you\[aq]re in.
-.SS How do I put downloads into a specific folder?
-.PP
-Use the \f[C]\-o\f[] to specify an output template (#output-template),
-for example \f[C]\-o\ "/home/user/videos/%(title)s\-%(id)s.%(ext)s"\f[].
-If you want this for all of your downloads, put the option into your
-configuration file (#configuration).
-.SS How do I download a video starting with a \f[C]\-\f[]?
-.PP
-Either prepend \f[C]http://www.youtube.com/watch?v=\f[] or separate the
-ID from the options with \f[C]\-\-\f[]:
-.IP
-.nf
-\f[C]
-youtube\-dl\ \-\-\ \-wNyEUrxzFU
-youtube\-dl\ "http://www.youtube.com/watch?v=\-wNyEUrxzFU"
-\f[]
-.fi
-.SS How do I pass cookies to youtube\-dl?
-.PP
-Use the \f[C]\-\-cookies\f[] option, for example
-\f[C]\-\-cookies\ /path/to/cookies/file.txt\f[].
-Note that the cookies file must be in Mozilla/Netscape format and the
-first line of the cookies file must be either
-\f[C]#\ HTTP\ Cookie\ File\f[] or
-\f[C]#\ Netscape\ HTTP\ Cookie\ File\f[].
-Make sure you have correct newline
-format (https://en.wikipedia.org/wiki/Newline) in the cookies file and
-convert newlines if necessary to correspond with your OS, namely
-\f[C]CRLF\f[] (\f[C]\\r\\n\f[]) for Windows, \f[C]LF\f[] (\f[C]\\n\f[])
-for Linux and \f[C]CR\f[] (\f[C]\\r\f[]) for Mac OS.
-\f[C]HTTP\ Error\ 400:\ Bad\ Request\f[] when using \f[C]\-\-cookies\f[]
-is a good sign of invalid newline format.
-.PP
-Passing cookies to youtube\-dl is a good way to workaround login when a
-particular extractor does not implement it explicitly.
-Another use case is working around
-CAPTCHA (https://en.wikipedia.org/wiki/CAPTCHA) some websites require
-you to solve in particular cases in order to get access (e.g.
-YouTube, CloudFlare).
-.SS Can you add support for this anime video site, or site which shows
-current movies for free?
-.PP
-As a matter of policy (as well as legality), youtube\-dl does not
-include support for services that specialize in infringing copyright.
-As a rule of thumb, if you cannot easily find a video that the service
-is quite obviously allowed to distribute (i.e.
-that has been uploaded by the creator, the creator\[aq]s distributor, or
-is published under a free license), the service is probably unfit for
-inclusion to youtube\-dl.
-.PP
-A note on the service that they don\[aq]t host the infringing content,
-but just link to those who do, is evidence that the service should
-\f[B]not\f[] be included into youtube\-dl.
-The same goes for any DMCA note when the whole front page of the service
-is filled with videos they are not allowed to distribute.
-A "fair use" note is equally unconvincing if the service shows
-copyright\-protected videos in full without authorization.
-.PP
-Support requests for services that \f[B]do\f[] purchase the rights to
-distribute their content are perfectly fine though.
-If in doubt, you can simply include a source that mentions the
-legitimate purchase of content.
-.SS How can I speed up work on my issue?
-.PP
-(Also known as: Help, my important issue not being solved!) The
-youtube\-dl core developer team is quite small.
-While we do our best to solve as many issues as possible, sometimes that
-can take quite a while.
-To speed up your issue, here\[aq]s what you can do:
-.PP
-First of all, please do report the issue at our issue
-tracker (https://yt-dl.org/bugs).
-That allows us to coordinate all efforts by users and developers, and
-serves as a unified point.
-Unfortunately, the youtube\-dl project has grown too large to use
-personal email as an effective communication channel.
-.PP
-Please read the bug reporting instructions (#bugs) below.
-A lot of bugs lack all the necessary information.
-If you can, offer proxy, VPN, or shell access to the youtube\-dl
-developers.
-If you are able to, test the issue from multiple computers in multiple
-countries to exclude local censorship or misconfiguration issues.
-.PP
-If nobody is interested in solving your issue, you are welcome to take
-matters into your own hands and submit a pull request (or coerce/pay
-somebody else to do so).
-.PP
-Feel free to bump the issue from time to time by writing a small comment
-("Issue is still present in youtube\-dl version ...from France, but
-fixed from Belgium"), but please not more than once a month.
-Please do not declare your issue as \f[C]important\f[] or
-\f[C]urgent\f[].
-.SS How can I detect whether a given URL is supported by youtube\-dl?
-.PP
-For one, have a look at the list of supported
-sites (docs/supportedsites.md).
-Note that it can sometimes happen that the site changes its URL scheme
-(say, from http://example.com/video/1234567 to
-http://example.com/v/1234567 ) and youtube\-dl reports an URL of a
-service in that list as unsupported.
-In that case, simply report a bug.
-.PP
-It is \f[I]not\f[] possible to detect whether a URL is supported or not.
-That\[aq]s because youtube\-dl contains a generic extractor which
-matches \f[B]all\f[] URLs.
-You may be tempted to disable, exclude, or remove the generic extractor,
-but the generic extractor not only allows users to extract videos from
-lots of websites that embed a video from another service, but may also
-be used to extract video from a service that it\[aq]s hosting itself.
-Therefore, we neither recommend nor support disabling, excluding, or
-removing the generic extractor.
-.PP
-If you want to find out whether a given URL is supported, simply call
-youtube\-dl with it.
-If you get no videos back, chances are the URL is either not referring
-to a video or unsupported.
-You can find out which by examining the output (if you run youtube\-dl
-on the console) or catching an \f[C]UnsupportedError\f[] exception if
-you run it from a Python program.
-.SH DEVELOPER INSTRUCTIONS
-.PP
-Most users do not need to build youtube\-dl and can download the
-builds (http://rg3.github.io/youtube-dl/download.html) or get them from
-their distribution.
-.PP
-To run youtube\-dl as a developer, you don\[aq]t need to build anything
-either.
-Simply execute
-.IP
-.nf
-\f[C]
-python\ \-m\ youtube_dl
-\f[]
-.fi
-.PP
-To run the test, simply invoke your favorite test runner, or execute a
-test file directly; any of the following work:
-.IP
-.nf
-\f[C]
-python\ \-m\ unittest\ discover
-python\ test/test_download.py
-nosetests
-\f[]
-.fi
-.PP
-If you want to create a build of youtube\-dl yourself, you\[aq]ll need
-.IP \[bu] 2
-python
-.IP \[bu] 2
-make
-.IP \[bu] 2
-pandoc
-.IP \[bu] 2
-zip
-.IP \[bu] 2
-nosetests
-.SS Adding support for a new site
-.PP
-If you want to add support for a new site, you can follow this quick
-list (assuming your service is called \f[C]yourextractor\f[]):
-.IP " 1." 4
-Fork this repository (https://github.com/rg3/youtube-dl/fork)
-.IP " 2." 4
-Check out the source code with
-\f[C]git\ clone\ git\@github.com:YOUR_GITHUB_USERNAME/youtube\-dl.git\f[]
-.IP " 3." 4
-Start a new git branch with
-\f[C]cd\ youtube\-dl;\ git\ checkout\ \-b\ yourextractor\f[]
-.IP " 4." 4
-Start with this simple template and save it to
-\f[C]youtube_dl/extractor/yourextractor.py\f[]:
-.RS 4
-.IP
-.nf
-\f[C]
-#\ coding:\ utf\-8
-from\ __future__\ import\ unicode_literals
-
-from\ .common\ import\ InfoExtractor
-
-
-class\ YourExtractorIE(InfoExtractor):
-\ \ \ \ _VALID_URL\ =\ r\[aq]https?://(?:www\\.)?yourextractor\\.com/watch/(?P<id>[0\-9]+)\[aq]
-\ \ \ \ _TEST\ =\ {
-\ \ \ \ \ \ \ \ \[aq]url\[aq]:\ \[aq]http://yourextractor.com/watch/42\[aq],
-\ \ \ \ \ \ \ \ \[aq]md5\[aq]:\ \[aq]TODO:\ md5\ sum\ of\ the\ first\ 10241\ bytes\ of\ the\ video\ file\ (use\ \-\-test)\[aq],
-\ \ \ \ \ \ \ \ \[aq]info_dict\[aq]:\ {
-\ \ \ \ \ \ \ \ \ \ \ \ \[aq]id\[aq]:\ \[aq]42\[aq],
-\ \ \ \ \ \ \ \ \ \ \ \ \[aq]ext\[aq]:\ \[aq]mp4\[aq],
-\ \ \ \ \ \ \ \ \ \ \ \ \[aq]title\[aq]:\ \[aq]Video\ title\ goes\ here\[aq],
-\ \ \ \ \ \ \ \ \ \ \ \ \[aq]thumbnail\[aq]:\ \[aq]re:^https?://.*\\.jpg$\[aq],
-\ \ \ \ \ \ \ \ \ \ \ \ #\ TODO\ more\ properties,\ either\ as:
-\ \ \ \ \ \ \ \ \ \ \ \ #\ *\ A\ value
-\ \ \ \ \ \ \ \ \ \ \ \ #\ *\ MD5\ checksum;\ start\ the\ string\ with\ md5:
-\ \ \ \ \ \ \ \ \ \ \ \ #\ *\ A\ regular\ expression;\ start\ the\ string\ with\ re:
-\ \ \ \ \ \ \ \ \ \ \ \ #\ *\ Any\ Python\ type\ (for\ example\ int\ or\ float)
-\ \ \ \ \ \ \ \ }
-\ \ \ \ }
-
-\ \ \ \ def\ _real_extract(self,\ url):
-\ \ \ \ \ \ \ \ video_id\ =\ self._match_id(url)
-\ \ \ \ \ \ \ \ webpage\ =\ self._download_webpage(url,\ video_id)
-
-\ \ \ \ \ \ \ \ #\ TODO\ more\ code\ goes\ here,\ for\ example\ ...
-\ \ \ \ \ \ \ \ title\ =\ self._html_search_regex(r\[aq]<h1>(.+?)</h1>\[aq],\ webpage,\ \[aq]title\[aq])
-
-\ \ \ \ \ \ \ \ return\ {
-\ \ \ \ \ \ \ \ \ \ \ \ \[aq]id\[aq]:\ video_id,
-\ \ \ \ \ \ \ \ \ \ \ \ \[aq]title\[aq]:\ title,
-\ \ \ \ \ \ \ \ \ \ \ \ \[aq]description\[aq]:\ self._og_search_description(webpage),
-\ \ \ \ \ \ \ \ \ \ \ \ \[aq]uploader\[aq]:\ self._search_regex(r\[aq]<div[^>]+id="uploader"[^>]*>([^<]+)<\[aq],\ webpage,\ \[aq]uploader\[aq],\ fatal=False),
-\ \ \ \ \ \ \ \ \ \ \ \ #\ TODO\ more\ properties\ (see\ youtube_dl/extractor/common.py)
-\ \ \ \ \ \ \ \ }
-\f[]
-.fi
-.RE
-.IP " 5." 4
-Add an import in
-\f[C]youtube_dl/extractor/__init__.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
-.IP " 6." 4
-Run
-\f[C]python\ test/test_download.py\ TestDownload.test_YourExtractor\f[].
-This \f[I]should fail\f[] at first, but you can continually re\-run it
-until you\[aq]re done.
-If you decide to add more than one test, then rename \f[C]_TEST\f[] to
-\f[C]_TESTS\f[] and make it into a list of dictionaries.
-The tests will then be named \f[C]TestDownload.test_YourExtractor\f[],
-\f[C]TestDownload.test_YourExtractor_1\f[],
-\f[C]TestDownload.test_YourExtractor_2\f[], etc.
-.IP " 7." 4
-Have a look at
-\f[C]youtube_dl/extractor/common.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py)
-for possible helper methods and a detailed description of what your
-extractor should and may
-return (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200).
-Add tests and code for as many as you want.
-.IP " 8." 4
-If you can, check the code with
-flake8 (https://pypi.python.org/pypi/flake8).
-.IP " 9." 4
-When the tests pass, add (http://git-scm.com/docs/git-add) the new files
-and commit (http://git-scm.com/docs/git-commit) them and
-push (http://git-scm.com/docs/git-push) the result, like this:
-.RS 4
-.IP
-.nf
-\f[C]
-$\ git\ add\ youtube_dl/extractor/__init__.py
-$\ git\ add\ youtube_dl/extractor/yourextractor.py
-$\ git\ commit\ \-m\ \[aq][yourextractor]\ Add\ new\ extractor\[aq]
-$\ git\ push\ origin\ yourextractor
-\f[]
-.fi
-.RE
-.IP "10." 4
-Finally, create a pull
-request (https://help.github.com/articles/creating-a-pull-request).
-We\[aq]ll then review and merge it.
-.PP
-In any case, thank you very much for your contributions!
-.SH EMBEDDING YOUTUBE\-DL
-.PP
-youtube\-dl makes the best effort to be a good command\-line program,
-and thus should be callable from any programming language.
-If you encounter any problems parsing its output, feel free to create a
-report (https://github.com/rg3/youtube-dl/issues/new).
-.PP
-From a Python program, you can embed youtube\-dl in a more powerful
-fashion, like this:
-.IP
-.nf
-\f[C]
-from\ __future__\ import\ unicode_literals
-import\ youtube_dl
-
-ydl_opts\ =\ {}
-with\ youtube_dl.YoutubeDL(ydl_opts)\ as\ ydl:
-\ \ \ \ ydl.download([\[aq]http://www.youtube.com/watch?v=BaW_jenozKc\[aq]])
-\f[]
-.fi
-.PP
-Most likely, you\[aq]ll want to use various options.
-For a list of what can be done, have a look at
-\f[C]youtube_dl/YoutubeDL.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L121-L269).
-For a start, if you want to intercept youtube\-dl\[aq]s output, set a
-\f[C]logger\f[] object.
-.PP
-Here\[aq]s a more complete example of a program that outputs only errors
-(and a short message after the download is finished), and
-downloads/converts the video to an mp3 file:
-.IP
-.nf
-\f[C]
-from\ __future__\ import\ unicode_literals
-import\ youtube_dl
-
-
-class\ MyLogger(object):
-\ \ \ \ def\ debug(self,\ msg):
-\ \ \ \ \ \ \ \ pass
-
-\ \ \ \ def\ warning(self,\ msg):
-\ \ \ \ \ \ \ \ pass
-
-\ \ \ \ def\ error(self,\ msg):
-\ \ \ \ \ \ \ \ print(msg)
-
-
-def\ my_hook(d):
-\ \ \ \ if\ d[\[aq]status\[aq]]\ ==\ \[aq]finished\[aq]:
-\ \ \ \ \ \ \ \ print(\[aq]Done\ downloading,\ now\ converting\ ...\[aq])
-
-
-ydl_opts\ =\ {
-\ \ \ \ \[aq]format\[aq]:\ \[aq]bestaudio/best\[aq],
-\ \ \ \ \[aq]postprocessors\[aq]:\ [{
-\ \ \ \ \ \ \ \ \[aq]key\[aq]:\ \[aq]FFmpegExtractAudio\[aq],
-\ \ \ \ \ \ \ \ \[aq]preferredcodec\[aq]:\ \[aq]mp3\[aq],
-\ \ \ \ \ \ \ \ \[aq]preferredquality\[aq]:\ \[aq]192\[aq],
-\ \ \ \ }],
-\ \ \ \ \[aq]logger\[aq]:\ MyLogger(),
-\ \ \ \ \[aq]progress_hooks\[aq]:\ [my_hook],
-}
-with\ youtube_dl.YoutubeDL(ydl_opts)\ as\ ydl:
-\ \ \ \ ydl.download([\[aq]http://www.youtube.com/watch?v=BaW_jenozKc\[aq]])
-\f[]
-.fi
-.SH BUGS
-.PP
-Bugs and suggestions should be reported at:
-<https://github.com/rg3/youtube-dl/issues>.
-Unless you were prompted so or there is another pertinent reason (e.g.
-GitHub fails to accept the bug report), please do not send bug reports
-via personal email.
-For discussions, join us in the IRC channel
-#youtube\-dl (irc://chat.freenode.net/#youtube-dl) on freenode
-(webchat (http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
-.PP
-\f[B]Please include the full output of youtube\-dl when run with
-\f[C]\-v\f[]\f[], i.e.
-\f[B]add\f[] \f[C]\-v\f[] flag to \f[B]your command line\f[], copy the
-\f[B]whole\f[] output and post it in the issue body wrapped in ``` for
-better formatting.
-It should look similar to this:
-.IP
-.nf
-\f[C]
-$\ youtube\-dl\ \-v\ <your\ command\ line>
-[debug]\ System\ config:\ []
-[debug]\ User\ config:\ []
-[debug]\ Command\-line\ args:\ [u\[aq]\-v\[aq],\ u\[aq]http://www.youtube.com/watch?v=BaW_jenozKcj\[aq]]
-[debug]\ Encodings:\ locale\ cp1251,\ fs\ mbcs,\ out\ cp866,\ pref\ cp1251
-[debug]\ youtube\-dl\ version\ 2015.12.06
-[debug]\ Git\ HEAD:\ 135392e
-[debug]\ Python\ version\ 2.6.6\ \-\ Windows\-2003Server\-5.2.3790\-SP2
-[debug]\ exe\ versions:\ ffmpeg\ N\-75573\-g1d0487f,\ ffprobe\ N\-75573\-g1d0487f,\ rtmpdump\ 2.4
-[debug]\ Proxy\ map:\ {}
-\&...
-\f[]
-.fi
-.PP
-\f[B]Do not post screenshots of verbose log only plain text is
-acceptable.\f[]
-.PP
-The output (including the first lines) contains important debugging
-information.
-Issues without the full output are often not reproducible and therefore
-do not get solved in short order, if ever.
-.PP
-Please re\-read your issue once again to avoid a couple of common
-mistakes (you can and should use this as a checklist):
-.SS Is the description of the issue itself sufficient?
-.PP
-We often get issue reports that we cannot really decipher.
-While in most cases we eventually get the required information after
-asking back multiple times, this poses an unnecessary drain on our
-resources.
-Many contributors, including myself, are also not native speakers, so we
-may misread some parts.
-.PP
-So please elaborate on what feature you are requesting, or what bug you
-want to be fixed.
-Make sure that it\[aq]s obvious
-.IP \[bu] 2
-What the problem is
-.IP \[bu] 2
-How it could be fixed
-.IP \[bu] 2
-How your proposed solution would look like
-.PP
-If your report is shorter than two lines, it is almost certainly missing
-some of these, which makes it hard for us to respond to it.
-We\[aq]re often too polite to close the issue outright, but the missing
-info makes misinterpretation likely.
-As a committer myself, I often get frustrated by these issues, since the
-only possible way for me to move forward on them is to ask for
-clarification over and over.
-.PP
-For bug reports, this means that your report should contain the
-\f[I]complete\f[] output of youtube\-dl when called with the
-\f[C]\-v\f[] flag.
-The error message you get for (most) bugs even says so, but you would
-not believe how many of our bug reports do not contain this information.
-.PP
-If your server has multiple IPs or you suspect censorship, adding
-\f[C]\-\-call\-home\f[] may be a good idea to get more diagnostics.
-If the error is \f[C]ERROR:\ Unable\ to\ extract\ ...\f[] and you cannot
-reproduce it from multiple countries, add \f[C]\-\-dump\-pages\f[]
-(warning: this will yield a rather large output, redirect it to the file
-\f[C]log.txt\f[] by adding \f[C]>log.txt\ 2>&1\f[] to your
-command\-line) or upload the \f[C]\&.dump\f[] files you get when you add
-\f[C]\-\-write\-pages\f[] somewhere (https://gist.github.com/).
-.PP
-\f[B]Site support requests must contain an example URL\f[].
-An example URL is a URL you might want to download, like
-\f[C]http://www.youtube.com/watch?v=BaW_jenozKc\f[].
-There should be an obvious video present.
-Except under very special circumstances, the main page of a video
-service (e.g.
-\f[C]http://www.youtube.com/\f[]) is \f[I]not\f[] an example URL.
-.SS Are you using the latest version?
-.PP
-Before reporting any issue, type \f[C]youtube\-dl\ \-U\f[].
-This should report that you\[aq]re up\-to\-date.
-About 20% of the reports we receive are already fixed, but people are
-using outdated versions.
-This goes for feature requests as well.
-.SS Is the issue already documented?
-.PP
-Make sure that someone has not already opened the issue you\[aq]re
-trying to open.
-Search at the top of the window or browse the GitHub
-Issues (https://github.com/rg3/youtube-dl/search?type=Issues) of this
-repository.
-If there is an issue, feel free to write something along the lines of
-"This affects me as well, with version 2015.01.01.
-Here is some more information on the issue: ...".
-While some issues may be old, a new post into them often spurs rapid
-activity.
-.SS Why are existing options not enough?
-.PP
-Before requesting a new feature, please have a quick peek at the list of
-supported
-options (https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis).
-Many feature requests are for features that actually exist already!
-Please, absolutely do show off your work in the issue report and detail
-how the existing similar options do \f[I]not\f[] solve your problem.
-.SS Is there enough context in your bug report?
-.PP
-People want to solve problems, and often think they do us a favor by
-breaking down their larger problems (e.g.
-wanting to skip already downloaded files) to a specific request (e.g.
-requesting us to look whether the file exists before downloading the
-info page).
-However, what often happens is that they break down the problem into two
-steps: One simple, and one impossible (or extremely complicated one).
-.PP
-We are then presented with a very complicated request when the original
-problem could be solved far easier, e.g.
-by recording the downloaded video IDs in a separate file.
-To avoid this, you must include the greater context where it is
-non\-obvious.
-In particular, every feature request that does not consist of adding
-support for a new site should contain a use case scenario that explains
-in what situation the missing feature would be useful.
-.SS Does the issue involve one problem, and one problem only?
-.PP
-Some of our users seem to think there is a limit of issues they can or
-should open.
-There is no limit of issues they can or should open.
-While it may seem appealing to be able to dump all your issues into one
-ticket, that means that someone who solves one of your issues cannot
-mark the issue as closed.
-Typically, reporting a bunch of issues leads to the ticket lingering
-since nobody wants to attack that behemoth, until someone mercifully
-splits the issue into multiple ones.
-.PP
-In particular, every site support request issue should only pertain to
-services at one site (generally under a common domain, but always using
-the same backend technology).
-Do not request support for vimeo user videos, Whitehouse podcasts, and
-Google Plus pages in the same issue.
-Also, make sure that you don\[aq]t post bug reports alongside feature
-requests.
-As a rule of thumb, a feature request does not include outputs of
-youtube\-dl that are not immediately related to the feature at hand.
-Do not post reports of a network error alongside the request for a new
-video service.
-.SS Is anyone going to need the feature?
-.PP
-Only post features that you (or an incapacitated friend you can
-personally talk to) require.
-Do not post features because they seem like a good idea.
-If they are really useful, they will be requested by someone who
-requires them.
-.SS Is your question about youtube\-dl?
-.PP
-It may sound strange, but some bug reports we receive are completely
-unrelated to youtube\-dl and relate to a different or even the
-reporter\[aq]s own application.
-Please make sure that you are actually using youtube\-dl.
-If you are using a UI for youtube\-dl, report the bug to the maintainer
-of the actual application providing the UI.
-On the other hand, if your UI for youtube\-dl fails in some way you
-believe is related to youtube\-dl, by all means, go ahead and report the
-bug.
-.SH COPYRIGHT
-.PP
-youtube\-dl is released into the public domain by the copyright holders.
-.PP
-This README file was originally written by Daniel
-Bolton (https://github.com/dbbolton) and is likewise released into the
-public domain.
diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion
deleted file mode 100644 (file)
index d8c2ef1..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-__youtube_dl()
-{
-    local cur prev opts fileopts diropts keywords
-    COMPREPLY=()
-    cur="${COMP_WORDS[COMP_CWORD]}"
-    prev="${COMP_WORDS[COMP_CWORD-1]}"
-    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs"
-    keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
-    fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
-    diropts="--cache-dir"
-
-    if [[ ${prev} =~ ${fileopts} ]]; then
-        COMPREPLY=( $(compgen -f -- ${cur}) )
-        return 0
-    elif [[ ${prev} =~ ${diropts} ]]; then
-        COMPREPLY=( $(compgen -d -- ${cur}) )
-        return 0
-    fi
-
-    if [[ ${cur} =~ : ]]; then
-        COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
-        return 0
-    elif [[ ${cur} == * ]] ; then
-        COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
-        return 0
-    fi
-}
-
-complete -F __youtube_dl youtube-dl
diff --git a/youtube-dl.fish b/youtube-dl.fish
deleted file mode 100644 (file)
index 5f30e09..0000000
+++ /dev/null
@@ -1,146 +0,0 @@
-
-complete --command youtube-dl --long-option help --short-option h --description 'Print this help text and exit'
-complete --command youtube-dl --long-option version --description 'Print program version and exit'
-complete --command youtube-dl --long-option update --short-option U --description 'Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)'
-complete --command youtube-dl --long-option ignore-errors --short-option i --description 'Continue on download errors, for example to skip unavailable videos in a playlist'
-complete --command youtube-dl --long-option abort-on-error --description 'Abort downloading of further videos (in the playlist or the command line) if an error occurs'
-complete --command youtube-dl --long-option dump-user-agent --description 'Display the current browser identification'
-complete --command youtube-dl --long-option list-extractors --description 'List all supported extractors'
-complete --command youtube-dl --long-option extractor-descriptions --description 'Output descriptions of all supported extractors'
-complete --command youtube-dl --long-option force-generic-extractor --description 'Force extraction to use the generic extractor'
-complete --command youtube-dl --long-option default-search --description 'Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.'
-complete --command youtube-dl --long-option ignore-config --description 'Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)'
-complete --command youtube-dl --long-option flat-playlist --description 'Do not extract the videos of a playlist, only list them.'
-complete --command youtube-dl --long-option no-color --description 'Do not emit color codes in output'
-complete --command youtube-dl --long-option proxy --description 'Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection'
-complete --command youtube-dl --long-option socket-timeout --description 'Time to wait before giving up, in seconds'
-complete --command youtube-dl --long-option source-address --description 'Client-side IP address to bind to (experimental)'
-complete --command youtube-dl --long-option force-ipv4 --short-option 4 --description 'Make all connections via IPv4 (experimental)'
-complete --command youtube-dl --long-option force-ipv6 --short-option 6 --description 'Make all connections via IPv6 (experimental)'
-complete --command youtube-dl --long-option cn-verification-proxy --description 'Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
-complete --command youtube-dl --long-option playlist-start --description 'Playlist video to start at (default is %default)'
-complete --command youtube-dl --long-option playlist-end --description 'Playlist video to end at (default is last)'
-complete --command youtube-dl --long-option playlist-items --description 'Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.'
-complete --command youtube-dl --long-option match-title --description 'Download only matching titles (regex or caseless sub-string)'
-complete --command youtube-dl --long-option reject-title --description 'Skip download for matching titles (regex or caseless sub-string)'
-complete --command youtube-dl --long-option max-downloads --description 'Abort after downloading NUMBER files'
-complete --command youtube-dl --long-option min-filesize --description 'Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)'
-complete --command youtube-dl --long-option max-filesize --description 'Do not download any videos larger than SIZE (e.g. 50k or 44.6m)'
-complete --command youtube-dl --long-option date --description 'Download only videos uploaded in this date'
-complete --command youtube-dl --long-option datebefore --description 'Download only videos uploaded on or before this date (i.e. inclusive)'
-complete --command youtube-dl --long-option dateafter --description 'Download only videos uploaded on or after this date (i.e. inclusive)'
-complete --command youtube-dl --long-option min-views --description 'Do not download any videos with less than COUNT views'
-complete --command youtube-dl --long-option max-views --description 'Do not download any videos with more than COUNT views'
-complete --command youtube-dl --long-option match-filter --description 'Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" .'
-complete --command youtube-dl --long-option no-playlist --description 'Download only the video, if the URL refers to a video and a playlist.'
-complete --command youtube-dl --long-option yes-playlist --description 'Download the playlist, if the URL refers to a video and a playlist.'
-complete --command youtube-dl --long-option age-limit --description 'Download only videos suitable for the given age'
-complete --command youtube-dl --long-option download-archive --description 'Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.' --require-parameter
-complete --command youtube-dl --long-option include-ads --description 'Download advertisements as well (experimental)'
-complete --command youtube-dl --long-option rate-limit --short-option r --description 'Maximum download rate in bytes per second (e.g. 50K or 4.2M)'
-complete --command youtube-dl --long-option retries --short-option R --description 'Number of retries (default is %default), or "infinite".'
-complete --command youtube-dl --long-option buffer-size --description 'Size of download buffer (e.g. 1024 or 16K) (default is %default)'
-complete --command youtube-dl --long-option no-resize-buffer --description 'Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.'
-complete --command youtube-dl --long-option test
-complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
-complete --command youtube-dl --long-option xattr-set-filesize --description 'Set file xattribute ytdl.filesize with expected filesize (experimental)'
-complete --command youtube-dl --long-option hls-prefer-native --description 'Use the native HLS downloader instead of ffmpeg (experimental)'
-complete --command youtube-dl --long-option hls-use-mpegts --description 'Use the mpegts container for HLS videos, allowing to play the video while downloading (some players may not be able to play it)'
-complete --command youtube-dl --long-option external-downloader --description 'Use the specified external downloader. Currently supports aria2c,axel,curl,httpie,wget'
-complete --command youtube-dl --long-option external-downloader-args --description 'Give these arguments to the external downloader'
-complete --command youtube-dl --long-option batch-file --short-option a --description 'File containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
-complete --command youtube-dl --long-option id --description 'Use only video ID in file name'
-complete --command youtube-dl --long-option output --short-option o --description 'Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube'"'"'s itags: "137"), %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id, %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format. %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o '"'"'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s'"'"' .'
-complete --command youtube-dl --long-option autonumber-size --description 'Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given'
-complete --command youtube-dl --long-option restrict-filenames --description 'Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames'
-complete --command youtube-dl --long-option auto-number --short-option A --description '[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000'
-complete --command youtube-dl --long-option title --short-option t --description '[deprecated] Use title in file name (default)'
-complete --command youtube-dl --long-option literal --short-option l --description '[deprecated] Alias of --title'
-complete --command youtube-dl --long-option no-overwrites --short-option w --description 'Do not overwrite files'
-complete --command youtube-dl --long-option continue --short-option c --description 'Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.'
-complete --command youtube-dl --long-option no-continue --description 'Do not resume partially downloaded files (restart from beginning)'
-complete --command youtube-dl --long-option no-part --description 'Do not use .part files - write directly into output file'
-complete --command youtube-dl --long-option no-mtime --description 'Do not use the Last-modified header to set the file modification time'
-complete --command youtube-dl --long-option write-description --description 'Write video description to a .description file'
-complete --command youtube-dl --long-option write-info-json --description 'Write video metadata to a .info.json file'
-complete --command youtube-dl --long-option write-annotations --description 'Write video annotations to a .annotations.xml file'
-complete --command youtube-dl --long-option load-info --description 'JSON file containing the video information (created with the "--write-info-json" option)' --require-parameter
-complete --command youtube-dl --long-option cookies --description 'File to read cookies from and dump cookie jar in' --require-parameter
-complete --command youtube-dl --long-option cache-dir --description 'Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.'
-complete --command youtube-dl --long-option no-cache-dir --description 'Disable filesystem caching'
-complete --command youtube-dl --long-option rm-cache-dir --description 'Delete all filesystem cache files'
-complete --command youtube-dl --long-option write-thumbnail --description 'Write thumbnail image to disk'
-complete --command youtube-dl --long-option write-all-thumbnails --description 'Write all thumbnail image formats to disk'
-complete --command youtube-dl --long-option list-thumbnails --description 'Simulate and list all available thumbnail formats'
-complete --command youtube-dl --long-option quiet --short-option q --description 'Activate quiet mode'
-complete --command youtube-dl --long-option no-warnings --description 'Ignore warnings'
-complete --command youtube-dl --long-option simulate --short-option s --description 'Do not download the video and do not write anything to disk'
-complete --command youtube-dl --long-option skip-download --description 'Do not download the video'
-complete --command youtube-dl --long-option get-url --short-option g --description 'Simulate, quiet but print URL'
-complete --command youtube-dl --long-option get-title --short-option e --description 'Simulate, quiet but print title'
-complete --command youtube-dl --long-option get-id --description 'Simulate, quiet but print id'
-complete --command youtube-dl --long-option get-thumbnail --description 'Simulate, quiet but print thumbnail URL'
-complete --command youtube-dl --long-option get-description --description 'Simulate, quiet but print video description'
-complete --command youtube-dl --long-option get-duration --description 'Simulate, quiet but print video length'
-complete --command youtube-dl --long-option get-filename --description 'Simulate, quiet but print output filename'
-complete --command youtube-dl --long-option get-format --description 'Simulate, quiet but print output format'
-complete --command youtube-dl --long-option dump-json --short-option j --description 'Simulate, quiet but print JSON information. See --output for a description of available keys.'
-complete --command youtube-dl --long-option dump-single-json --short-option J --description 'Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.'
-complete --command youtube-dl --long-option print-json --description 'Be quiet and print the video information as JSON (video is still being downloaded).'
-complete --command youtube-dl --long-option newline --description 'Output progress bar as new lines'
-complete --command youtube-dl --long-option no-progress --description 'Do not print progress bar'
-complete --command youtube-dl --long-option console-title --description 'Display progress in console titlebar'
-complete --command youtube-dl --long-option verbose --short-option v --description 'Print various debugging information'
-complete --command youtube-dl --long-option dump-pages --description 'Print downloaded pages encoded using base64 to debug problems (very verbose)'
-complete --command youtube-dl --long-option write-pages --description 'Write downloaded intermediary pages to files in the current directory to debug problems'
-complete --command youtube-dl --long-option youtube-print-sig-code
-complete --command youtube-dl --long-option print-traffic --description 'Display sent and read HTTP traffic'
-complete --command youtube-dl --long-option call-home --short-option C --description 'Contact the youtube-dl server for debugging'
-complete --command youtube-dl --long-option no-call-home --description 'Do NOT contact the youtube-dl server for debugging'
-complete --command youtube-dl --long-option encoding --description 'Force the specified encoding (experimental)'
-complete --command youtube-dl --long-option no-check-certificate --description 'Suppress HTTPS certificate validation'
-complete --command youtube-dl --long-option prefer-insecure --description 'Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)'
-complete --command youtube-dl --long-option user-agent --description 'Specify a custom user agent'
-complete --command youtube-dl --long-option referer --description 'Specify a custom referer, use if the video access is restricted to one domain'
-complete --command youtube-dl --long-option add-header --description 'Specify a custom HTTP header and its value, separated by a colon '"'"':'"'"'. You can use this option multiple times'
-complete --command youtube-dl --long-option bidi-workaround --description 'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH'
-complete --command youtube-dl --long-option sleep-interval --description 'Number of seconds to sleep before each download.'
-complete --command youtube-dl --long-option format --short-option f --description 'Video format code, see the "FORMAT SELECTION" for all the info'
-complete --command youtube-dl --long-option all-formats --description 'Download all available video formats'
-complete --command youtube-dl --long-option prefer-free-formats --description 'Prefer free video formats unless a specific one is requested'
-complete --command youtube-dl --long-option list-formats --short-option F --description 'List all available formats of requested videos'
-complete --command youtube-dl --long-option youtube-include-dash-manifest
-complete --command youtube-dl --long-option youtube-skip-dash-manifest --description 'Do not download the DASH manifests and related data on YouTube videos'
-complete --command youtube-dl --long-option merge-output-format --description 'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv. Ignored if no merge is required'
-complete --command youtube-dl --long-option write-sub --description 'Write subtitle file'
-complete --command youtube-dl --long-option write-auto-sub --description 'Write automatically generated subtitle file (YouTube only)'
-complete --command youtube-dl --long-option all-subs --description 'Download all the available subtitles of the video'
-complete --command youtube-dl --long-option list-subs --description 'List all available subtitles for the video'
-complete --command youtube-dl --long-option sub-format --description 'Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"'
-complete --command youtube-dl --long-option sub-lang --description 'Languages of the subtitles to download (optional) separated by commas, use --list-subs for available language tags'
-complete --command youtube-dl --long-option username --short-option u --description 'Login with this account ID'
-complete --command youtube-dl --long-option password --short-option p --description 'Account password. If this option is left out, youtube-dl will ask interactively.'
-complete --command youtube-dl --long-option twofactor --short-option 2 --description 'Two-factor auth code'
-complete --command youtube-dl --long-option netrc --short-option n --description 'Use .netrc authentication data'
-complete --command youtube-dl --long-option video-password --description 'Video password (vimeo, smotri, youku)'
-complete --command youtube-dl --long-option extract-audio --short-option x --description 'Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)'
-complete --command youtube-dl --long-option audio-format --description 'Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default'
-complete --command youtube-dl --long-option audio-quality --description 'Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)'
-complete --command youtube-dl --long-option recode-video --description 'Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)' --arguments 'mp4 flv ogg webm mkv' --exclusive
-complete --command youtube-dl --long-option postprocessor-args --description 'Give these arguments to the postprocessor'
-complete --command youtube-dl --long-option keep-video --short-option k --description 'Keep the video file on disk after the post-processing; the video is erased by default'
-complete --command youtube-dl --long-option no-post-overwrites --description 'Do not overwrite post-processed files; the post-processed files are overwritten by default'
-complete --command youtube-dl --long-option embed-subs --description 'Embed subtitles in the video (only for mkv and mp4 videos)'
-complete --command youtube-dl --long-option embed-thumbnail --description 'Embed thumbnail in the audio as cover art'
-complete --command youtube-dl --long-option add-metadata --description 'Write metadata to the video file'
-complete --command youtube-dl --long-option metadata-from-title --description 'Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise"'
-complete --command youtube-dl --long-option xattrs --description 'Write metadata to the video file'"'"'s xattrs (using dublin core and xdg standards)'
-complete --command youtube-dl --long-option fixup --description 'Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default; fix file if we can, warn otherwise)'
-complete --command youtube-dl --long-option prefer-avconv --description 'Prefer avconv over ffmpeg for running the postprocessors (default)'
-complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors'
-complete --command youtube-dl --long-option ffmpeg-location --description 'Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.'
-complete --command youtube-dl --long-option exec --description 'Execute a command on the file after downloading, similar to find'"'"'s -exec syntax. Example: --exec '"'"'adb push {} /sdcard/Music/ && rm {}'"'"''
-complete --command youtube-dl --long-option convert-subs --description 'Convert the subtitles to other format (currently supported: srt|ass|vtt)'
-
-
-complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
diff --git a/youtube-dl.plugin.zsh b/youtube-dl.plugin.zsh
new file mode 100644 (file)
index 0000000..4edab52
--- /dev/null
@@ -0,0 +1,24 @@
+# This allows the youtube-dl command to be installed in ZSH using antigen.
+# Antigen is a bundle manager. It allows you to enhance the functionality of
+# your zsh session by installing bundles and themes easily.
+
+# Antigen documentation:
+# http://antigen.sharats.me/
+# https://github.com/zsh-users/antigen
+
+# Install youtube-dl:
+# antigen bundle rg3/youtube-dl
+# Bundles installed by antigen are available for use immediately.
+
+# Update youtube-dl (and all other antigen bundles):
+# antigen update
+
+# The antigen command will download the git repository to a folder and then
+# execute an enabling script (this file). The complete process for loading the
+# code is documented here:
+# https://github.com/zsh-users/antigen#notes-on-writing-plugins
+
+# This specific script just aliases youtube-dl to the python script that this
+# library provides. This requires updating the PYTHONPATH to ensure that the
+# full set of code can be located.
+alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"
diff --git a/youtube-dl.zsh b/youtube-dl.zsh
deleted file mode 100644 (file)
index 66ac24a..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-#compdef youtube-dl
-
-__youtube_dl() {
-    local curcontext="$curcontext" fileopts diropts cur prev
-    typeset -A opt_args
-    fileopts="--download-archive|-a|--batch-file|--load-info|--cookies"
-    diropts="--cache-dir"
-    cur=$words[CURRENT]
-    case $cur in
-        :)
-            _arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)'
-        ;;
-        *)
-            prev=$words[CURRENT-1]
-            if [[ ${prev} =~ ${fileopts} ]]; then
-                _path_files
-            elif [[ ${prev} =~ ${diropts} ]]; then
-                _path_files -/
-            elif [[ ${prev} == "--recode-video" ]]; then
-                _arguments '*: :(mp4 flv ogg webm mkv)'
-            else
-                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs)'
-            fi
-        ;;
-    esac
-}
-
-__youtube_dl
\ No newline at end of file
index f4324039c72ec656f67bf536ee7856aae464cc01..5036289b062f9ee05ab5c872c76ab38babcb4a54 100755 (executable)
@@ -24,9 +24,6 @@ import time
 import tokenize
 import traceback
 
-if os.name == 'nt':
-    import ctypes
-
 from .compat import (
     compat_basestring,
     compat_cookiejar,
@@ -34,6 +31,7 @@ from .compat import (
     compat_get_terminal_size,
     compat_http_client,
     compat_kwargs,
+    compat_os_name,
     compat_str,
     compat_tokenize_tokenize,
     compat_urllib_error,
@@ -41,6 +39,8 @@ from .compat import (
     compat_urllib_request_DataHandler,
 )
 from .utils import (
+    age_restricted,
+    args_to_str,
     ContentTooShortError,
     date_from_str,
     DateRange,
@@ -60,13 +60,17 @@ from .utils import (
     PagedList,
     parse_filesize,
     PerRequestProxyHandler,
-    PostProcessingError,
     platform_name,
+    PostProcessingError,
     preferredencoding,
+    prepend_extension,
+    register_socks_protocols,
     render_table,
+    replace_extension,
     SameFileError,
     sanitize_filename,
     sanitize_path,
+    sanitize_url,
     sanitized_Request,
     std_headers,
     subtitles_filename,
@@ -77,16 +81,13 @@ from .utils import (
     write_string,
     YoutubeDLCookieProcessor,
     YoutubeDLHandler,
-    prepend_extension,
-    replace_extension,
-    args_to_str,
-    age_restricted,
 )
 from .cache import Cache
-from .extractor import get_info_extractor, gen_extractors
+from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
 from .downloader import get_suitable_downloader
 from .downloader.rtmp import rtmpdump_version
 from .postprocessor import (
+    FFmpegFixupM3u8PP,
     FFmpegFixupM4aPP,
     FFmpegFixupStretchedPP,
     FFmpegMergerPP,
@@ -95,6 +96,9 @@ from .postprocessor import (
 )
 from .version import __version__
 
+if compat_os_name == 'nt':
+    import ctypes
+
 
 class YoutubeDL(object):
     """YoutubeDL class.
@@ -257,7 +261,9 @@ class YoutubeDL(object):
     The following options determine which downloader is picked:
     external_downloader: Executable of the external downloader to call.
                        None or unset for standard (built-in) downloader.
-    hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
+    hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
+                       if True, otherwise use ffmpeg/avconv if False, otherwise
+                       use downloader suggested by extractor if None.
 
     The following parameters are not used by YoutubeDL itself, they are used by
     the downloader (see youtube_dl/downloader/common.py):
@@ -320,7 +326,7 @@ class YoutubeDL(object):
                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
                 self._output_channel = os.fdopen(master, 'rb')
             except OSError as ose:
-                if ose.errno == 2:
+                if ose.errno == errno.ENOENT:
                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
                 else:
                     raise
@@ -356,6 +362,8 @@ class YoutubeDL(object):
         for ph in self.params.get('progress_hooks', []):
             self.add_progress_hook(ph)
 
+        register_socks_protocols()
+
     def warn_if_short_id(self, argv):
         # short YouTube ID starting with dash?
         idxs = [
@@ -375,8 +383,9 @@ class YoutubeDL(object):
     def add_info_extractor(self, ie):
         """Add an InfoExtractor object to the end of the list."""
         self._ies.append(ie)
-        self._ies_instances[ie.ie_key()] = ie
-        ie.set_downloader(self)
+        if not isinstance(ie, type):
+            self._ies_instances[ie.ie_key()] = ie
+            ie.set_downloader(self)
 
     def get_info_extractor(self, ie_key):
         """
@@ -394,7 +403,7 @@ class YoutubeDL(object):
         """
         Add the InfoExtractors returned by gen_extractors to the end of the list
         """
-        for ie in gen_extractors():
+        for ie in gen_extractor_classes():
             self.add_info_extractor(ie)
 
     def add_post_processor(self, pp):
@@ -450,7 +459,7 @@ class YoutubeDL(object):
     def to_console_title(self, message):
         if not self.params.get('consoletitle', False):
             return
-        if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
+        if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
             # c_wchar_p() might not be necessary if `message` is
             # already of type unicode()
             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
@@ -521,7 +530,7 @@ class YoutubeDL(object):
         else:
             if self.params.get('no_warnings'):
                 return
-            if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
+            if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
                 _msg_header = '\033[0;33mWARNING:\033[0m'
             else:
                 _msg_header = 'WARNING:'
@@ -533,7 +542,7 @@ class YoutubeDL(object):
         Do the same as trouble, but prefixes the message with 'ERROR:', colored
         in red if stderr is a tty file.
         '''
-        if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
+        if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
             _msg_header = '\033[0;31mERROR:\033[0m'
         else:
             _msg_header = 'ERROR:'
@@ -566,7 +575,7 @@ class YoutubeDL(object):
                 elif template_dict.get('height'):
                     template_dict['resolution'] = '%sp' % template_dict['height']
                 elif template_dict.get('width'):
-                    template_dict['resolution'] = '?x%d' % template_dict['width']
+                    template_dict['resolution'] = '%dx?' % template_dict['width']
 
             sanitize = lambda k, v: sanitize_filename(
                 compat_str(v),
@@ -574,7 +583,7 @@ class YoutubeDL(object):
                 is_id=(k == 'id'))
             template_dict = dict((k, sanitize(k, v))
                                  for k, v in template_dict.items()
-                                 if v is not None)
+                                 if v is not None and not isinstance(v, (list, tuple, dict)))
             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 
             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
@@ -658,6 +667,7 @@ class YoutubeDL(object):
             if not ie.suitable(url):
                 continue
 
+            ie = self.get_info_extractor(ie.ie_key())
             if not ie.working():
                 self.report_warning('The program functionality for this site has been marked as broken, '
                                     'and will probably not work.')
@@ -710,6 +720,7 @@ class YoutubeDL(object):
         result_type = ie_result.get('_type', 'video')
 
         if result_type in ('url', 'url_transparent'):
+            ie_result['url'] = sanitize_url(ie_result['url'])
             extract_flat = self.params.get('extract_flat', False)
             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
                     extract_flat is True):
@@ -903,7 +914,7 @@ class YoutubeDL(object):
                 '*=': lambda attr, value: value in attr,
             }
             str_operator_rex = re.compile(r'''(?x)
-                \s*(?P<key>ext|acodec|vcodec|container|protocol)
+                \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
                 \s*(?P<value>[a-zA-Z0-9._-]+)
                 \s*$
@@ -1212,6 +1223,10 @@ class YoutubeDL(object):
         if 'title' not in info_dict:
             raise ExtractorError('Missing "title" field in extractor result')
 
+        if not isinstance(info_dict['id'], compat_str):
+            self.report_warning('"id" field is not a string - forcing string conversion')
+            info_dict['id'] = compat_str(info_dict['id'])
+
         if 'playlist' not in info_dict:
             # It isn't part of a playlist
             info_dict['playlist'] = None
@@ -1227,12 +1242,20 @@ class YoutubeDL(object):
                 t.get('preference'), t.get('width'), t.get('height'),
                 t.get('id'), t.get('url')))
             for i, t in enumerate(thumbnails):
+                t['url'] = sanitize_url(t['url'])
                 if t.get('width') and t.get('height'):
                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
                 if t.get('id') is None:
                     t['id'] = '%d' % i
 
-        if thumbnails and 'thumbnail' not in info_dict:
+        if self.params.get('list_thumbnails'):
+            self.list_thumbnails(info_dict)
+            return
+
+        thumbnail = info_dict.get('thumbnail')
+        if thumbnail:
+            info_dict['thumbnail'] = sanitize_url(thumbnail)
+        elif thumbnails:
             info_dict['thumbnail'] = thumbnails[-1]['url']
 
         if 'display_id' not in info_dict and 'id' in info_dict:
@@ -1257,6 +1280,8 @@ class YoutubeDL(object):
         if subtitles:
             for _, subtitle in subtitles.items():
                 for subtitle_format in subtitle:
+                    if subtitle_format.get('url'):
+                        subtitle_format['url'] = sanitize_url(subtitle_format['url'])
                     if 'ext' not in subtitle_format:
                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
 
@@ -1286,6 +1311,8 @@ class YoutubeDL(object):
             if 'url' not in format:
                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 
+            format['url'] = sanitize_url(format['url'])
+
             if format.get('format_id') is None:
                 format['format_id'] = compat_str(i)
             else:
@@ -1333,9 +1360,6 @@ class YoutubeDL(object):
         if self.params.get('listformats'):
             self.list_formats(info_dict)
             return
-        if self.params.get('list_thumbnails'):
-            self.list_thumbnails(info_dict)
-            return
 
         req_format = self.params.get('format')
         if req_format is None:
@@ -1623,7 +1647,7 @@ class YoutubeDL(object):
                     # Just a single file
                     success = dl(filename, info_dict)
             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                self.report_error('unable to download video data: %s' % str(err))
+                self.report_error('unable to download video data: %s' % error_to_compat_str(err))
                 return
             except (OSError, IOError) as err:
                 raise UnavailableVideoError(err)
@@ -1631,12 +1655,14 @@ class YoutubeDL(object):
                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
                 return
 
-            if success:
+            if success and filename != '-':
                 # Fixup content
                 fixup_policy = self.params.get('fixup')
                 if fixup_policy is None:
                     fixup_policy = 'detect_or_warn'
 
+                INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
+
                 stretched_ratio = info_dict.get('stretched_ratio')
                 if stretched_ratio is not None and stretched_ratio != 1:
                     if fixup_policy == 'warn':
@@ -1649,15 +1675,18 @@ class YoutubeDL(object):
                             info_dict['__postprocessors'].append(stretched_pp)
                         else:
                             self.report_warning(
-                                '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
-                                    info_dict['id'], stretched_ratio))
+                                '%s: Non-uniform pixel ratio (%s). %s'
+                                % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
                     else:
                         assert fixup_policy in ('ignore', 'never')
 
-                if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
+                if (info_dict.get('requested_formats') is None and
+                        info_dict.get('container') == 'm4a_dash'):
                     if fixup_policy == 'warn':
-                        self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
-                            info_dict['id']))
+                        self.report_warning(
+                            '%s: writing DASH m4a. '
+                            'Only some players support this container.'
+                            % info_dict['id'])
                     elif fixup_policy == 'detect_or_warn':
                         fixup_pp = FFmpegFixupM4aPP(self)
                         if fixup_pp.available:
@@ -1665,8 +1694,27 @@ class YoutubeDL(object):
                             info_dict['__postprocessors'].append(fixup_pp)
                         else:
                             self.report_warning(
-                                '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
-                                    info_dict['id']))
+                                '%s: writing DASH m4a. '
+                                'Only some players support this container. %s'
+                                % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
+                    else:
+                        assert fixup_policy in ('ignore', 'never')
+
+                if (info_dict.get('protocol') == 'm3u8_native' or
+                        info_dict.get('protocol') == 'm3u8' and
+                        self.params.get('hls_prefer_native')):
+                    if fixup_policy == 'warn':
+                        self.report_warning('%s: malformated aac bitstream.' % (
+                            info_dict['id']))
+                    elif fixup_policy == 'detect_or_warn':
+                        fixup_pp = FFmpegFixupM3u8PP(self)
+                        if fixup_pp.available:
+                            info_dict.setdefault('__postprocessors', [])
+                            info_dict['__postprocessors'].append(fixup_pp)
+                        else:
+                            self.report_warning(
+                                '%s: malformated aac bitstream. %s'
+                                % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
                     else:
                         assert fixup_policy in ('ignore', 'never')
 
@@ -1809,7 +1857,7 @@ class YoutubeDL(object):
         if fdict.get('language'):
             if res:
                 res += ' '
-            res += '[%s]' % fdict['language']
+            res += '[%s] ' % fdict['language']
         if fdict.get('format_note') is not None:
             res += fdict['format_note'] + ' '
         if fdict.get('tbr') is not None:
@@ -1830,7 +1878,9 @@ class YoutubeDL(object):
         if fdict.get('vbr') is not None:
             res += '%4dk' % fdict['vbr']
         if fdict.get('fps') is not None:
-            res += ', %sfps' % fdict['fps']
+            if res:
+                res += ', '
+            res += '%sfps' % fdict['fps']
         if fdict.get('acodec') is not None:
             if res:
                 res += ', '
@@ -1873,13 +1923,8 @@ class YoutubeDL(object):
     def list_thumbnails(self, info_dict):
         thumbnails = info_dict.get('thumbnails')
         if not thumbnails:
-            tn_url = info_dict.get('thumbnail')
-            if tn_url:
-                thumbnails = [{'id': '0', 'url': tn_url}]
-            else:
-                self.to_screen(
-                    '[info] No thumbnails present for %s' % info_dict['id'])
-                return
+            self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
+            return
 
         self.to_screen(
             '[info] Thumbnails for %s:' % info_dict['id'])
@@ -1924,6 +1969,8 @@ class YoutubeDL(object):
         write_string(encoding_str, encoding=None)
 
         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
+        if _LAZY_LOADER:
+            self._write_string('[debug] Lazy loading extractors enabled' + '\n')
         try:
             sp = subprocess.Popen(
                 ['git', 'rev-parse', '--short', 'HEAD'],
@@ -1979,6 +2026,7 @@ class YoutubeDL(object):
         if opts_cookiefile is None:
             self.cookiejar = compat_cookiejar.CookieJar()
         else:
+            opts_cookiefile = compat_expanduser(opts_cookiefile)
             self.cookiejar = compat_cookiejar.MozillaCookieJar(
                 opts_cookiefile)
             if os.access(opts_cookiefile, os.R_OK):
index f5f06424146f88fa045a26db569cbe7fa42da586..4905674ad26f29323264cdcb188f1c9aed621a0a 100644 (file)
@@ -18,7 +18,6 @@ from .options import (
 from .compat import (
     compat_expanduser,
     compat_getpass,
-    compat_print,
     compat_shlex_split,
     workaround_optparse_bug9161,
 )
@@ -67,16 +66,16 @@ def _real_main(argv=None):
     # Custom HTTP headers
     if opts.headers is not None:
         for h in opts.headers:
-            if h.find(':', 1) < 0:
+            if ':' not in h:
                 parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
-            key, value = h.split(':', 2)
+            key, value = h.split(':', 1)
             if opts.verbose:
                 write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
             std_headers[key] = value
 
     # Dump user agent
     if opts.dump_user_agent:
-        compat_print(std_headers['User-Agent'])
+        write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
         sys.exit(0)
 
     # Batch file verification
@@ -86,7 +85,9 @@ def _real_main(argv=None):
             if opts.batchfile == '-':
                 batchfd = sys.stdin
             else:
-                batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
+                batchfd = io.open(
+                    compat_expanduser(opts.batchfile),
+                    'r', encoding='utf-8', errors='ignore')
             batch_urls = read_batch_urls(batchfd)
             if opts.verbose:
                 write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
@@ -99,10 +100,10 @@ def _real_main(argv=None):
 
     if opts.list_extractors:
         for ie in list_extractors(opts.age_limit):
-            compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
+            write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
             matchedUrls = [url for url in all_urls if ie.suitable(url)]
             for mu in matchedUrls:
-                compat_print('  ' + mu)
+                write_string('  ' + mu + '\n', out=sys.stdout)
         sys.exit(0)
     if opts.list_extractor_descriptions:
         for ie in list_extractors(opts.age_limit):
@@ -115,7 +116,7 @@ def _real_main(argv=None):
                 _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
                 _COUNTS = ('', '5', '10', 'all')
                 desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
-            compat_print(desc)
+            write_string(desc + '\n', out=sys.stdout)
         sys.exit(0)
 
     # Conflicting, missing and erroneous options
@@ -144,14 +145,20 @@ def _real_main(argv=None):
         if numeric_limit is None:
             parser.error('invalid max_filesize specified')
         opts.max_filesize = numeric_limit
-    if opts.retries is not None:
-        if opts.retries in ('inf', 'infinite'):
-            opts_retries = float('inf')
+
+    def parse_retries(retries):
+        if retries in ('inf', 'infinite'):
+            parsed_retries = float('inf')
         else:
             try:
-                opts_retries = int(opts.retries)
+                parsed_retries = int(retries)
             except (TypeError, ValueError):
                 parser.error('invalid retry count specified')
+        return parsed_retries
+    if opts.retries is not None:
+        opts.retries = parse_retries(opts.retries)
+    if opts.fragment_retries is not None:
+        opts.fragment_retries = parse_retries(opts.fragment_retries)
     if opts.buffersize is not None:
         numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
         if numeric_buffersize is None:
@@ -299,7 +306,8 @@ def _real_main(argv=None):
         'force_generic_extractor': opts.force_generic_extractor,
         'ratelimit': opts.ratelimit,
         'nooverwrites': opts.nooverwrites,
-        'retries': opts_retries,
+        'retries': opts.retries,
+        'fragment_retries': opts.fragment_retries,
         'buffersize': opts.buffersize,
         'noresizebuffer': opts.noresizebuffer,
         'continuedl': opts.continue_dl,
@@ -355,6 +363,7 @@ def _real_main(argv=None):
         'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
         'encoding': opts.encoding,
         'extract_flat': opts.extract_flat,
+        'mark_watched': opts.mark_watched,
         'merge_output_format': opts.merge_output_format,
         'postprocessors': postprocessors,
         'fixup': opts.fixup,
@@ -396,7 +405,7 @@ def _real_main(argv=None):
 
         try:
             if opts.load_info_filename is not None:
-                retcode = ydl.download_with_info_file(opts.load_info_filename)
+                retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
             else:
                 retcode = ydl.download(all_urls)
         except MaxDownloadsReached:
index b497da6964944b5d47e0805f9e872357b6c11c74..67db1c7c6595b0f219d393da7df1363c48103f26 100644 (file)
@@ -11,6 +11,7 @@ import re
 import shlex
 import shutil
 import socket
+import struct
 import subprocess
 import sys
 import itertools
@@ -62,6 +63,2244 @@ try:
 except ImportError:  # Python 2
     import htmlentitydefs as compat_html_entities
 
+try:  # Python >= 3.3
+    compat_html_entities_html5 = compat_html_entities.html5
+except AttributeError:
+    # Copied from CPython 3.5.1 html/entities.py
+    compat_html_entities_html5 = {
+        'Aacute': '\xc1',
+        'aacute': '\xe1',
+        'Aacute;': '\xc1',
+        'aacute;': '\xe1',
+        'Abreve;': '\u0102',
+        'abreve;': '\u0103',
+        'ac;': '\u223e',
+        'acd;': '\u223f',
+        'acE;': '\u223e\u0333',
+        'Acirc': '\xc2',
+        'acirc': '\xe2',
+        'Acirc;': '\xc2',
+        'acirc;': '\xe2',
+        'acute': '\xb4',
+        'acute;': '\xb4',
+        'Acy;': '\u0410',
+        'acy;': '\u0430',
+        'AElig': '\xc6',
+        'aelig': '\xe6',
+        'AElig;': '\xc6',
+        'aelig;': '\xe6',
+        'af;': '\u2061',
+        'Afr;': '\U0001d504',
+        'afr;': '\U0001d51e',
+        'Agrave': '\xc0',
+        'agrave': '\xe0',
+        'Agrave;': '\xc0',
+        'agrave;': '\xe0',
+        'alefsym;': '\u2135',
+        'aleph;': '\u2135',
+        'Alpha;': '\u0391',
+        'alpha;': '\u03b1',
+        'Amacr;': '\u0100',
+        'amacr;': '\u0101',
+        'amalg;': '\u2a3f',
+        'AMP': '&',
+        'amp': '&',
+        'AMP;': '&',
+        'amp;': '&',
+        'And;': '\u2a53',
+        'and;': '\u2227',
+        'andand;': '\u2a55',
+        'andd;': '\u2a5c',
+        'andslope;': '\u2a58',
+        'andv;': '\u2a5a',
+        'ang;': '\u2220',
+        'ange;': '\u29a4',
+        'angle;': '\u2220',
+        'angmsd;': '\u2221',
+        'angmsdaa;': '\u29a8',
+        'angmsdab;': '\u29a9',
+        'angmsdac;': '\u29aa',
+        'angmsdad;': '\u29ab',
+        'angmsdae;': '\u29ac',
+        'angmsdaf;': '\u29ad',
+        'angmsdag;': '\u29ae',
+        'angmsdah;': '\u29af',
+        'angrt;': '\u221f',
+        'angrtvb;': '\u22be',
+        'angrtvbd;': '\u299d',
+        'angsph;': '\u2222',
+        'angst;': '\xc5',
+        'angzarr;': '\u237c',
+        'Aogon;': '\u0104',
+        'aogon;': '\u0105',
+        'Aopf;': '\U0001d538',
+        'aopf;': '\U0001d552',
+        'ap;': '\u2248',
+        'apacir;': '\u2a6f',
+        'apE;': '\u2a70',
+        'ape;': '\u224a',
+        'apid;': '\u224b',
+        'apos;': "'",
+        'ApplyFunction;': '\u2061',
+        'approx;': '\u2248',
+        'approxeq;': '\u224a',
+        'Aring': '\xc5',
+        'aring': '\xe5',
+        'Aring;': '\xc5',
+        'aring;': '\xe5',
+        'Ascr;': '\U0001d49c',
+        'ascr;': '\U0001d4b6',
+        'Assign;': '\u2254',
+        'ast;': '*',
+        'asymp;': '\u2248',
+        'asympeq;': '\u224d',
+        'Atilde': '\xc3',
+        'atilde': '\xe3',
+        'Atilde;': '\xc3',
+        'atilde;': '\xe3',
+        'Auml': '\xc4',
+        'auml': '\xe4',
+        'Auml;': '\xc4',
+        'auml;': '\xe4',
+        'awconint;': '\u2233',
+        'awint;': '\u2a11',
+        'backcong;': '\u224c',
+        'backepsilon;': '\u03f6',
+        'backprime;': '\u2035',
+        'backsim;': '\u223d',
+        'backsimeq;': '\u22cd',
+        'Backslash;': '\u2216',
+        'Barv;': '\u2ae7',
+        'barvee;': '\u22bd',
+        'Barwed;': '\u2306',
+        'barwed;': '\u2305',
+        'barwedge;': '\u2305',
+        'bbrk;': '\u23b5',
+        'bbrktbrk;': '\u23b6',
+        'bcong;': '\u224c',
+        'Bcy;': '\u0411',
+        'bcy;': '\u0431',
+        'bdquo;': '\u201e',
+        'becaus;': '\u2235',
+        'Because;': '\u2235',
+        'because;': '\u2235',
+        'bemptyv;': '\u29b0',
+        'bepsi;': '\u03f6',
+        'bernou;': '\u212c',
+        'Bernoullis;': '\u212c',
+        'Beta;': '\u0392',
+        'beta;': '\u03b2',
+        'beth;': '\u2136',
+        'between;': '\u226c',
+        'Bfr;': '\U0001d505',
+        'bfr;': '\U0001d51f',
+        'bigcap;': '\u22c2',
+        'bigcirc;': '\u25ef',
+        'bigcup;': '\u22c3',
+        'bigodot;': '\u2a00',
+        'bigoplus;': '\u2a01',
+        'bigotimes;': '\u2a02',
+        'bigsqcup;': '\u2a06',
+        'bigstar;': '\u2605',
+        'bigtriangledown;': '\u25bd',
+        'bigtriangleup;': '\u25b3',
+        'biguplus;': '\u2a04',
+        'bigvee;': '\u22c1',
+        'bigwedge;': '\u22c0',
+        'bkarow;': '\u290d',
+        'blacklozenge;': '\u29eb',
+        'blacksquare;': '\u25aa',
+        'blacktriangle;': '\u25b4',
+        'blacktriangledown;': '\u25be',
+        'blacktriangleleft;': '\u25c2',
+        'blacktriangleright;': '\u25b8',
+        'blank;': '\u2423',
+        'blk12;': '\u2592',
+        'blk14;': '\u2591',
+        'blk34;': '\u2593',
+        'block;': '\u2588',
+        'bne;': '=\u20e5',
+        'bnequiv;': '\u2261\u20e5',
+        'bNot;': '\u2aed',
+        'bnot;': '\u2310',
+        'Bopf;': '\U0001d539',
+        'bopf;': '\U0001d553',
+        'bot;': '\u22a5',
+        'bottom;': '\u22a5',
+        'bowtie;': '\u22c8',
+        'boxbox;': '\u29c9',
+        'boxDL;': '\u2557',
+        'boxDl;': '\u2556',
+        'boxdL;': '\u2555',
+        'boxdl;': '\u2510',
+        'boxDR;': '\u2554',
+        'boxDr;': '\u2553',
+        'boxdR;': '\u2552',
+        'boxdr;': '\u250c',
+        'boxH;': '\u2550',
+        'boxh;': '\u2500',
+        'boxHD;': '\u2566',
+        'boxHd;': '\u2564',
+        'boxhD;': '\u2565',
+        'boxhd;': '\u252c',
+        'boxHU;': '\u2569',
+        'boxHu;': '\u2567',
+        'boxhU;': '\u2568',
+        'boxhu;': '\u2534',
+        'boxminus;': '\u229f',
+        'boxplus;': '\u229e',
+        'boxtimes;': '\u22a0',
+        'boxUL;': '\u255d',
+        'boxUl;': '\u255c',
+        'boxuL;': '\u255b',
+        'boxul;': '\u2518',
+        'boxUR;': '\u255a',
+        'boxUr;': '\u2559',
+        'boxuR;': '\u2558',
+        'boxur;': '\u2514',
+        'boxV;': '\u2551',
+        'boxv;': '\u2502',
+        'boxVH;': '\u256c',
+        'boxVh;': '\u256b',
+        'boxvH;': '\u256a',
+        'boxvh;': '\u253c',
+        'boxVL;': '\u2563',
+        'boxVl;': '\u2562',
+        'boxvL;': '\u2561',
+        'boxvl;': '\u2524',
+        'boxVR;': '\u2560',
+        'boxVr;': '\u255f',
+        'boxvR;': '\u255e',
+        'boxvr;': '\u251c',
+        'bprime;': '\u2035',
+        'Breve;': '\u02d8',
+        'breve;': '\u02d8',
+        'brvbar': '\xa6',
+        'brvbar;': '\xa6',
+        'Bscr;': '\u212c',
+        'bscr;': '\U0001d4b7',
+        'bsemi;': '\u204f',
+        'bsim;': '\u223d',
+        'bsime;': '\u22cd',
+        'bsol;': '\\',
+        'bsolb;': '\u29c5',
+        'bsolhsub;': '\u27c8',
+        'bull;': '\u2022',
+        'bullet;': '\u2022',
+        'bump;': '\u224e',
+        'bumpE;': '\u2aae',
+        'bumpe;': '\u224f',
+        'Bumpeq;': '\u224e',
+        'bumpeq;': '\u224f',
+        'Cacute;': '\u0106',
+        'cacute;': '\u0107',
+        'Cap;': '\u22d2',
+        'cap;': '\u2229',
+        'capand;': '\u2a44',
+        'capbrcup;': '\u2a49',
+        'capcap;': '\u2a4b',
+        'capcup;': '\u2a47',
+        'capdot;': '\u2a40',
+        'CapitalDifferentialD;': '\u2145',
+        'caps;': '\u2229\ufe00',
+        'caret;': '\u2041',
+        'caron;': '\u02c7',
+        'Cayleys;': '\u212d',
+        'ccaps;': '\u2a4d',
+        'Ccaron;': '\u010c',
+        'ccaron;': '\u010d',
+        'Ccedil': '\xc7',
+        'ccedil': '\xe7',
+        'Ccedil;': '\xc7',
+        'ccedil;': '\xe7',
+        'Ccirc;': '\u0108',
+        'ccirc;': '\u0109',
+        'Cconint;': '\u2230',
+        'ccups;': '\u2a4c',
+        'ccupssm;': '\u2a50',
+        'Cdot;': '\u010a',
+        'cdot;': '\u010b',
+        'cedil': '\xb8',
+        'cedil;': '\xb8',
+        'Cedilla;': '\xb8',
+        'cemptyv;': '\u29b2',
+        'cent': '\xa2',
+        'cent;': '\xa2',
+        'CenterDot;': '\xb7',
+        'centerdot;': '\xb7',
+        'Cfr;': '\u212d',
+        'cfr;': '\U0001d520',
+        'CHcy;': '\u0427',
+        'chcy;': '\u0447',
+        'check;': '\u2713',
+        'checkmark;': '\u2713',
+        'Chi;': '\u03a7',
+        'chi;': '\u03c7',
+        'cir;': '\u25cb',
+        'circ;': '\u02c6',
+        'circeq;': '\u2257',
+        'circlearrowleft;': '\u21ba',
+        'circlearrowright;': '\u21bb',
+        'circledast;': '\u229b',
+        'circledcirc;': '\u229a',
+        'circleddash;': '\u229d',
+        'CircleDot;': '\u2299',
+        'circledR;': '\xae',
+        'circledS;': '\u24c8',
+        'CircleMinus;': '\u2296',
+        'CirclePlus;': '\u2295',
+        'CircleTimes;': '\u2297',
+        'cirE;': '\u29c3',
+        'cire;': '\u2257',
+        'cirfnint;': '\u2a10',
+        'cirmid;': '\u2aef',
+        'cirscir;': '\u29c2',
+        'ClockwiseContourIntegral;': '\u2232',
+        'CloseCurlyDoubleQuote;': '\u201d',
+        'CloseCurlyQuote;': '\u2019',
+        'clubs;': '\u2663',
+        'clubsuit;': '\u2663',
+        'Colon;': '\u2237',
+        'colon;': ':',
+        'Colone;': '\u2a74',
+        'colone;': '\u2254',
+        'coloneq;': '\u2254',
+        'comma;': ',',
+        'commat;': '@',
+        'comp;': '\u2201',
+        'compfn;': '\u2218',
+        'complement;': '\u2201',
+        'complexes;': '\u2102',
+        'cong;': '\u2245',
+        'congdot;': '\u2a6d',
+        'Congruent;': '\u2261',
+        'Conint;': '\u222f',
+        'conint;': '\u222e',
+        'ContourIntegral;': '\u222e',
+        'Copf;': '\u2102',
+        'copf;': '\U0001d554',
+        'coprod;': '\u2210',
+        'Coproduct;': '\u2210',
+        'COPY': '\xa9',
+        'copy': '\xa9',
+        'COPY;': '\xa9',
+        'copy;': '\xa9',
+        'copysr;': '\u2117',
+        'CounterClockwiseContourIntegral;': '\u2233',
+        'crarr;': '\u21b5',
+        'Cross;': '\u2a2f',
+        'cross;': '\u2717',
+        'Cscr;': '\U0001d49e',
+        'cscr;': '\U0001d4b8',
+        'csub;': '\u2acf',
+        'csube;': '\u2ad1',
+        'csup;': '\u2ad0',
+        'csupe;': '\u2ad2',
+        'ctdot;': '\u22ef',
+        'cudarrl;': '\u2938',
+        'cudarrr;': '\u2935',
+        'cuepr;': '\u22de',
+        'cuesc;': '\u22df',
+        'cularr;': '\u21b6',
+        'cularrp;': '\u293d',
+        'Cup;': '\u22d3',
+        'cup;': '\u222a',
+        'cupbrcap;': '\u2a48',
+        'CupCap;': '\u224d',
+        'cupcap;': '\u2a46',
+        'cupcup;': '\u2a4a',
+        'cupdot;': '\u228d',
+        'cupor;': '\u2a45',
+        'cups;': '\u222a\ufe00',
+        'curarr;': '\u21b7',
+        'curarrm;': '\u293c',
+        'curlyeqprec;': '\u22de',
+        'curlyeqsucc;': '\u22df',
+        'curlyvee;': '\u22ce',
+        'curlywedge;': '\u22cf',
+        'curren': '\xa4',
+        'curren;': '\xa4',
+        'curvearrowleft;': '\u21b6',
+        'curvearrowright;': '\u21b7',
+        'cuvee;': '\u22ce',
+        'cuwed;': '\u22cf',
+        'cwconint;': '\u2232',
+        'cwint;': '\u2231',
+        'cylcty;': '\u232d',
+        'Dagger;': '\u2021',
+        'dagger;': '\u2020',
+        'daleth;': '\u2138',
+        'Darr;': '\u21a1',
+        'dArr;': '\u21d3',
+        'darr;': '\u2193',
+        'dash;': '\u2010',
+        'Dashv;': '\u2ae4',
+        'dashv;': '\u22a3',
+        'dbkarow;': '\u290f',
+        'dblac;': '\u02dd',
+        'Dcaron;': '\u010e',
+        'dcaron;': '\u010f',
+        'Dcy;': '\u0414',
+        'dcy;': '\u0434',
+        'DD;': '\u2145',
+        'dd;': '\u2146',
+        'ddagger;': '\u2021',
+        'ddarr;': '\u21ca',
+        'DDotrahd;': '\u2911',
+        'ddotseq;': '\u2a77',
+        'deg': '\xb0',
+        'deg;': '\xb0',
+        'Del;': '\u2207',
+        'Delta;': '\u0394',
+        'delta;': '\u03b4',
+        'demptyv;': '\u29b1',
+        'dfisht;': '\u297f',
+        'Dfr;': '\U0001d507',
+        'dfr;': '\U0001d521',
+        'dHar;': '\u2965',
+        'dharl;': '\u21c3',
+        'dharr;': '\u21c2',
+        'DiacriticalAcute;': '\xb4',
+        'DiacriticalDot;': '\u02d9',
+        'DiacriticalDoubleAcute;': '\u02dd',
+        'DiacriticalGrave;': '`',
+        'DiacriticalTilde;': '\u02dc',
+        'diam;': '\u22c4',
+        'Diamond;': '\u22c4',
+        'diamond;': '\u22c4',
+        'diamondsuit;': '\u2666',
+        'diams;': '\u2666',
+        'die;': '\xa8',
+        'DifferentialD;': '\u2146',
+        'digamma;': '\u03dd',
+        'disin;': '\u22f2',
+        'div;': '\xf7',
+        'divide': '\xf7',
+        'divide;': '\xf7',
+        'divideontimes;': '\u22c7',
+        'divonx;': '\u22c7',
+        'DJcy;': '\u0402',
+        'djcy;': '\u0452',
+        'dlcorn;': '\u231e',
+        'dlcrop;': '\u230d',
+        'dollar;': '$',
+        'Dopf;': '\U0001d53b',
+        'dopf;': '\U0001d555',
+        'Dot;': '\xa8',
+        'dot;': '\u02d9',
+        'DotDot;': '\u20dc',
+        'doteq;': '\u2250',
+        'doteqdot;': '\u2251',
+        'DotEqual;': '\u2250',
+        'dotminus;': '\u2238',
+        'dotplus;': '\u2214',
+        'dotsquare;': '\u22a1',
+        'doublebarwedge;': '\u2306',
+        'DoubleContourIntegral;': '\u222f',
+        'DoubleDot;': '\xa8',
+        'DoubleDownArrow;': '\u21d3',
+        'DoubleLeftArrow;': '\u21d0',
+        'DoubleLeftRightArrow;': '\u21d4',
+        'DoubleLeftTee;': '\u2ae4',
+        'DoubleLongLeftArrow;': '\u27f8',
+        'DoubleLongLeftRightArrow;': '\u27fa',
+        'DoubleLongRightArrow;': '\u27f9',
+        'DoubleRightArrow;': '\u21d2',
+        'DoubleRightTee;': '\u22a8',
+        'DoubleUpArrow;': '\u21d1',
+        'DoubleUpDownArrow;': '\u21d5',
+        'DoubleVerticalBar;': '\u2225',
+        'DownArrow;': '\u2193',
+        'Downarrow;': '\u21d3',
+        'downarrow;': '\u2193',
+        'DownArrowBar;': '\u2913',
+        'DownArrowUpArrow;': '\u21f5',
+        'DownBreve;': '\u0311',
+        'downdownarrows;': '\u21ca',
+        'downharpoonleft;': '\u21c3',
+        'downharpoonright;': '\u21c2',
+        'DownLeftRightVector;': '\u2950',
+        'DownLeftTeeVector;': '\u295e',
+        'DownLeftVector;': '\u21bd',
+        'DownLeftVectorBar;': '\u2956',
+        'DownRightTeeVector;': '\u295f',
+        'DownRightVector;': '\u21c1',
+        'DownRightVectorBar;': '\u2957',
+        'DownTee;': '\u22a4',
+        'DownTeeArrow;': '\u21a7',
+        'drbkarow;': '\u2910',
+        'drcorn;': '\u231f',
+        'drcrop;': '\u230c',
+        'Dscr;': '\U0001d49f',
+        'dscr;': '\U0001d4b9',
+        'DScy;': '\u0405',
+        'dscy;': '\u0455',
+        'dsol;': '\u29f6',
+        'Dstrok;': '\u0110',
+        'dstrok;': '\u0111',
+        'dtdot;': '\u22f1',
+        'dtri;': '\u25bf',
+        'dtrif;': '\u25be',
+        'duarr;': '\u21f5',
+        'duhar;': '\u296f',
+        'dwangle;': '\u29a6',
+        'DZcy;': '\u040f',
+        'dzcy;': '\u045f',
+        'dzigrarr;': '\u27ff',
+        'Eacute': '\xc9',
+        'eacute': '\xe9',
+        'Eacute;': '\xc9',
+        'eacute;': '\xe9',
+        'easter;': '\u2a6e',
+        'Ecaron;': '\u011a',
+        'ecaron;': '\u011b',
+        'ecir;': '\u2256',
+        'Ecirc': '\xca',
+        'ecirc': '\xea',
+        'Ecirc;': '\xca',
+        'ecirc;': '\xea',
+        'ecolon;': '\u2255',
+        'Ecy;': '\u042d',
+        'ecy;': '\u044d',
+        'eDDot;': '\u2a77',
+        'Edot;': '\u0116',
+        'eDot;': '\u2251',
+        'edot;': '\u0117',
+        'ee;': '\u2147',
+        'efDot;': '\u2252',
+        'Efr;': '\U0001d508',
+        'efr;': '\U0001d522',
+        'eg;': '\u2a9a',
+        'Egrave': '\xc8',
+        'egrave': '\xe8',
+        'Egrave;': '\xc8',
+        'egrave;': '\xe8',
+        'egs;': '\u2a96',
+        'egsdot;': '\u2a98',
+        'el;': '\u2a99',
+        'Element;': '\u2208',
+        'elinters;': '\u23e7',
+        'ell;': '\u2113',
+        'els;': '\u2a95',
+        'elsdot;': '\u2a97',
+        'Emacr;': '\u0112',
+        'emacr;': '\u0113',
+        'empty;': '\u2205',
+        'emptyset;': '\u2205',
+        'EmptySmallSquare;': '\u25fb',
+        'emptyv;': '\u2205',
+        'EmptyVerySmallSquare;': '\u25ab',
+        'emsp13;': '\u2004',
+        'emsp14;': '\u2005',
+        'emsp;': '\u2003',
+        'ENG;': '\u014a',
+        'eng;': '\u014b',
+        'ensp;': '\u2002',
+        'Eogon;': '\u0118',
+        'eogon;': '\u0119',
+        'Eopf;': '\U0001d53c',
+        'eopf;': '\U0001d556',
+        'epar;': '\u22d5',
+        'eparsl;': '\u29e3',
+        'eplus;': '\u2a71',
+        'epsi;': '\u03b5',
+        'Epsilon;': '\u0395',
+        'epsilon;': '\u03b5',
+        'epsiv;': '\u03f5',
+        'eqcirc;': '\u2256',
+        'eqcolon;': '\u2255',
+        'eqsim;': '\u2242',
+        'eqslantgtr;': '\u2a96',
+        'eqslantless;': '\u2a95',
+        'Equal;': '\u2a75',
+        'equals;': '=',
+        'EqualTilde;': '\u2242',
+        'equest;': '\u225f',
+        'Equilibrium;': '\u21cc',
+        'equiv;': '\u2261',
+        'equivDD;': '\u2a78',
+        'eqvparsl;': '\u29e5',
+        'erarr;': '\u2971',
+        'erDot;': '\u2253',
+        'Escr;': '\u2130',
+        'escr;': '\u212f',
+        'esdot;': '\u2250',
+        'Esim;': '\u2a73',
+        'esim;': '\u2242',
+        'Eta;': '\u0397',
+        'eta;': '\u03b7',
+        'ETH': '\xd0',
+        'eth': '\xf0',
+        'ETH;': '\xd0',
+        'eth;': '\xf0',
+        'Euml': '\xcb',
+        'euml': '\xeb',
+        'Euml;': '\xcb',
+        'euml;': '\xeb',
+        'euro;': '\u20ac',
+        'excl;': '!',
+        'exist;': '\u2203',
+        'Exists;': '\u2203',
+        'expectation;': '\u2130',
+        'ExponentialE;': '\u2147',
+        'exponentiale;': '\u2147',
+        'fallingdotseq;': '\u2252',
+        'Fcy;': '\u0424',
+        'fcy;': '\u0444',
+        'female;': '\u2640',
+        'ffilig;': '\ufb03',
+        'fflig;': '\ufb00',
+        'ffllig;': '\ufb04',
+        'Ffr;': '\U0001d509',
+        'ffr;': '\U0001d523',
+        'filig;': '\ufb01',
+        'FilledSmallSquare;': '\u25fc',
+        'FilledVerySmallSquare;': '\u25aa',
+        'fjlig;': 'fj',
+        'flat;': '\u266d',
+        'fllig;': '\ufb02',
+        'fltns;': '\u25b1',
+        'fnof;': '\u0192',
+        'Fopf;': '\U0001d53d',
+        'fopf;': '\U0001d557',
+        'ForAll;': '\u2200',
+        'forall;': '\u2200',
+        'fork;': '\u22d4',
+        'forkv;': '\u2ad9',
+        'Fouriertrf;': '\u2131',
+        'fpartint;': '\u2a0d',
+        'frac12': '\xbd',
+        'frac12;': '\xbd',
+        'frac13;': '\u2153',
+        'frac14': '\xbc',
+        'frac14;': '\xbc',
+        'frac15;': '\u2155',
+        'frac16;': '\u2159',
+        'frac18;': '\u215b',
+        'frac23;': '\u2154',
+        'frac25;': '\u2156',
+        'frac34': '\xbe',
+        'frac34;': '\xbe',
+        'frac35;': '\u2157',
+        'frac38;': '\u215c',
+        'frac45;': '\u2158',
+        'frac56;': '\u215a',
+        'frac58;': '\u215d',
+        'frac78;': '\u215e',
+        'frasl;': '\u2044',
+        'frown;': '\u2322',
+        'Fscr;': '\u2131',
+        'fscr;': '\U0001d4bb',
+        'gacute;': '\u01f5',
+        'Gamma;': '\u0393',
+        'gamma;': '\u03b3',
+        'Gammad;': '\u03dc',
+        'gammad;': '\u03dd',
+        'gap;': '\u2a86',
+        'Gbreve;': '\u011e',
+        'gbreve;': '\u011f',
+        'Gcedil;': '\u0122',
+        'Gcirc;': '\u011c',
+        'gcirc;': '\u011d',
+        'Gcy;': '\u0413',
+        'gcy;': '\u0433',
+        'Gdot;': '\u0120',
+        'gdot;': '\u0121',
+        'gE;': '\u2267',
+        'ge;': '\u2265',
+        'gEl;': '\u2a8c',
+        'gel;': '\u22db',
+        'geq;': '\u2265',
+        'geqq;': '\u2267',
+        'geqslant;': '\u2a7e',
+        'ges;': '\u2a7e',
+        'gescc;': '\u2aa9',
+        'gesdot;': '\u2a80',
+        'gesdoto;': '\u2a82',
+        'gesdotol;': '\u2a84',
+        'gesl;': '\u22db\ufe00',
+        'gesles;': '\u2a94',
+        'Gfr;': '\U0001d50a',
+        'gfr;': '\U0001d524',
+        'Gg;': '\u22d9',
+        'gg;': '\u226b',
+        'ggg;': '\u22d9',
+        'gimel;': '\u2137',
+        'GJcy;': '\u0403',
+        'gjcy;': '\u0453',
+        'gl;': '\u2277',
+        'gla;': '\u2aa5',
+        'glE;': '\u2a92',
+        'glj;': '\u2aa4',
+        'gnap;': '\u2a8a',
+        'gnapprox;': '\u2a8a',
+        'gnE;': '\u2269',
+        'gne;': '\u2a88',
+        'gneq;': '\u2a88',
+        'gneqq;': '\u2269',
+        'gnsim;': '\u22e7',
+        'Gopf;': '\U0001d53e',
+        'gopf;': '\U0001d558',
+        'grave;': '`',
+        'GreaterEqual;': '\u2265',
+        'GreaterEqualLess;': '\u22db',
+        'GreaterFullEqual;': '\u2267',
+        'GreaterGreater;': '\u2aa2',
+        'GreaterLess;': '\u2277',
+        'GreaterSlantEqual;': '\u2a7e',
+        'GreaterTilde;': '\u2273',
+        'Gscr;': '\U0001d4a2',
+        'gscr;': '\u210a',
+        'gsim;': '\u2273',
+        'gsime;': '\u2a8e',
+        'gsiml;': '\u2a90',
+        'GT': '>',
+        'gt': '>',
+        'GT;': '>',
+        'Gt;': '\u226b',
+        'gt;': '>',
+        'gtcc;': '\u2aa7',
+        'gtcir;': '\u2a7a',
+        'gtdot;': '\u22d7',
+        'gtlPar;': '\u2995',
+        'gtquest;': '\u2a7c',
+        'gtrapprox;': '\u2a86',
+        'gtrarr;': '\u2978',
+        'gtrdot;': '\u22d7',
+        'gtreqless;': '\u22db',
+        'gtreqqless;': '\u2a8c',
+        'gtrless;': '\u2277',
+        'gtrsim;': '\u2273',
+        'gvertneqq;': '\u2269\ufe00',
+        'gvnE;': '\u2269\ufe00',
+        'Hacek;': '\u02c7',
+        'hairsp;': '\u200a',
+        'half;': '\xbd',
+        'hamilt;': '\u210b',
+        'HARDcy;': '\u042a',
+        'hardcy;': '\u044a',
+        'hArr;': '\u21d4',
+        'harr;': '\u2194',
+        'harrcir;': '\u2948',
+        'harrw;': '\u21ad',
+        'Hat;': '^',
+        'hbar;': '\u210f',
+        'Hcirc;': '\u0124',
+        'hcirc;': '\u0125',
+        'hearts;': '\u2665',
+        'heartsuit;': '\u2665',
+        'hellip;': '\u2026',
+        'hercon;': '\u22b9',
+        'Hfr;': '\u210c',
+        'hfr;': '\U0001d525',
+        'HilbertSpace;': '\u210b',
+        'hksearow;': '\u2925',
+        'hkswarow;': '\u2926',
+        'hoarr;': '\u21ff',
+        'homtht;': '\u223b',
+        'hookleftarrow;': '\u21a9',
+        'hookrightarrow;': '\u21aa',
+        'Hopf;': '\u210d',
+        'hopf;': '\U0001d559',
+        'horbar;': '\u2015',
+        'HorizontalLine;': '\u2500',
+        'Hscr;': '\u210b',
+        'hscr;': '\U0001d4bd',
+        'hslash;': '\u210f',
+        'Hstrok;': '\u0126',
+        'hstrok;': '\u0127',
+        'HumpDownHump;': '\u224e',
+        'HumpEqual;': '\u224f',
+        'hybull;': '\u2043',
+        'hyphen;': '\u2010',
+        'Iacute': '\xcd',
+        'iacute': '\xed',
+        'Iacute;': '\xcd',
+        'iacute;': '\xed',
+        'ic;': '\u2063',
+        'Icirc': '\xce',
+        'icirc': '\xee',
+        'Icirc;': '\xce',
+        'icirc;': '\xee',
+        'Icy;': '\u0418',
+        'icy;': '\u0438',
+        'Idot;': '\u0130',
+        'IEcy;': '\u0415',
+        'iecy;': '\u0435',
+        'iexcl': '\xa1',
+        'iexcl;': '\xa1',
+        'iff;': '\u21d4',
+        'Ifr;': '\u2111',
+        'ifr;': '\U0001d526',
+        'Igrave': '\xcc',
+        'igrave': '\xec',
+        'Igrave;': '\xcc',
+        'igrave;': '\xec',
+        'ii;': '\u2148',
+        'iiiint;': '\u2a0c',
+        'iiint;': '\u222d',
+        'iinfin;': '\u29dc',
+        'iiota;': '\u2129',
+        'IJlig;': '\u0132',
+        'ijlig;': '\u0133',
+        'Im;': '\u2111',
+        'Imacr;': '\u012a',
+        'imacr;': '\u012b',
+        'image;': '\u2111',
+        'ImaginaryI;': '\u2148',
+        'imagline;': '\u2110',
+        'imagpart;': '\u2111',
+        'imath;': '\u0131',
+        'imof;': '\u22b7',
+        'imped;': '\u01b5',
+        'Implies;': '\u21d2',
+        'in;': '\u2208',
+        'incare;': '\u2105',
+        'infin;': '\u221e',
+        'infintie;': '\u29dd',
+        'inodot;': '\u0131',
+        'Int;': '\u222c',
+        'int;': '\u222b',
+        'intcal;': '\u22ba',
+        'integers;': '\u2124',
+        'Integral;': '\u222b',
+        'intercal;': '\u22ba',
+        'Intersection;': '\u22c2',
+        'intlarhk;': '\u2a17',
+        'intprod;': '\u2a3c',
+        'InvisibleComma;': '\u2063',
+        'InvisibleTimes;': '\u2062',
+        'IOcy;': '\u0401',
+        'iocy;': '\u0451',
+        'Iogon;': '\u012e',
+        'iogon;': '\u012f',
+        'Iopf;': '\U0001d540',
+        'iopf;': '\U0001d55a',
+        'Iota;': '\u0399',
+        'iota;': '\u03b9',
+        'iprod;': '\u2a3c',
+        'iquest': '\xbf',
+        'iquest;': '\xbf',
+        'Iscr;': '\u2110',
+        'iscr;': '\U0001d4be',
+        'isin;': '\u2208',
+        'isindot;': '\u22f5',
+        'isinE;': '\u22f9',
+        'isins;': '\u22f4',
+        'isinsv;': '\u22f3',
+        'isinv;': '\u2208',
+        'it;': '\u2062',
+        'Itilde;': '\u0128',
+        'itilde;': '\u0129',
+        'Iukcy;': '\u0406',
+        'iukcy;': '\u0456',
+        'Iuml': '\xcf',
+        'iuml': '\xef',
+        'Iuml;': '\xcf',
+        'iuml;': '\xef',
+        'Jcirc;': '\u0134',
+        'jcirc;': '\u0135',
+        'Jcy;': '\u0419',
+        'jcy;': '\u0439',
+        'Jfr;': '\U0001d50d',
+        'jfr;': '\U0001d527',
+        'jmath;': '\u0237',
+        'Jopf;': '\U0001d541',
+        'jopf;': '\U0001d55b',
+        'Jscr;': '\U0001d4a5',
+        'jscr;': '\U0001d4bf',
+        'Jsercy;': '\u0408',
+        'jsercy;': '\u0458',
+        'Jukcy;': '\u0404',
+        'jukcy;': '\u0454',
+        'Kappa;': '\u039a',
+        'kappa;': '\u03ba',
+        'kappav;': '\u03f0',
+        'Kcedil;': '\u0136',
+        'kcedil;': '\u0137',
+        'Kcy;': '\u041a',
+        'kcy;': '\u043a',
+        'Kfr;': '\U0001d50e',
+        'kfr;': '\U0001d528',
+        'kgreen;': '\u0138',
+        'KHcy;': '\u0425',
+        'khcy;': '\u0445',
+        'KJcy;': '\u040c',
+        'kjcy;': '\u045c',
+        'Kopf;': '\U0001d542',
+        'kopf;': '\U0001d55c',
+        'Kscr;': '\U0001d4a6',
+        'kscr;': '\U0001d4c0',
+        'lAarr;': '\u21da',
+        'Lacute;': '\u0139',
+        'lacute;': '\u013a',
+        'laemptyv;': '\u29b4',
+        'lagran;': '\u2112',
+        'Lambda;': '\u039b',
+        'lambda;': '\u03bb',
+        'Lang;': '\u27ea',
+        'lang;': '\u27e8',
+        'langd;': '\u2991',
+        'langle;': '\u27e8',
+        'lap;': '\u2a85',
+        'Laplacetrf;': '\u2112',
+        'laquo': '\xab',
+        'laquo;': '\xab',
+        'Larr;': '\u219e',
+        'lArr;': '\u21d0',
+        'larr;': '\u2190',
+        'larrb;': '\u21e4',
+        'larrbfs;': '\u291f',
+        'larrfs;': '\u291d',
+        'larrhk;': '\u21a9',
+        'larrlp;': '\u21ab',
+        'larrpl;': '\u2939',
+        'larrsim;': '\u2973',
+        'larrtl;': '\u21a2',
+        'lat;': '\u2aab',
+        'lAtail;': '\u291b',
+        'latail;': '\u2919',
+        'late;': '\u2aad',
+        'lates;': '\u2aad\ufe00',
+        'lBarr;': '\u290e',
+        'lbarr;': '\u290c',
+        'lbbrk;': '\u2772',
+        'lbrace;': '{',
+        'lbrack;': '[',
+        'lbrke;': '\u298b',
+        'lbrksld;': '\u298f',
+        'lbrkslu;': '\u298d',
+        'Lcaron;': '\u013d',
+        'lcaron;': '\u013e',
+        'Lcedil;': '\u013b',
+        'lcedil;': '\u013c',
+        'lceil;': '\u2308',
+        'lcub;': '{',
+        'Lcy;': '\u041b',
+        'lcy;': '\u043b',
+        'ldca;': '\u2936',
+        'ldquo;': '\u201c',
+        'ldquor;': '\u201e',
+        'ldrdhar;': '\u2967',
+        'ldrushar;': '\u294b',
+        'ldsh;': '\u21b2',
+        'lE;': '\u2266',
+        'le;': '\u2264',
+        'LeftAngleBracket;': '\u27e8',
+        'LeftArrow;': '\u2190',
+        'Leftarrow;': '\u21d0',
+        'leftarrow;': '\u2190',
+        'LeftArrowBar;': '\u21e4',
+        'LeftArrowRightArrow;': '\u21c6',
+        'leftarrowtail;': '\u21a2',
+        'LeftCeiling;': '\u2308',
+        'LeftDoubleBracket;': '\u27e6',
+        'LeftDownTeeVector;': '\u2961',
+        'LeftDownVector;': '\u21c3',
+        'LeftDownVectorBar;': '\u2959',
+        'LeftFloor;': '\u230a',
+        'leftharpoondown;': '\u21bd',
+        'leftharpoonup;': '\u21bc',
+        'leftleftarrows;': '\u21c7',
+        'LeftRightArrow;': '\u2194',
+        'Leftrightarrow;': '\u21d4',
+        'leftrightarrow;': '\u2194',
+        'leftrightarrows;': '\u21c6',
+        'leftrightharpoons;': '\u21cb',
+        'leftrightsquigarrow;': '\u21ad',
+        'LeftRightVector;': '\u294e',
+        'LeftTee;': '\u22a3',
+        'LeftTeeArrow;': '\u21a4',
+        'LeftTeeVector;': '\u295a',
+        'leftthreetimes;': '\u22cb',
+        'LeftTriangle;': '\u22b2',
+        'LeftTriangleBar;': '\u29cf',
+        'LeftTriangleEqual;': '\u22b4',
+        'LeftUpDownVector;': '\u2951',
+        'LeftUpTeeVector;': '\u2960',
+        'LeftUpVector;': '\u21bf',
+        'LeftUpVectorBar;': '\u2958',
+        'LeftVector;': '\u21bc',
+        'LeftVectorBar;': '\u2952',
+        'lEg;': '\u2a8b',
+        'leg;': '\u22da',
+        'leq;': '\u2264',
+        'leqq;': '\u2266',
+        'leqslant;': '\u2a7d',
+        'les;': '\u2a7d',
+        'lescc;': '\u2aa8',
+        'lesdot;': '\u2a7f',
+        'lesdoto;': '\u2a81',
+        'lesdotor;': '\u2a83',
+        'lesg;': '\u22da\ufe00',
+        'lesges;': '\u2a93',
+        'lessapprox;': '\u2a85',
+        'lessdot;': '\u22d6',
+        'lesseqgtr;': '\u22da',
+        'lesseqqgtr;': '\u2a8b',
+        'LessEqualGreater;': '\u22da',
+        'LessFullEqual;': '\u2266',
+        'LessGreater;': '\u2276',
+        'lessgtr;': '\u2276',
+        'LessLess;': '\u2aa1',
+        'lesssim;': '\u2272',
+        'LessSlantEqual;': '\u2a7d',
+        'LessTilde;': '\u2272',
+        'lfisht;': '\u297c',
+        'lfloor;': '\u230a',
+        'Lfr;': '\U0001d50f',
+        'lfr;': '\U0001d529',
+        'lg;': '\u2276',
+        'lgE;': '\u2a91',
+        'lHar;': '\u2962',
+        'lhard;': '\u21bd',
+        'lharu;': '\u21bc',
+        'lharul;': '\u296a',
+        'lhblk;': '\u2584',
+        'LJcy;': '\u0409',
+        'ljcy;': '\u0459',
+        'Ll;': '\u22d8',
+        'll;': '\u226a',
+        'llarr;': '\u21c7',
+        'llcorner;': '\u231e',
+        'Lleftarrow;': '\u21da',
+        'llhard;': '\u296b',
+        'lltri;': '\u25fa',
+        'Lmidot;': '\u013f',
+        'lmidot;': '\u0140',
+        'lmoust;': '\u23b0',
+        'lmoustache;': '\u23b0',
+        'lnap;': '\u2a89',
+        'lnapprox;': '\u2a89',
+        'lnE;': '\u2268',
+        'lne;': '\u2a87',
+        'lneq;': '\u2a87',
+        'lneqq;': '\u2268',
+        'lnsim;': '\u22e6',
+        'loang;': '\u27ec',
+        'loarr;': '\u21fd',
+        'lobrk;': '\u27e6',
+        'LongLeftArrow;': '\u27f5',
+        'Longleftarrow;': '\u27f8',
+        'longleftarrow;': '\u27f5',
+        'LongLeftRightArrow;': '\u27f7',
+        'Longleftrightarrow;': '\u27fa',
+        'longleftrightarrow;': '\u27f7',
+        'longmapsto;': '\u27fc',
+        'LongRightArrow;': '\u27f6',
+        'Longrightarrow;': '\u27f9',
+        'longrightarrow;': '\u27f6',
+        'looparrowleft;': '\u21ab',
+        'looparrowright;': '\u21ac',
+        'lopar;': '\u2985',
+        'Lopf;': '\U0001d543',
+        'lopf;': '\U0001d55d',
+        'loplus;': '\u2a2d',
+        'lotimes;': '\u2a34',
+        'lowast;': '\u2217',
+        'lowbar;': '_',
+        'LowerLeftArrow;': '\u2199',
+        'LowerRightArrow;': '\u2198',
+        'loz;': '\u25ca',
+        'lozenge;': '\u25ca',
+        'lozf;': '\u29eb',
+        'lpar;': '(',
+        'lparlt;': '\u2993',
+        'lrarr;': '\u21c6',
+        'lrcorner;': '\u231f',
+        'lrhar;': '\u21cb',
+        'lrhard;': '\u296d',
+        'lrm;': '\u200e',
+        'lrtri;': '\u22bf',
+        'lsaquo;': '\u2039',
+        'Lscr;': '\u2112',
+        'lscr;': '\U0001d4c1',
+        'Lsh;': '\u21b0',
+        'lsh;': '\u21b0',
+        'lsim;': '\u2272',
+        'lsime;': '\u2a8d',
+        'lsimg;': '\u2a8f',
+        'lsqb;': '[',
+        'lsquo;': '\u2018',
+        'lsquor;': '\u201a',
+        'Lstrok;': '\u0141',
+        'lstrok;': '\u0142',
+        'LT': '<',
+        'lt': '<',
+        'LT;': '<',
+        'Lt;': '\u226a',
+        'lt;': '<',
+        'ltcc;': '\u2aa6',
+        'ltcir;': '\u2a79',
+        'ltdot;': '\u22d6',
+        'lthree;': '\u22cb',
+        'ltimes;': '\u22c9',
+        'ltlarr;': '\u2976',
+        'ltquest;': '\u2a7b',
+        'ltri;': '\u25c3',
+        'ltrie;': '\u22b4',
+        'ltrif;': '\u25c2',
+        'ltrPar;': '\u2996',
+        'lurdshar;': '\u294a',
+        'luruhar;': '\u2966',
+        'lvertneqq;': '\u2268\ufe00',
+        'lvnE;': '\u2268\ufe00',
+        'macr': '\xaf',
+        'macr;': '\xaf',
+        'male;': '\u2642',
+        'malt;': '\u2720',
+        'maltese;': '\u2720',
+        'Map;': '\u2905',
+        'map;': '\u21a6',
+        'mapsto;': '\u21a6',
+        'mapstodown;': '\u21a7',
+        'mapstoleft;': '\u21a4',
+        'mapstoup;': '\u21a5',
+        'marker;': '\u25ae',
+        'mcomma;': '\u2a29',
+        'Mcy;': '\u041c',
+        'mcy;': '\u043c',
+        'mdash;': '\u2014',
+        'mDDot;': '\u223a',
+        'measuredangle;': '\u2221',
+        'MediumSpace;': '\u205f',
+        'Mellintrf;': '\u2133',
+        'Mfr;': '\U0001d510',
+        'mfr;': '\U0001d52a',
+        'mho;': '\u2127',
+        'micro': '\xb5',
+        'micro;': '\xb5',
+        'mid;': '\u2223',
+        'midast;': '*',
+        'midcir;': '\u2af0',
+        'middot': '\xb7',
+        'middot;': '\xb7',
+        'minus;': '\u2212',
+        'minusb;': '\u229f',
+        'minusd;': '\u2238',
+        'minusdu;': '\u2a2a',
+        'MinusPlus;': '\u2213',
+        'mlcp;': '\u2adb',
+        'mldr;': '\u2026',
+        'mnplus;': '\u2213',
+        'models;': '\u22a7',
+        'Mopf;': '\U0001d544',
+        'mopf;': '\U0001d55e',
+        'mp;': '\u2213',
+        'Mscr;': '\u2133',
+        'mscr;': '\U0001d4c2',
+        'mstpos;': '\u223e',
+        'Mu;': '\u039c',
+        'mu;': '\u03bc',
+        'multimap;': '\u22b8',
+        'mumap;': '\u22b8',
+        'nabla;': '\u2207',
+        'Nacute;': '\u0143',
+        'nacute;': '\u0144',
+        'nang;': '\u2220\u20d2',
+        'nap;': '\u2249',
+        'napE;': '\u2a70\u0338',
+        'napid;': '\u224b\u0338',
+        'napos;': '\u0149',
+        'napprox;': '\u2249',
+        'natur;': '\u266e',
+        'natural;': '\u266e',
+        'naturals;': '\u2115',
+        'nbsp': '\xa0',
+        'nbsp;': '\xa0',
+        'nbump;': '\u224e\u0338',
+        'nbumpe;': '\u224f\u0338',
+        'ncap;': '\u2a43',
+        'Ncaron;': '\u0147',
+        'ncaron;': '\u0148',
+        'Ncedil;': '\u0145',
+        'ncedil;': '\u0146',
+        'ncong;': '\u2247',
+        'ncongdot;': '\u2a6d\u0338',
+        'ncup;': '\u2a42',
+        'Ncy;': '\u041d',
+        'ncy;': '\u043d',
+        'ndash;': '\u2013',
+        'ne;': '\u2260',
+        'nearhk;': '\u2924',
+        'neArr;': '\u21d7',
+        'nearr;': '\u2197',
+        'nearrow;': '\u2197',
+        'nedot;': '\u2250\u0338',
+        'NegativeMediumSpace;': '\u200b',
+        'NegativeThickSpace;': '\u200b',
+        'NegativeThinSpace;': '\u200b',
+        'NegativeVeryThinSpace;': '\u200b',
+        'nequiv;': '\u2262',
+        'nesear;': '\u2928',
+        'nesim;': '\u2242\u0338',
+        'NestedGreaterGreater;': '\u226b',
+        'NestedLessLess;': '\u226a',
+        'NewLine;': '\n',
+        'nexist;': '\u2204',
+        'nexists;': '\u2204',
+        'Nfr;': '\U0001d511',
+        'nfr;': '\U0001d52b',
+        'ngE;': '\u2267\u0338',
+        'nge;': '\u2271',
+        'ngeq;': '\u2271',
+        'ngeqq;': '\u2267\u0338',
+        'ngeqslant;': '\u2a7e\u0338',
+        'nges;': '\u2a7e\u0338',
+        'nGg;': '\u22d9\u0338',
+        'ngsim;': '\u2275',
+        'nGt;': '\u226b\u20d2',
+        'ngt;': '\u226f',
+        'ngtr;': '\u226f',
+        'nGtv;': '\u226b\u0338',
+        'nhArr;': '\u21ce',
+        'nharr;': '\u21ae',
+        'nhpar;': '\u2af2',
+        'ni;': '\u220b',
+        'nis;': '\u22fc',
+        'nisd;': '\u22fa',
+        'niv;': '\u220b',
+        'NJcy;': '\u040a',
+        'njcy;': '\u045a',
+        'nlArr;': '\u21cd',
+        'nlarr;': '\u219a',
+        'nldr;': '\u2025',
+        'nlE;': '\u2266\u0338',
+        'nle;': '\u2270',
+        'nLeftarrow;': '\u21cd',
+        'nleftarrow;': '\u219a',
+        'nLeftrightarrow;': '\u21ce',
+        'nleftrightarrow;': '\u21ae',
+        'nleq;': '\u2270',
+        'nleqq;': '\u2266\u0338',
+        'nleqslant;': '\u2a7d\u0338',
+        'nles;': '\u2a7d\u0338',
+        'nless;': '\u226e',
+        'nLl;': '\u22d8\u0338',
+        'nlsim;': '\u2274',
+        'nLt;': '\u226a\u20d2',
+        'nlt;': '\u226e',
+        'nltri;': '\u22ea',
+        'nltrie;': '\u22ec',
+        'nLtv;': '\u226a\u0338',
+        'nmid;': '\u2224',
+        'NoBreak;': '\u2060',
+        'NonBreakingSpace;': '\xa0',
+        'Nopf;': '\u2115',
+        'nopf;': '\U0001d55f',
+        'not': '\xac',
+        'Not;': '\u2aec',
+        'not;': '\xac',
+        'NotCongruent;': '\u2262',
+        'NotCupCap;': '\u226d',
+        'NotDoubleVerticalBar;': '\u2226',
+        'NotElement;': '\u2209',
+        'NotEqual;': '\u2260',
+        'NotEqualTilde;': '\u2242\u0338',
+        'NotExists;': '\u2204',
+        'NotGreater;': '\u226f',
+        'NotGreaterEqual;': '\u2271',
+        'NotGreaterFullEqual;': '\u2267\u0338',
+        'NotGreaterGreater;': '\u226b\u0338',
+        'NotGreaterLess;': '\u2279',
+        'NotGreaterSlantEqual;': '\u2a7e\u0338',
+        'NotGreaterTilde;': '\u2275',
+        'NotHumpDownHump;': '\u224e\u0338',
+        'NotHumpEqual;': '\u224f\u0338',
+        'notin;': '\u2209',
+        'notindot;': '\u22f5\u0338',
+        'notinE;': '\u22f9\u0338',
+        'notinva;': '\u2209',
+        'notinvb;': '\u22f7',
+        'notinvc;': '\u22f6',
+        'NotLeftTriangle;': '\u22ea',
+        'NotLeftTriangleBar;': '\u29cf\u0338',
+        'NotLeftTriangleEqual;': '\u22ec',
+        'NotLess;': '\u226e',
+        'NotLessEqual;': '\u2270',
+        'NotLessGreater;': '\u2278',
+        'NotLessLess;': '\u226a\u0338',
+        'NotLessSlantEqual;': '\u2a7d\u0338',
+        'NotLessTilde;': '\u2274',
+        'NotNestedGreaterGreater;': '\u2aa2\u0338',
+        'NotNestedLessLess;': '\u2aa1\u0338',
+        'notni;': '\u220c',
+        'notniva;': '\u220c',
+        'notnivb;': '\u22fe',
+        'notnivc;': '\u22fd',
+        'NotPrecedes;': '\u2280',
+        'NotPrecedesEqual;': '\u2aaf\u0338',
+        'NotPrecedesSlantEqual;': '\u22e0',
+        'NotReverseElement;': '\u220c',
+        'NotRightTriangle;': '\u22eb',
+        'NotRightTriangleBar;': '\u29d0\u0338',
+        'NotRightTriangleEqual;': '\u22ed',
+        'NotSquareSubset;': '\u228f\u0338',
+        'NotSquareSubsetEqual;': '\u22e2',
+        'NotSquareSuperset;': '\u2290\u0338',
+        'NotSquareSupersetEqual;': '\u22e3',
+        'NotSubset;': '\u2282\u20d2',
+        'NotSubsetEqual;': '\u2288',
+        'NotSucceeds;': '\u2281',
+        'NotSucceedsEqual;': '\u2ab0\u0338',
+        'NotSucceedsSlantEqual;': '\u22e1',
+        'NotSucceedsTilde;': '\u227f\u0338',
+        'NotSuperset;': '\u2283\u20d2',
+        'NotSupersetEqual;': '\u2289',
+        'NotTilde;': '\u2241',
+        'NotTildeEqual;': '\u2244',
+        'NotTildeFullEqual;': '\u2247',
+        'NotTildeTilde;': '\u2249',
+        'NotVerticalBar;': '\u2224',
+        'npar;': '\u2226',
+        'nparallel;': '\u2226',
+        'nparsl;': '\u2afd\u20e5',
+        'npart;': '\u2202\u0338',
+        'npolint;': '\u2a14',
+        'npr;': '\u2280',
+        'nprcue;': '\u22e0',
+        'npre;': '\u2aaf\u0338',
+        'nprec;': '\u2280',
+        'npreceq;': '\u2aaf\u0338',
+        'nrArr;': '\u21cf',
+        'nrarr;': '\u219b',
+        'nrarrc;': '\u2933\u0338',
+        'nrarrw;': '\u219d\u0338',
+        'nRightarrow;': '\u21cf',
+        'nrightarrow;': '\u219b',
+        'nrtri;': '\u22eb',
+        'nrtrie;': '\u22ed',
+        'nsc;': '\u2281',
+        'nsccue;': '\u22e1',
+        'nsce;': '\u2ab0\u0338',
+        'Nscr;': '\U0001d4a9',
+        'nscr;': '\U0001d4c3',
+        'nshortmid;': '\u2224',
+        'nshortparallel;': '\u2226',
+        'nsim;': '\u2241',
+        'nsime;': '\u2244',
+        'nsimeq;': '\u2244',
+        'nsmid;': '\u2224',
+        'nspar;': '\u2226',
+        'nsqsube;': '\u22e2',
+        'nsqsupe;': '\u22e3',
+        'nsub;': '\u2284',
+        'nsubE;': '\u2ac5\u0338',
+        'nsube;': '\u2288',
+        'nsubset;': '\u2282\u20d2',
+        'nsubseteq;': '\u2288',
+        'nsubseteqq;': '\u2ac5\u0338',
+        'nsucc;': '\u2281',
+        'nsucceq;': '\u2ab0\u0338',
+        'nsup;': '\u2285',
+        'nsupE;': '\u2ac6\u0338',
+        'nsupe;': '\u2289',
+        'nsupset;': '\u2283\u20d2',
+        'nsupseteq;': '\u2289',
+        'nsupseteqq;': '\u2ac6\u0338',
+        'ntgl;': '\u2279',
+        'Ntilde': '\xd1',
+        'ntilde': '\xf1',
+        'Ntilde;': '\xd1',
+        'ntilde;': '\xf1',
+        'ntlg;': '\u2278',
+        'ntriangleleft;': '\u22ea',
+        'ntrianglelefteq;': '\u22ec',
+        'ntriangleright;': '\u22eb',
+        'ntrianglerighteq;': '\u22ed',
+        'Nu;': '\u039d',
+        'nu;': '\u03bd',
+        'num;': '#',
+        'numero;': '\u2116',
+        'numsp;': '\u2007',
+        'nvap;': '\u224d\u20d2',
+        'nVDash;': '\u22af',
+        'nVdash;': '\u22ae',
+        'nvDash;': '\u22ad',
+        'nvdash;': '\u22ac',
+        'nvge;': '\u2265\u20d2',
+        'nvgt;': '>\u20d2',
+        'nvHarr;': '\u2904',
+        'nvinfin;': '\u29de',
+        'nvlArr;': '\u2902',
+        'nvle;': '\u2264\u20d2',
+        'nvlt;': '<\u20d2',
+        'nvltrie;': '\u22b4\u20d2',
+        'nvrArr;': '\u2903',
+        'nvrtrie;': '\u22b5\u20d2',
+        'nvsim;': '\u223c\u20d2',
+        'nwarhk;': '\u2923',
+        'nwArr;': '\u21d6',
+        'nwarr;': '\u2196',
+        'nwarrow;': '\u2196',
+        'nwnear;': '\u2927',
+        'Oacute': '\xd3',
+        'oacute': '\xf3',
+        'Oacute;': '\xd3',
+        'oacute;': '\xf3',
+        'oast;': '\u229b',
+        'ocir;': '\u229a',
+        'Ocirc': '\xd4',
+        'ocirc': '\xf4',
+        'Ocirc;': '\xd4',
+        'ocirc;': '\xf4',
+        'Ocy;': '\u041e',
+        'ocy;': '\u043e',
+        'odash;': '\u229d',
+        'Odblac;': '\u0150',
+        'odblac;': '\u0151',
+        'odiv;': '\u2a38',
+        'odot;': '\u2299',
+        'odsold;': '\u29bc',
+        'OElig;': '\u0152',
+        'oelig;': '\u0153',
+        'ofcir;': '\u29bf',
+        'Ofr;': '\U0001d512',
+        'ofr;': '\U0001d52c',
+        'ogon;': '\u02db',
+        'Ograve': '\xd2',
+        'ograve': '\xf2',
+        'Ograve;': '\xd2',
+        'ograve;': '\xf2',
+        'ogt;': '\u29c1',
+        'ohbar;': '\u29b5',
+        'ohm;': '\u03a9',
+        'oint;': '\u222e',
+        'olarr;': '\u21ba',
+        'olcir;': '\u29be',
+        'olcross;': '\u29bb',
+        'oline;': '\u203e',
+        'olt;': '\u29c0',
+        'Omacr;': '\u014c',
+        'omacr;': '\u014d',
+        'Omega;': '\u03a9',
+        'omega;': '\u03c9',
+        'Omicron;': '\u039f',
+        'omicron;': '\u03bf',
+        'omid;': '\u29b6',
+        'ominus;': '\u2296',
+        'Oopf;': '\U0001d546',
+        'oopf;': '\U0001d560',
+        'opar;': '\u29b7',
+        'OpenCurlyDoubleQuote;': '\u201c',
+        'OpenCurlyQuote;': '\u2018',
+        'operp;': '\u29b9',
+        'oplus;': '\u2295',
+        'Or;': '\u2a54',
+        'or;': '\u2228',
+        'orarr;': '\u21bb',
+        'ord;': '\u2a5d',
+        'order;': '\u2134',
+        'orderof;': '\u2134',
+        'ordf': '\xaa',
+        'ordf;': '\xaa',
+        'ordm': '\xba',
+        'ordm;': '\xba',
+        'origof;': '\u22b6',
+        'oror;': '\u2a56',
+        'orslope;': '\u2a57',
+        'orv;': '\u2a5b',
+        'oS;': '\u24c8',
+        'Oscr;': '\U0001d4aa',
+        'oscr;': '\u2134',
+        'Oslash': '\xd8',
+        'oslash': '\xf8',
+        'Oslash;': '\xd8',
+        'oslash;': '\xf8',
+        'osol;': '\u2298',
+        'Otilde': '\xd5',
+        'otilde': '\xf5',
+        'Otilde;': '\xd5',
+        'otilde;': '\xf5',
+        'Otimes;': '\u2a37',
+        'otimes;': '\u2297',
+        'otimesas;': '\u2a36',
+        'Ouml': '\xd6',
+        'ouml': '\xf6',
+        'Ouml;': '\xd6',
+        'ouml;': '\xf6',
+        'ovbar;': '\u233d',
+        'OverBar;': '\u203e',
+        'OverBrace;': '\u23de',
+        'OverBracket;': '\u23b4',
+        'OverParenthesis;': '\u23dc',
+        'par;': '\u2225',
+        'para': '\xb6',
+        'para;': '\xb6',
+        'parallel;': '\u2225',
+        'parsim;': '\u2af3',
+        'parsl;': '\u2afd',
+        'part;': '\u2202',
+        'PartialD;': '\u2202',
+        'Pcy;': '\u041f',
+        'pcy;': '\u043f',
+        'percnt;': '%',
+        'period;': '.',
+        'permil;': '\u2030',
+        'perp;': '\u22a5',
+        'pertenk;': '\u2031',
+        'Pfr;': '\U0001d513',
+        'pfr;': '\U0001d52d',
+        'Phi;': '\u03a6',
+        'phi;': '\u03c6',
+        'phiv;': '\u03d5',
+        'phmmat;': '\u2133',
+        'phone;': '\u260e',
+        'Pi;': '\u03a0',
+        'pi;': '\u03c0',
+        'pitchfork;': '\u22d4',
+        'piv;': '\u03d6',
+        'planck;': '\u210f',
+        'planckh;': '\u210e',
+        'plankv;': '\u210f',
+        'plus;': '+',
+        'plusacir;': '\u2a23',
+        'plusb;': '\u229e',
+        'pluscir;': '\u2a22',
+        'plusdo;': '\u2214',
+        'plusdu;': '\u2a25',
+        'pluse;': '\u2a72',
+        'PlusMinus;': '\xb1',
+        'plusmn': '\xb1',
+        'plusmn;': '\xb1',
+        'plussim;': '\u2a26',
+        'plustwo;': '\u2a27',
+        'pm;': '\xb1',
+        'Poincareplane;': '\u210c',
+        'pointint;': '\u2a15',
+        'Popf;': '\u2119',
+        'popf;': '\U0001d561',
+        'pound': '\xa3',
+        'pound;': '\xa3',
+        'Pr;': '\u2abb',
+        'pr;': '\u227a',
+        'prap;': '\u2ab7',
+        'prcue;': '\u227c',
+        'prE;': '\u2ab3',
+        'pre;': '\u2aaf',
+        'prec;': '\u227a',
+        'precapprox;': '\u2ab7',
+        'preccurlyeq;': '\u227c',
+        'Precedes;': '\u227a',
+        'PrecedesEqual;': '\u2aaf',
+        'PrecedesSlantEqual;': '\u227c',
+        'PrecedesTilde;': '\u227e',
+        'preceq;': '\u2aaf',
+        'precnapprox;': '\u2ab9',
+        'precneqq;': '\u2ab5',
+        'precnsim;': '\u22e8',
+        'precsim;': '\u227e',
+        'Prime;': '\u2033',
+        'prime;': '\u2032',
+        'primes;': '\u2119',
+        'prnap;': '\u2ab9',
+        'prnE;': '\u2ab5',
+        'prnsim;': '\u22e8',
+        'prod;': '\u220f',
+        'Product;': '\u220f',
+        'profalar;': '\u232e',
+        'profline;': '\u2312',
+        'profsurf;': '\u2313',
+        'prop;': '\u221d',
+        'Proportion;': '\u2237',
+        'Proportional;': '\u221d',
+        'propto;': '\u221d',
+        'prsim;': '\u227e',
+        'prurel;': '\u22b0',
+        'Pscr;': '\U0001d4ab',
+        'pscr;': '\U0001d4c5',
+        'Psi;': '\u03a8',
+        'psi;': '\u03c8',
+        'puncsp;': '\u2008',
+        'Qfr;': '\U0001d514',
+        'qfr;': '\U0001d52e',
+        'qint;': '\u2a0c',
+        'Qopf;': '\u211a',
+        'qopf;': '\U0001d562',
+        'qprime;': '\u2057',
+        'Qscr;': '\U0001d4ac',
+        'qscr;': '\U0001d4c6',
+        'quaternions;': '\u210d',
+        'quatint;': '\u2a16',
+        'quest;': '?',
+        'questeq;': '\u225f',
+        'QUOT': '"',
+        'quot': '"',
+        'QUOT;': '"',
+        'quot;': '"',
+        'rAarr;': '\u21db',
+        'race;': '\u223d\u0331',
+        'Racute;': '\u0154',
+        'racute;': '\u0155',
+        'radic;': '\u221a',
+        'raemptyv;': '\u29b3',
+        'Rang;': '\u27eb',
+        'rang;': '\u27e9',
+        'rangd;': '\u2992',
+        'range;': '\u29a5',
+        'rangle;': '\u27e9',
+        'raquo': '\xbb',
+        'raquo;': '\xbb',
+        'Rarr;': '\u21a0',
+        'rArr;': '\u21d2',
+        'rarr;': '\u2192',
+        'rarrap;': '\u2975',
+        'rarrb;': '\u21e5',
+        'rarrbfs;': '\u2920',
+        'rarrc;': '\u2933',
+        'rarrfs;': '\u291e',
+        'rarrhk;': '\u21aa',
+        'rarrlp;': '\u21ac',
+        'rarrpl;': '\u2945',
+        'rarrsim;': '\u2974',
+        'Rarrtl;': '\u2916',
+        'rarrtl;': '\u21a3',
+        'rarrw;': '\u219d',
+        'rAtail;': '\u291c',
+        'ratail;': '\u291a',
+        'ratio;': '\u2236',
+        'rationals;': '\u211a',
+        'RBarr;': '\u2910',
+        'rBarr;': '\u290f',
+        'rbarr;': '\u290d',
+        'rbbrk;': '\u2773',
+        'rbrace;': '}',
+        'rbrack;': ']',
+        'rbrke;': '\u298c',
+        'rbrksld;': '\u298e',
+        'rbrkslu;': '\u2990',
+        'Rcaron;': '\u0158',
+        'rcaron;': '\u0159',
+        'Rcedil;': '\u0156',
+        'rcedil;': '\u0157',
+        'rceil;': '\u2309',
+        'rcub;': '}',
+        'Rcy;': '\u0420',
+        'rcy;': '\u0440',
+        'rdca;': '\u2937',
+        'rdldhar;': '\u2969',
+        'rdquo;': '\u201d',
+        'rdquor;': '\u201d',
+        'rdsh;': '\u21b3',
+        'Re;': '\u211c',
+        'real;': '\u211c',
+        'realine;': '\u211b',
+        'realpart;': '\u211c',
+        'reals;': '\u211d',
+        'rect;': '\u25ad',
+        'REG': '\xae',
+        'reg': '\xae',
+        'REG;': '\xae',
+        'reg;': '\xae',
+        'ReverseElement;': '\u220b',
+        'ReverseEquilibrium;': '\u21cb',
+        'ReverseUpEquilibrium;': '\u296f',
+        'rfisht;': '\u297d',
+        'rfloor;': '\u230b',
+        'Rfr;': '\u211c',
+        'rfr;': '\U0001d52f',
+        'rHar;': '\u2964',
+        'rhard;': '\u21c1',
+        'rharu;': '\u21c0',
+        'rharul;': '\u296c',
+        'Rho;': '\u03a1',
+        'rho;': '\u03c1',
+        'rhov;': '\u03f1',
+        'RightAngleBracket;': '\u27e9',
+        'RightArrow;': '\u2192',
+        'Rightarrow;': '\u21d2',
+        'rightarrow;': '\u2192',
+        'RightArrowBar;': '\u21e5',
+        'RightArrowLeftArrow;': '\u21c4',
+        'rightarrowtail;': '\u21a3',
+        'RightCeiling;': '\u2309',
+        'RightDoubleBracket;': '\u27e7',
+        'RightDownTeeVector;': '\u295d',
+        'RightDownVector;': '\u21c2',
+        'RightDownVectorBar;': '\u2955',
+        'RightFloor;': '\u230b',
+        'rightharpoondown;': '\u21c1',
+        'rightharpoonup;': '\u21c0',
+        'rightleftarrows;': '\u21c4',
+        'rightleftharpoons;': '\u21cc',
+        'rightrightarrows;': '\u21c9',
+        'rightsquigarrow;': '\u219d',
+        'RightTee;': '\u22a2',
+        'RightTeeArrow;': '\u21a6',
+        'RightTeeVector;': '\u295b',
+        'rightthreetimes;': '\u22cc',
+        'RightTriangle;': '\u22b3',
+        'RightTriangleBar;': '\u29d0',
+        'RightTriangleEqual;': '\u22b5',
+        'RightUpDownVector;': '\u294f',
+        'RightUpTeeVector;': '\u295c',
+        'RightUpVector;': '\u21be',
+        'RightUpVectorBar;': '\u2954',
+        'RightVector;': '\u21c0',
+        'RightVectorBar;': '\u2953',
+        'ring;': '\u02da',
+        'risingdotseq;': '\u2253',
+        'rlarr;': '\u21c4',
+        'rlhar;': '\u21cc',
+        'rlm;': '\u200f',
+        'rmoust;': '\u23b1',
+        'rmoustache;': '\u23b1',
+        'rnmid;': '\u2aee',
+        'roang;': '\u27ed',
+        'roarr;': '\u21fe',
+        'robrk;': '\u27e7',
+        'ropar;': '\u2986',
+        'Ropf;': '\u211d',
+        'ropf;': '\U0001d563',
+        'roplus;': '\u2a2e',
+        'rotimes;': '\u2a35',
+        'RoundImplies;': '\u2970',
+        'rpar;': ')',
+        'rpargt;': '\u2994',
+        'rppolint;': '\u2a12',
+        'rrarr;': '\u21c9',
+        'Rrightarrow;': '\u21db',
+        'rsaquo;': '\u203a',
+        'Rscr;': '\u211b',
+        'rscr;': '\U0001d4c7',
+        'Rsh;': '\u21b1',
+        'rsh;': '\u21b1',
+        'rsqb;': ']',
+        'rsquo;': '\u2019',
+        'rsquor;': '\u2019',
+        'rthree;': '\u22cc',
+        'rtimes;': '\u22ca',
+        'rtri;': '\u25b9',
+        'rtrie;': '\u22b5',
+        'rtrif;': '\u25b8',
+        'rtriltri;': '\u29ce',
+        'RuleDelayed;': '\u29f4',
+        'ruluhar;': '\u2968',
+        'rx;': '\u211e',
+        'Sacute;': '\u015a',
+        'sacute;': '\u015b',
+        'sbquo;': '\u201a',
+        'Sc;': '\u2abc',
+        'sc;': '\u227b',
+        'scap;': '\u2ab8',
+        'Scaron;': '\u0160',
+        'scaron;': '\u0161',
+        'sccue;': '\u227d',
+        'scE;': '\u2ab4',
+        'sce;': '\u2ab0',
+        'Scedil;': '\u015e',
+        'scedil;': '\u015f',
+        'Scirc;': '\u015c',
+        'scirc;': '\u015d',
+        'scnap;': '\u2aba',
+        'scnE;': '\u2ab6',
+        'scnsim;': '\u22e9',
+        'scpolint;': '\u2a13',
+        'scsim;': '\u227f',
+        'Scy;': '\u0421',
+        'scy;': '\u0441',
+        'sdot;': '\u22c5',
+        'sdotb;': '\u22a1',
+        'sdote;': '\u2a66',
+        'searhk;': '\u2925',
+        'seArr;': '\u21d8',
+        'searr;': '\u2198',
+        'searrow;': '\u2198',
+        'sect': '\xa7',
+        'sect;': '\xa7',
+        'semi;': ';',
+        'seswar;': '\u2929',
+        'setminus;': '\u2216',
+        'setmn;': '\u2216',
+        'sext;': '\u2736',
+        'Sfr;': '\U0001d516',
+        'sfr;': '\U0001d530',
+        'sfrown;': '\u2322',
+        'sharp;': '\u266f',
+        'SHCHcy;': '\u0429',
+        'shchcy;': '\u0449',
+        'SHcy;': '\u0428',
+        'shcy;': '\u0448',
+        'ShortDownArrow;': '\u2193',
+        'ShortLeftArrow;': '\u2190',
+        'shortmid;': '\u2223',
+        'shortparallel;': '\u2225',
+        'ShortRightArrow;': '\u2192',
+        'ShortUpArrow;': '\u2191',
+        'shy': '\xad',
+        'shy;': '\xad',
+        'Sigma;': '\u03a3',
+        'sigma;': '\u03c3',
+        'sigmaf;': '\u03c2',
+        'sigmav;': '\u03c2',
+        'sim;': '\u223c',
+        'simdot;': '\u2a6a',
+        'sime;': '\u2243',
+        'simeq;': '\u2243',
+        'simg;': '\u2a9e',
+        'simgE;': '\u2aa0',
+        'siml;': '\u2a9d',
+        'simlE;': '\u2a9f',
+        'simne;': '\u2246',
+        'simplus;': '\u2a24',
+        'simrarr;': '\u2972',
+        'slarr;': '\u2190',
+        'SmallCircle;': '\u2218',
+        'smallsetminus;': '\u2216',
+        'smashp;': '\u2a33',
+        'smeparsl;': '\u29e4',
+        'smid;': '\u2223',
+        'smile;': '\u2323',
+        'smt;': '\u2aaa',
+        'smte;': '\u2aac',
+        'smtes;': '\u2aac\ufe00',
+        'SOFTcy;': '\u042c',
+        'softcy;': '\u044c',
+        'sol;': '/',
+        'solb;': '\u29c4',
+        'solbar;': '\u233f',
+        'Sopf;': '\U0001d54a',
+        'sopf;': '\U0001d564',
+        'spades;': '\u2660',
+        'spadesuit;': '\u2660',
+        'spar;': '\u2225',
+        'sqcap;': '\u2293',
+        'sqcaps;': '\u2293\ufe00',
+        'sqcup;': '\u2294',
+        'sqcups;': '\u2294\ufe00',
+        'Sqrt;': '\u221a',
+        'sqsub;': '\u228f',
+        'sqsube;': '\u2291',
+        'sqsubset;': '\u228f',
+        'sqsubseteq;': '\u2291',
+        'sqsup;': '\u2290',
+        'sqsupe;': '\u2292',
+        'sqsupset;': '\u2290',
+        'sqsupseteq;': '\u2292',
+        'squ;': '\u25a1',
+        'Square;': '\u25a1',
+        'square;': '\u25a1',
+        'SquareIntersection;': '\u2293',
+        'SquareSubset;': '\u228f',
+        'SquareSubsetEqual;': '\u2291',
+        'SquareSuperset;': '\u2290',
+        'SquareSupersetEqual;': '\u2292',
+        'SquareUnion;': '\u2294',
+        'squarf;': '\u25aa',
+        'squf;': '\u25aa',
+        'srarr;': '\u2192',
+        'Sscr;': '\U0001d4ae',
+        'sscr;': '\U0001d4c8',
+        'ssetmn;': '\u2216',
+        'ssmile;': '\u2323',
+        'sstarf;': '\u22c6',
+        'Star;': '\u22c6',
+        'star;': '\u2606',
+        'starf;': '\u2605',
+        'straightepsilon;': '\u03f5',
+        'straightphi;': '\u03d5',
+        'strns;': '\xaf',
+        'Sub;': '\u22d0',
+        'sub;': '\u2282',
+        'subdot;': '\u2abd',
+        'subE;': '\u2ac5',
+        'sube;': '\u2286',
+        'subedot;': '\u2ac3',
+        'submult;': '\u2ac1',
+        'subnE;': '\u2acb',
+        'subne;': '\u228a',
+        'subplus;': '\u2abf',
+        'subrarr;': '\u2979',
+        'Subset;': '\u22d0',
+        'subset;': '\u2282',
+        'subseteq;': '\u2286',
+        'subseteqq;': '\u2ac5',
+        'SubsetEqual;': '\u2286',
+        'subsetneq;': '\u228a',
+        'subsetneqq;': '\u2acb',
+        'subsim;': '\u2ac7',
+        'subsub;': '\u2ad5',
+        'subsup;': '\u2ad3',
+        'succ;': '\u227b',
+        'succapprox;': '\u2ab8',
+        'succcurlyeq;': '\u227d',
+        'Succeeds;': '\u227b',
+        'SucceedsEqual;': '\u2ab0',
+        'SucceedsSlantEqual;': '\u227d',
+        'SucceedsTilde;': '\u227f',
+        'succeq;': '\u2ab0',
+        'succnapprox;': '\u2aba',
+        'succneqq;': '\u2ab6',
+        'succnsim;': '\u22e9',
+        'succsim;': '\u227f',
+        'SuchThat;': '\u220b',
+        'Sum;': '\u2211',
+        'sum;': '\u2211',
+        'sung;': '\u266a',
+        'sup1': '\xb9',
+        'sup1;': '\xb9',
+        'sup2': '\xb2',
+        'sup2;': '\xb2',
+        'sup3': '\xb3',
+        'sup3;': '\xb3',
+        'Sup;': '\u22d1',
+        'sup;': '\u2283',
+        'supdot;': '\u2abe',
+        'supdsub;': '\u2ad8',
+        'supE;': '\u2ac6',
+        'supe;': '\u2287',
+        'supedot;': '\u2ac4',
+        'Superset;': '\u2283',
+        'SupersetEqual;': '\u2287',
+        'suphsol;': '\u27c9',
+        'suphsub;': '\u2ad7',
+        'suplarr;': '\u297b',
+        'supmult;': '\u2ac2',
+        'supnE;': '\u2acc',
+        'supne;': '\u228b',
+        'supplus;': '\u2ac0',
+        'Supset;': '\u22d1',
+        'supset;': '\u2283',
+        'supseteq;': '\u2287',
+        'supseteqq;': '\u2ac6',
+        'supsetneq;': '\u228b',
+        'supsetneqq;': '\u2acc',
+        'supsim;': '\u2ac8',
+        'supsub;': '\u2ad4',
+        'supsup;': '\u2ad6',
+        'swarhk;': '\u2926',
+        'swArr;': '\u21d9',
+        'swarr;': '\u2199',
+        'swarrow;': '\u2199',
+        'swnwar;': '\u292a',
+        'szlig': '\xdf',
+        'szlig;': '\xdf',
+        'Tab;': '\t',
+        'target;': '\u2316',
+        'Tau;': '\u03a4',
+        'tau;': '\u03c4',
+        'tbrk;': '\u23b4',
+        'Tcaron;': '\u0164',
+        'tcaron;': '\u0165',
+        'Tcedil;': '\u0162',
+        'tcedil;': '\u0163',
+        'Tcy;': '\u0422',
+        'tcy;': '\u0442',
+        'tdot;': '\u20db',
+        'telrec;': '\u2315',
+        'Tfr;': '\U0001d517',
+        'tfr;': '\U0001d531',
+        'there4;': '\u2234',
+        'Therefore;': '\u2234',
+        'therefore;': '\u2234',
+        'Theta;': '\u0398',
+        'theta;': '\u03b8',
+        'thetasym;': '\u03d1',
+        'thetav;': '\u03d1',
+        'thickapprox;': '\u2248',
+        'thicksim;': '\u223c',
+        'ThickSpace;': '\u205f\u200a',
+        'thinsp;': '\u2009',
+        'ThinSpace;': '\u2009',
+        'thkap;': '\u2248',
+        'thksim;': '\u223c',
+        'THORN': '\xde',
+        'thorn': '\xfe',
+        'THORN;': '\xde',
+        'thorn;': '\xfe',
+        'Tilde;': '\u223c',
+        'tilde;': '\u02dc',
+        'TildeEqual;': '\u2243',
+        'TildeFullEqual;': '\u2245',
+        'TildeTilde;': '\u2248',
+        'times': '\xd7',
+        'times;': '\xd7',
+        'timesb;': '\u22a0',
+        'timesbar;': '\u2a31',
+        'timesd;': '\u2a30',
+        'tint;': '\u222d',
+        'toea;': '\u2928',
+        'top;': '\u22a4',
+        'topbot;': '\u2336',
+        'topcir;': '\u2af1',
+        'Topf;': '\U0001d54b',
+        'topf;': '\U0001d565',
+        'topfork;': '\u2ada',
+        'tosa;': '\u2929',
+        'tprime;': '\u2034',
+        'TRADE;': '\u2122',
+        'trade;': '\u2122',
+        'triangle;': '\u25b5',
+        'triangledown;': '\u25bf',
+        'triangleleft;': '\u25c3',
+        'trianglelefteq;': '\u22b4',
+        'triangleq;': '\u225c',
+        'triangleright;': '\u25b9',
+        'trianglerighteq;': '\u22b5',
+        'tridot;': '\u25ec',
+        'trie;': '\u225c',
+        'triminus;': '\u2a3a',
+        'TripleDot;': '\u20db',
+        'triplus;': '\u2a39',
+        'trisb;': '\u29cd',
+        'tritime;': '\u2a3b',
+        'trpezium;': '\u23e2',
+        'Tscr;': '\U0001d4af',
+        'tscr;': '\U0001d4c9',
+        'TScy;': '\u0426',
+        'tscy;': '\u0446',
+        'TSHcy;': '\u040b',
+        'tshcy;': '\u045b',
+        'Tstrok;': '\u0166',
+        'tstrok;': '\u0167',
+        'twixt;': '\u226c',
+        'twoheadleftarrow;': '\u219e',
+        'twoheadrightarrow;': '\u21a0',
+        'Uacute': '\xda',
+        'uacute': '\xfa',
+        'Uacute;': '\xda',
+        'uacute;': '\xfa',
+        'Uarr;': '\u219f',
+        'uArr;': '\u21d1',
+        'uarr;': '\u2191',
+        'Uarrocir;': '\u2949',
+        'Ubrcy;': '\u040e',
+        'ubrcy;': '\u045e',
+        'Ubreve;': '\u016c',
+        'ubreve;': '\u016d',
+        'Ucirc': '\xdb',
+        'ucirc': '\xfb',
+        'Ucirc;': '\xdb',
+        'ucirc;': '\xfb',
+        'Ucy;': '\u0423',
+        'ucy;': '\u0443',
+        'udarr;': '\u21c5',
+        'Udblac;': '\u0170',
+        'udblac;': '\u0171',
+        'udhar;': '\u296e',
+        'ufisht;': '\u297e',
+        'Ufr;': '\U0001d518',
+        'ufr;': '\U0001d532',
+        'Ugrave': '\xd9',
+        'ugrave': '\xf9',
+        'Ugrave;': '\xd9',
+        'ugrave;': '\xf9',
+        'uHar;': '\u2963',
+        'uharl;': '\u21bf',
+        'uharr;': '\u21be',
+        'uhblk;': '\u2580',
+        'ulcorn;': '\u231c',
+        'ulcorner;': '\u231c',
+        'ulcrop;': '\u230f',
+        'ultri;': '\u25f8',
+        'Umacr;': '\u016a',
+        'umacr;': '\u016b',
+        'uml': '\xa8',
+        'uml;': '\xa8',
+        'UnderBar;': '_',
+        'UnderBrace;': '\u23df',
+        'UnderBracket;': '\u23b5',
+        'UnderParenthesis;': '\u23dd',
+        'Union;': '\u22c3',
+        'UnionPlus;': '\u228e',
+        'Uogon;': '\u0172',
+        'uogon;': '\u0173',
+        'Uopf;': '\U0001d54c',
+        'uopf;': '\U0001d566',
+        'UpArrow;': '\u2191',
+        'Uparrow;': '\u21d1',
+        'uparrow;': '\u2191',
+        'UpArrowBar;': '\u2912',
+        'UpArrowDownArrow;': '\u21c5',
+        'UpDownArrow;': '\u2195',
+        'Updownarrow;': '\u21d5',
+        'updownarrow;': '\u2195',
+        'UpEquilibrium;': '\u296e',
+        'upharpoonleft;': '\u21bf',
+        'upharpoonright;': '\u21be',
+        'uplus;': '\u228e',
+        'UpperLeftArrow;': '\u2196',
+        'UpperRightArrow;': '\u2197',
+        'Upsi;': '\u03d2',
+        'upsi;': '\u03c5',
+        'upsih;': '\u03d2',
+        'Upsilon;': '\u03a5',
+        'upsilon;': '\u03c5',
+        'UpTee;': '\u22a5',
+        'UpTeeArrow;': '\u21a5',
+        'upuparrows;': '\u21c8',
+        'urcorn;': '\u231d',
+        'urcorner;': '\u231d',
+        'urcrop;': '\u230e',
+        'Uring;': '\u016e',
+        'uring;': '\u016f',
+        'urtri;': '\u25f9',
+        'Uscr;': '\U0001d4b0',
+        'uscr;': '\U0001d4ca',
+        'utdot;': '\u22f0',
+        'Utilde;': '\u0168',
+        'utilde;': '\u0169',
+        'utri;': '\u25b5',
+        'utrif;': '\u25b4',
+        'uuarr;': '\u21c8',
+        'Uuml': '\xdc',
+        'uuml': '\xfc',
+        'Uuml;': '\xdc',
+        'uuml;': '\xfc',
+        'uwangle;': '\u29a7',
+        'vangrt;': '\u299c',
+        'varepsilon;': '\u03f5',
+        'varkappa;': '\u03f0',
+        'varnothing;': '\u2205',
+        'varphi;': '\u03d5',
+        'varpi;': '\u03d6',
+        'varpropto;': '\u221d',
+        'vArr;': '\u21d5',
+        'varr;': '\u2195',
+        'varrho;': '\u03f1',
+        'varsigma;': '\u03c2',
+        'varsubsetneq;': '\u228a\ufe00',
+        'varsubsetneqq;': '\u2acb\ufe00',
+        'varsupsetneq;': '\u228b\ufe00',
+        'varsupsetneqq;': '\u2acc\ufe00',
+        'vartheta;': '\u03d1',
+        'vartriangleleft;': '\u22b2',
+        'vartriangleright;': '\u22b3',
+        'Vbar;': '\u2aeb',
+        'vBar;': '\u2ae8',
+        'vBarv;': '\u2ae9',
+        'Vcy;': '\u0412',
+        'vcy;': '\u0432',
+        'VDash;': '\u22ab',
+        'Vdash;': '\u22a9',
+        'vDash;': '\u22a8',
+        'vdash;': '\u22a2',
+        'Vdashl;': '\u2ae6',
+        'Vee;': '\u22c1',
+        'vee;': '\u2228',
+        'veebar;': '\u22bb',
+        'veeeq;': '\u225a',
+        'vellip;': '\u22ee',
+        'Verbar;': '\u2016',
+        'verbar;': '|',
+        'Vert;': '\u2016',
+        'vert;': '|',
+        'VerticalBar;': '\u2223',
+        'VerticalLine;': '|',
+        'VerticalSeparator;': '\u2758',
+        'VerticalTilde;': '\u2240',
+        'VeryThinSpace;': '\u200a',
+        'Vfr;': '\U0001d519',
+        'vfr;': '\U0001d533',
+        'vltri;': '\u22b2',
+        'vnsub;': '\u2282\u20d2',
+        'vnsup;': '\u2283\u20d2',
+        'Vopf;': '\U0001d54d',
+        'vopf;': '\U0001d567',
+        'vprop;': '\u221d',
+        'vrtri;': '\u22b3',
+        'Vscr;': '\U0001d4b1',
+        'vscr;': '\U0001d4cb',
+        'vsubnE;': '\u2acb\ufe00',
+        'vsubne;': '\u228a\ufe00',
+        'vsupnE;': '\u2acc\ufe00',
+        'vsupne;': '\u228b\ufe00',
+        'Vvdash;': '\u22aa',
+        'vzigzag;': '\u299a',
+        'Wcirc;': '\u0174',
+        'wcirc;': '\u0175',
+        'wedbar;': '\u2a5f',
+        'Wedge;': '\u22c0',
+        'wedge;': '\u2227',
+        'wedgeq;': '\u2259',
+        'weierp;': '\u2118',
+        'Wfr;': '\U0001d51a',
+        'wfr;': '\U0001d534',
+        'Wopf;': '\U0001d54e',
+        'wopf;': '\U0001d568',
+        'wp;': '\u2118',
+        'wr;': '\u2240',
+        'wreath;': '\u2240',
+        'Wscr;': '\U0001d4b2',
+        'wscr;': '\U0001d4cc',
+        'xcap;': '\u22c2',
+        'xcirc;': '\u25ef',
+        'xcup;': '\u22c3',
+        'xdtri;': '\u25bd',
+        'Xfr;': '\U0001d51b',
+        'xfr;': '\U0001d535',
+        'xhArr;': '\u27fa',
+        'xharr;': '\u27f7',
+        'Xi;': '\u039e',
+        'xi;': '\u03be',
+        'xlArr;': '\u27f8',
+        'xlarr;': '\u27f5',
+        'xmap;': '\u27fc',
+        'xnis;': '\u22fb',
+        'xodot;': '\u2a00',
+        'Xopf;': '\U0001d54f',
+        'xopf;': '\U0001d569',
+        'xoplus;': '\u2a01',
+        'xotime;': '\u2a02',
+        'xrArr;': '\u27f9',
+        'xrarr;': '\u27f6',
+        'Xscr;': '\U0001d4b3',
+        'xscr;': '\U0001d4cd',
+        'xsqcup;': '\u2a06',
+        'xuplus;': '\u2a04',
+        'xutri;': '\u25b3',
+        'xvee;': '\u22c1',
+        'xwedge;': '\u22c0',
+        'Yacute': '\xdd',
+        'yacute': '\xfd',
+        'Yacute;': '\xdd',
+        'yacute;': '\xfd',
+        'YAcy;': '\u042f',
+        'yacy;': '\u044f',
+        'Ycirc;': '\u0176',
+        'ycirc;': '\u0177',
+        'Ycy;': '\u042b',
+        'ycy;': '\u044b',
+        'yen': '\xa5',
+        'yen;': '\xa5',
+        'Yfr;': '\U0001d51c',
+        'yfr;': '\U0001d536',
+        'YIcy;': '\u0407',
+        'yicy;': '\u0457',
+        'Yopf;': '\U0001d550',
+        'yopf;': '\U0001d56a',
+        'Yscr;': '\U0001d4b4',
+        'yscr;': '\U0001d4ce',
+        'YUcy;': '\u042e',
+        'yucy;': '\u044e',
+        'yuml': '\xff',
+        'Yuml;': '\u0178',
+        'yuml;': '\xff',
+        'Zacute;': '\u0179',
+        'zacute;': '\u017a',
+        'Zcaron;': '\u017d',
+        'zcaron;': '\u017e',
+        'Zcy;': '\u0417',
+        'zcy;': '\u0437',
+        'Zdot;': '\u017b',
+        'zdot;': '\u017c',
+        'zeetrf;': '\u2128',
+        'ZeroWidthSpace;': '\u200b',
+        'Zeta;': '\u0396',
+        'zeta;': '\u03b6',
+        'Zfr;': '\u2128',
+        'zfr;': '\U0001d537',
+        'ZHcy;': '\u0416',
+        'zhcy;': '\u0436',
+        'zigrarr;': '\u21dd',
+        'Zopf;': '\u2124',
+        'zopf;': '\U0001d56b',
+        'Zscr;': '\U0001d4b5',
+        'zscr;': '\U0001d4cf',
+        'zwj;': '\u200d',
+        'zwnj;': '\u200c',
+    }
+
 try:
     import http.client as compat_http_client
 except ImportError:  # Python 2
@@ -77,6 +2316,10 @@ try:
 except ImportError:  # Python 2
     from urllib import urlretrieve as compat_urlretrieve
 
+try:
+    from html.parser import HTMLParser as compat_HTMLParser
+except ImportError:  # Python 2
+    from HTMLParser import HTMLParser as compat_HTMLParser
 
 try:
     from subprocess import DEVNULL
@@ -164,6 +2407,32 @@ except ImportError:  # Python 2
         string = string.replace('+', ' ')
         return compat_urllib_parse_unquote(string, encoding, errors)
 
+try:
+    from urllib.parse import urlencode as compat_urllib_parse_urlencode
+except ImportError:  # Python 2
+    # Python 2 will choke in urlencode on mixture of byte and unicode strings.
+    # Possible solutions are to either port it from python 3 with all
+    # the friends or manually ensure input query contains only byte strings.
+    # We will stick with latter thus recursively encoding the whole query.
+    def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
+        def encode_elem(e):
+            if isinstance(e, dict):
+                e = encode_dict(e)
+            elif isinstance(e, (list, tuple,)):
+                list_e = encode_list(e)
+                e = tuple(list_e) if isinstance(e, tuple) else list_e
+            elif isinstance(e, compat_str):
+                e = e.encode(encoding)
+            return e
+
+        def encode_dict(d):
+            return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
+
+        def encode_list(l):
+            return [encode_elem(e) for e in l]
+
+        return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+
 try:
     from urllib.request import DataHandler as compat_urllib_request_DataHandler
 except ImportError:  # Python < 3.4
@@ -213,13 +2482,20 @@ try:
 except ImportError:  # Python 2.6
     from xml.parsers.expat import ExpatError as compat_xml_parse_error
 
+
+etree = xml.etree.ElementTree
+
+
+class _TreeBuilder(etree.TreeBuilder):
+    def doctype(self, name, pubid, system):
+        pass
+
 if sys.version_info[0] >= 3:
-    compat_etree_fromstring = xml.etree.ElementTree.fromstring
+    def compat_etree_fromstring(text):
+        return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
 else:
     # python 2.x tries to encode unicode strings with ascii (see the
     # XMLParser._fixtext method)
-    etree = xml.etree.ElementTree
-
     try:
         _etree_iter = etree.Element.iter
     except AttributeError:  # Python <=2.6
@@ -233,7 +2509,7 @@ else:
     # 2.7 source
     def _XML(text, parser=None):
         if not parser:
-            parser = etree.XMLParser(target=etree.TreeBuilder())
+            parser = etree.XMLParser(target=_TreeBuilder())
         parser.feed(text)
         return parser.close()
 
@@ -245,12 +2521,22 @@ else:
         return el
 
     def compat_etree_fromstring(text):
-        doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+        doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
         for el in _etree_iter(doc):
             if el.text is not None and isinstance(el.text, bytes):
                 el.text = el.text.decode('utf-8')
         return doc
 
+if sys.version_info < (2, 7):
+    # Here comes the crazy part: In 2.6, if the xpath is a unicode,
+    # .//node does not match if a node is a direct child of . !
+    def compat_xpath(xpath):
+        if isinstance(xpath, compat_str):
+            xpath = xpath.encode('ascii')
+        return xpath
+else:
+    compat_xpath = lambda xpath: xpath
+
 try:
     from urllib.parse import parse_qs as compat_parse_qs
 except ImportError:  # Python 2
@@ -299,9 +2585,9 @@ except ImportError:  # Python 2
         return parsed_result
 
 try:
-    from shlex import quote as shlex_quote
+    from shlex import quote as compat_shlex_quote
 except ImportError:  # Python < 3.3
-    def shlex_quote(s):
+    def compat_shlex_quote(s):
         if re.match(r'^[-_\w./]+$', s):
             return s
         else:
@@ -326,9 +2612,15 @@ def compat_ord(c):
         return ord(c)
 
 
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
 if sys.version_info >= (3, 0):
     compat_getenv = os.getenv
     compat_expanduser = os.path.expanduser
+
+    def compat_setenv(key, value, env=os.environ):
+        env[key] = value
 else:
     # Environment variables should be decoded with filesystem encoding.
     # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
@@ -340,13 +2632,19 @@ else:
             env = env.decode(get_filesystem_encoding())
         return env
 
+    def compat_setenv(key, value, env=os.environ):
+        def encode(v):
+            from .utils import get_filesystem_encoding
+            return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
+        env[encode(key)] = encode(value)
+
     # HACK: The default implementations of os.path.expanduser from cpython do not decode
     # environment variables with filesystem encoding. We will work around this by
     # providing adjusted implementations.
     # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
     # for different platforms with correct environment variables decoding.
 
-    if os.name == 'posix':
+    if compat_os_name == 'posix':
         def compat_expanduser(path):
             """Expand ~ and ~user constructions.  If user or $HOME is unknown,
             do nothing."""
@@ -370,7 +2668,7 @@ else:
                 userhome = pwent.pw_dir
             userhome = userhome.rstrip('/')
             return (userhome + path[i:]) or '/'
-    elif os.name == 'nt' or os.name == 'ce':
+    elif compat_os_name == 'nt' or compat_os_name == 'ce':
         def compat_expanduser(path):
             """Expand ~ and ~user constructs.
 
@@ -412,18 +2710,6 @@ else:
         print(s)
 
 
-try:
-    subprocess_check_output = subprocess.check_output
-except AttributeError:
-    def subprocess_check_output(*args, **kwargs):
-        assert 'input' not in kwargs
-        p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
-        output, _ = p.communicate()
-        ret = p.poll()
-        if ret:
-            raise subprocess.CalledProcessError(ret, p.args, output=output)
-        return output
-
 if sys.version_info < (3, 0) and sys.platform == 'win32':
     def compat_getpass(prompt, *args, **kwargs):
         if isinstance(prompt, compat_str):
@@ -433,6 +2719,11 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
 else:
     compat_getpass = getpass.getpass
 
+try:
+    compat_input = raw_input
+except NameError:  # Python 3
+    compat_input = input
+
 # Python < 2.6.5 require kwargs to be bytes
 try:
     def _testfunc(x):
@@ -539,7 +2830,28 @@ if sys.version_info >= (3, 0):
 else:
     from tokenize import generate_tokens as compat_tokenize_tokenize
 
+
+try:
+    struct.pack('!I', 0)
+except TypeError:
+    # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
+    # See https://bugs.python.org/issue19099
+    def compat_struct_pack(spec, *args):
+        if isinstance(spec, compat_str):
+            spec = spec.encode('ascii')
+        return struct.pack(spec, *args)
+
+    def compat_struct_unpack(spec, *args):
+        if isinstance(spec, compat_str):
+            spec = spec.encode('ascii')
+        return struct.unpack(spec, *args)
+else:
+    compat_struct_pack = struct.pack
+    compat_struct_unpack = struct.unpack
+
+
 __all__ = [
+    'compat_HTMLParser',
     'compat_HTTPError',
     'compat_basestring',
     'compat_chr',
@@ -551,16 +2863,23 @@ __all__ = [
     'compat_getenv',
     'compat_getpass',
     'compat_html_entities',
+    'compat_html_entities_html5',
     'compat_http_client',
     'compat_http_server',
+    'compat_input',
     'compat_itertools_count',
     'compat_kwargs',
     'compat_ord',
+    'compat_os_name',
     'compat_parse_qs',
     'compat_print',
+    'compat_setenv',
+    'compat_shlex_quote',
     'compat_shlex_split',
     'compat_socket_create_connection',
     'compat_str',
+    'compat_struct_pack',
+    'compat_struct_unpack',
     'compat_subprocess_get_DEVNULL',
     'compat_tokenize_tokenize',
     'compat_urllib_error',
@@ -568,6 +2887,7 @@ __all__ = [
     'compat_urllib_parse_unquote',
     'compat_urllib_parse_unquote_plus',
     'compat_urllib_parse_unquote_to_bytes',
+    'compat_urllib_parse_urlencode',
     'compat_urllib_parse_urlparse',
     'compat_urllib_request',
     'compat_urllib_request_DataHandler',
@@ -575,7 +2895,6 @@ __all__ = [
     'compat_urlparse',
     'compat_urlretrieve',
     'compat_xml_parse_error',
-    'shlex_quote',
-    'subprocess_check_output',
+    'compat_xpath',
     'workaround_optparse_bug9161',
 ]
index dccc59212d3028bb9a96f0eb9ffff4acb0be681e..817591d97e88606b966b7055026f691faab840dc 100644 (file)
@@ -1,14 +1,16 @@
 from __future__ import unicode_literals
 
 from .common import FileDownloader
-from .external import get_external_downloader
 from .f4m import F4mFD
 from .hls import HlsFD
-from .hls import NativeHlsFD
 from .http import HttpFD
-from .rtsp import RtspFD
 from .rtmp import RtmpFD
 from .dash import DashSegmentsFD
+from .rtsp import RtspFD
+from .external import (
+    get_external_downloader,
+    FFmpegFD,
+)
 
 from ..utils import (
     determine_protocol,
@@ -16,8 +18,8 @@ from ..utils import (
 
 PROTOCOL_MAP = {
     'rtmp': RtmpFD,
-    'm3u8_native': NativeHlsFD,
-    'm3u8': HlsFD,
+    'm3u8_native': HlsFD,
+    'm3u8': FFmpegFD,
     'mms': RtspFD,
     'rtsp': RtspFD,
     'f4m': F4mFD,
@@ -30,14 +32,20 @@ def get_suitable_downloader(info_dict, params={}):
     protocol = determine_protocol(info_dict)
     info_dict['protocol'] = protocol
 
+    # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
+    #     return FFmpegFD
+
     external_downloader = params.get('external_downloader')
     if external_downloader is not None:
         ed = get_external_downloader(external_downloader)
-        if ed.supports(info_dict):
+        if ed.can_download(info_dict):
             return ed
 
-    if protocol == 'm3u8' and params.get('hls_prefer_native'):
-        return NativeHlsFD
+    if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
+        return HlsFD
+
+    if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
+        return FFmpegFD
 
     return PROTOCOL_MAP.get(protocol, HttpFD)
 
index 2d51540518f7ee40c7d002d632007d5aa9542697..1dba9f49a8b9b586b8428c6c7d65ca641de02c58 100644 (file)
@@ -5,6 +5,7 @@ import re
 import sys
 import time
 
+from ..compat import compat_os_name
 from ..utils import (
     encodeFilename,
     error_to_compat_str,
@@ -114,6 +115,10 @@ class FileDownloader(object):
             return '%10s' % '---b/s'
         return '%10s' % ('%s/s' % format_bytes(speed))
 
+    @staticmethod
+    def format_retries(retries):
+        return 'inf' if retries == float('inf') else '%.0f' % retries
+
     @staticmethod
     def best_block_size(elapsed_time, bytes):
         new_min = max(bytes / 2.0, 1.0)
@@ -219,7 +224,7 @@ class FileDownloader(object):
         if self.params.get('progress_with_newline', False):
             self.to_screen(fullmsg)
         else:
-            if os.name == 'nt':
+            if compat_os_name == 'nt':
                 prev_len = getattr(self, '_report_progress_prev_line_length',
                                    0)
                 if prev_len > len(fullmsg):
@@ -296,7 +301,9 @@ class FileDownloader(object):
 
     def report_retry(self, count, retries):
         """Report retry in case of HTTP error 5xx"""
-        self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %.0f)...' % (count, retries))
+        self.to_screen(
+            '[download] Got server HTTP error. Retrying (attempt %d of %s)...'
+            % (count, self.format_retries(retries)))
 
     def report_file_already_downloaded(self, file_name):
         """Report file has already been fully downloaded."""
index 8b1b17c6ec5c0b05a93a63ef446d549c76951e0e..8bbab9dbc596c659db622fe9910d0ae90018a598 100644 (file)
@@ -4,6 +4,7 @@ import os
 import re
 
 from .fragment import FragmentFD
+from ..compat import compat_urllib_error
 from ..utils import (
     sanitize_open,
     encodeFilename,
@@ -36,20 +37,41 @@ class DashSegmentsFD(FragmentFD):
 
         segments_filenames = []
 
-        def append_url_to_file(target_url, target_filename):
-            success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
-            if not success:
+        fragment_retries = self.params.get('fragment_retries', 0)
+
+        def append_url_to_file(target_url, tmp_filename, segment_name):
+            target_filename = '%s-%s' % (tmp_filename, segment_name)
+            count = 0
+            while count <= fragment_retries:
+                try:
+                    success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
+                    if not success:
+                        return False
+                    down, target_sanitized = sanitize_open(target_filename, 'rb')
+                    ctx['dest_stream'].write(down.read())
+                    down.close()
+                    segments_filenames.append(target_sanitized)
+                    break
+                except (compat_urllib_error.HTTPError, ) as err:
+                    # YouTube may often return 404 HTTP error for a fragment causing the
+                    # whole download to fail. However if the same fragment is immediately
+                    # retried with the same request data this usually succeeds (1-2 attemps
+                    # is usually enough) thus allowing to download the whole file successfully.
+                    # So, we will retry all fragments that fail with 404 HTTP error for now.
+                    if err.code != 404:
+                        raise
+                    # Retry fragment
+                    count += 1
+                    if count <= fragment_retries:
+                        self.report_retry_fragment(segment_name, count, fragment_retries)
+            if count > fragment_retries:
+                self.report_error('giving up after %s fragment retries' % fragment_retries)
                 return False
-            down, target_sanitized = sanitize_open(target_filename, 'rb')
-            ctx['dest_stream'].write(down.read())
-            down.close()
-            segments_filenames.append(target_sanitized)
 
         if initialization_url:
-            append_url_to_file(initialization_url, ctx['tmpfilename'] + '-Init')
+            append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init')
         for i, segment_url in enumerate(segment_urls):
-            segment_filename = '%s-Seg%d' % (ctx['tmpfilename'], i)
-            append_url_to_file(segment_url, segment_filename)
+            append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i)
 
         self._finish_frag_download(ctx)
 
index 2bc01126693fa4b520a34afe0ad0a67a61370829..fae2450248494a70f237a65d07a4bedcfddaadeb 100644 (file)
@@ -2,8 +2,12 @@ from __future__ import unicode_literals
 
 import os.path
 import subprocess
+import sys
+import re
 
 from .common import FileDownloader
+from ..compat import compat_setenv
+from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
 from ..utils import (
     cli_option,
     cli_valueless_option,
@@ -11,6 +15,8 @@ from ..utils import (
     cli_configuration_args,
     encodeFilename,
     encodeArgument,
+    handle_youtubedl_headers,
+    check_executable,
 )
 
 
@@ -45,10 +51,18 @@ class ExternalFD(FileDownloader):
     def exe(self):
         return self.params.get('external_downloader')
 
+    @classmethod
+    def available(cls):
+        return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
+
     @classmethod
     def supports(cls, info_dict):
         return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
 
+    @classmethod
+    def can_download(cls, info_dict):
+        return cls.available() and cls.supports(info_dict)
+
     def _option(self, command_option, param):
         return cli_option(self.params, command_option, param)
 
@@ -71,11 +85,13 @@ class ExternalFD(FileDownloader):
             cmd, stderr=subprocess.PIPE)
         _, stderr = p.communicate()
         if p.returncode != 0:
-            self.to_stderr(stderr)
+            self.to_stderr(stderr.decode('utf-8', 'replace'))
         return p.returncode
 
 
 class CurlFD(ExternalFD):
+    AVAILABLE_OPT = '-V'
+
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '--location', '-o', tmpfilename]
         for key, val in info_dict['http_headers'].items():
@@ -89,6 +105,8 @@ class CurlFD(ExternalFD):
 
 
 class AxelFD(ExternalFD):
+    AVAILABLE_OPT = '-V'
+
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '-o', tmpfilename]
         for key, val in info_dict['http_headers'].items():
@@ -99,6 +117,8 @@ class AxelFD(ExternalFD):
 
 
 class WgetFD(ExternalFD):
+    AVAILABLE_OPT = '--version'
+
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
         for key, val in info_dict['http_headers'].items():
@@ -112,6 +132,8 @@ class WgetFD(ExternalFD):
 
 
 class Aria2cFD(ExternalFD):
+    AVAILABLE_OPT = '-v'
+
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '-c']
         cmd += self._configuration_args([
@@ -130,12 +152,125 @@ class Aria2cFD(ExternalFD):
 
 
 class HttpieFD(ExternalFD):
+    @classmethod
+    def available(cls):
+        return check_executable('http', ['--version'])
+
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
         for key, val in info_dict['http_headers'].items():
             cmd += ['%s:%s' % (key, val)]
         return cmd
 
+
+class FFmpegFD(ExternalFD):
+    @classmethod
+    def supports(cls, info_dict):
+        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+
+    @classmethod
+    def available(cls):
+        return FFmpegPostProcessor().available
+
+    def _call_downloader(self, tmpfilename, info_dict):
+        url = info_dict['url']
+        ffpp = FFmpegPostProcessor(downloader=self)
+        if not ffpp.available:
+            self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+            return False
+        ffpp.check_version()
+
+        args = [ffpp.executable, '-y']
+
+        args += self._configuration_args()
+
+        # start_time = info_dict.get('start_time') or 0
+        # if start_time:
+        #     args += ['-ss', compat_str(start_time)]
+        # end_time = info_dict.get('end_time')
+        # if end_time:
+        #     args += ['-t', compat_str(end_time - start_time)]
+
+        if info_dict['http_headers'] and re.match(r'^https?://', url):
+            # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
+            # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
+            headers = handle_youtubedl_headers(info_dict['http_headers'])
+            args += [
+                '-headers',
+                ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
+
+        env = None
+        proxy = self.params.get('proxy')
+        if proxy:
+            if not re.match(r'^[\da-zA-Z]+://', proxy):
+                proxy = 'http://%s' % proxy
+            # Since December 2015 ffmpeg supports -http_proxy option (see
+            # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
+            # We could switch to the following code if we are able to detect version properly
+            # args += ['-http_proxy', proxy]
+            env = os.environ.copy()
+            compat_setenv('HTTP_PROXY', proxy, env=env)
+            compat_setenv('http_proxy', proxy, env=env)
+
+        protocol = info_dict.get('protocol')
+
+        if protocol == 'rtmp':
+            player_url = info_dict.get('player_url')
+            page_url = info_dict.get('page_url')
+            app = info_dict.get('app')
+            play_path = info_dict.get('play_path')
+            tc_url = info_dict.get('tc_url')
+            flash_version = info_dict.get('flash_version')
+            live = info_dict.get('rtmp_live', False)
+            if player_url is not None:
+                args += ['-rtmp_swfverify', player_url]
+            if page_url is not None:
+                args += ['-rtmp_pageurl', page_url]
+            if app is not None:
+                args += ['-rtmp_app', app]
+            if play_path is not None:
+                args += ['-rtmp_playpath', play_path]
+            if tc_url is not None:
+                args += ['-rtmp_tcurl', tc_url]
+            if flash_version is not None:
+                args += ['-rtmp_flashver', flash_version]
+            if live:
+                args += ['-rtmp_live', 'live']
+
+        args += ['-i', url, '-c', 'copy']
+        if protocol in ('m3u8', 'm3u8_native'):
+            if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
+                args += ['-f', 'mpegts']
+            else:
+                args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+        elif protocol == 'rtmp':
+            args += ['-f', 'flv']
+        else:
+            args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
+
+        args = [encodeArgument(opt) for opt in args]
+        args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
+
+        self._debug_cmd(args)
+
+        proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
+        try:
+            retval = proc.wait()
+        except KeyboardInterrupt:
+            # subprocces.run would send the SIGKILL signal to ffmpeg and the
+            # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+            # produces a file that is playable (this is mostly useful for live
+            # streams). Note that Windows is not affected and produces playable
+            # files (see https://github.com/rg3/youtube-dl/issues/8300).
+            if sys.platform != 'win32':
+                proc.communicate(b'q')
+            raise
+        return retval
+
+
+class AVconvFD(FFmpegFD):
+    pass
+
 _BY_NAME = dict(
     (klass.get_basename(), klass)
     for name, klass in globals().items()
index fc9642905f3fffb5f1ec11f959c36fd486b757f2..8f88b02414a28a8da650e03418c8ecd7e52a59a6 100644 (file)
@@ -12,37 +12,49 @@ from ..compat import (
     compat_urlparse,
     compat_urllib_error,
     compat_urllib_parse_urlparse,
+    compat_struct_pack,
+    compat_struct_unpack,
 )
 from ..utils import (
     encodeFilename,
     fix_xml_ampersands,
     sanitize_open,
-    struct_pack,
-    struct_unpack,
     xpath_text,
 )
 
 
+class DataTruncatedError(Exception):
+    pass
+
+
 class FlvReader(io.BytesIO):
     """
     Reader for Flv files
     The file format is documented in https://www.adobe.com/devnet/f4v.html
     """
 
+    def read_bytes(self, n):
+        data = self.read(n)
+        if len(data) < n:
+            raise DataTruncatedError(
+                'FlvReader error: need %d bytes while only %d bytes got' % (
+                    n, len(data)))
+        return data
+
     # Utility functions for reading numbers and strings
     def read_unsigned_long_long(self):
-        return struct_unpack('!Q', self.read(8))[0]
+        return compat_struct_unpack('!Q', self.read_bytes(8))[0]
 
     def read_unsigned_int(self):
-        return struct_unpack('!I', self.read(4))[0]
+        return compat_struct_unpack('!I', self.read_bytes(4))[0]
 
     def read_unsigned_char(self):
-        return struct_unpack('!B', self.read(1))[0]
+        return compat_struct_unpack('!B', self.read_bytes(1))[0]
 
     def read_string(self):
         res = b''
         while True:
-            char = self.read(1)
+            char = self.read_bytes(1)
             if char == b'\x00':
                 break
             res += char
@@ -53,18 +65,18 @@ class FlvReader(io.BytesIO):
         Read a box and return the info as a tuple: (box_size, box_type, box_data)
         """
         real_size = size = self.read_unsigned_int()
-        box_type = self.read(4)
+        box_type = self.read_bytes(4)
         header_end = 8
         if size == 1:
             real_size = self.read_unsigned_long_long()
             header_end = 16
-        return real_size, box_type, self.read(real_size - header_end)
+        return real_size, box_type, self.read_bytes(real_size - header_end)
 
     def read_asrt(self):
         # version
         self.read_unsigned_char()
         # flags
-        self.read(3)
+        self.read_bytes(3)
         quality_entry_count = self.read_unsigned_char()
         # QualityEntryCount
         for i in range(quality_entry_count):
@@ -85,7 +97,7 @@ class FlvReader(io.BytesIO):
         # version
         self.read_unsigned_char()
         # flags
-        self.read(3)
+        self.read_bytes(3)
         # time scale
         self.read_unsigned_int()
 
@@ -119,7 +131,7 @@ class FlvReader(io.BytesIO):
         # version
         self.read_unsigned_char()
         # flags
-        self.read(3)
+        self.read_bytes(3)
 
         self.read_unsigned_int()  # BootstrapinfoVersion
         # Profile,Live,Update,Reserved
@@ -194,11 +206,11 @@ def build_fragments_list(boot_info):
 
 
 def write_unsigned_int(stream, val):
-    stream.write(struct_pack('!I', val))
+    stream.write(compat_struct_pack('!I', val))
 
 
 def write_unsigned_int_24(stream, val):
-    stream.write(struct_pack('!I', val)[1:])
+    stream.write(compat_struct_pack('!I', val)[1:])
 
 
 def write_flv_header(stream):
@@ -223,6 +235,12 @@ def write_metadata_tag(stream, metadata):
         write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
 
 
+def remove_encrypted_media(media):
+    return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
+                                 'drmAdditionalHeaderSetId' not in e.attrib,
+                       media))
+
+
 def _add_ns(prop):
     return '{http://ns.adobe.com/f4m/1.0}%s' % prop
 
@@ -244,9 +262,7 @@ class F4mFD(FragmentFD):
             # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
             if 'id' not in e.attrib:
                 self.report_error('Missing ID in f4m DRM')
-        media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
-                                      'drmAdditionalHeaderSetId' not in e.attrib,
-                            media))
+        media = remove_encrypted_media(media)
         if not media:
             self.report_error('Unsupported DRM')
         return media
@@ -303,7 +319,7 @@ class F4mFD(FragmentFD):
         doc = compat_etree_fromstring(manifest)
         formats = [(int(f.attrib.get('bitrate', -1)), f)
                    for f in self._get_unencrypted_media(doc)]
-        if requested_bitrate is None:
+        if requested_bitrate is None or len(formats) == 1:
             # get the best format
             formats = sorted(formats, key=lambda f: f[0])
             rate, media = formats[-1]
@@ -370,7 +386,17 @@ class F4mFD(FragmentFD):
                 down.close()
                 reader = FlvReader(down_data)
                 while True:
-                    _, box_type, box_data = reader.read_box_info()
+                    try:
+                        _, box_type, box_data = reader.read_box_info()
+                    except DataTruncatedError:
+                        if test:
+                            # In tests, segments may be truncated, and thus
+                            # FlvReader may not be able to parse the whole
+                            # chunk. If so, write the segment as is
+                            # See https://github.com/rg3/youtube-dl/issues/9214
+                            dest_stream.write(down_data)
+                            break
+                        raise
                     if box_type == b'mdat':
                         dest_stream.write(box_data)
                         break
index 5bc99492bc7b90abdc5c133b3f6c573d54fe9ed3..ba903ae103a7bc6817378389a34713d9a5550e19 100644 (file)
@@ -19,8 +19,17 @@ class HttpQuietDownloader(HttpFD):
 class FragmentFD(FileDownloader):
     """
     A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
+
+    Available options:
+
+    fragment_retries:   Number of times to retry a fragment for HTTP error (DASH only)
     """
 
+    def report_retry_fragment(self, fragment_name, count, retries):
+        self.to_screen(
+            '[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...'
+            % (fragment_name, count, self.format_retries(retries)))
+
     def _prepare_and_start_frag_download(self, ctx):
         self._prepare_frag_download(ctx)
         self._start_frag_download(ctx)
@@ -99,7 +108,8 @@ class FragmentFD(FileDownloader):
                     state['eta'] = self.calc_eta(
                         start, time_now, estimated_size,
                         state['downloaded_bytes'])
-                state['speed'] = s.get('speed')
+                state['speed'] = s.get('speed') or ctx.get('speed')
+                ctx['speed'] = state['speed']
                 ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
             self._hook_progress(state)
 
index 2a775bf0023f7ddc09507d66ab660a8dd97d19b2..3b7bb35087568b084e84fcd0610f951c0eaf8472 100644 (file)
 from __future__ import unicode_literals
 
-import os
+import os.path
 import re
-import subprocess
-import sys
+import binascii
+try:
+    from Crypto.Cipher import AES
+    can_decrypt_frag = True
+except ImportError:
+    can_decrypt_frag = False
 
-from .common import FileDownloader
 from .fragment import FragmentFD
+from .external import FFmpegFD
 
-from ..compat import compat_urlparse
-from ..postprocessor.ffmpeg import FFmpegPostProcessor
+from ..compat import (
+    compat_urlparse,
+    compat_struct_pack,
+)
 from ..utils import (
-    encodeArgument,
     encodeFilename,
     sanitize_open,
-    handle_youtubedl_headers,
+    parse_m3u8_attributes,
 )
 
 
-class HlsFD(FileDownloader):
-    def real_download(self, filename, info_dict):
-        url = info_dict['url']
-        self.report_destination(filename)
-        tmpfilename = self.temp_name(filename)
-
-        ffpp = FFmpegPostProcessor(downloader=self)
-        if not ffpp.available:
-            self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
-            return False
-        ffpp.check_version()
-
-        args = [ffpp.executable, '-y']
-
-        if info_dict['http_headers'] and re.match(r'^https?://', url):
-            # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
-            # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
-            headers = handle_youtubedl_headers(info_dict['http_headers'])
-            args += [
-                '-headers',
-                ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
-
-        args += ['-i', url, '-c', 'copy']
-        if self.params.get('hls_use_mpegts', False):
-            args += ['-f', 'mpegts']
-        else:
-            args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
-
-        args = [encodeArgument(opt) for opt in args]
-        args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
-
-        self._debug_cmd(args)
-
-        proc = subprocess.Popen(args, stdin=subprocess.PIPE)
-        try:
-            retval = proc.wait()
-        except KeyboardInterrupt:
-            # subprocces.run would send the SIGKILL signal to ffmpeg and the
-            # mp4 file couldn't be played, but if we ask ffmpeg to quit it
-            # produces a file that is playable (this is mostly useful for live
-            # streams). Note that Windows is not affected and produces playable
-            # files (see https://github.com/rg3/youtube-dl/issues/8300).
-            if sys.platform != 'win32':
-                proc.communicate(b'q')
-            raise
-        if retval == 0:
-            fsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
-            self.try_rename(tmpfilename, filename)
-            self._hook_progress({
-                'downloaded_bytes': fsize,
-                'total_bytes': fsize,
-                'filename': filename,
-                'status': 'finished',
-            })
-            return True
-        else:
-            self.to_stderr('\n')
-            self.report_error('%s exited with code %d' % (ffpp.basename, retval))
-            return False
-
-
-class NativeHlsFD(FragmentFD):
-    """ A more limited implementation that does not require ffmpeg """
+class HlsFD(FragmentFD):
+    """ A limited implementation that does not require ffmpeg """
 
     FD_NAME = 'hlsnative'
 
+    @staticmethod
+    def can_download(manifest):
+        UNSUPPORTED_FEATURES = (
+            r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1]
+            r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]
+
+            # Live streams heuristic does not always work (e.g. geo restricted to Germany
+            # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
+            # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)',  # live streams [3]
+
+            # This heuristic also is not correct since segments may not be appended as well.
+            # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
+            # no segments will definitely be appended to the end of the playlist.
+            # r'#EXT-X-PLAYLIST-TYPE:EVENT',  # media segments may be appended to the end of
+            #                                 # event media playlists [4]
+
+            # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
+            # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
+            # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
+            # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
+        )
+        check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
+        check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
+        return all(check_results)
+
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
         manifest = self.ydl.urlopen(man_url).read()
 
         s = manifest.decode('utf-8', 'ignore')
-        fragment_urls = []
+
+        if not self.can_download(s):
+            self.report_warning(
+                'hlsnative has detected features it does not support, '
+                'extraction will be delegated to ffmpeg')
+            fd = FFmpegFD(self.ydl, self.params)
+            for ph in self._progress_hooks:
+                fd.add_progress_hook(ph)
+            return fd.real_download(filename, info_dict)
+
+        total_frags = 0
         for line in s.splitlines():
             line = line.strip()
             if line and not line.startswith('#'):
-                segment_url = (
-                    line
-                    if re.match(r'^https?://', line)
-                    else compat_urlparse.urljoin(man_url, line))
-                fragment_urls.append(segment_url)
-                # We only download the first fragment during the test
-                if self.params.get('test', False):
-                    break
+                total_frags += 1
 
         ctx = {
             'filename': filename,
-            'total_frags': len(fragment_urls),
+            'total_frags': total_frags,
         }
 
         self._prepare_and_start_frag_download(ctx)
 
+        i = 0
+        media_sequence = 0
+        decrypt_info = {'METHOD': 'NONE'}
         frags_filenames = []
-        for i, frag_url in enumerate(fragment_urls):
-            frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
-            success = ctx['dl'].download(frag_filename, {'url': frag_url})
-            if not success:
-                return False
-            down, frag_sanitized = sanitize_open(frag_filename, 'rb')
-            ctx['dest_stream'].write(down.read())
-            down.close()
-            frags_filenames.append(frag_sanitized)
+        for line in s.splitlines():
+            line = line.strip()
+            if line:
+                if not line.startswith('#'):
+                    frag_url = (
+                        line
+                        if re.match(r'^https?://', line)
+                        else compat_urlparse.urljoin(man_url, line))
+                    frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
+                    success = ctx['dl'].download(frag_filename, {'url': frag_url})
+                    if not success:
+                        return False
+                    down, frag_sanitized = sanitize_open(frag_filename, 'rb')
+                    frag_content = down.read()
+                    down.close()
+                    if decrypt_info['METHOD'] == 'AES-128':
+                        iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
+                        frag_content = AES.new(
+                            decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
+                    ctx['dest_stream'].write(frag_content)
+                    frags_filenames.append(frag_sanitized)
+                    # We only download the first fragment during the test
+                    if self.params.get('test', False):
+                        break
+                    i += 1
+                    media_sequence += 1
+                elif line.startswith('#EXT-X-KEY'):
+                    decrypt_info = parse_m3u8_attributes(line[11:])
+                    if decrypt_info['METHOD'] == 'AES-128':
+                        if 'IV' in decrypt_info:
+                            decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:])
+                        if not re.match(r'^https?://', decrypt_info['URI']):
+                            decrypt_info['URI'] = compat_urlparse.urljoin(
+                                man_url, decrypt_info['URI'])
+                        decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
+                elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
+                    media_sequence = int(line[22:])
 
         self._finish_frag_download(ctx)
 
index 3eb29526cbc90cb3351c75876698a1b238c07ef8..939358b2a2f00edaca5283d311d89ab220d26966 100644 (file)
@@ -27,6 +27,8 @@ class RtspFD(FileDownloader):
             self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
             return False
 
+        self._debug_cmd(args)
+
         retval = subprocess.call(args)
         if retval == 0:
             fsize = os.path.getsize(encodeFilename(tmpfilename))
index 1ae606f1eaaa669f71ad3d3c24b7b222545a0520..18d8dbcd6672f82776a9bd9f6f4cc63cac91129d 100644 (file)
 from __future__ import unicode_literals
 
-from .abc import ABCIE
-from .abc7news import Abc7NewsIE
-from .academicearth import AcademicEarthCourseIE
-from .acast import (
-    ACastIE,
-    ACastChannelIE,
-)
-from .addanime import AddAnimeIE
-from .adobetv import (
-    AdobeTVIE,
-    AdobeTVShowIE,
-    AdobeTVChannelIE,
-    AdobeTVVideoIE,
-)
-from .adultswim import AdultSwimIE
-from .aenetworks import AENetworksIE
-from .aftonbladet import AftonbladetIE
-from .airmozilla import AirMozillaIE
-from .aljazeera import AlJazeeraIE
-from .alphaporno import AlphaPornoIE
-from .animeondemand import AnimeOnDemandIE
-from .anitube import AnitubeIE
-from .anysex import AnySexIE
-from .aol import AolIE
-from .allocine import AllocineIE
-from .aparat import AparatIE
-from .appleconnect import AppleConnectIE
-from .appletrailers import (
-    AppleTrailersIE,
-    AppleTrailersSectionIE,
-)
-from .archiveorg import ArchiveOrgIE
-from .ard import (
-    ARDIE,
-    ARDMediathekIE,
-    SportschauIE,
-)
-from .arte import (
-    ArteTvIE,
-    ArteTVPlus7IE,
-    ArteTVCreativeIE,
-    ArteTVConcertIE,
-    ArteTVFutureIE,
-    ArteTVCinemaIE,
-    ArteTVDDCIE,
-    ArteTVMagazineIE,
-    ArteTVEmbedIE,
-)
-from .atresplayer import AtresPlayerIE
-from .atttechchannel import ATTTechChannelIE
-from .audimedia import AudiMediaIE
-from .audiomack import AudiomackIE, AudiomackAlbumIE
-from .azubu import AzubuIE, AzubuLiveIE
-from .baidu import BaiduVideoIE
-from .bambuser import BambuserIE, BambuserChannelIE
-from .bandcamp import BandcampIE, BandcampAlbumIE
-from .bbc import (
-    BBCCoUkIE,
-    BBCCoUkArticleIE,
-    BBCIE,
-)
-from .beeg import BeegIE
-from .behindkink import BehindKinkIE
-from .beatportpro import BeatportProIE
-from .bet import BetIE
-from .bigflix import BigflixIE
-from .bild import BildIE
-from .bilibili import BiliBiliIE
-from .bleacherreport import (
-    BleacherReportIE,
-    BleacherReportCMSIE,
-)
-from .blinkx import BlinkxIE
-from .bloomberg import BloombergIE
-from .bpb import BpbIE
-from .br import BRIE
-from .breakcom import BreakIE
-from .brightcove import (
-    BrightcoveLegacyIE,
-    BrightcoveNewIE,
-)
-from .buzzfeed import BuzzFeedIE
-from .byutv import BYUtvIE
-from .c56 import C56IE
-from .camdemy import (
-    CamdemyIE,
-    CamdemyFolderIE
-)
-from .canalplus import CanalplusIE
-from .canalc2 import Canalc2IE
-from .canvas import CanvasIE
-from .cbc import (
-    CBCIE,
-    CBCPlayerIE,
-)
-from .cbs import CBSIE
-from .cbsnews import (
-    CBSNewsIE,
-    CBSNewsLiveVideoIE,
-)
-from .cbssports import CBSSportsIE
-from .ccc import CCCIE
-from .ceskatelevize import CeskaTelevizeIE
-from .channel9 import Channel9IE
-from .chaturbate import ChaturbateIE
-from .chilloutzone import ChilloutzoneIE
-from .chirbit import (
-    ChirbitIE,
-    ChirbitProfileIE,
-)
-from .cinchcast import CinchcastIE
-from .cinemassacre import CinemassacreIE
-from .clipfish import ClipfishIE
-from .cliphunter import CliphunterIE
-from .clipsyndicate import ClipsyndicateIE
-from .cloudy import CloudyIE
-from .clubic import ClubicIE
-from .clyp import ClypIE
-from .cmt import CMTIE
-from .cnet import CNETIE
-from .cnn import (
-    CNNIE,
-    CNNBlogsIE,
-    CNNArticleIE,
-)
-from .collegehumor import CollegeHumorIE
-from .collegerama import CollegeRamaIE
-from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
-from .comcarcoff import ComCarCoffIE
-from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
-from .condenast import CondeNastIE
-from .cracked import CrackedIE
-from .crackle import CrackleIE
-from .criterion import CriterionIE
-from .crooksandliars import CrooksAndLiarsIE
-from .crunchyroll import (
-    CrunchyrollIE,
-    CrunchyrollShowPlaylistIE
-)
-from .cspan import CSpanIE
-from .ctsnews import CtsNewsIE
-from .cultureunplugged import CultureUnpluggedIE
-from .cwtv import CWTVIE
-from .dailymotion import (
-    DailymotionIE,
-    DailymotionPlaylistIE,
-    DailymotionUserIE,
-    DailymotionCloudIE,
-)
-from .daum import (
-    DaumIE,
-    DaumClipIE,
-    DaumPlaylistIE,
-    DaumUserIE,
-)
-from .dbtv import DBTVIE
-from .dcn import (
-    DCNIE,
-    DCNVideoIE,
-    DCNLiveIE,
-    DCNSeasonIE,
-)
-from .dctp import DctpTvIE
-from .deezer import DeezerPlaylistIE
-from .democracynow import DemocracynowIE
-from .dfb import DFBIE
-from .dhm import DHMIE
-from .dotsub import DotsubIE
-from .douyutv import DouyuTVIE
-from .dplay import DPlayIE
-from .dramafever import (
-    DramaFeverIE,
-    DramaFeverSeriesIE,
-)
-from .dreisat import DreiSatIE
-from .drbonanza import DRBonanzaIE
-from .drtuber import DrTuberIE
-from .drtv import DRTVIE
-from .dvtv import DVTVIE
-from .dump import DumpIE
-from .dumpert import DumpertIE
-from .defense import DefenseGouvFrIE
-from .discovery import DiscoveryIE
-from .dropbox import DropboxIE
-from .eagleplatform import EaglePlatformIE
-from .ebaumsworld import EbaumsWorldIE
-from .echomsk import EchoMskIE
-from .ehow import EHowIE
-from .eighttracks import EightTracksIE
-from .einthusan import EinthusanIE
-from .eitb import EitbIE
-from .ellentv import (
-    EllenTVIE,
-    EllenTVClipsIE,
-)
-from .elpais import ElPaisIE
-from .embedly import EmbedlyIE
-from .engadget import EngadgetIE
-from .eporner import EpornerIE
-from .eroprofile import EroProfileIE
-from .escapist import EscapistIE
-from .espn import ESPNIE
-from .esri import EsriVideoIE
-from .europa import EuropaIE
-from .everyonesmixtape import EveryonesMixtapeIE
-from .exfm import ExfmIE
-from .expotv import ExpoTVIE
-from .extremetube import ExtremeTubeIE
-from .facebook import (
-    FacebookIE,
-    FacebookPostIE,
-)
-from .faz import FazIE
-from .fc2 import FC2IE
-from .fczenit import FczenitIE
-from .firstpost import FirstpostIE
-from .firsttv import FirstTVIE
-from .fivemin import FiveMinIE
-from .fivetv import FiveTVIE
-from .fktv import FKTVIE
-from .flickr import FlickrIE
-from .folketinget import FolketingetIE
-from .footyroom import FootyRoomIE
-from .fourtube import FourTubeIE
-from .fox import FOXIE
-from .foxgay import FoxgayIE
-from .foxnews import FoxNewsIE
-from .foxsports import FoxSportsIE
-from .franceculture import (
-    FranceCultureIE,
-    FranceCultureEmissionIE,
-)
-from .franceinter import FranceInterIE
-from .francetv import (
-    PluzzIE,
-    FranceTvInfoIE,
-    FranceTVIE,
-    GenerationQuoiIE,
-    CultureboxIE,
-)
-from .freesound import FreesoundIE
-from .freespeech import FreespeechIE
-from .freevideo import FreeVideoIE
-from .funimation import FunimationIE
-from .funnyordie import FunnyOrDieIE
-from .gameinformer import GameInformerIE
-from .gamekings import GamekingsIE
-from .gameone import (
-    GameOneIE,
-    GameOnePlaylistIE,
-)
-from .gamersyde import GamersydeIE
-from .gamespot import GameSpotIE
-from .gamestar import GameStarIE
-from .gametrailers import GametrailersIE
-from .gazeta import GazetaIE
-from .gdcvault import GDCVaultIE
-from .generic import GenericIE
-from .gfycat import GfycatIE
-from .giantbomb import GiantBombIE
-from .giga import GigaIE
-from .glide import GlideIE
-from .globo import (
-    GloboIE,
-    GloboArticleIE,
-)
-from .godtube import GodTubeIE
-from .goldenmoustache import GoldenMoustacheIE
-from .golem import GolemIE
-from .googledrive import GoogleDriveIE
-from .googleplus import GooglePlusIE
-from .googlesearch import GoogleSearchIE
-from .goshgay import GoshgayIE
-from .gputechconf import GPUTechConfIE
-from .groupon import GrouponIE
-from .hark import HarkIE
-from .hearthisat import HearThisAtIE
-from .heise import HeiseIE
-from .hellporno import HellPornoIE
-from .helsinki import HelsinkiIE
-from .hentaistigma import HentaiStigmaIE
-from .historicfilms import HistoricFilmsIE
-from .hitbox import HitboxIE, HitboxLiveIE
-from .hornbunny import HornBunnyIE
-from .hotnewhiphop import HotNewHipHopIE
-from .hotstar import HotStarIE
-from .howcast import HowcastIE
-from .howstuffworks import HowStuffWorksIE
-from .huffpost import HuffPostIE
-from .hypem import HypemIE
-from .iconosquare import IconosquareIE
-from .ign import (
-    IGNIE,
-    OneUPIE,
-    PCMagIE,
-)
-from .imdb import (
-    ImdbIE,
-    ImdbListIE
-)
-from .imgur import (
-    ImgurIE,
-    ImgurAlbumIE,
-)
-from .ina import InaIE
-from .indavideo import (
-    IndavideoIE,
-    IndavideoEmbedIE,
-)
-from .infoq import InfoQIE
-from .instagram import InstagramIE, InstagramUserIE
-from .internetvideoarchive import InternetVideoArchiveIE
-from .iprima import IPrimaIE
-from .iqiyi import IqiyiIE
-from .ir90tv import Ir90TvIE
-from .ivi import (
-    IviIE,
-    IviCompilationIE
-)
-from .ivideon import IvideonIE
-from .izlesene import IzleseneIE
-from .jadorecettepub import JadoreCettePubIE
-from .jeuxvideo import JeuxVideoIE
-from .jove import JoveIE
-from .jwplatform import JWPlatformIE
-from .jpopsukitv import JpopsukiIE
-from .kaltura import KalturaIE
-from .kanalplay import KanalPlayIE
-from .kankan import KankanIE
-from .karaoketv import KaraoketvIE
-from .karrierevideos import KarriereVideosIE
-from .keezmovies import KeezMoviesIE
-from .khanacademy import KhanAcademyIE
-from .kickstarter import KickStarterIE
-from .keek import KeekIE
-from .konserthusetplay import KonserthusetPlayIE
-from .kontrtube import KontrTubeIE
-from .krasview import KrasViewIE
-from .ku6 import Ku6IE
-from .kuwo import (
-    KuwoIE,
-    KuwoAlbumIE,
-    KuwoChartIE,
-    KuwoSingerIE,
-    KuwoCategoryIE,
-    KuwoMvIE,
-)
-from .la7 import LA7IE
-from .laola1tv import Laola1TvIE
-from .lecture2go import Lecture2GoIE
-from .lemonde import LemondeIE
-from .letv import (
-    LetvIE,
-    LetvTvIE,
-    LetvPlaylistIE,
-    LetvCloudIE,
-)
-from .libsyn import LibsynIE
-from .lifenews import (
-    LifeNewsIE,
-    LifeEmbedIE,
-)
-from .limelight import (
-    LimelightMediaIE,
-    LimelightChannelIE,
-    LimelightChannelListIE,
-)
-from .liveleak import LiveLeakIE
-from .livestream import (
-    LivestreamIE,
-    LivestreamOriginalIE,
-    LivestreamShortenerIE,
-)
-from .lnkgo import LnkGoIE
-from .lovehomeporn import LoveHomePornIE
-from .lrt import LRTIE
-from .lynda import (
-    LyndaIE,
-    LyndaCourseIE
-)
-from .m6 import M6IE
-from .macgamestore import MacGameStoreIE
-from .mailru import MailRuIE
-from .makertv import MakerTVIE
-from .malemotion import MalemotionIE
-from .matchtv import MatchTVIE
-from .mdr import MDRIE
-from .metacafe import MetacafeIE
-from .metacritic import MetacriticIE
-from .mgoon import MgoonIE
-from .minhateca import MinhatecaIE
-from .ministrygrid import MinistryGridIE
-from .miomio import MioMioIE
-from .mit import TechTVMITIE, MITIE, OCWMITIE
-from .mitele import MiTeleIE
-from .mixcloud import MixcloudIE
-from .mlb import MLBIE
-from .mpora import MporaIE
-from .moevideo import MoeVideoIE
-from .mofosex import MofosexIE
-from .mojvideo import MojvideoIE
-from .moniker import MonikerIE
-from .mooshare import MooshareIE
-from .morningstar import MorningstarIE
-from .motherless import MotherlessIE
-from .motorsport import MotorsportIE
-from .movieclips import MovieClipsIE
-from .moviezine import MoviezineIE
-from .mtv import (
-    MTVIE,
-    MTVServicesEmbeddedIE,
-    MTVIggyIE,
-    MTVDEIE,
-)
-from .muenchentv import MuenchenTVIE
-from .musicplayon import MusicPlayOnIE
-from .muzu import MuzuTVIE
-from .mwave import MwaveIE
-from .myspace import MySpaceIE, MySpaceAlbumIE
-from .myspass import MySpassIE
-from .myvi import MyviIE
-from .myvideo import MyVideoIE
-from .myvidster import MyVidsterIE
-from .nationalgeographic import NationalGeographicIE
-from .naver import NaverIE
-from .nba import NBAIE
-from .nbc import (
-    NBCIE,
-    NBCNewsIE,
-    NBCSportsIE,
-    NBCSportsVPlayerIE,
-    MSNBCIE,
-)
-from .ndr import (
-    NDRIE,
-    NJoyIE,
-    NDREmbedBaseIE,
-    NDREmbedIE,
-    NJoyEmbedIE,
-)
-from .ndtv import NDTVIE
-from .netzkino import NetzkinoIE
-from .nerdcubed import NerdCubedFeedIE
-from .nerdist import NerdistIE
-from .neteasemusic import (
-    NetEaseMusicIE,
-    NetEaseMusicAlbumIE,
-    NetEaseMusicSingerIE,
-    NetEaseMusicListIE,
-    NetEaseMusicMvIE,
-    NetEaseMusicProgramIE,
-    NetEaseMusicDjRadioIE,
-)
-from .newgrounds import NewgroundsIE
-from .newstube import NewstubeIE
-from .nextmedia import (
-    NextMediaIE,
-    NextMediaActionNewsIE,
-    AppleDailyIE,
-)
-from .nextmovie import NextMovieIE
-from .nfb import NFBIE
-from .nfl import NFLIE
-from .nhl import (
-    NHLIE,
-    NHLNewsIE,
-    NHLVideocenterIE,
-)
-from .nick import NickIE
-from .niconico import NiconicoIE, NiconicoPlaylistIE
-from .ninegag import NineGagIE
-from .noco import NocoIE
-from .normalboots import NormalbootsIE
-from .nosvideo import NosVideoIE
-from .nova import NovaIE
-from .novamov import (
-    NovaMovIE,
-    WholeCloudIE,
-    NowVideoIE,
-    VideoWeedIE,
-    CloudTimeIE,
-)
-from .nowness import (
-    NownessIE,
-    NownessPlaylistIE,
-    NownessSeriesIE,
-)
-from .nowtv import (
-    NowTVIE,
-    NowTVListIE,
-)
-from .noz import NozIE
-from .npo import (
-    NPOIE,
-    NPOLiveIE,
-    NPORadioIE,
-    NPORadioFragmentIE,
-    SchoolTVIE,
-    VPROIE,
-    WNLIE
-)
-from .npr import NprIE
-from .nrk import (
-    NRKIE,
-    NRKPlaylistIE,
-    NRKTVIE,
-)
-from .ntvde import NTVDeIE
-from .ntvru import NTVRuIE
-from .nytimes import (
-    NYTimesIE,
-    NYTimesArticleIE,
-)
-from .nuvid import NuvidIE
-from .odnoklassniki import OdnoklassnikiIE
-from .oktoberfesttv import OktoberfestTVIE
-from .onionstudios import OnionStudiosIE
-from .ooyala import (
-    OoyalaIE,
-    OoyalaExternalIE,
-)
-from .ora import OraTVIE
-from .orf import (
-    ORFTVthekIE,
-    ORFOE1IE,
-    ORFFM4IE,
-    ORFIPTVIE,
-)
-from .pandoratv import PandoraTVIE
-from .parliamentliveuk import ParliamentLiveUKIE
-from .patreon import PatreonIE
-from .pbs import PBSIE
-from .periscope import PeriscopeIE
-from .philharmoniedeparis import PhilharmonieDeParisIE
-from .phoenix import PhoenixIE
-from .photobucket import PhotobucketIE
-from .pinkbike import PinkbikeIE
-from .planetaplay import PlanetaPlayIE
-from .pladform import PladformIE
-from .played import PlayedIE
-from .playfm import PlayFMIE
-from .plays import PlaysTVIE
-from .playtvak import PlaytvakIE
-from .playvid import PlayvidIE
-from .playwire import PlaywireIE
-from .pluralsight import (
-    PluralsightIE,
-    PluralsightCourseIE,
-)
-from .podomatic import PodomaticIE
-from .porn91 import Porn91IE
-from .pornhd import PornHdIE
-from .pornhub import (
-    PornHubIE,
-    PornHubPlaylistIE,
-    PornHubUserVideosIE,
-)
-from .pornotube import PornotubeIE
-from .pornovoisines import PornoVoisinesIE
-from .pornoxo import PornoXOIE
-from .primesharetv import PrimeShareTVIE
-from .promptfile import PromptFileIE
-from .prosiebensat1 import ProSiebenSat1IE
-from .puls4 import Puls4IE
-from .pyvideo import PyvideoIE
-from .qqmusic import (
-    QQMusicIE,
-    QQMusicSingerIE,
-    QQMusicAlbumIE,
-    QQMusicToplistIE,
-    QQMusicPlaylistIE,
-)
-from .quickvid import QuickVidIE
-from .r7 import R7IE
-from .radiode import RadioDeIE
-from .radiojavan import RadioJavanIE
-from .radiobremen import RadioBremenIE
-from .radiofrance import RadioFranceIE
-from .rai import (
-    RaiTVIE,
-    RaiIE,
-)
-from .rbmaradio import RBMARadioIE
-from .rds import RDSIE
-from .redtube import RedTubeIE
-from .regiotv import RegioTVIE
-from .restudy import RestudyIE
-from .reverbnation import ReverbNationIE
-from .revision3 import Revision3IE
-from .ringtv import RingTVIE
-from .ro220 import Ro220IE
-from .rottentomatoes import RottenTomatoesIE
-from .roxwel import RoxwelIE
-from .rtbf import RTBFIE
-from .rte import RteIE, RteRadioIE
-from .rtlnl import RtlNlIE
-from .rtl2 import RTL2IE
-from .rtp import RTPIE
-from .rts import RTSIE
-from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
-from .rtvnh import RTVNHIE
-from .ruhd import RUHDIE
-from .ruleporn import RulePornIE
-from .rutube import (
-    RutubeIE,
-    RutubeChannelIE,
-    RutubeEmbedIE,
-    RutubeMovieIE,
-    RutubePersonIE,
-)
-from .rutv import RUTVIE
-from .ruutu import RuutuIE
-from .sandia import SandiaIE
-from .safari import (
-    SafariIE,
-    SafariCourseIE,
-)
-from .sapo import SapoIE
-from .savefrom import SaveFromIE
-from .sbs import SBSIE
-from .scivee import SciVeeIE
-from .screencast import ScreencastIE
-from .screencastomatic import ScreencastOMaticIE
-from .screenjunkies import ScreenJunkiesIE
-from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
-from .senateisvp import SenateISVPIE
-from .servingsys import ServingSysIE
-from .sexu import SexuIE
-from .sexykarma import SexyKarmaIE
-from .shahid import ShahidIE
-from .shared import SharedIE
-from .sharesix import ShareSixIE
-from .sina import SinaIE
-from .skynewsarabia import (
-    SkyNewsArabiaIE,
-    SkyNewsArabiaArticleIE,
-)
-from .slideshare import SlideshareIE
-from .slutload import SlutloadIE
-from .smotri import (
-    SmotriIE,
-    SmotriCommunityIE,
-    SmotriUserIE,
-    SmotriBroadcastIE,
-)
-from .snagfilms import (
-    SnagFilmsIE,
-    SnagFilmsEmbedIE,
-)
-from .snotr import SnotrIE
-from .sohu import SohuIE
-from .soundcloud import (
-    SoundcloudIE,
-    SoundcloudSetIE,
-    SoundcloudUserIE,
-    SoundcloudPlaylistIE,
-    SoundcloudSearchIE
-)
-from .soundgasm import (
-    SoundgasmIE,
-    SoundgasmProfileIE
-)
-from .southpark import (
-    SouthParkIE,
-    SouthParkDeIE,
-    SouthParkDkIE,
-    SouthParkEsIE,
-    SouthParkNlIE
-)
-from .space import SpaceIE
-from .spankbang import SpankBangIE
-from .spankwire import SpankwireIE
-from .spiegel import SpiegelIE, SpiegelArticleIE
-from .spiegeltv import SpiegeltvIE
-from .spike import SpikeIE
-from .stitcher import StitcherIE
-from .sport5 import Sport5IE
-from .sportbox import (
-    SportBoxIE,
-    SportBoxEmbedIE,
-)
-from .sportdeutschland import SportDeutschlandIE
-from .srgssr import (
-    SRGSSRIE,
-    SRGSSRPlayIE,
-)
-from .srmediathek import SRMediathekIE
-from .ssa import SSAIE
-from .stanfordoc import StanfordOpenClassroomIE
-from .steam import SteamIE
-from .streamcloud import StreamcloudIE
-from .streamcz import StreamCZIE
-from .streetvoice import StreetVoiceIE
-from .sunporno import SunPornoIE
-from .svt import (
-    SVTIE,
-    SVTPlayIE,
-)
-from .swrmediathek import SWRMediathekIE
-from .syfy import SyfyIE
-from .sztvhu import SztvHuIE
-from .tagesschau import TagesschauIE
-from .tapely import TapelyIE
-from .tass import TassIE
-from .teachertube import (
-    TeacherTubeIE,
-    TeacherTubeUserIE,
-)
-from .teachingchannel import TeachingChannelIE
-from .teamcoco import TeamcocoIE
-from .techtalks import TechTalksIE
-from .ted import TEDIE
-from .tele13 import Tele13IE
-from .telebruxelles import TeleBruxellesIE
-from .telecinco import TelecincoIE
-from .telegraaf import TelegraafIE
-from .telemb import TeleMBIE
-from .teletask import TeleTaskIE
-from .tenplay import TenPlayIE
-from .testurl import TestURLIE
-from .tf1 import TF1IE
-from .theintercept import TheInterceptIE
-from .theonion import TheOnionIE
-from .theplatform import (
-    ThePlatformIE,
-    ThePlatformFeedIE,
-)
-from .thesixtyone import TheSixtyOneIE
-from .thisamericanlife import ThisAmericanLifeIE
-from .thisav import ThisAVIE
-from .tinypic import TinyPicIE
-from .tlc import TlcDeIE
-from .tmz import (
-    TMZIE,
-    TMZArticleIE,
-)
-from .tnaflix import (
-    TNAFlixIE,
-    EMPFlixIE,
-    MovieFapIE,
-)
-from .toggle import ToggleIE
-from .thvideo import (
-    THVideoIE,
-    THVideoPlaylistIE
-)
-from .toutv import TouTvIE
-from .toypics import ToypicsUserIE, ToypicsIE
-from .traileraddict import TrailerAddictIE
-from .trilulilu import TriluliluIE
-from .trollvids import TrollvidsIE
-from .trutube import TruTubeIE
-from .tube8 import Tube8IE
-from .tubitv import TubiTvIE
-from .tudou import (
-    TudouIE,
-    TudouPlaylistIE,
-    TudouAlbumIE,
-)
-from .tumblr import TumblrIE
-from .tunein import (
-    TuneInClipIE,
-    TuneInStationIE,
-    TuneInProgramIE,
-    TuneInTopicIE,
-    TuneInShortenerIE,
-)
-from .turbo import TurboIE
-from .tutv import TutvIE
-from .tv2 import (
-    TV2IE,
-    TV2ArticleIE,
-)
-from .tv4 import TV4IE
-from .tvc import (
-    TVCIE,
-    TVCArticleIE,
-)
-from .tvigle import TvigleIE
-from .tvland import TVLandIE
-from .tvp import TvpIE, TvpSeriesIE
-from .tvplay import TVPlayIE
-from .tweakers import TweakersIE
-from .twentyfourvideo import TwentyFourVideoIE
-from .twentymin import TwentyMinutenIE
-from .twentytwotracks import (
-    TwentyTwoTracksIE,
-    TwentyTwoTracksGenreIE
-)
-from .twitch import (
-    TwitchVideoIE,
-    TwitchChapterIE,
-    TwitchVodIE,
-    TwitchProfileIE,
-    TwitchPastBroadcastsIE,
-    TwitchBookmarksIE,
-    TwitchStreamIE,
-)
-from .twitter import (
-    TwitterCardIE,
-    TwitterIE,
-    TwitterAmplifyIE,
-)
-from .ubu import UbuIE
-from .udemy import (
-    UdemyIE,
-    UdemyCourseIE
-)
-from .udn import UDNEmbedIE
-from .digiteka import DigitekaIE
-from .unistra import UnistraIE
-from .urort import UrortIE
-from .ustream import UstreamIE, UstreamChannelIE
-from .varzesh3 import Varzesh3IE
-from .vbox7 import Vbox7IE
-from .veehd import VeeHDIE
-from .veoh import VeohIE
-from .vessel import VesselIE
-from .vesti import VestiIE
-from .vevo import VevoIE
-from .vgtv import (
-    BTArticleIE,
-    BTVestlendingenIE,
-    VGTVIE,
-)
-from .vh1 import VH1IE
-from .vice import ViceIE
-from .viddler import ViddlerIE
-from .videodetective import VideoDetectiveIE
-from .videofyme import VideofyMeIE
-from .videomega import VideoMegaIE
-from .videomore import (
-    VideomoreIE,
-    VideomoreVideoIE,
-    VideomoreSeasonIE,
-)
-from .videopremium import VideoPremiumIE
-from .videott import VideoTtIE
-from .vidme import (
-    VidmeIE,
-    VidmeUserIE,
-    VidmeUserLikesIE,
-)
-from .vidzi import VidziIE
-from .vier import VierIE, VierVideosIE
-from .viewster import ViewsterIE
-from .viidea import ViideaIE
-from .vimeo import (
-    VimeoIE,
-    VimeoAlbumIE,
-    VimeoChannelIE,
-    VimeoGroupsIE,
-    VimeoLikesIE,
-    VimeoReviewIE,
-    VimeoUserIE,
-    VimeoWatchLaterIE,
-)
-from .vimple import VimpleIE
-from .vine import (
-    VineIE,
-    VineUserIE,
-)
-from .viki import (
-    VikiIE,
-    VikiChannelIE,
-)
-from .vk import (
-    VKIE,
-    VKUserVideosIE,
-)
-from .vlive import VLiveIE
-from .vodlocker import VodlockerIE
-from .voicerepublic import VoiceRepublicIE
-from .vporn import VpornIE
-from .vrt import VRTIE
-from .vube import VubeIE
-from .vuclip import VuClipIE
-from .vulture import VultureIE
-from .walla import WallaIE
-from .washingtonpost import WashingtonPostIE
-from .wat import WatIE
-from .wayofthemaster import WayOfTheMasterIE
-from .wdr import (
-    WDRIE,
-    WDRMobileIE,
-    WDRMausIE,
-)
-from .webofstories import (
-    WebOfStoriesIE,
-    WebOfStoriesPlaylistIE,
-)
-from .weibo import WeiboIE
-from .weiqitv import WeiqiTVIE
-from .wimp import WimpIE
-from .wistia import WistiaIE
-from .worldstarhiphop import WorldStarHipHopIE
-from .wrzuta import WrzutaIE
-from .wsj import WSJIE
-from .xbef import XBefIE
-from .xboxclips import XboxClipsIE
-from .xfileshare import XFileShareIE
-from .xhamster import (
-    XHamsterIE,
-    XHamsterEmbedIE,
-)
-from .xminus import XMinusIE
-from .xnxx import XNXXIE
-from .xstream import XstreamIE
-from .xtube import XTubeUserIE, XTubeIE
-from .xuite import XuiteIE
-from .xvideos import XVideosIE
-from .xxxymovies import XXXYMoviesIE
-from .yahoo import (
-    YahooIE,
-    YahooSearchIE,
-)
-from .yam import YamIE
-from .yandexmusic import (
-    YandexMusicTrackIE,
-    YandexMusicAlbumIE,
-    YandexMusicPlaylistIE,
-)
-from .yesjapan import YesJapanIE
-from .yinyuetai import YinYueTaiIE
-from .ynet import YnetIE
-from .youjizz import YouJizzIE
-from .youku import YoukuIE
-from .youporn import YouPornIE
-from .yourupload import YourUploadIE
-from .youtube import (
-    YoutubeIE,
-    YoutubeChannelIE,
-    YoutubeFavouritesIE,
-    YoutubeHistoryIE,
-    YoutubePlaylistIE,
-    YoutubeRecommendedIE,
-    YoutubeSearchDateIE,
-    YoutubeSearchIE,
-    YoutubeSearchURLIE,
-    YoutubeShowIE,
-    YoutubeSubscriptionsIE,
-    YoutubeTruncatedIDIE,
-    YoutubeTruncatedURLIE,
-    YoutubeUserIE,
-    YoutubePlaylistsIE,
-    YoutubeWatchLaterIE,
-)
-from .zapiks import ZapiksIE
-from .zdf import ZDFIE, ZDFChannelIE
-from .zingmp3 import (
-    ZingMp3SongIE,
-    ZingMp3AlbumIE,
-)
-from .zippcast import ZippCastIE
-
-_ALL_CLASSES = [
-    klass
-    for name, klass in globals().items()
-    if name.endswith('IE') and name != 'GenericIE'
-]
-_ALL_CLASSES.append(GenericIE)
+try:
+    from .lazy_extractors import *
+    from .lazy_extractors import _ALL_CLASSES
+    _LAZY_LOADER = True
+except ImportError:
+    _LAZY_LOADER = False
+    from .extractors import *
+
+    _ALL_CLASSES = [
+        klass
+        for name, klass in globals().items()
+        if name.endswith('IE') and name != 'GenericIE'
+    ]
+    _ALL_CLASSES.append(GenericIE)
+
+
+def gen_extractor_classes():
+    """ Return a list of supported extractors.
+    The order does matter; the first extractor matched is the one handling the URL.
+    """
+    return _ALL_CLASSES
 
 
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
     The order does matter; the first extractor matched is the one handling the URL.
     """
-    return [klass() for klass in _ALL_CLASSES]
+    return [klass() for klass in gen_extractor_classes()]
 
 
 def list_extractors(age_limit):
index 6a29e587f007b28cfaf8bd2e714e276ab0b681a1..b584277be92b5a86fb9e0ac5d95870444d441174 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class ABCIE(InfoExtractor):
     IE_NAME = 'abc.net.au'
-    _VALID_URL = r'http://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
+    _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
index 122dc909985c721bd236d3d7d4f1c64993f58924..c04949c215d03229909d3024dbe32e376d6c35c3 100644 (file)
@@ -44,6 +44,7 @@ class Abc7NewsIE(InfoExtractor):
             'contentURL', webpage, 'm3u8 url', fatal=True)
 
         formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
+        self._sort_formats(formats)
 
         title = self._og_search_title(webpage).strip()
         description = self._og_search_description(webpage).strip()
diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py
new file mode 100644 (file)
index 0000000..b61a632
--- /dev/null
@@ -0,0 +1,135 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import calendar
+import re
+import time
+
+from .amp import AMPIE
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+
+
+class AbcNewsVideoIE(AMPIE):
+    IE_NAME = 'abcnews:video'
+    _VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
+        'info_dict': {
+            'id': '20411932',
+            'ext': 'mp4',
+            'display_id': 'week-exclusive-irans-foreign-minister-zarif',
+            'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
+            'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
+            'duration': 180,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+        video_id = mobj.group('id')
+        info_dict = self._extract_feed_info(
+            'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
+        info_dict.update({
+            'id': video_id,
+            'display_id': display_id,
+        })
+        return info_dict
+
+
+class AbcNewsIE(InfoExtractor):
+    IE_NAME = 'abcnews'
+    _VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
+        'info_dict': {
+            'id': '10498713',
+            'ext': 'flv',
+            'display_id': 'dramatic-video-rare-death-job-america',
+            'title': 'Occupational Hazards',
+            'description': 'Nightline investigates the dangers that lurk at various jobs.',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20100428',
+            'timestamp': 1272412800,
+        },
+        'add_ie': ['AbcNewsVideo'],
+    }, {
+        'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
+        'info_dict': {
+            'id': '39125818',
+            'ext': 'mp4',
+            'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
+            'title': 'Justin Timberlake Drops Hints For Secret Single',
+            'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
+            'upload_date': '20160515',
+            'timestamp': 1463329500,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+            # The embedded YouTube video is blocked due to copyright issues
+            'playlist_items': '1',
+        },
+        'add_ie': ['AbcNewsVideo'],
+    }, {
+        'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        video_url = self._search_regex(
+            r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
+        full_video_url = compat_urlparse.urljoin(url, video_url)
+
+        youtube_url = self._html_search_regex(
+            r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
+            webpage, 'YouTube URL', default=None)
+
+        timestamp = None
+        date_str = self._html_search_regex(
+            r'<span[^>]+class="timestamp">([^<]+)</span>',
+            webpage, 'timestamp', fatal=False)
+        if date_str:
+            tz_offset = 0
+            if date_str.endswith(' ET'):  # Eastern Time
+                tz_offset = -5
+                date_str = date_str[:-3]
+            date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
+            for date_format in date_formats:
+                try:
+                    timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
+                except ValueError:
+                    continue
+            if timestamp is not None:
+                timestamp -= tz_offset * 3600
+
+        entry = {
+            '_type': 'url_transparent',
+            'ie_key': AbcNewsVideoIE.ie_key(),
+            'url': full_video_url,
+            'id': video_id,
+            'display_id': display_id,
+            'timestamp': timestamp,
+        }
+
+        if youtube_url:
+            entries = [entry, self.url_result(youtube_url, 'Youtube')]
+            return self.playlist_result(entries)
+
+        return entry
index 92eee8119da420a78782d5eea05a2d3564468d3a..94ce88c834f5ce1575b36f839f02fdf43f96e046 100644 (file)
@@ -2,10 +2,14 @@
 from __future__ import unicode_literals
 
 import re
+import functools
 
 from .common import InfoExtractor
 from ..compat import compat_str
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    OnDemandPagedList,
+)
 
 
 class ACastIE(InfoExtractor):
@@ -26,13 +30,8 @@ class ACastIE(InfoExtractor):
 
     def _real_extract(self, url):
         channel, display_id = re.match(self._VALID_URL, url).groups()
-
-        embed_page = self._download_webpage(
-            re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id)
-        cast_data = self._parse_json(self._search_regex(
-            r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'),
-            display_id)['GetAcast/%s/%s' % (channel, display_id)]
-
+        cast_data = self._download_json(
+            'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id)
         return {
             'id': compat_str(cast_data['id']),
             'display_id': display_id,
@@ -58,15 +57,26 @@ class ACastChannelIE(InfoExtractor):
         'playlist_mincount': 20,
     }
     _API_BASE_URL = 'https://www.acast.com/api/'
+    _PAGE_SIZE = 10
 
     @classmethod
     def suitable(cls, url):
         return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
 
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id)
-        casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id)
-        entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts]
+    def _fetch_page(self, channel_slug, page):
+        casts = self._download_json(
+            self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page),
+            channel_slug, note='Download page %d of channel data' % page)
+        for cast in casts:
+            yield self.url_result(
+                'https://www.acast.com/%s/%s' % (channel_slug, cast['url']),
+                'ACast', cast['id'])
 
-        return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description'))
+    def _real_extract(self, url):
+        channel_slug = self._match_id(url)
+        channel_data = self._download_json(
+            self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug)
+        entries = OnDemandPagedList(functools.partial(
+            self._fetch_page, channel_slug), self._PAGE_SIZE)
+        return self.playlist_result(entries, compat_str(
+            channel_data['id']), channel_data['name'], channel_data.get('description'))
index e3e6d21137994593d593fbc51313bf38032ce7f8..55a9322a753829e90715a76bc91e06828c460531 100644 (file)
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
     compat_str,
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
 )
 from ..utils import (
@@ -16,7 +16,7 @@ from ..utils import (
 
 
 class AddAnimeIE(InfoExtractor):
-    _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
+    _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
     _TESTS = [{
         'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
         'md5': '72954ea10bc979ab5e2eb288b21425a0',
@@ -60,7 +60,7 @@ class AddAnimeIE(InfoExtractor):
             confirm_url = (
                 parsed_url.scheme + '://' + parsed_url.netloc +
                 action + '?' +
-                compat_urllib_parse.urlencode({
+                compat_urllib_parse_urlencode({
                     'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
             self._download_webpage(
                 confirm_url, video_id,
index 8753ee2cf2b5fdaa5810fc8d564f388734a84324..5ae16fa16809b557e74e133a4a7811d396b1c2c2 100644 (file)
@@ -156,7 +156,10 @@ class AdobeTVVideoIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        video_data = self._download_json(url + '?format=json', video_id)
+        webpage = self._download_webpage(url, video_id)
+
+        video_data = self._parse_json(self._search_regex(
+            r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
 
         formats = [{
             'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
index 6018ae79a2a114451c79edc34af524898462004d..1bbfe264177dab80c6e40009e8543e52600c3d9d 100644 (file)
@@ -1,13 +1,19 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
-from ..utils import smuggle_url
+from ..utils import (
+    smuggle_url,
+    update_url_query,
+    unescapeHTML,
+)
 
 
 class AENetworksIE(InfoExtractor):
     IE_NAME = 'aenetworks'
     IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
-    _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
 
     _TESTS = [{
         'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
@@ -16,6 +22,9 @@ class AENetworksIE(InfoExtractor):
             'ext': 'mp4',
             'title': "Bet You Didn't Know: Valentine's Day",
             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
+            'timestamp': 1375819729,
+            'upload_date': '20130806',
+            'uploader': 'AENE-NEW',
         },
         'params': {
             # m3u8 download
@@ -25,15 +34,15 @@ class AENetworksIE(InfoExtractor):
         'expected_warnings': ['JSON-LD'],
     }, {
         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
+        'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
         'info_dict': {
             'id': 'eg47EERs_JsZ',
             'ext': 'mp4',
             'title': 'Winter Is Coming',
             'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
+            'timestamp': 1338306241,
+            'upload_date': '20120529',
+            'uploader': 'AENE-NEW',
         },
         'add_ie': ['ThePlatform'],
     }, {
@@ -48,7 +57,7 @@ class AENetworksIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        page_type, video_id = re.match(self._VALID_URL, url).groups()
 
         webpage = self._download_webpage(url, video_id)
 
@@ -56,11 +65,23 @@ class AENetworksIE(InfoExtractor):
             r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
             r"media_url\s*=\s*'([^']+)'"
         ]
-        video_url = self._search_regex(video_url_re, webpage, 'video url')
+        video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))
+        query = {'mbr': 'true'}
+        if page_type == 'shows':
+            query['assetTypes'] = 'medium_video_s3'
+        if 'switch=hds' in video_url:
+            query['switch'] = 'hls'
 
         info = self._search_json_ld(webpage, video_id, fatal=False)
         info.update({
             '_type': 'url_transparent',
-            'url': smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}),
+            'url': smuggle_url(
+                update_url_query(video_url, query),
+                {
+                    'sig': {
+                        'key': 'crazyjava',
+                        'secret': 's3cr3t'},
+                    'force_smil_url': True
+                }),
         })
         return info
diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py
new file mode 100644 (file)
index 0000000..518c61f
--- /dev/null
@@ -0,0 +1,133 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_urlparse,
+    compat_urlparse,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    xpath_element,
+    xpath_text,
+)
+
+
+class AfreecaTVIE(InfoExtractor):
+    IE_DESC = 'afreecatv.com'
+    _VALID_URL = r'''(?x)^
+        https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
+        (?:
+            /app/(?:index|read_ucc_bbs)\.cgi|
+            /player/[Pp]layer\.(?:swf|html))
+        \?.*?\bnTitleNo=(?P<id>\d+)'''
+    _TESTS = [{
+        'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
+        'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
+        'info_dict': {
+            'id': '36164052',
+            'ext': 'mp4',
+            'title': '데일리 에이프릴 요정들의 시상식!',
+            'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+            'uploader': 'dailyapril',
+            'uploader_id': 'dailyapril',
+            'upload_date': '20160503',
+        }
+    }, {
+        'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
+        'info_dict': {
+            'id': '36153164',
+            'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
+            'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+            'uploader': 'dailyapril',
+            'uploader_id': 'dailyapril',
+        },
+        'playlist_count': 2,
+        'playlist': [{
+            'md5': 'd8b7c174568da61d774ef0203159bf97',
+            'info_dict': {
+                'id': '36153164_1',
+                'ext': 'mp4',
+                'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
+                'upload_date': '20160502',
+            },
+        }, {
+            'md5': '58f2ce7f6044e34439ab2d50612ab02b',
+            'info_dict': {
+                'id': '36153164_2',
+                'ext': 'mp4',
+                'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
+                'upload_date': '20160502',
+            },
+        }],
+    }, {
+        'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def parse_video_key(key):
+        video_key = {}
+        m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
+        if m:
+            video_key['upload_date'] = m.group('upload_date')
+            video_key['part'] = m.group('part')
+        return video_key
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        parsed_url = compat_urllib_parse_urlparse(url)
+        info_url = compat_urlparse.urlunparse(parsed_url._replace(
+            netloc='afbbs.afreecatv.com:8080',
+            path='/api/video/get_video_info.php'))
+        video_xml = self._download_xml(info_url, video_id)
+
+        if xpath_element(video_xml, './track/video/file') is None:
+            raise ExtractorError('Specified AfreecaTV video does not exist',
+                                 expected=True)
+
+        title = xpath_text(video_xml, './track/title', 'title')
+        uploader = xpath_text(video_xml, './track/nickname', 'uploader')
+        uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
+        duration = int_or_none(xpath_text(video_xml, './track/duration',
+                                          'duration'))
+        thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
+
+        entries = []
+        for i, video_file in enumerate(video_xml.findall('./track/video/file')):
+            video_key = self.parse_video_key(video_file.get('key', ''))
+            if not video_key:
+                continue
+            entries.append({
+                'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
+                'title': title,
+                'upload_date': video_key.get('upload_date'),
+                'duration': int_or_none(video_file.get('duration')),
+                'url': video_file.text,
+            })
+
+        info = {
+            'id': video_id,
+            'title': title,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'duration': duration,
+            'thumbnail': thumbnail,
+        }
+
+        if len(entries) > 1:
+            info['_type'] = 'multi_video'
+            info['entries'] = entries
+        elif len(entries) == 1:
+            info['url'] = entries[0]['url']
+            info['upload_date'] = entries[0].get('upload_date')
+        else:
+            raise ExtractorError(
+                'No files found for the specified AfreecaTV video, either'
+                ' the URL is incorrect or the video has been made private.',
+                expected=True)
+
+        return info
index e0518cf261fbffc4dd23bc4a3800d04eae324139..5766b4fe80ab3e827ce9cccfe8e5460113c61440 100644 (file)
@@ -6,7 +6,7 @@ from ..utils import int_or_none
 
 
 class AftonbladetIE(InfoExtractor):
-    _VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
         'info_dict': {
@@ -24,10 +24,10 @@ class AftonbladetIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         # find internal video meta data
-        meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
+        meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json'
         player_config = self._parse_json(self._html_search_regex(
             r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
-        internal_meta_id = player_config['videoId']
+        internal_meta_id = player_config['aptomaVideoId']
         internal_meta_url = meta_url % internal_meta_id
         internal_meta_json = self._download_json(
             internal_meta_url, video_id, 'Downloading video meta data')
index 5b2c0dc9ac10aa826d5757f2fc75738c376219a3..b081695d8400c0e24d36e84bd8445efa084ed8b3 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class AlJazeeraIE(InfoExtractor):
-    _VALID_URL = r'http://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
+    _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
 
     _TEST = {
         'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
@@ -13,24 +13,18 @@ class AlJazeeraIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'The Slum - Episode 1: Deliverance',
             'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
-            'uploader': 'Al Jazeera English',
+            'uploader_id': '665003303001',
+            'timestamp': 1411116829,
+            'upload_date': '20140919',
         },
-        'add_ie': ['BrightcoveLegacy'],
+        'add_ie': ['BrightcoveNew'],
         'skip': 'Not accessible from Travis CI server',
     }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
         program_name = self._match_id(url)
         webpage = self._download_webpage(url, program_name)
         brightcove_id = self._search_regex(
             r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
-
-        return {
-            '_type': 'url',
-            'url': (
-                'brightcove:'
-                'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
-                '&%40videoPlayer={0}'.format(brightcove_id)
-            ),
-            'ie_key': 'BrightcoveLegacy',
-        }
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
index 69e6baff7fd4cb6c948ad1826df3a5bc9342f569..8545681bead617b2ffedeee04212e032bdc3406e 100644 (file)
@@ -52,7 +52,7 @@ class AMPIE(InfoExtractor):
         for media_data in media_content:
             media = media_data['@attributes']
             media_type = media['type']
-            if media_type == 'video/f4m':
+            if media_type in ('video/f4m', 'application/f4m+xml'):
                 formats.extend(self._extract_f4m_formats(
                     media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
                     video_id, f4m_id='hds', fatal=False))
@@ -61,7 +61,7 @@ class AMPIE(InfoExtractor):
                     media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
             else:
                 formats.append({
-                    'format_id': media_data['media-category']['@attributes']['label'],
+                    'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
                     'url': media['url'],
                     'tbr': int_or_none(media.get('bitrate')),
                     'filesize': int_or_none(media.get('fileSize')),
@@ -69,12 +69,14 @@ class AMPIE(InfoExtractor):
 
         self._sort_formats(formats)
 
+        timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+
         return {
             'id': video_id,
             'title': get_media_node('title'),
             'description': get_media_node('description'),
             'thumbnails': thumbnails,
-            'timestamp': parse_iso8601(item.get('pubDate'), ' '),
+            'timestamp': timestamp,
             'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
             'subtitles': subtitles,
             'formats': formats,
index a7d8daf7b4788bbaff020ccfca1ed7ba46e5d6f8..9b01e38f5fe8b5a80b2635061433cc214fb1b315 100644 (file)
@@ -3,10 +3,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_urlparse,
+    compat_str,
+)
 from ..utils import (
     determine_ext,
-    encode_dict,
+    extract_attributes,
     ExtractorError,
     sanitized_Request,
     urlencode_postdata,
@@ -18,7 +21,7 @@ class AnimeOnDemandIE(InfoExtractor):
     _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
     _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
     _NETRC_MACHINE = 'animeondemand'
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.anime-on-demand.de/anime/161',
         'info_dict': {
             'id': '161',
@@ -26,7 +29,19 @@ class AnimeOnDemandIE(InfoExtractor):
             'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
         },
         'playlist_mincount': 4,
-    }
+    }, {
+        # Film wording is used instead of Episode
+        'url': 'https://www.anime-on-demand.de/anime/39',
+        'only_matching': True,
+    }, {
+        # Episodes without titles
+        'url': 'https://www.anime-on-demand.de/anime/162',
+        'only_matching': True,
+    }, {
+        # ger/jap, Dub/OmU, account required
+        'url': 'https://www.anime-on-demand.de/anime/169',
+        'only_matching': True,
+    }]
 
     def _login(self):
         (username, password) = self._get_login_info()
@@ -36,6 +51,10 @@ class AnimeOnDemandIE(InfoExtractor):
         login_page = self._download_webpage(
             self._LOGIN_URL, None, 'Downloading login page')
 
+        if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
+            self.raise_geo_restricted(
+                '%s is only available in German-speaking countries of Europe' % self.IE_NAME)
+
         login_form = self._form_hidden_inputs('new_user', login_page)
 
         login_form.update({
@@ -51,7 +70,7 @@ class AnimeOnDemandIE(InfoExtractor):
             post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
 
         request = sanitized_Request(
-            post_url, urlencode_postdata(encode_dict(login_form)))
+            post_url, urlencode_postdata(login_form))
         request.add_header('Referer', self._LOGIN_URL)
 
         response = self._download_webpage(
@@ -91,14 +110,22 @@ class AnimeOnDemandIE(InfoExtractor):
 
         entries = []
 
-        for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage):
-            m = re.search(
-                r'class="episodebox-title"[^>]+title="Episode (?P<number>\d+) - (?P<title>.+?)"', episode_html)
-            if not m:
+        for num, episode_html in enumerate(re.findall(
+                r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
+            episodebox_title = self._search_regex(
+                (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
+                 r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
+                episode_html, 'episodebox title', default=None, group='title')
+            if not episodebox_title:
                 continue
 
-            episode_number = int(m.group('number'))
-            episode_title = m.group('title')
+            episode_number = int(self._search_regex(
+                r'(?:Episode|Film)\s*(\d+)',
+                episodebox_title, 'episode number', default=num))
+            episode_title = self._search_regex(
+                r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
+                episodebox_title, 'episode title', default=None)
+
             video_id = 'episode-%d' % episode_number
 
             common_info = {
@@ -110,33 +137,86 @@ class AnimeOnDemandIE(InfoExtractor):
 
             formats = []
 
-            playlist_url = self._search_regex(
-                r'data-playlist=(["\'])(?P<url>.+?)\1',
-                episode_html, 'data playlist', default=None, group='url')
-            if playlist_url:
-                request = sanitized_Request(
-                    compat_urlparse.urljoin(url, playlist_url),
-                    headers={
-                        'X-Requested-With': 'XMLHttpRequest',
-                        'X-CSRF-Token': csrf_token,
-                        'Referer': url,
-                        'Accept': 'application/json, text/javascript, */*; q=0.01',
-                    })
-
-                playlist = self._download_json(
-                    request, video_id, 'Downloading playlist JSON', fatal=False)
-                if playlist:
-                    playlist = playlist['playlist'][0]
-                    title = playlist['title']
+            for input_ in re.findall(
+                    r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
+                attributes = extract_attributes(input_)
+                playlist_urls = []
+                for playlist_key in ('data-playlist', 'data-otherplaylist'):
+                    playlist_url = attributes.get(playlist_key)
+                    if isinstance(playlist_url, compat_str) and re.match(
+                            r'/?[\da-zA-Z]+', playlist_url):
+                        playlist_urls.append(attributes[playlist_key])
+                if not playlist_urls:
+                    continue
+
+                lang = attributes.get('data-lang')
+                lang_note = attributes.get('value')
+
+                for playlist_url in playlist_urls:
+                    kind = self._search_regex(
+                        r'videomaterialurl/\d+/([^/]+)/',
+                        playlist_url, 'media kind', default=None)
+                    format_id_list = []
+                    if lang:
+                        format_id_list.append(lang)
+                    if kind:
+                        format_id_list.append(kind)
+                    if not format_id_list:
+                        format_id_list.append(compat_str(num))
+                    format_id = '-'.join(format_id_list)
+                    format_note = ', '.join(filter(None, (kind, lang_note)))
+                    request = sanitized_Request(
+                        compat_urlparse.urljoin(url, playlist_url),
+                        headers={
+                            'X-Requested-With': 'XMLHttpRequest',
+                            'X-CSRF-Token': csrf_token,
+                            'Referer': url,
+                            'Accept': 'application/json, text/javascript, */*; q=0.01',
+                        })
+                    playlist = self._download_json(
+                        request, video_id, 'Downloading %s playlist JSON' % format_id,
+                        fatal=False)
+                    if not playlist:
+                        continue
+                    start_video = playlist.get('startvideo', 0)
+                    playlist = playlist.get('playlist')
+                    if not playlist or not isinstance(playlist, list):
+                        continue
+                    playlist = playlist[start_video]
+                    title = playlist.get('title')
+                    if not title:
+                        continue
                     description = playlist.get('description')
                     for source in playlist.get('sources', []):
                         file_ = source.get('file')
-                        if file_ and determine_ext(file_) == 'm3u8':
-                            formats = self._extract_m3u8_formats(
+                        if not file_:
+                            continue
+                        ext = determine_ext(file_)
+                        format_id_list = [lang, kind]
+                        if ext == 'm3u8':
+                            format_id_list.append('hls')
+                        elif source.get('type') == 'video/dash' or ext == 'mpd':
+                            format_id_list.append('dash')
+                        format_id = '-'.join(filter(None, format_id_list))
+                        if ext == 'm3u8':
+                            file_formats = self._extract_m3u8_formats(
                                 file_, video_id, 'mp4',
-                                entry_protocol='m3u8_native', m3u8_id='hls')
+                                entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
+                        elif source.get('type') == 'video/dash' or ext == 'mpd':
+                            continue
+                            file_formats = self._extract_mpd_formats(
+                                file_, video_id, mpd_id=format_id, fatal=False)
+                        else:
+                            continue
+                        for f in file_formats:
+                            f.update({
+                                'language': lang,
+                                'format_note': format_note,
+                            })
+                        formats.extend(file_formats)
 
             if formats:
+                self._sort_formats(formats)
                 f = common_info.copy()
                 f.update({
                     'title': title,
@@ -145,16 +225,18 @@ class AnimeOnDemandIE(InfoExtractor):
                 })
                 entries.append(f)
 
-            m = re.search(
-                r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
-                episode_html)
-            if m:
-                f = common_info.copy()
-                f.update({
-                    'id': '%s-teaser' % f['id'],
-                    'title': m.group('title'),
-                    'url': compat_urlparse.urljoin(url, m.group('href')),
-                })
-                entries.append(f)
+            # Extract teaser only when full episode is not available
+            if not formats:
+                m = re.search(
+                    r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
+                    episode_html)
+                if m:
+                    f = common_info.copy()
+                    f.update({
+                        'id': '%s-teaser' % f['id'],
+                        'title': m.group('title'),
+                        'url': compat_urlparse.urljoin(url, m.group('href')),
+                    })
+                    entries.append(f)
 
         return self.playlist_result(entries, anime_id, anime_title, anime_description)
diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py
new file mode 100644 (file)
index 0000000..cb29cf1
--- /dev/null
@@ -0,0 +1,224 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import hashlib
+import json
+import random
+import time
+
+from .common import InfoExtractor
+from ..aes import aes_encrypt
+from ..compat import compat_str
+from ..utils import (
+    bytes_to_intlist,
+    determine_ext,
+    intlist_to_bytes,
+    int_or_none,
+    strip_jsonp,
+)
+
+
+def md5_text(s):
+    if not isinstance(s, compat_str):
+        s = compat_str(s)
+    return hashlib.md5(s.encode('utf-8')).hexdigest()
+
+
+class AnvatoIE(InfoExtractor):
+    # Copied from anvplayer.min.js
+    _ANVACK_TABLE = {
+        'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
+        'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA',
+        'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP',
+        'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv',
+        'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7',
+        'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR',
+        'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg',
+        'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto',
+        'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY',
+        'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh',
+        'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK',
+        'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D',
+        'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad',
+        'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp',
+        'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih',
+        'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR',
+        'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW',
+        'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su',
+        'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q',
+        'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5',
+        'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3',
+        'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI',
+        'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s',
+        'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz',
+        'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg',
+        'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x',
+        'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH',
+        'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX',
+        'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc',
+        'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK',
+        'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7',
+        'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C',
+        'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e',
+        'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1',
+        'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re',
+        'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51',
+        'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho',
+        'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9',
+        'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH',
+        'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F',
+        'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo',
+        'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR',
+        'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa',
+        'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk',
+        'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ',
+        'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ',
+        'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m',
+        'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b',
+        'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3',
+        'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK',
+        'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
+        'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
+        'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F',
+        'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx',
+        'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ',
+        'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH',
+        'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm',
+        'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt',
+        'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl',
+        'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b',
+        'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV',
+        'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg',
+        'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk',
+        'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT',
+        'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa',
+        'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv',
+        'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k',
+        'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI',
+        'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr',
+        'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw',
+        'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K',
+        'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH',
+        'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK',
+        'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu',
+        'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
+        'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
+        'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK',
+        'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n',
+        'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD',
+        'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk',
+        'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
+        'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
+        'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
+        'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
+    }
+
+    _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
+
+    def __init__(self, *args, **kwargs):
+        super(AnvatoIE, self).__init__(*args, **kwargs)
+        self.__server_time = None
+
+    def _server_time(self, access_key, video_id):
+        if self.__server_time is not None:
+            return self.__server_time
+
+        self.__server_time = int(self._download_json(
+            self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
+            note='Fetching server time')['server_time'])
+
+        return self.__server_time
+
+    def _api_prefix(self, access_key):
+        return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
+
+    def _get_video_json(self, access_key, video_id):
+        # See et() in anvplayer.min.js, which is an alias of getVideoJSON()
+        video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
+        server_time = self._server_time(access_key, video_id)
+        input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
+
+        auth_secret = intlist_to_bytes(aes_encrypt(
+            bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
+
+        video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
+        anvrid = md5_text(time.time() * 1000 * random.random())[:30]
+        payload = {
+            'api': {
+                'anvrid': anvrid,
+                'anvstk': md5_text('%s|%s|%d|%s' % (
+                    access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
+                'anvts': server_time,
+            },
+        }
+
+        return self._download_json(
+            video_data_url, video_id, transform_source=strip_jsonp,
+            data=json.dumps(payload).encode('utf-8'))
+
+    def _extract_anvato_videos(self, webpage, video_id):
+        anvplayer_data = self._parse_json(self._html_search_regex(
+            r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
+            'Anvato player data'), video_id)
+
+        video_id = anvplayer_data['video']
+        access_key = anvplayer_data['accessKey']
+
+        video_data = self._get_video_json(access_key, video_id)
+
+        formats = []
+        for published_url in video_data['published_urls']:
+            video_url = published_url['embed_url']
+            ext = determine_ext(video_url)
+
+            if ext == 'smil':
+                formats.extend(self._extract_smil_formats(video_url, video_id))
+                continue
+
+            tbr = int_or_none(published_url.get('kbps'))
+            a_format = {
+                'url': video_url,
+                'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(),
+                'tbr': tbr if tbr != 0 else None,
+            }
+
+            if ext == 'm3u8':
+                # Not using _extract_m3u8_formats here as individual media
+                # playlists are also included in published_urls.
+                if tbr is None:
+                    formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
+                    continue
+                else:
+                    a_format.update({
+                        'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
+                        'ext': 'mp4',
+                    })
+            elif ext == 'mp3':
+                a_format['vcodec'] = 'none'
+            else:
+                a_format.update({
+                    'width': int_or_none(published_url.get('width')),
+                    'height': int_or_none(published_url.get('height')),
+                })
+            formats.append(a_format)
+
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for caption in video_data.get('captions', []):
+            a_caption = {
+                'url': caption['url'],
+                'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
+            }
+            subtitles.setdefault(caption['language'], []).append(a_caption)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': video_data.get('def_title'),
+            'description': video_data.get('def_description'),
+            'categories': video_data.get('categories'),
+            'thumbnail': video_data.get('thumbnail'),
+            'subtitles': subtitles,
+        }
index b51eafc45928f8e6ff4ce571763593f71b715583..42c21bf41d975bcb49fd6c398b19ed2897cd3bd2 100644 (file)
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
 
 from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
 
 
 class AolIE(InfoExtractor):
     IE_NAME = 'on.aol.com'
-    _VALID_URL = r'''(?x)
-        (?:
-            aol-video:|
-            http://on\.aol\.com/
-            (?:
-                video/.*-|
-                playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
-            )
-        )
-        (?P<id>[0-9]+)
-        (?:$|\?)
-    '''
+    _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)'
 
     _TESTS = [{
+        # video with 5min ID
         'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
         'md5': '18ef68f48740e86ae94b98da815eec42',
         'info_dict': {
             'id': '518167793',
             'ext': 'mp4',
             'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
+            'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.',
+            'timestamp': 1395405060,
+            'upload_date': '20140321',
+            'uploader': 'Newsy Studio',
         },
-        'add_ie': ['FiveMin'],
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
     }, {
-        'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
+        # video with vidible ID
+        'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183',
         'info_dict': {
-            'id': '152147',
-            'title': 'Brace Yourself - Today\'s Weirdest News',
+            'id': '5707d6b8e4b090497b04f706',
+            'ext': 'mp4',
+            'title': 'Netflix is Raising Rates',
+            'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.',
+            'upload_date': '20160408',
+            'timestamp': 1460123280,
+            'uploader': 'Veuer',
         },
-        'playlist_mincount': 10,
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944',
+        'only_matching': True,
+    }, {
+        'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763',
+        'only_matching': True,
+    }, {
+        'url': 'http://on.aol.com/video/519442220',
+        'only_matching': True,
+    }, {
+        'url': 'aol-video:5707d6b8e4b090497b04f706',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        playlist_id = mobj.group('playlist_id')
-        if not playlist_id or self._downloader.params.get('noplaylist'):
-            return self.url_result('5min:%s' % video_id)
+        video_id = self._match_id(url)
 
-        self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+        response = self._download_json(
+            'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
+            video_id)['response']
+        if response['statusText'] != 'Ok':
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
 
-        webpage = self._download_webpage(url, playlist_id)
-        title = self._html_search_regex(
-            r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
-        playlist_html = self._search_regex(
-            r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
-            'playlist HTML')
-        entries = [{
-            '_type': 'url',
-            'url': 'aol-video:%s' % m.group('id'),
-            'ie_key': 'Aol',
-        } for m in re.finditer(
-            r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
-            playlist_html)]
+        video_data = response['data']
+        formats = []
+        m3u8_url = video_data.get('videoMasterPlaylist')
+        if m3u8_url:
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+        for rendition in video_data.get('renditions', []):
+            video_url = rendition.get('url')
+            if not video_url:
+                continue
+            ext = rendition.get('format')
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+            else:
+                f = {
+                    'url': video_url,
+                    'format_id': rendition.get('quality'),
+                }
+                mobj = re.search(r'(\d+)x(\d+)', video_url)
+                if mobj:
+                    f.update({
+                        'width': int(mobj.group(1)),
+                        'height': int(mobj.group(2)),
+                    })
+                formats.append(f)
+        self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
 
         return {
-            '_type': 'playlist',
-            'id': playlist_id,
-            'display_id': mobj.group('playlist_display_id'),
-            'title': title,
-            'entries': entries,
+            'id': video_id,
+            'title': video_data['title'],
+            'duration': int_or_none(video_data.get('duration')),
+            'timestamp': int_or_none(video_data.get('publishDate')),
+            'view_count': int_or_none(video_data.get('views')),
+            'description': video_data.get('description'),
+            'uploader': video_data.get('videoOwner'),
+            'formats': formats,
         }
+
+
+class AolFeaturesIE(InfoExtractor):
+    IE_NAME = 'features.aol.com'
+    _VALID_URL = r'https?://features\.aol\.com/video/(?P<id>[^/?#]+)'
+
+    _TESTS = [{
+        'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',
+        'md5': '7db483bb0c09c85e241f84a34238cc75',
+        'info_dict': {
+            'id': '519507715',
+            'ext': 'mp4',
+            'title': 'What To Watch - February 17, 2016',
+        },
+        'add_ie': ['FiveMin'],
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        return self.url_result(self._search_regex(
+            r'<script type="text/javascript" src="(https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js[^"]+)"',
+            webpage, '5min embed url'), 'FiveMin')
index be40f85b487057b4cb319dba102cec76519880a5..a6801f3d4860414c286277c92bd994e16212cffd 100644 (file)
@@ -7,6 +7,8 @@ from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
     int_or_none,
+    parse_duration,
+    unified_strdate,
 )
 
 
@@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor):
     _TESTS = [{
         'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
         'info_dict': {
-            'id': 'manofsteel',
+            'id': '5111',
+            'title': 'Man of Steel',
         },
         'playlist': [
             {
@@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor):
             'id': 'blackthorn',
         },
         'playlist_mincount': 2,
+        'expected_warnings': ['Unable to download JSON metadata'],
+    }, {
+        # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
+        'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
+        'info_dict': {
+            'id': '15881',
+            'title': 'Kung Fu Panda 3',
+        },
+        'playlist_mincount': 4,
     }, {
         'url': 'http://trailers.apple.com/ca/metropole/autrui/',
         'only_matching': True,
@@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor):
         movie = mobj.group('movie')
         uploader_id = mobj.group('company')
 
+        webpage = self._download_webpage(url, movie)
+        film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
+        film_data = self._download_json(
+            'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
+            film_id, fatal=False)
+
+        if film_data:
+            entries = []
+            for clip in film_data.get('clips', []):
+                clip_title = clip['title']
+
+                formats = []
+                for version, version_data in clip.get('versions', {}).items():
+                    for size, size_data in version_data.get('sizes', {}).items():
+                        src = size_data.get('src')
+                        if not src:
+                            continue
+                        formats.append({
+                            'format_id': '%s-%s' % (version, size),
+                            'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
+                            'width': int_or_none(size_data.get('width')),
+                            'height': int_or_none(size_data.get('height')),
+                            'language': version[:2],
+                        })
+                self._sort_formats(formats)
+
+                entries.append({
+                    'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
+                    'formats': formats,
+                    'title': clip_title,
+                    'thumbnail': clip.get('screen') or clip.get('thumb'),
+                    'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
+                    'upload_date': unified_strdate(clip.get('posted')),
+                    'uploader_id': uploader_id,
+                })
+
+            page_data = film_data.get('page', {})
+            return self.playlist_result(entries, film_id, page_data.get('movie_title'))
+
         playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
 
         def fix_html(s):
index 9fb84911a0b81fd42de2c9bd410cdaf2dd4813a6..fd45b3e42b374ec6a7077454d238932c66d16d46 100644 (file)
@@ -8,7 +8,6 @@ from .generic import GenericIE
 from ..utils import (
     determine_ext,
     ExtractorError,
-    get_element_by_attribute,
     qualities,
     int_or_none,
     parse_duration,
@@ -83,7 +82,7 @@ class ARDMediathekIE(InfoExtractor):
         subtitle_url = media_info.get('_subtitleUrl')
         if subtitle_url:
             subtitles['de'] = [{
-                'ext': 'srt',
+                'ext': 'ttml',
                 'url': subtitle_url,
             }]
 
@@ -274,41 +273,3 @@ class ARDIE(InfoExtractor):
             'upload_date': upload_date,
             'thumbnail': thumbnail,
         }
-
-
-class SportschauIE(ARDMediathekIE):
-    IE_NAME = 'Sportschau'
-    _VALID_URL = r'(?P<baseurl>https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P<id>[^/#?]+))\.html'
-    _TESTS = [{
-        'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html',
-        'info_dict': {
-            'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100',
-            'ext': 'mp4',
-            'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        base_url = mobj.group('baseurl')
-
-        webpage = self._download_webpage(url, video_id)
-        title = get_element_by_attribute('class', 'headline', webpage)
-        description = self._html_search_meta('description', webpage, 'description')
-
-        info = self._extract_media_info(
-            base_url + '-mc_defaultQuality-h.json', webpage, video_id)
-
-        info.update({
-            'title': title,
-            'description': description,
-        })
-
-        return info
index efde7e207bc8d166e80f2a26429797684535d114..049f1fa9ed35404acba9e6ad7f1c12f516b436f2 100644 (file)
@@ -23,7 +23,7 @@ from ..utils import (
 
 
 class ArteTvIE(InfoExtractor):
-    _VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
+    _VALID_URL = r'https?://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
     IE_NAME = 'arte.tv'
 
     def _real_extract(self, url):
@@ -61,10 +61,7 @@ class ArteTvIE(InfoExtractor):
         }
 
 
-class ArteTVPlus7IE(InfoExtractor):
-    IE_NAME = 'arte.tv:+7'
-    _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&+])'
-
+class ArteTVBaseIE(InfoExtractor):
     @classmethod
     def _extract_url_info(cls, url):
         mobj = re.match(cls._VALID_URL, url)
@@ -78,58 +75,7 @@ class ArteTVPlus7IE(InfoExtractor):
             video_id = mobj.group('id')
         return video_id, lang
 
-    def _real_extract(self, url):
-        video_id, lang = self._extract_url_info(url)
-        webpage = self._download_webpage(url, video_id)
-        return self._extract_from_webpage(webpage, video_id, lang)
-
-    def _extract_from_webpage(self, webpage, video_id, lang):
-        patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
-        ids = (video_id, '')
-        # some pages contain multiple videos (like
-        # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
-        # so we first try to look for json URLs that contain the video id from
-        # the 'vid' parameter.
-        patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
-        json_url = self._html_search_regex(
-            patterns, webpage, 'json vp url', default=None)
-        if not json_url:
-            def find_iframe_url(webpage, default=NO_DEFAULT):
-                return self._html_search_regex(
-                    r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
-                    webpage, 'iframe url', group='url', default=default)
-
-            iframe_url = find_iframe_url(webpage, None)
-            if not iframe_url:
-                embed_url = self._html_search_regex(
-                    r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
-                if embed_url:
-                    player = self._download_json(
-                        embed_url, video_id, 'Downloading player page')
-                    iframe_url = find_iframe_url(player['html'])
-            # en and es URLs produce react-based pages with different layout (e.g.
-            # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
-            if not iframe_url:
-                program = self._search_regex(
-                    r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
-                    webpage, 'program', default=None)
-                if program:
-                    embed_html = self._parse_json(program, video_id)
-                    if embed_html:
-                        iframe_url = find_iframe_url(embed_html['embed_html'])
-            if iframe_url:
-                json_url = compat_parse_qs(
-                    compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
-        if json_url:
-            return self._extract_from_json_url(json_url, video_id, lang)
-        # Differend kind of embed URL (e.g.
-        # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
-        embed_url = self._search_regex(
-            r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
-            webpage, 'embed url', group='url')
-        return self.url_result(embed_url)
-
-    def _extract_from_json_url(self, json_url, video_id, lang):
+    def _extract_from_json_url(self, json_url, video_id, lang, title=None):
         info = self._download_json(json_url, video_id)
         player_info = info['videoJsonPlayer']
 
@@ -137,7 +83,7 @@ class ArteTVPlus7IE(InfoExtractor):
         if not upload_date_str:
             upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
 
-        title = player_info['VTI'].strip()
+        title = (player_info.get('VTI') or title or player_info['VID']).strip()
         subtitle = player_info.get('VSU', '').strip()
         if subtitle:
             title += ' - %s' % subtitle
@@ -158,24 +104,53 @@ class ArteTVPlus7IE(InfoExtractor):
             'es': 'E[ESP]',
         }
 
+        langcode = LANGS.get(lang, lang)
+
         formats = []
         for format_id, format_dict in player_info['VSR'].items():
             f = dict(format_dict)
             versionCode = f.get('versionCode')
-            langcode = LANGS.get(lang, lang)
-            lang_rexs = [r'VO?%s-' % re.escape(langcode), r'VO?.-ST%s$' % re.escape(langcode)]
-            lang_pref = None
-            if versionCode:
-                matched_lang_rexs = [r for r in lang_rexs if re.match(r, versionCode)]
-                lang_pref = -10 if not matched_lang_rexs else 10 * len(matched_lang_rexs)
-            source_pref = 0
-            if versionCode is not None:
-                # The original version with subtitles has lower relevance
-                if re.match(r'VO-ST(F|A|E)', versionCode):
-                    source_pref -= 10
-                # The version with sourds/mal subtitles has also lower relevance
-                elif re.match(r'VO?(F|A|E)-STM\1', versionCode):
-                    source_pref -= 9
+            l = re.escape(langcode)
+
+            # Language preference from most to least priority
+            # Reference: section 5.6.3 of
+            # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf
+            PREFERENCES = (
+                # original version in requested language, without subtitles
+                r'VO{0}$'.format(l),
+                # original version in requested language, with partial subtitles in requested language
+                r'VO{0}-ST{0}$'.format(l),
+                # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
+                r'VO{0}-STM{0}$'.format(l),
+                # non-original (dubbed) version in requested language, without subtitles
+                r'V{0}$'.format(l),
+                # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
+                r'V{0}-ST{0}$'.format(l),
+                # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
+                r'V{0}-STM{0}$'.format(l),
+                # original version in requested language, with partial subtitles in different language
+                r'VO{0}-ST(?!{0}).+?$'.format(l),
+                # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
+                r'VO{0}-STM(?!{0}).+?$'.format(l),
+                # original version in different language, with partial subtitles in requested language
+                r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
+                # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
+                r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
+                # original version in different language, without subtitles
+                r'VO(?:(?!{0}))?$'.format(l),
+                # original version in different language, with partial subtitles in different language
+                r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
+                # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
+                r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
+            )
+
+            for pref, p in enumerate(PREFERENCES):
+                if re.match(p, versionCode):
+                    lang_pref = len(PREFERENCES) - pref
+                    break
+            else:
+                lang_pref = -1
+
             format = {
                 'format_id': format_id,
                 'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
@@ -185,7 +160,6 @@ class ArteTVPlus7IE(InfoExtractor):
                 'height': int_or_none(f.get('height')),
                 'tbr': int_or_none(f.get('bitrate')),
                 'quality': qfunc(f.get('quality')),
-                'source_preference': source_pref,
             }
 
             if f.get('mediaType') == 'rtmp':
@@ -204,28 +178,112 @@ class ArteTVPlus7IE(InfoExtractor):
         return info_dict
 
 
+class ArteTVPlus7IE(ArteTVBaseIE):
+    IE_NAME = 'arte.tv:+7'
+    _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
+        'only_matching': True,
+    }, {
+        'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        video_id, lang = self._extract_url_info(url)
+        webpage = self._download_webpage(url, video_id)
+        return self._extract_from_webpage(webpage, video_id, lang)
+
+    def _extract_from_webpage(self, webpage, video_id, lang):
+        patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
+        ids = (video_id, '')
+        # some pages contain multiple videos (like
+        # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
+        # so we first try to look for json URLs that contain the video id from
+        # the 'vid' parameter.
+        patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
+        json_url = self._html_search_regex(
+            patterns, webpage, 'json vp url', default=None)
+        if not json_url:
+            def find_iframe_url(webpage, default=NO_DEFAULT):
+                return self._html_search_regex(
+                    r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
+                    webpage, 'iframe url', group='url', default=default)
+
+            iframe_url = find_iframe_url(webpage, None)
+            if not iframe_url:
+                embed_url = self._html_search_regex(
+                    r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
+                if embed_url:
+                    player = self._download_json(
+                        embed_url, video_id, 'Downloading player page')
+                    iframe_url = find_iframe_url(player['html'])
+            # en and es URLs produce react-based pages with different layout (e.g.
+            # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
+            if not iframe_url:
+                program = self._search_regex(
+                    r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
+                    webpage, 'program', default=None)
+                if program:
+                    embed_html = self._parse_json(program, video_id)
+                    if embed_html:
+                        iframe_url = find_iframe_url(embed_html['embed_html'])
+            if iframe_url:
+                json_url = compat_parse_qs(
+                    compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
+        if json_url:
+            title = self._search_regex(
+                r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
+                webpage, 'title', default=None, group='title')
+            return self._extract_from_json_url(json_url, video_id, lang, title=title)
+        # Different kind of embed URL (e.g.
+        # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
+        entries = [
+            self.url_result(url)
+            for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)]
+        return self.playlist_result(entries)
+
+
 # It also uses the arte_vp_url url from the webpage to extract the information
 class ArteTVCreativeIE(ArteTVPlus7IE):
     IE_NAME = 'arte.tv:creative'
-    _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:magazine?/)?(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
 
     _TESTS = [{
-        'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
+        'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1',
         'info_dict': {
-            'id': '72176',
+            'id': '057405-001-A',
             'ext': 'mp4',
-            'title': 'Folge 2 - Corporate Design',
-            'upload_date': '20131004',
+            'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)',
+            'upload_date': '20150716',
         },
     }, {
         'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
+        'playlist_count': 11,
+        'add_ie': ['Youtube'],
+    }, {
+        'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde',
+        'only_matching': True,
+    }]
+
+
+class ArteTVInfoIE(ArteTVPlus7IE):
+    IE_NAME = 'arte.tv:info'
+    _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
         'info_dict': {
-            'id': '160676',
+            'id': '067528-000-A',
             'ext': 'mp4',
-            'title': 'Monty Python live (mostly)',
-            'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
-            'upload_date': '20140805',
-        }
+            'title': 'Service civique, un cache misère ?',
+            'upload_date': '20160403',
+        },
     }]
 
 
@@ -251,6 +309,8 @@ class ArteTVDDCIE(ArteTVPlus7IE):
     IE_NAME = 'arte.tv:ddc'
     _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
 
+    _TESTS = []
+
     def _real_extract(self, url):
         video_id, lang = self._extract_url_info(url)
         if lang == 'folge':
@@ -269,7 +329,7 @@ class ArteTVConcertIE(ArteTVPlus7IE):
     IE_NAME = 'arte.tv:concert'
     _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
         'md5': '9ea035b7bd69696b67aa2ccaaa218161',
         'info_dict': {
@@ -279,24 +339,23 @@ class ArteTVConcertIE(ArteTVPlus7IE):
             'upload_date': '20140128',
             'description': 'md5:486eb08f991552ade77439fe6d82c305',
         },
-    }
+    }]
 
 
 class ArteTVCinemaIE(ArteTVPlus7IE):
     IE_NAME = 'arte.tv:cinema'
     _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
 
-    _TEST = {
-        'url': 'http://cinema.arte.tv/de/node/38291',
-        'md5': '6b275511a5107c60bacbeeda368c3aa1',
+    _TESTS = [{
+        'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck',
+        'md5': 'a5b9dd5575a11d93daf0e3f404f45438',
         'info_dict': {
-            'id': '055876-000_PWA12025-D',
+            'id': '062494-000-A',
             'ext': 'mp4',
-            'title': 'Tod auf dem Nil',
-            'upload_date': '20160122',
-            'description': 'md5:7f749bbb77d800ef2be11d54529b96bc',
+            'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck',
+            'upload_date': '20150807',
         },
-    }
+    }]
 
 
 class ArteTVMagazineIE(ArteTVPlus7IE):
@@ -334,16 +393,48 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
     IE_NAME = 'arte.tv:embed'
     _VALID_URL = r'''(?x)
         http://www\.arte\.tv
-        /playerv2/embed\.php\?json_url=
+        /(?:playerv2/embed|arte_vp/index)\.php\?json_url=
         (?P<json_url>
             http://arte\.tv/papi/tvguide/videos/stream/player/
             (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
         )
     '''
 
+    _TESTS = []
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         lang = mobj.group('lang')
         json_url = mobj.group('json_url')
         return self._extract_from_json_url(json_url, video_id, lang)
+
+
+class ArteTVPlaylistIE(ArteTVBaseIE):
+    IE_NAME = 'arte.tv:playlist'
+    _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
+
+    _TESTS = [{
+        'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV',
+        'info_dict': {
+            'id': 'PL-013263',
+            'title': 'Areva & Uramin',
+        },
+        'playlist_mincount': 6,
+    }, {
+        'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id, lang = self._extract_url_info(url)
+        collection = self._download_json(
+            'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
+            % (lang, playlist_id), playlist_id)
+        title = collection.get('title')
+        description = collection.get('shortDescription') or collection.get('teaserText')
+        entries = [
+            self._extract_from_json_url(
+                video['jsonUrl'], video.get('programId') or playlist_id, lang)
+            for video in collection['videos'] if video.get('jsonUrl')]
+        return self.playlist_result(entries, playlist_id, title, description)
index b8f9ae005fbb0ab41589258886b8342eba0ac288..d2f3889645f9b9324deb0eda00d4f6b67ab32dc1 100644 (file)
@@ -6,16 +6,14 @@ import hashlib
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_urllib_parse,
-)
+from ..compat import compat_str
 from ..utils import (
-    int_or_none,
+    ExtractorError,
     float_or_none,
+    int_or_none,
     sanitized_Request,
+    urlencode_postdata,
     xpath_text,
-    ExtractorError,
 )
 
 
@@ -86,7 +84,7 @@ class AtresPlayerIE(InfoExtractor):
         }
 
         request = sanitized_Request(
-            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+            self._LOGIN_URL, urlencode_postdata(login_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         response = self._download_webpage(
             request, None, 'Logging in as %s' % username)
index 3b2effa15fe15a5527644349d785b452540c7568..aa6925623140f08090515fda2f42a7debd5545ac 100644 (file)
@@ -10,9 +10,9 @@ from ..utils import (
 
 
 class AudiMediaIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
     _TEST = {
-        'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
+        'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
         'md5': '79a8b71c46d49042609795ab59779b66',
         'info_dict': {
             'id': '1565',
@@ -32,7 +32,10 @@ class AudiMediaIE(InfoExtractor):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
 
-        raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload')
+        raw_payload = self._search_regex([
+            r'class="amtv-embed"[^>]+id="([^"]+)"',
+            r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
+        ], webpage, 'raw payload')
         _, stage_mode, video_id, lang = raw_payload.split('-')
 
         # TODO: handle s and e stage_mode (live streams and ended live streams)
@@ -59,13 +62,19 @@ class AudiMediaIE(InfoExtractor):
                 video_version_url = video_version.get('download_url') or video_version.get('stream_url')
                 if not video_version_url:
                     continue
-                formats.append({
+                f = {
                     'url': video_version_url,
                     'width': int_or_none(video_version.get('width')),
                     'height': int_or_none(video_version.get('height')),
                     'abr': int_or_none(video_version.get('audio_bitrate')),
                     'vbr': int_or_none(video_version.get('video_bitrate')),
-                })
+                }
+                bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
+                if bitrate:
+                    f.update({
+                        'format_id': 'http-%s' % bitrate,
+                    })
+                formats.append(f)
             self._sort_formats(formats)
 
             return {
diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py
new file mode 100644 (file)
index 0000000..2ec2d70
--- /dev/null
@@ -0,0 +1,66 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class AudioBoomIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
+        'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
+        'info_dict': {
+            'id': '4279833',
+            'ext': 'mp3',
+            'title': '3/09/2016 Czaban Hour 3',
+            'description': 'Guest:   Nate Davis - NFL free agency,   Guest:   Stan Gans',
+            'duration': 2245.72,
+            'uploader': 'Steve Czaban',
+            'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        clip = None
+
+        clip_store = self._parse_json(
+            self._search_regex(
+                r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id,
+                webpage, 'clip store', default='{}', group='json'),
+            video_id, fatal=False)
+        if clip_store:
+            clips = clip_store.get('clips')
+            if clips and isinstance(clips, list) and isinstance(clips[0], dict):
+                clip = clips[0]
+
+        def from_clip(field):
+            if clip:
+                clip.get(field)
+
+        audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
+            'audio', webpage, 'audio url')
+        title = from_clip('title') or self._og_search_title(webpage)
+        description = from_clip('description') or self._og_search_description(webpage)
+
+        duration = float_or_none(from_clip('duration') or self._html_search_meta(
+            'weibo:audio:duration', webpage))
+
+        uploader = from_clip('author') or self._og_search_property(
+            'audio:artist', webpage, 'uploader', fatal=False)
+        uploader_url = from_clip('author_url') or self._html_search_meta(
+            'audioboo:channel', webpage, 'uploader url')
+
+        return {
+            'id': video_id,
+            'url': audio_url,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'uploader': uploader,
+            'uploader_url': uploader_url,
+        }
index 3eed91279fd7b6ead45bf3ee01486eab53b91d6a..f3bd4d4447f559a8bd924f7d796a1a9faf24b9d3 100644 (file)
@@ -6,6 +6,7 @@ import time
 
 from .common import InfoExtractor
 from .soundcloud import SoundcloudIE
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     url_basename,
@@ -30,14 +31,14 @@ class AudiomackIE(InfoExtractor):
         # audiomack wrapper around soundcloud song
         {
             'add_ie': ['Soundcloud'],
-            'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
+            'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
             'info_dict': {
-                'id': '172419696',
+                'id': '258901379',
                 'ext': 'mp3',
-                'description': 'md5:1fc3272ed7a635cce5be1568c2822997',
-                'title': 'Young Thug ft Lil Wayne - Take Kare',
-                'uploader': 'Young Thug World',
-                'upload_date': '20141016',
+                'description': 'mamba day freestyle for the legend Kobe Bryant ',
+                'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
+                'uploader': 'ILOVEMAKONNEN',
+                'upload_date': '20160414',
             }
         },
     ]
@@ -136,7 +137,7 @@ class AudiomackAlbumIE(InfoExtractor):
                         result[resultkey] = api_response[apikey]
                 song_id = url_basename(api_response['url']).rpartition('.')[0]
                 result['entries'].append({
-                    'id': api_response.get('id', song_id),
+                    'id': compat_str(api_response.get('id', song_id)),
                     'uploader': api_response.get('artist'),
                     'title': api_response.get('title', song_id),
                     'url': api_response['url'],
index 011edf128c2a688bc4ef56e487872ff1b15cee66..a813eb429fe8168c2e4223342fd6540647fe127a 100644 (file)
@@ -46,6 +46,7 @@ class AzubuIE(InfoExtractor):
                 'uploader_id': 272749,
                 'view_count': int,
             },
+            'skip': 'Channel offline',
         },
     ]
 
@@ -56,22 +57,26 @@ class AzubuIE(InfoExtractor):
             'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
 
         title = data['title'].strip()
-        description = data['description']
-        thumbnail = data['thumbnail']
-        view_count = data['view_count']
-        uploader = data['user']['username']
-        uploader_id = data['user']['id']
+        description = data.get('description')
+        thumbnail = data.get('thumbnail')
+        view_count = data.get('view_count')
+        user = data.get('user', {})
+        uploader = user.get('username')
+        uploader_id = user.get('id')
 
         stream_params = json.loads(data['stream_params'])
 
-        timestamp = float_or_none(stream_params['creationDate'], 1000)
-        duration = float_or_none(stream_params['length'], 1000)
+        timestamp = float_or_none(stream_params.get('creationDate'), 1000)
+        duration = float_or_none(stream_params.get('length'), 1000)
 
         renditions = stream_params.get('renditions') or []
         video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
         if video:
             renditions.append(video)
 
+        if not renditions and not user.get('channel', {}).get('is_live', True):
+            raise ExtractorError('%s said: channel is offline.' % self.IE_NAME, expected=True)
+
         formats = [{
             'url': fmt['url'],
             'width': fmt['frameWidth'],
@@ -98,7 +103,7 @@ class AzubuIE(InfoExtractor):
 
 
 class AzubuLiveIE(InfoExtractor):
-    _VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$'
+    _VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$'
 
     _TEST = {
         'url': 'http://www.azubu.tv/MarsTVMDLen',
@@ -120,6 +125,7 @@ class AzubuLiveIE(InfoExtractor):
         bc_info = self._download_json(req, user)
         m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
         formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
+        self._sort_formats(formats)
 
         return {
             'id': info['id'],
index 76b21e5962eae26e6bceef265c56e1f1a16ca922..234a661d34623b0b2da3028b20bcc23fc11e2991 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import unescapeHTML
 
 class BaiduVideoIE(InfoExtractor):
     IE_DESC = '百度视频'
-    _VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
+    _VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
     _TESTS = [{
         'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
         'info_dict': {
index da986e06350d047f9a70dc4f42bfeb27b04afd0e..0eb1930c2d24bb01acd57292d4fe7e1b9a330bff 100644 (file)
@@ -4,15 +4,13 @@ import re
 import itertools
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_str,
-)
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
-    int_or_none,
     float_or_none,
+    int_or_none,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -58,7 +56,7 @@ class BambuserIE(InfoExtractor):
         }
 
         request = sanitized_Request(
-            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+            self._LOGIN_URL, urlencode_postdata(login_form))
         request.add_header('Referer', self._LOGIN_URL)
         response = self._download_webpage(
             request, None, 'Logging in as %s' % username)
index c1ef8051d3074a6551941bf140f88eee4ed8a124..991ab0676e6b93a1c64d04f48b3728551bc4ccf0 100644 (file)
@@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor):
         '_skip': 'There is a limit of 200 free downloads / month for the test song'
     }, {
         'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
-        'md5': '2b68e5851514c20efdff2afc5603b8b4',
+        'md5': '73d0b3171568232574e45652f8720b5c',
         'info_dict': {
             'id': '2650410135',
             'ext': 'mp3',
@@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor):
             if m_trackinfo:
                 json_code = m_trackinfo.group(1)
                 data = json.loads(json_code)[0]
+                track_id = compat_str(data['id'])
+
+                if not data.get('file'):
+                    raise ExtractorError('Not streamable', video_id=track_id, expected=True)
 
                 formats = []
                 for format_id, format_url in data['file'].items():
@@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor):
                 self._sort_formats(formats)
 
                 return {
-                    'id': compat_str(data['id']),
+                    'id': track_id,
                     'title': data['title'],
                     'formats': formats,
                     'duration': float_or_none(data.get('duration')),
index 9d0dfb9611687b15075d0e6fc7d57dfa0244c60a..4b3cd8c65a65967c5ad017d53126e9ccc76a71ef 100644 (file)
@@ -10,7 +10,6 @@ from ..utils import (
     int_or_none,
     parse_duration,
     parse_iso8601,
-    remove_end,
     unescapeHTML,
 )
 from ..compat import (
@@ -32,7 +31,7 @@ class BBCCoUkIE(InfoExtractor):
                             music/clips[/#]|
                             radio/player/
                         )
-                        (?P<id>%s)
+                        (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
                     ''' % _ID_REGEX
 
     _MEDIASELECTOR_URLS = [
@@ -193,6 +192,7 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             },
+            'skip': 'Now it\'s really geo-restricted',
         }, {
             # compact player (https://github.com/rg3/youtube-dl/issues/8147)
             'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
@@ -329,6 +329,7 @@ class BBCCoUkIE(InfoExtractor):
                     'format_id': '%s_%s' % (service, format['format_id']),
                     'abr': abr,
                     'acodec': acodec,
+                    'vcodec': 'none',
                 })
             formats.extend(conn_formats)
         return formats
@@ -561,7 +562,7 @@ class BBCIE(BBCCoUkIE):
         'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
         'info_dict': {
             'id': '3662a707-0af9-3149-963f-47bea720b460',
-            'title': 'BBC Blogs - Adam Curtis - BUGGER',
+            'title': 'BUGGER',
         },
         'playlist_count': 18,
     }, {
@@ -670,9 +671,18 @@ class BBCIE(BBCCoUkIE):
         'url': 'http://www.bbc.com/sport/0/football/34475836',
         'info_dict': {
             'id': '34475836',
-            'title': 'What Liverpool can expect from Klopp',
+            'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
+            'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
         },
         'playlist_count': 3,
+    }, {
+        # school report article with single video
+        'url': 'http://www.bbc.co.uk/schoolreport/35744779',
+        'info_dict': {
+            'id': '35744779',
+            'title': 'School which breaks down barriers in Jerusalem',
+        },
+        'playlist_count': 1,
     }, {
         # single video with playlist URL from weather section
         'url': 'http://www.bbc.com/weather/features/33601775',
@@ -681,11 +691,17 @@ class BBCIE(BBCCoUkIE):
         # custom redirection to www.bbc.com
         'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
         'only_matching': True,
+    }, {
+        # single video article embedded with data-media-vpid
+        'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
+        'only_matching': True,
     }]
 
     @classmethod
     def suitable(cls, url):
-        return False if BBCCoUkIE.suitable(url) or BBCCoUkArticleIE.suitable(url) else super(BBCIE, cls).suitable(url)
+        EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
+        return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
+                else super(BBCIE, cls).suitable(url))
 
     def _extract_from_media_meta(self, media_meta, video_id):
         # Direct links to media in media metadata (e.g.
@@ -735,8 +751,17 @@ class BBCIE(BBCCoUkIE):
 
         json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
         timestamp = json_ld_info.get('timestamp')
+
         playlist_title = json_ld_info.get('title')
-        playlist_description = json_ld_info.get('description')
+        if not playlist_title:
+            playlist_title = self._og_search_title(
+                webpage, default=None) or self._html_search_regex(
+                r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
+            if playlist_title:
+                playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
+
+        playlist_description = json_ld_info.get(
+            'description') or self._og_search_description(webpage, default=None)
 
         if not timestamp:
             timestamp = parse_iso8601(self._search_regex(
@@ -797,13 +822,11 @@ class BBCIE(BBCCoUkIE):
                                 playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
 
         if entries:
-            playlist_title = playlist_title or remove_end(self._og_search_title(webpage), ' - BBC News')
-            playlist_description = playlist_description or self._og_search_description(webpage, default=None)
             return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
 
         # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
         programme_id = self._search_regex(
-            [r'data-video-player-vpid="(%s)"' % self._ID_REGEX,
+            [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
              r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
              r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
             webpage, 'vpid', default=None)
@@ -829,10 +852,6 @@ class BBCIE(BBCCoUkIE):
                 'subtitles': subtitles,
             }
 
-        playlist_title = self._html_search_regex(
-            r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
-        playlist_description = self._og_search_description(webpage, default=None)
-
         def extract_all(pattern):
             return list(filter(None, map(
                 lambda s: self._parse_json(s, playlist_id, fatal=False),
@@ -932,7 +951,7 @@ class BBCIE(BBCCoUkIE):
 
 
 class BBCCoUkArticleIE(InfoExtractor):
-    _VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
     IE_NAME = 'bbc.co.uk:article'
     IE_DESC = 'BBC articles'
 
@@ -959,3 +978,72 @@ class BBCCoUkArticleIE(InfoExtractor):
             r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
 
         return self.playlist_result(entries, playlist_id, title, description)
+
+
+class BBCCoUkPlaylistBaseIE(InfoExtractor):
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
+            for video_id in re.findall(
+                self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
+
+        title, description = self._extract_title_and_description(webpage)
+
+        return self.playlist_result(entries, playlist_id, title, description)
+
+
+class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
+    IE_NAME = 'bbc.co.uk:iplayer:playlist'
+    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
+    _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
+    _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
+    _TEST = {
+        'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
+        'info_dict': {
+            'id': 'b05rcz9v',
+            'title': 'The Disappearance',
+            'description': 'French thriller serial about a missing teenager.',
+        },
+        'playlist_mincount': 6,
+    }
+
+    def _extract_title_and_description(self, webpage):
+        title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
+        description = self._search_regex(
+            r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
+            webpage, 'description', fatal=False, group='value')
+        return title, description
+
+
+class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
+    IE_NAME = 'bbc.co.uk:playlist'
+    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
+    _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
+    _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
+    _TESTS = [{
+        'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
+        'info_dict': {
+            'id': 'b05rcz9v',
+            'title': 'The Disappearance - Clips - BBC Four',
+            'description': 'French thriller serial about a missing teenager.',
+        },
+        'playlist_mincount': 7,
+    }, {
+        'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
+        'only_matching': True,
+    }]
+
+    def _extract_title_and_description(self, webpage):
+        title = self._og_search_title(webpage, fatal=False)
+        description = self._og_search_description(webpage)
+        return title, description
index 34c2a756fba11f81516e87e095ef1e02d5e65417..956c7680e2ecc46a1df493947ed0be7b973d81b8 100644 (file)
@@ -33,8 +33,33 @@ class BeegIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        webpage = self._download_webpage(url, video_id)
+
+        cpl_url = self._search_regex(
+            r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
+            webpage, 'cpl', default=None, group='url')
+
+        beeg_version, beeg_salt = [None] * 2
+
+        if cpl_url:
+            cpl = self._download_webpage(
+                self._proto_relative_url(cpl_url), video_id,
+                'Downloading cpl JS', fatal=False)
+            if cpl:
+                beeg_version = self._search_regex(
+                    r'beeg_version\s*=\s*(\d+)', cpl,
+                    'beeg version', default=None) or self._search_regex(
+                    r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
+                beeg_salt = self._search_regex(
+                    r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg beeg_salt',
+                    default=None, group='beeg_salt')
+
+        beeg_version = beeg_version or '1750'
+        beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr'
+
         video = self._download_json(
-            'https://api.beeg.com/api/v5/video/%s' % video_id, video_id)
+            'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
+            video_id)
 
         def split(o, e):
             def cut(s, x):
@@ -50,8 +75,8 @@ class BeegIE(InfoExtractor):
             return n
 
         def decrypt_key(key):
-            # Reverse engineered from http://static.beeg.com/cpl/1105.js
-            a = '5ShMcIQlssOd7zChAIOlmeTZDaUxULbJRnywYaiB'
+            # Reverse engineered from http://static.beeg.com/cpl/1738.js
+            a = beeg_salt
             e = compat_urllib_parse_unquote(key)
             o = ''.join([
                 compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
@@ -101,5 +126,5 @@ class BeegIE(InfoExtractor):
             'duration': duration,
             'tags': tags,
             'formats': formats,
-            'age_limit': 18,
+            'age_limit': self._rta_search(webpage),
         }
index 1bdc25812b6afb4cf133007f2d12b89fd56b353f..9bca853b32979a4e2700f5d121c24a08fd875224 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import url_basename
 
 
 class BehindKinkIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
+    _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
     _TEST = {
         'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
         'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
index 03dad4636afdf0443735fde8f1d643aea553ba10..bd3ee2e2eb3822253bb04fceb253d4028448f5f5 100644 (file)
@@ -1,31 +1,27 @@
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
-from ..utils import (
-    xpath_text,
-    xpath_with_ns,
-    int_or_none,
-    parse_iso8601,
-)
+from .mtv import MTVServicesInfoExtractor
+from ..utils import unified_strdate
+from ..compat import compat_urllib_parse_urlencode
 
 
-class BetIE(InfoExtractor):
+class BetIE(MTVServicesInfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
     _TESTS = [
         {
             'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
             'info_dict': {
-                'id': 'news/national/2014/a-conversation-with-president-obama',
+                'id': '07e96bd3-8850-3051-b856-271b457f0ab8',
                 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
                 'ext': 'flv',
                 'title': 'A Conversation With President Obama',
-                'description': 'md5:699d0652a350cf3e491cd15cc745b5da',
+                'description': 'President Obama urges persistence in confronting racism and bias.',
                 'duration': 1534,
-                'timestamp': 1418075340,
                 'upload_date': '20141208',
-                'uploader': 'admin',
                 'thumbnail': 're:(?i)^https?://.*\.jpg$',
+                'subtitles': {
+                    'en': 'mincount:2',
+                }
             },
             'params': {
                 # rtmp download
@@ -35,16 +31,17 @@ class BetIE(InfoExtractor):
         {
             'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
             'info_dict': {
-                'id': 'news/national/2014/justice-for-ferguson-a-community-reacts',
+                'id': '9f516bf1-7543-39c4-8076-dd441b459ba9',
                 'display_id': 'justice-for-ferguson-a-community-reacts',
                 'ext': 'flv',
                 'title': 'Justice for Ferguson: A Community Reacts',
                 'description': 'A BET News special.',
                 'duration': 1696,
-                'timestamp': 1416942360,
                 'upload_date': '20141125',
-                'uploader': 'admin',
                 'thumbnail': 're:(?i)^https?://.*\.jpg$',
+                'subtitles': {
+                    'en': 'mincount:2',
+                }
             },
             'params': {
                 # rtmp download
@@ -53,56 +50,32 @@ class BetIE(InfoExtractor):
         }
     ]
 
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-
-        media_url = compat_urllib_parse_unquote(self._search_regex(
-            [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
-            webpage, 'media URL'))
+    _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
 
-        video_id = self._search_regex(
-            r'/video/(.*)/_jcr_content/', media_url, 'video id')
+    def _get_feed_query(self, uri):
+        return compat_urllib_parse_urlencode({
+            'uuid': uri,
+        })
 
-        mrss = self._download_xml(media_url, display_id)
-
-        item = mrss.find('./channel/item')
-
-        NS_MAP = {
-            'dc': 'http://purl.org/dc/elements/1.1/',
-            'media': 'http://search.yahoo.com/mrss/',
-            'ka': 'http://kickapps.com/karss',
-        }
+    def _extract_mgid(self, webpage):
+        return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
 
-        title = xpath_text(item, './title', 'title')
-        description = xpath_text(
-            item, './description', 'description', fatal=False)
-
-        timestamp = parse_iso8601(xpath_text(
-            item, xpath_with_ns('./dc:date', NS_MAP),
-            'upload date', fatal=False))
-        uploader = xpath_text(
-            item, xpath_with_ns('./dc:creator', NS_MAP),
-            'uploader', fatal=False)
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
 
-        media_content = item.find(
-            xpath_with_ns('./media:content', NS_MAP))
-        duration = int_or_none(media_content.get('duration'))
-        smil_url = media_content.get('url')
+        webpage = self._download_webpage(url, display_id)
+        mgid = self._extract_mgid(webpage)
+        videos_info = self._get_videos_info(mgid)
 
-        thumbnail = media_content.find(
-            xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
+        info_dict = videos_info['entries'][0]
 
-        formats = self._extract_smil_formats(smil_url, display_id)
+        upload_date = unified_strdate(self._html_search_meta('date', webpage))
+        description = self._html_search_meta('description', webpage)
 
-        return {
-            'id': video_id,
+        info_dict.update({
             'display_id': display_id,
-            'title': title,
             'description': description,
-            'thumbnail': thumbnail,
-            'timestamp': timestamp,
-            'uploader': uploader,
-            'duration': duration,
-            'formats': formats,
-        }
+            'upload_date': upload_date,
+        })
+
+        return info_dict
index 59beb11bce71bfc6ef9b036ad123dc44e872d0be..b17047b399b6630fe2334aa24f0a5e97aed8506f 100644 (file)
@@ -1,34 +1,42 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import calendar
+import datetime
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+    compat_etree_fromstring,
+    compat_str,
+    compat_parse_qs,
+    compat_xml_parse_error,
+)
 from ..utils import (
-    int_or_none,
-    unescapeHTML,
     ExtractorError,
+    int_or_none,
+    float_or_none,
     xpath_text,
 )
 
 
 class BiliBiliIE(InfoExtractor):
-    _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
+    _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.bilibili.tv/video/av1074402/',
-        'md5': '2c301e4dab317596e837c3e7633e7d86',
+        'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
         'info_dict': {
             'id': '1554319',
             'ext': 'flv',
             'title': '【金坷垃】金泡沫',
-            'duration': 308313,
+            'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
+            'duration': 308.067,
+            'timestamp': 1398012660,
             'upload_date': '20140420',
             'thumbnail': 're:^https?://.+\.jpg',
-            'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
-            'timestamp': 1397983878,
             'uploader': '菊子桑',
+            'uploader_id': '156160',
         },
     }, {
         'url': 'http://www.bilibili.com/video/av1041170/',
@@ -36,75 +44,186 @@ class BiliBiliIE(InfoExtractor):
             'id': '1041170',
             'title': '【BD1080P】刀语【诸神&异域】',
             'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
-            'uploader': '枫叶逝去',
-            'timestamp': 1396501299,
         },
         'playlist_count': 9,
+    }, {
+        'url': 'http://www.bilibili.com/video/av4808130/',
+        'info_dict': {
+            'id': '4808130',
+            'title': '【长篇】哆啦A梦443【钉铛】',
+            'description': '(2016.05.27)来组合客人的脸吧&amp;amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;amp;illust_id=56912929',
+        },
+        'playlist': [{
+            'md5': '55cdadedf3254caaa0d5d27cf20a8f9c',
+            'info_dict': {
+                'id': '4808130_part1',
+                'ext': 'flv',
+                'title': '【长篇】哆啦A梦443【钉铛】',
+                'description': '(2016.05.27)来组合客人的脸吧&amp;amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;amp;illust_id=56912929',
+                'timestamp': 1464564180,
+                'upload_date': '20160529',
+                'uploader': '喜欢拉面',
+                'uploader_id': '151066',
+            },
+        }, {
+            'md5': '926f9f67d0c482091872fbd8eca7ea3d',
+            'info_dict': {
+                'id': '4808130_part2',
+                'ext': 'flv',
+                'title': '【长篇】哆啦A梦443【钉铛】',
+                'description': '(2016.05.27)来组合客人的脸吧&amp;amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;amp;illust_id=56912929',
+                'timestamp': 1464564180,
+                'upload_date': '20160529',
+                'uploader': '喜欢拉面',
+                'uploader_id': '151066',
+            },
+        }, {
+            'md5': '4b7b225b968402d7c32348c646f1fd83',
+            'info_dict': {
+                'id': '4808130_part3',
+                'ext': 'flv',
+                'title': '【长篇】哆啦A梦443【钉铛】',
+                'description': '(2016.05.27)来组合客人的脸吧&amp;amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;amp;illust_id=56912929',
+                'timestamp': 1464564180,
+                'upload_date': '20160529',
+                'uploader': '喜欢拉面',
+                'uploader_id': '151066',
+            },
+        }, {
+            'md5': '7b795e214166501e9141139eea236e91',
+            'info_dict': {
+                'id': '4808130_part4',
+                'ext': 'flv',
+                'title': '【长篇】哆啦A梦443【钉铛】',
+                'description': '(2016.05.27)来组合客人的脸吧&amp;amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;amp;illust_id=56912929',
+                'timestamp': 1464564180,
+                'upload_date': '20160529',
+                'uploader': '喜欢拉面',
+                'uploader_id': '151066',
+            },
+        }],
+    }, {
+        # Missing upload time
+        'url': 'http://www.bilibili.com/video/av1867637/',
+        'info_dict': {
+            'id': '2880301',
+            'ext': 'flv',
+            'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
+            'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
+            'uploader': '黑夜为猫',
+            'uploader_id': '610729',
+        },
+        'params': {
+            # Just to test metadata extraction
+            'skip_download': True,
+        },
+        'expected_warnings': ['upload time'],
     }]
 
+    # BiliBili blocks keys from time to time. The current key is extracted from
+    # the Android client
+    # TODO: find the sign algorithm used in the flash player
+    _APP_KEY = '86385cdc024c0f6c'
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        page_num = mobj.group('page_num') or '1'
 
-        view_data = self._download_json(
-            'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num),
-            video_id)
-        if 'error' in view_data:
-            raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True)
+        webpage = self._download_webpage(url, video_id)
 
-        cid = view_data['cid']
-        title = unescapeHTML(view_data['title'])
+        params = compat_parse_qs(self._search_regex(
+            [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
+             r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
+            webpage, 'player parameters'))
+        cid = params['cid'][0]
 
-        doc = self._download_xml(
-            'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,
-            cid,
-            'Downloading page %s/%s' % (page_num, view_data['pages'])
-        )
+        info_xml_str = self._download_webpage(
+            'http://interface.bilibili.com/v_cdn_play',
+            cid, query={'appkey': self._APP_KEY, 'cid': cid},
+            note='Downloading video info page')
+
+        err_msg = None
+        durls = None
+        info_xml = None
+        try:
+            info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
+        except compat_xml_parse_error:
+            info_json = self._parse_json(info_xml_str, video_id, fatal=False)
+            err_msg = (info_json or {}).get('error_text')
+        else:
+            err_msg = xpath_text(info_xml, './message')
 
-        if xpath_text(doc, './result') == 'error':
-            raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)
+        if info_xml is not None:
+            durls = info_xml.findall('./durl')
+        if not durls:
+            if err_msg:
+                raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
+            else:
+                raise ExtractorError('No videos found!')
 
         entries = []
 
-        for durl in doc.findall('./durl'):
+        for durl in durls:
             size = xpath_text(durl, ['./filesize', './size'])
             formats = [{
                 'url': durl.find('./url').text,
                 'filesize': int_or_none(size),
-                'ext': 'flv',
             }]
-            backup_urls = durl.find('./backup_url')
-            if backup_urls is not None:
-                for backup_url in backup_urls.findall('./url'):
-                    formats.append({'url': backup_url.text})
-            formats.reverse()
+            for backup_url in durl.findall('./backup_url/url'):
+                formats.append({
+                    'url': backup_url.text,
+                    # backup URLs have lower priorities
+                    'preference': -2 if 'hd.mp4' in backup_url.text else -3,
+                })
+
+            self._sort_formats(formats)
 
             entries.append({
                 'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
-                'title': title,
                 'duration': int_or_none(xpath_text(durl, './length'), 1000),
                 'formats': formats,
             })
 
+        title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
+        description = self._html_search_meta('description', webpage)
+        datetime_str = self._html_search_regex(
+            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
+        timestamp = None
+        if datetime_str:
+            timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
+
+        # TODO 'view_count' requires deobfuscating Javascript
         info = {
             'id': compat_str(cid),
             'title': title,
-            'description': view_data.get('description'),
-            'thumbnail': view_data.get('pic'),
-            'uploader': view_data.get('author'),
-            'timestamp': int_or_none(view_data.get('created')),
-            'view_count': int_or_none(view_data.get('play')),
-            'duration': int_or_none(xpath_text(doc, './timelength')),
+            'description': description,
+            'timestamp': timestamp,
+            'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
+            'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
         }
 
+        uploader_mobj = re.search(
+            r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
+            webpage)
+        if uploader_mobj:
+            info.update({
+                'uploader': uploader_mobj.group('name'),
+                'uploader_id': uploader_mobj.group('id'),
+            })
+
+        for entry in entries:
+            entry.update(info)
+
         if len(entries) == 1:
-            entries[0].update(info)
             return entries[0]
         else:
-            info.update({
+            for idx, entry in enumerate(entries):
+                entry['id'] = '%s_part%d' % (video_id, (idx + 1))
+
+            return {
                 '_type': 'multi_video',
                 'id': video_id,
+                'title': title,
+                'description': description,
                 'entries': entries,
-            })
-            return info
+            }
diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py
new file mode 100644 (file)
index 0000000..1332281
--- /dev/null
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import remove_end
+
+
+class BioBioChileTVIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
+
+    _TESTS = [{
+        'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
+        'md5': '26f51f03cf580265defefb4518faec09',
+        'info_dict': {
+            'id': 'sobre-camaras-y-camarillas-parlamentarias',
+            'ext': 'mp4',
+            'title': 'Sobre Cámaras y camarillas parlamentarias',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'Fernando Atria',
+        },
+    }, {
+        # different uploader layout
+        'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
+        'md5': 'edc2e6b58974c46d5b047dea3c539ff3',
+        'info_dict': {
+            'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades',
+            'ext': 'mp4',
+            'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'Piangella Obrador',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
+
+        file_url = self._search_regex(
+            r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
+            webpage, 'file url', group='url')
+
+        base_url = self._search_regex(
+            r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
+            'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
+            group='url')
+
+        formats = self._extract_m3u8_formats(
+            '%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
+            entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+        f = {
+            'url': '%s%s' % (base_url, file_url),
+            'format_id': 'http',
+            'protocol': 'http',
+            'preference': 1,
+        }
+        if formats:
+            f_copy = formats[-1].copy()
+            f_copy.update(f)
+            f = f_copy
+        formats.append(f)
+        self._sort_formats(formats)
+
+        thumbnail = self._og_search_thumbnail(webpage)
+        uploader = self._html_search_regex(
+            r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
+            webpage, 'uploader', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py
new file mode 100644 (file)
index 0000000..ae4579b
--- /dev/null
@@ -0,0 +1,39 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class BIQLEIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
+    _TESTS = [{
+        'url': 'http://www.biqle.ru/watch/847655_160197695',
+        'md5': 'ad5f746a874ccded7b8f211aeea96637',
+        'info_dict': {
+            'id': '160197695',
+            'ext': 'mp4',
+            'title': 'Foo Fighters - The Pretender (Live at Wembley Stadium)',
+            'uploader': 'Andrey Rogozin',
+            'upload_date': '20110605',
+        }
+    }, {
+        'url': 'https://biqle.org/watch/-44781847_168547604',
+        'md5': '7f24e72af1db0edf7c1aaba513174f97',
+        'info_dict': {
+            'id': '168547604',
+            'ext': 'mp4',
+            'title': 'Ребенок в шоке от автоматической мойки',
+            'uploader': 'Dmitry Kotov',
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        embed_url = self._proto_relative_url(self._search_regex(
+            r'<iframe.+?src="((?:http:)?//daxab\.com/[^"]+)".*?></iframe>', webpage, 'embed url'))
+
+        return {
+            '_type': 'url_transparent',
+            'url': embed_url,
+        }
index 38bda3af5a189cc7a2c8d65937a7d710edd0211f..7a8e1f60b82923b643918e43924fa64a5250cb83 100644 (file)
@@ -28,10 +28,10 @@ class BleacherReportIE(InfoExtractor):
         'add_ie': ['Ooyala'],
     }, {
         'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
-        'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
+        'md5': '6a5cd403418c7b01719248ca97fb0692',
         'info_dict': {
             'id': '2586817',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
             'timestamp': 1446839961,
             'uploader': 'Sean Fay',
@@ -93,10 +93,14 @@ class BleacherReportCMSIE(AMPIE):
         'md5': '8c2c12e3af7805152675446c905d159b',
         'info_dict': {
             'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
             'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
         },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
index 13343bc258532b37bf912f0648e317103b5f428d..bd538be50bc4a650d000eaffb5e292a4e8e76cbe 100644 (file)
@@ -17,6 +17,9 @@ class BloombergIE(InfoExtractor):
             'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
             'description': 'md5:a8ba0302912d03d246979735c17d2761',
         },
+        'params': {
+            'format': 'best[format_id^=hds]',
+        },
     }, {
         'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
         'only_matching': True,
diff --git a/youtube_dl/extractor/bokecc.py b/youtube_dl/extractor/bokecc.py
new file mode 100644 (file)
index 0000000..86a7f4d
--- /dev/null
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import ExtractorError
+
+
+class BokeCCBaseIE(InfoExtractor):
+    def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
+        player_params_str = self._html_search_regex(
+            r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
+            webpage, 'player params')
+
+        player_params = compat_parse_qs(player_params_str)
+
+        info_xml = self._download_xml(
+            'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
+                player_params['siteid'][0], player_params['vid'][0]), video_id)
+
+        formats = [{
+            'format_id': format_id,
+            'url': quality.find('./copy').attrib['playurl'],
+            'preference': int(quality.attrib['value']),
+        } for quality in info_xml.findall('./video/quality')]
+
+        self._sort_formats(formats)
+
+        return formats
+
+
+class BokeCCIE(BokeCCBaseIE):
+    _IE_DESC = 'CC视频'
+    _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
+
+    _TESTS = [{
+        'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',
+        'info_dict': {
+            'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30',
+            'ext': 'flv',
+            'title': 'BokeCC Video',
+        },
+    }]
+
+    def _real_extract(self, url):
+        qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
+        if not qs.get('vid') or not qs.get('uid'):
+            raise ExtractorError('Invalid URL', expected=True)
+
+        video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0])
+
+        webpage = self._download_webpage(url, video_id)
+
+        return {
+            'id': video_id,
+            'title': 'BokeCC Video',  # no title provided in the webpage
+            'formats': self._extract_bokecc_formats(webpage, video_id),
+        }
index c28e72927fefc90c320d1b11b31db5b3d7e754d1..6ad45a1e6a30bac2450743de3f0d12a2c9f2b89d 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class BpbIE(InfoExtractor):
     IE_DESC = 'Bundeszentrale für politische Bildung'
-    _VALID_URL = r'http://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
+    _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
 
     _TEST = {
         'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
index 11cf498515ba572f8ef8c7f20d5620bf50289827..ff0aa11b19a7736017992d76f13a0ba5509f2f8e 100644 (file)
@@ -29,7 +29,8 @@ class BRIE(InfoExtractor):
                 'duration': 180,
                 'uploader': 'Reinhard Weber',
                 'upload_date': '20150422',
-            }
+            },
+            'skip': '404 not found',
         },
         {
             'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
@@ -40,7 +41,8 @@ class BRIE(InfoExtractor):
                 'title': 'Manfred Schreiber ist tot',
                 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
                 'duration': 26,
-            }
+            },
+            'skip': '404 not found',
         },
         {
             'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
@@ -51,7 +53,8 @@ class BRIE(InfoExtractor):
                 'title': 'Kurzweilig und sehr bewegend',
                 'description': 'md5:0351996e3283d64adeb38ede91fac54e',
                 'duration': 296,
-            }
+            },
+            'skip': '404 not found',
         },
         {
             'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
diff --git a/youtube_dl/extractor/bravotv.py b/youtube_dl/extractor/bravotv.py
new file mode 100644 (file)
index 0000000..541c769
--- /dev/null
@@ -0,0 +1,31 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class BravoTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
+    _TEST = {
+        'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
+        'md5': 'd60cdf68904e854fac669bd26cccf801',
+        'info_dict': {
+            'id': 'LitrBdX64qLn',
+            'ext': 'mp4',
+            'title': 'Last Chance Kitchen Returns',
+            'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
+            'timestamp': 1448926740,
+            'upload_date': '20151130',
+            'uploader': 'NBCU-BRAV',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
+        release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
+        return self.url_result(smuggle_url(
+            'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
+            {'force_smil_url': True}), 'ThePlatform', release_pid)
index aa08051b168cee479a416647645acdd6543ac71d..725859b4d2d554df91ff4793a2b3d245f02c8996 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class BreakIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
         'info_dict': {
index c947337f9f3d54c730487c3e050c00a029bb2d1b..ef560b592792910c2d39174eb2db129cea5590fe 100644 (file)
@@ -9,10 +9,10 @@ from ..compat import (
     compat_etree_fromstring,
     compat_parse_qs,
     compat_str,
-    compat_urllib_parse,
     compat_urllib_parse_urlparse,
     compat_urlparse,
     compat_xml_parse_error,
+    compat_HTTPError,
 )
 from ..utils import (
     determine_ext,
@@ -23,16 +23,16 @@ from ..utils import (
     js_to_json,
     int_or_none,
     parse_iso8601,
-    sanitized_Request,
     unescapeHTML,
     unsmuggle_url,
+    update_url_query,
 )
 
 
 class BrightcoveLegacyIE(InfoExtractor):
     IE_NAME = 'brightcove:legacy'
     _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
-    _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
+    _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
 
     _TESTS = [
         {
@@ -46,6 +46,9 @@ class BrightcoveLegacyIE(InfoExtractor):
                 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
                 'uploader': '8TV',
                 'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
+                'timestamp': 1368213670,
+                'upload_date': '20130510',
+                'uploader_id': '1589608506001',
             }
         },
         {
@@ -57,6 +60,9 @@ class BrightcoveLegacyIE(InfoExtractor):
                 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
                 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
                 'uploader': 'Oracle',
+                'timestamp': 1344975024,
+                'upload_date': '20120814',
+                'uploader_id': '1460825906',
             },
         },
         {
@@ -68,6 +74,9 @@ class BrightcoveLegacyIE(InfoExtractor):
                 'title': 'This Bracelet Acts as a Personal Thermostat',
                 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
                 'uploader': 'Mashable',
+                'timestamp': 1382041798,
+                'upload_date': '20131017',
+                'uploader_id': '1130468786001',
             },
         },
         {
@@ -85,14 +94,17 @@ class BrightcoveLegacyIE(InfoExtractor):
         {
             # test flv videos served by akamaihd.net
             # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
-            'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
+            'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
             # The md5 checksum changes on each download
             'info_dict': {
-                'id': '2996102916001',
+                'id': '3750436379001',
                 'ext': 'flv',
                 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
-                'uploader': 'Red Bull TV',
+                'uploader': 'RBTV Old (do not use)',
                 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
+                'timestamp': 1409122195,
+                'upload_date': '20140827',
+                'uploader_id': '710858724001',
             },
         },
         {
@@ -106,6 +118,12 @@ class BrightcoveLegacyIE(InfoExtractor):
             'playlist_mincount': 7,
         },
     ]
+    FLV_VCODECS = {
+        1: 'SORENSON',
+        2: 'ON2',
+        3: 'H264',
+        4: 'VP8',
+    }
 
     @classmethod
     def _build_brighcove_url(cls, object_str):
@@ -136,13 +154,16 @@ class BrightcoveLegacyIE(InfoExtractor):
         else:
             flashvars = {}
 
+        data_url = object_doc.attrib.get('data', '')
+        data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query)
+
         def find_param(name):
             if name in flashvars:
                 return flashvars[name]
             node = find_xpath_attr(object_doc, './param', 'name', name)
             if node is not None:
                 return node.attrib['value']
-            return None
+            return data_url_params.get(name)
 
         params = {}
 
@@ -155,8 +176,8 @@ class BrightcoveLegacyIE(InfoExtractor):
         # Not all pages define this value
         if playerKey is not None:
             params['playerKey'] = playerKey
-        # The three fields hold the id of the video
-        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
+        # These fields hold the id of the video
+        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
         if videoPlayer is not None:
             params['@videoPlayer'] = videoPlayer
         linkBase = find_param('linkBaseURL')
@@ -184,8 +205,7 @@ class BrightcoveLegacyIE(InfoExtractor):
 
     @classmethod
     def _make_brightcove_url(cls, params):
-        data = compat_urllib_parse.urlencode(params)
-        return cls._FEDERATED_URL_TEMPLATE % data
+        return update_url_query(cls._FEDERATED_URL, params)
 
     @classmethod
     def _extract_brightcove_url(cls, webpage):
@@ -239,7 +259,7 @@ class BrightcoveLegacyIE(InfoExtractor):
             # We set the original url as the default 'Referer' header
             referer = smuggled_data.get('Referer', url)
             return self._get_video_info(
-                videoPlayer[0], query_str, query, referer=referer)
+                videoPlayer[0], query, referer=referer)
         elif 'playerKey' in query:
             player_key = query['playerKey']
             return self._get_playlist_info(player_key[0])
@@ -248,15 +268,14 @@ class BrightcoveLegacyIE(InfoExtractor):
                 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
                 expected=True)
 
-    def _get_video_info(self, video_id, query_str, query, referer=None):
-        request_url = self._FEDERATED_URL_TEMPLATE % query_str
-        req = sanitized_Request(request_url)
+    def _get_video_info(self, video_id, query, referer=None):
+        headers = {}
         linkBase = query.get('linkBaseURL')
         if linkBase is not None:
             referer = linkBase[0]
         if referer is not None:
-            req.add_header('Referer', referer)
-        webpage = self._download_webpage(req, video_id)
+            headers['Referer'] = referer
+        webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
 
         error_msg = self._html_search_regex(
             r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
@@ -288,15 +307,20 @@ class BrightcoveLegacyIE(InfoExtractor):
                                     playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
 
     def _extract_video_info(self, video_info):
+        video_id = compat_str(video_info['id'])
+        publisher_id = video_info.get('publisherId')
         info = {
-            'id': compat_str(video_info['id']),
+            'id': video_id,
             'title': video_info['displayName'].strip(),
             'description': video_info.get('shortDescription'),
             'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
             'uploader': video_info.get('publisherName'),
+            'uploader_id': compat_str(publisher_id) if publisher_id else None,
+            'duration': float_or_none(video_info.get('length'), 1000),
+            'timestamp': int_or_none(video_info.get('creationDate'), 1000),
         }
 
-        renditions = video_info.get('renditions')
+        renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
         if renditions:
             formats = []
             for rend in renditions:
@@ -308,7 +332,8 @@ class BrightcoveLegacyIE(InfoExtractor):
                     url_comp = compat_urllib_parse_urlparse(url)
                     if url_comp.path.endswith('.m3u8'):
                         formats.extend(
-                            self._extract_m3u8_formats(url, info['id'], 'mp4'))
+                            self._extract_m3u8_formats(
+                                url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
                         continue
                     elif 'akamaihd.net' in url_comp.netloc:
                         # This type of renditions are served through
@@ -317,19 +342,42 @@ class BrightcoveLegacyIE(InfoExtractor):
                         ext = 'flv'
                 if ext is None:
                     ext = determine_ext(url)
-                size = rend.get('size')
-                formats.append({
+                tbr = int_or_none(rend.get('encodingRate'), 1000)
+                a_format = {
+                    'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
                     'url': url,
                     'ext': ext,
-                    'height': rend.get('frameHeight'),
-                    'width': rend.get('frameWidth'),
-                    'filesize': size if size != 0 else None,
-                })
+                    'filesize': int_or_none(rend.get('size')) or None,
+                    'tbr': tbr,
+                }
+                if rend.get('audioOnly'):
+                    a_format.update({
+                        'vcodec': 'none',
+                    })
+                else:
+                    a_format.update({
+                        'height': int_or_none(rend.get('frameHeight')),
+                        'width': int_or_none(rend.get('frameWidth')),
+                        'vcodec': rend.get('videoCodec'),
+                    })
+
+                # m3u8 manifests with remote == false are media playlists
+                # Not calling _extract_m3u8_formats here to save network traffic
+                if ext == 'm3u8':
+                    a_format.update({
+                        'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
+                        'ext': 'mp4',
+                        'protocol': 'm3u8_native',
+                    })
+
+                formats.append(a_format)
             self._sort_formats(formats)
             info['formats'] = formats
         elif video_info.get('FLVFullLengthURL') is not None:
             info.update({
                 'url': video_info['FLVFullLengthURL'],
+                'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
+                'filesize': int_or_none(video_info.get('FLVFullSize')),
             })
 
         if self._downloader.params.get('include_ads', False):
@@ -349,13 +397,13 @@ class BrightcoveLegacyIE(InfoExtractor):
                     return ad_info
 
         if 'url' not in info and not info.get('formats'):
-            raise ExtractorError('Unable to extract video url for %s' % info['id'])
+            raise ExtractorError('Unable to extract video url for %s' % video_id)
         return info
 
 
 class BrightcoveNewIE(InfoExtractor):
     IE_NAME = 'brightcove:new'
-    _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>(?:ref:)?\d+)'
+    _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
     _TESTS = [{
         'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
         'md5': 'c8100925723840d4b0d243f7025703be',
@@ -385,12 +433,21 @@ class BrightcoveNewIE(InfoExtractor):
             'formats': 'mincount:41',
         },
         'params': {
+            # m3u8 download
             'skip_download': True,
         }
     }, {
         # ref: prefixed video id
         'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
         'only_matching': True,
+    }, {
+        # non numeric ref: prefixed video id
+        'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
+        'only_matching': True,
+    }, {
+        # unavailable video without message but with error_code
+        'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
+        'only_matching': True,
     }]
 
     @staticmethod
@@ -410,8 +467,8 @@ class BrightcoveNewIE(InfoExtractor):
 
         # Look for iframe embeds [1]
         for _, url in re.findall(
-                r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
-            entries.append(url)
+                r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
+            entries.append(url if url.startswith('http') else 'http:' + url)
 
         # Look for embed_in_page embeds [2]
         for video_id, account_id, player_id, embed in re.findall(
@@ -420,11 +477,11 @@ class BrightcoveNewIE(InfoExtractor):
                 # According to [4] data-video-id may be prefixed with ref:
                 r'''(?sx)
                     <video[^>]+
-                        data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*?
+                        data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*?
                     </video>.*?
                     <script[^>]+
                         src=["\'](?:https?:)?//players\.brightcove\.net/
-                        (\d+)/([\da-f-]+)_([^/]+)/index\.min\.js
+                        (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
                 ''', webpage):
             entries.append(
                 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
@@ -454,24 +511,34 @@ class BrightcoveNewIE(InfoExtractor):
                 r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
                 webpage, 'policy key', group='pk')
 
-        req = sanitized_Request(
-            'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
-            % (account_id, video_id),
-            headers={'Accept': 'application/json;pk=%s' % policy_key})
-        json_data = self._download_json(req, video_id)
+        api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
+        try:
+            json_data = self._download_json(api_url, video_id, headers={
+                'Accept': 'application/json;pk=%s' % policy_key
+            })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
+                raise ExtractorError(
+                    json_data.get('message') or json_data['error_code'], expected=True)
+            raise
 
-        title = json_data['name']
+        title = json_data['name'].strip()
 
         formats = []
         for source in json_data.get('sources', []):
+            container = source.get('container')
             source_type = source.get('type')
             src = source.get('src')
-            if source_type == 'application/x-mpegURL':
+            if source_type == 'application/x-mpegURL' or container == 'M2TS':
                 if not src:
                     continue
                 formats.extend(self._extract_m3u8_formats(
-                    src, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
+                    src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+            elif source_type == 'application/dash+xml':
+                if not src:
+                    continue
+                formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
             else:
                 streaming_src = source.get('streaming_src')
                 stream_name, app_name = source.get('stream_name'), source.get('app_name')
@@ -479,15 +546,23 @@ class BrightcoveNewIE(InfoExtractor):
                     continue
                 tbr = float_or_none(source.get('avg_bitrate'), 1000)
                 height = int_or_none(source.get('height'))
+                width = int_or_none(source.get('width'))
                 f = {
                     'tbr': tbr,
-                    'width': int_or_none(source.get('width')),
-                    'height': height,
                     'filesize': int_or_none(source.get('size')),
-                    'container': source.get('container'),
-                    'vcodec': source.get('codec'),
-                    'ext': source.get('container').lower(),
+                    'container': container,
+                    'ext': container.lower(),
                 }
+                if width == 0 and height == 0:
+                    f.update({
+                        'vcodec': 'none',
+                    })
+                else:
+                    f.update({
+                        'width': width,
+                        'height': height,
+                        'vcodec': source.get('codec'),
+                    })
 
                 def build_format_id(kind):
                     format_id = kind
@@ -501,7 +576,7 @@ class BrightcoveNewIE(InfoExtractor):
                     f.update({
                         'url': src or streaming_src,
                         'format_id': build_format_id('http' if src else 'http-streaming'),
-                        'preference': 2 if src else 1,
+                        'source_preference': 0 if src else -1,
                     })
                 else:
                     f.update({
@@ -512,20 +587,22 @@ class BrightcoveNewIE(InfoExtractor):
                 formats.append(f)
         self._sort_formats(formats)
 
-        description = json_data.get('description')
-        thumbnail = json_data.get('thumbnail')
-        timestamp = parse_iso8601(json_data.get('published_at'))
-        duration = float_or_none(json_data.get('duration'), 1000)
-        tags = json_data.get('tags', [])
+        subtitles = {}
+        for text_track in json_data.get('text_tracks', []):
+            if text_track.get('src'):
+                subtitles.setdefault(text_track.get('srclang'), []).append({
+                    'url': text_track['src'],
+                })
 
         return {
             'id': video_id,
             'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'timestamp': timestamp,
+            'description': json_data.get('description'),
+            'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
+            'duration': float_or_none(json_data.get('duration'), 1000),
+            'timestamp': parse_iso8601(json_data.get('published_at')),
             'uploader_id': account_id,
             'formats': formats,
-            'tags': tags,
+            'subtitles': subtitles,
+            'tags': json_data.get('tags', []),
         }
index dda98059e9041c651de5a211fccb2c106b11bb75..3aec601f8e7179570088e1ea5ad1f7b6d30f219d 100644 (file)
@@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
     _TEST = {
         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
+        'md5': '05850eb8c749e2ee05ad5a1c34668493',
         'info_dict': {
             'id': 'studio-c-season-5-episode-5',
             'ext': 'mp4',
@@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'add_ie': ['Ooyala'],
     }
 
     def _real_extract(self, url):
index cb96c3876b7cbf02220d06ad86a44414d69c9fa8..cac8fdcba4a9967b9aa028c29bac1d539af458ca 100644 (file)
@@ -4,12 +4,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import js_to_json
 
 
 class C56IE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
     IE_NAME = '56.com'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
         'md5': 'e59995ac63d0457783ea05f93f12a866',
         'info_dict': {
@@ -18,12 +19,29 @@ class C56IE(InfoExtractor):
             'title': '网事知多少 第32期:车怒',
             'duration': 283.813,
         },
-    }
+    }, {
+        'url': 'http://www.56.com/u47/v_MTM5NjQ5ODc2.html',
+        'md5': '',
+        'info_dict': {
+            'id': '82247482',
+            'title': '爱的诅咒之杜鹃花开',
+        },
+        'playlist_count': 7,
+        'add_ie': ['Sohu'],
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
         text_id = mobj.group('textid')
 
+        webpage = self._download_webpage(url, text_id)
+        sohu_video_info_str = self._search_regex(
+            r'var\s+sohuVideoInfo\s*=\s*({[^}]+});', webpage, 'Sohu video info', default=None)
+        if sohu_video_info_str:
+            sohu_video_info = self._parse_json(
+                sohu_video_info_str, text_id, transform_source=js_to_json)
+            return self.url_result(sohu_video_info['url'], 'Sohu')
+
         page = self._download_json(
             'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
 
index 897f3a104ce2d31aeac99e98197557ef502faf18..6ffbeabd371fd6f80a9ead1d23762f760a13ba2f 100644 (file)
@@ -6,7 +6,7 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
@@ -16,7 +16,7 @@ from ..utils import (
 
 
 class CamdemyIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
     _TESTS = [{
         # single file
         'url': 'http://www.camdemy.com/media/5181/',
@@ -104,7 +104,7 @@ class CamdemyIE(InfoExtractor):
 
 
 class CamdemyFolderIE(InfoExtractor):
-    _VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)'
+    _VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)'
     _TESTS = [{
         # links with trailing slash
         'url': 'http://www.camdemy.com/folder/450',
@@ -139,7 +139,7 @@ class CamdemyFolderIE(InfoExtractor):
         parsed_url = list(compat_urlparse.urlparse(url))
         query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
         query.update({'displayMode': 'list'})
-        parsed_url[4] = compat_urllib_parse.urlencode(query)
+        parsed_url[4] = compat_urllib_parse_urlencode(query)
         final_url = compat_urlparse.urlunparse(parsed_url)
 
         page = self._download_webpage(final_url, folder_id)
diff --git a/youtube_dl/extractor/camwithher.py b/youtube_dl/extractor/camwithher.py
new file mode 100644 (file)
index 0000000..afbc5ea
--- /dev/null
@@ -0,0 +1,87 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    unified_strdate,
+)
+
+
+class CamWithHerIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)'
+
+    _TESTS = [{
+        'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=',
+        'info_dict': {
+            'id': '5644',
+            'ext': 'flv',
+            'title': 'Periscope Tease',
+            'description': 'In the clouds teasing on periscope to my favorite song',
+            'duration': 240,
+            'view_count': int,
+            'comment_count': int,
+            'uploader': 'MileenaK',
+            'upload_date': '20160322',
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937',
+        'only_matching': True,
+    }, {
+        'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=',
+        'only_matching': True,
+    }, {
+        'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        flv_id = self._html_search_regex(
+            r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id')
+
+        # Video URL construction algorithm is reverse-engineered from cwhplayer.swf
+        rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % (
+            ('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id)
+
+        title = self._html_search_regex(
+            r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title')
+        description = self._html_search_regex(
+            r'>Description:</span>(.+?)</div>', webpage, 'description', default=None)
+
+        runtime = self._search_regex(
+            r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None)
+        if runtime:
+            runtime = re.sub(r'[\s-]', '', runtime)
+        duration = parse_duration(runtime)
+        view_count = int_or_none(self._search_regex(
+            r'Views\s*:\s*(\d+)', webpage, 'view count', default=None))
+        comment_count = int_or_none(self._search_regex(
+            r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None))
+
+        uploader = self._search_regex(
+            r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None)
+        upload_date = unified_strdate(self._search_regex(
+            r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None))
+
+        return {
+            'id': flv_id,
+            'url': rtmp_url,
+            'ext': 'flv',
+            'no_resume': True,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'uploader': uploader,
+            'upload_date': upload_date,
+        }
index 25b2d4efe5d54e1c3264f906a3105ad05dd2ca3f..61463f249f6e4ded3b5f59831d7dba421ef9de9a 100644 (file)
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
 from ..utils import (
     ExtractorError,
     HEADRequest,
     unified_strdate,
-    url_basename,
     qualities,
     int_or_none,
 )
@@ -16,24 +16,38 @@ from ..utils import (
 
 class CanalplusIE(InfoExtractor):
     IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
-    _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
+    _VALID_URL = r'''(?x)
+                        https?://
+                            (?:
+                                (?:
+                                    (?:(?:www|m)\.)?canalplus\.fr|
+                                    (?:www\.)?piwiplus\.fr|
+                                    (?:www\.)?d8\.tv|
+                                    (?:www\.)?d17\.tv|
+                                    (?:www\.)?itele\.fr
+                                )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
+                                player\.canalplus\.fr/#/(?P<id>\d+)
+                            )
+
+                    '''
     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
     _SITE_ID_MAP = {
-        'canalplus.fr': 'cplus',
-        'piwiplus.fr': 'teletoon',
-        'd8.tv': 'd8',
-        'itele.fr': 'itele',
+        'canalplus': 'cplus',
+        'piwiplus': 'teletoon',
+        'd8': 'd8',
+        'd17': 'd17',
+        'itele': 'itele',
     }
 
     _TESTS = [{
-        'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
-        'md5': '12164a6f14ff6df8bd628e8ba9b10b78',
+        'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
+        'md5': '41f438a4904f7664b91b4ed0dec969dc',
         'info_dict': {
-            'id': '1263092',
+            'id': '1192814',
             'ext': 'mp4',
-            'title': 'Le Zapping - 13/05/15',
-            'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
-            'upload_date': '20150513',
+            'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014",
+            'description': "Toute l'année 2014 dans un Zapping exceptionnel !",
+            'upload_date': '20150105',
         },
     }, {
         'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
@@ -46,35 +60,45 @@ class CanalplusIE(InfoExtractor):
         },
         'skip': 'Only works from France',
     }, {
-        'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
+        'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231',
         'info_dict': {
-            'id': '966289',
-            'ext': 'flv',
-            'title': 'Campagne intime - Documentaire exceptionnel',
-            'description': 'md5:d2643b799fb190846ae09c61e59a859f',
-            'upload_date': '20131108',
+            'id': '1390231',
+            'ext': 'mp4',
+            'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité",
+            'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6',
+            'upload_date': '20160512',
+        },
+        'params': {
+            'skip_download': True,
         },
-        'skip': 'videos get deleted after a while',
     }, {
-        'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
-        'md5': '38b8f7934def74f0d6f3ba6c036a5f82',
+        'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224',
         'info_dict': {
-            'id': '1213714',
+            'id': '1398334',
             'ext': 'mp4',
-            'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
-            'description': 'md5:8216206ec53426ea6321321f3b3c16db',
-            'upload_date': '20150211',
+            'title': "L'invité de Bruce Toussaint du 07/06/2016 - ",
+            'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324',
+            'upload_date': '20160607',
         },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://m.canalplus.fr/?vid=1398231',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.groupdict().get('id')
+        video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid')
 
-        site_id = self._SITE_ID_MAP[mobj.group('site') or 'canal']
+        site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
 
         # Beware, some subclasses do not define an id group
-        display_id = url_basename(mobj.group('path'))
+        display_id = mobj.group('display_id') or video_id
 
         if video_id is None:
             webpage = self._download_webpage(url, display_id)
diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dl/extractor/carambatv.py
new file mode 100644 (file)
index 0000000..5797fb9
--- /dev/null
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    try_get,
+)
+
+
+class CarambaTVIE(InfoExtractor):
+    _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://video1.carambatv.ru/v/191910501',
+        'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a',
+        'info_dict': {
+            'id': '191910501',
+            'ext': 'mp4',
+            'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 2678.31,
+        },
+    }, {
+        'url': 'carambatv:191910501',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_json(
+            'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id,
+            video_id)
+
+        title = video['title']
+
+        base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id
+
+        formats = [{
+            'url': base_url + f['fn'],
+            'height': int_or_none(f.get('height')),
+            'format_id': '%sp' % f['height'] if f.get('height') else None,
+        } for f in video['qualities'] if f.get('fn')]
+        self._sort_formats(formats)
+
+        thumbnail = video.get('splash')
+        duration = float_or_none(try_get(
+            video, lambda x: x['annotations'][0]['end_time'], compat_str))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
+
+
+class CarambaTVPageIE(InfoExtractor):
+    _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
+        'md5': '',
+        'info_dict': {
+            'id': '191910501',
+            'ext': 'mp4',
+            'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 2678.31,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._og_search_property('video:iframe', webpage, default=None)
+
+        if not video_url:
+            video_id = self._search_regex(
+                r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)',
+                webpage, 'video id')
+            video_url = 'carambatv:%s' % video_id
+
+        return self.url_result(video_url, CarambaTVIE.ie_key())
index d8aa31038bfb85f6e5123fe8e7831a2eb22c0c45..ff663d07947fd345a107203892cbe1811080ac6c 100644 (file)
@@ -4,64 +4,66 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
+    js_to_json,
+    smuggle_url,
+)
 
 
 class CBCIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
     _TESTS = [{
         # with mediaId
         'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
+        'md5': '97e24d09672fc4cf56256d6faa6c25bc',
         'info_dict': {
             'id': '2682904050',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Don Cherry – All-Stars',
             'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
-            'timestamp': 1454475540,
+            'timestamp': 1454463000,
             'upload_date': '20160203',
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
+            'uploader': 'CBCC-NEW',
         },
     }, {
         # with clipId
         'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
+        'md5': '0274a90b51a9b4971fe005c63f592f12',
         'info_dict': {
             'id': '2487345465',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Robin Williams freestyles on 90 Minutes Live',
             'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
-            'upload_date': '19700101',
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
+            'upload_date': '19780210',
+            'uploader': 'CBCC-NEW',
+            'timestamp': 255977160,
         },
     }, {
         # multiple iframes
         'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
         'playlist': [{
+            'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
             'info_dict': {
                 'id': '2680832926',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
                 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
-                'upload_date': '19700101',
+                'upload_date': '20160201',
+                'timestamp': 1454342820,
+                'uploader': 'CBCC-NEW',
             },
         }, {
+            'md5': '415a0e3f586113894174dfb31aa5bb1a',
             'info_dict': {
                 'id': '2658915080',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'Fly like an eagle!',
                 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
-                'upload_date': '19700101',
+                'upload_date': '20150315',
+                'timestamp': 1426443984,
+                'uploader': 'CBCC-NEW',
             },
         }],
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
     }]
 
     @classmethod
@@ -90,24 +92,54 @@ class CBCIE(InfoExtractor):
 
 class CBCPlayerIE(InfoExtractor):
     _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.cbc.ca/player/play/2683190193',
+        'md5': '64d25f841ddf4ddb28a235338af32e2c',
         'info_dict': {
             'id': '2683190193',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Gerry Runs a Sweat Shop',
             'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
-            'timestamp': 1455067800,
+            'timestamp': 1455071400,
             'upload_date': '20160210',
+            'uploader': 'CBCC-NEW',
         },
-        'params': {
-            # rtmp download
-            'skip_download': True,
+    }, {
+        # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
+        'url': 'http://www.cbc.ca/player/play/2657631896',
+        'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
+        'info_dict': {
+            'id': '2657631896',
+            'ext': 'mp3',
+            'title': 'CBC Montreal is organizing its first ever community hackathon!',
+            'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
+            'timestamp': 1425704400,
+            'upload_date': '20150307',
+            'uploader': 'CBCC-NEW',
         },
-    }
+    }, {
+        # available only when we add `formats=MPEG4,FLV,MP3` to theplatform url
+        'url': 'http://www.cbc.ca/player/play/2164402062',
+        'md5': '17a61eb813539abea40618d6323a7f82',
+        'info_dict': {
+            'id': '2164402062',
+            'ext': 'flv',
+            'title': 'Cancer survivor four times over',
+            'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
+            'timestamp': 1320410746,
+            'upload_date': '20111104',
+            'uploader': 'CBCC-NEW',
+        },
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        return self.url_result(
-            'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
-            'ThePlatformFeed', video_id)
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'ThePlatform',
+            'url': smuggle_url(
+                'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, {
+                    'force_smil_url': True
+                }),
+            'id': video_id,
+        }
index 40d07ab181ff8462599f89e520a1ebc1711fe1b3..a23173d6f1a9570225242692ee74d68fc061fb3d 100644 (file)
@@ -1,44 +1,53 @@
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
+from .theplatform import ThePlatformFeedIE
 from ..utils import (
-    sanitized_Request,
-    smuggle_url,
+    int_or_none,
+    find_xpath_attr,
 )
 
 
-class CBSIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
+class CBSBaseIE(ThePlatformFeedIE):
+    def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
+        closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
+        return {
+            'en': [{
+                'ext': 'ttml',
+                'url': closed_caption_e.attrib['value'],
+            }]
+        } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
+
+    def _extract_video_info(self, filter_query, video_id):
+        return self._extract_feed_info(
+            'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: {
+                'series': entry.get('cbs$SeriesTitle'),
+                'season_number': int_or_none(entry.get('cbs$SeasonNumber')),
+                'episode': entry.get('cbs$EpisodeTitle'),
+                'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')),
+            }, {
+                'StreamPack': {
+                    'manifest': 'm3u',
+                }
+            })
+
+
+class CBSIE(CBSBaseIE):
+    _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
 
     _TESTS = [{
         'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
         'info_dict': {
-            'id': '4JUVEwq3wUT7',
+            'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
             'display_id': 'connect-chat-feat-garth-brooks',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Connect Chat feat. Garth Brooks',
             'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
             'duration': 1495,
+            'timestamp': 1385585425,
+            'upload_date': '20131127',
+            'uploader': 'CBSI-NEW',
         },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
-        '_skip': 'Blocked outside the US',
-    }, {
-        'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
-        'info_dict': {
-            'id': 'WWF_5KqY3PK1',
-            'display_id': 'st-vincent',
-            'ext': 'flv',
-            'title': 'Live on Letterman - St. Vincent',
-            'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
-            'duration': 3221,
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
+        'expected_warnings': ['Failed to download m3u8 information'],
         '_skip': 'Blocked outside the US',
     }, {
         'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
@@ -47,22 +56,8 @@ class CBSIE(InfoExtractor):
         'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
         'only_matching': True,
     }]
+    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
-        request = sanitized_Request(url)
-        # Android UA is served with higher quality (720p) streams (see
-        # https://github.com/rg3/youtube-dl/issues/7490)
-        request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')
-        webpage = self._download_webpage(request, display_id)
-        real_id = self._search_regex(
-            [r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
-            webpage, 'real video ID')
-        return {
-            '_type': 'url_transparent',
-            'ie_key': 'ThePlatform',
-            'url': smuggle_url(
-                'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true&manifest=m3u' % real_id,
-                {'force_smil_url': True}),
-            'display_id': display_id,
-        }
+        content_id = self._match_id(url)
+        return self._extract_video_info('byGuid=%s' % content_id, content_id)
similarity index 59%
rename from youtube_dl/extractor/cnet.py
rename to youtube_dl/extractor/cbsinteractive.py
index 5c3908f72b2f94c5557aaeaaa2e19ec78716c0f0..0011c30296971486e66fc121d82f7c9be8c53164 100644 (file)
@@ -1,12 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .theplatform import ThePlatformIE
 from ..utils import int_or_none
 
 
-class CNETIE(ThePlatformIE):
-    _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
+class CBSInteractiveIE(ThePlatformIE):
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video/share)/(?P<id>[^/?]+)'
     _TESTS = [{
         'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
         'info_dict': {
@@ -17,6 +19,8 @@ class CNETIE(ThePlatformIE):
             'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
             'uploader': 'Sarah Mitroff',
             'duration': 70,
+            'timestamp': 1396479627,
+            'upload_date': '20140402',
         },
     }, {
         'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
@@ -28,15 +32,38 @@ class CNETIE(ThePlatformIE):
             'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
             'uploader': 'Ashley Esqueda',
             'duration': 1482,
+            'timestamp': 1433289889,
+            'upload_date': '20150603',
+        },
+    }, {
+        'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
+        'info_dict': {
+            'id': 'bc1af9f0-a2b5-4e54-880d-0d95525781c0',
+            'ext': 'mp4',
+            'title': 'Video: Keeping Android smartphones and tablets secure',
+            'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
+            'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
+            'uploader': 'Adrian Kingsley-Hughes',
+            'timestamp': 1448961720,
+            'upload_date': '20151201',
         },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
     }]
+    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true'
+    MPX_ACCOUNTS = {
+        'cnet': 2288573011,
+        'zdnet': 2387448114,
+    }
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
+        site, display_id = re.match(self._VALID_URL, url).groups()
         webpage = self._download_webpage(url, display_id)
 
         data_json = self._html_search_regex(
-            r"data-cnet-video(?:-uvp)?-options='([^']+)'",
+            r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'",
             webpage, 'data json')
         data = self._parse_json(data_json, display_id)
         vdata = data.get('video') or data['videos'][0]
@@ -51,18 +78,15 @@ class CNETIE(ThePlatformIE):
             uploader = None
             uploader_id = None
 
-        mpx_account = data['config']['uvpConfig']['default']['mpx_account']
-
-        metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id)
-        description = vdata.get('description') or metadata.get('description')
-        duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
-
-        formats = []
-        subtitles = {}
+        media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
+        formats, subtitles = [], {}
+        if site == 'cnet':
+            formats, subtitles = self._extract_theplatform_smil(
+                self.TP_RELEASE_URL_TEMPLATE % media_guid_path, video_id)
         for (fkey, vid) in vdata['files'].items():
             if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
                 continue
-            release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid)
+            release_url = self.TP_RELEASE_URL_TEMPLATE % vid
             if fkey == 'hds':
                 release_url += '&manifest=f4m'
             tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
@@ -70,15 +94,15 @@ class CNETIE(ThePlatformIE):
             subtitles = self._merge_subtitles(subtitles, tp_subtitles)
         self._sort_formats(formats)
 
-        return {
+        info = self.get_metadata('kYEXFC/%s' % media_guid_path, video_id)
+        info.update({
             'id': video_id,
             'display_id': display_id,
             'title': title,
-            'description': description,
-            'thumbnail': metadata.get('thumbnail'),
-            'duration': duration,
+            'duration': int_or_none(vdata.get('duration')),
             'uploader': uploader,
             'uploader_id': uploader_id,
             'subtitles': subtitles,
             'formats': formats,
-        }
+        })
+        return info
diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py
new file mode 100644 (file)
index 0000000..74adb38
--- /dev/null
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import calendar
+import datetime
+
+from .anvato import AnvatoIE
+from .sendtonews import SendtoNewsIE
+from ..compat import compat_urlparse
+
+
+class CBSLocalIE(AnvatoIE):
+    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
+
+    _TESTS = [{
+        # Anvato backend
+        'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
+        'md5': 'f0ee3081e3843f575fccef901199b212',
+        'info_dict': {
+            'id': '3401037',
+            'ext': 'mp4',
+            'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
+            'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
+            'thumbnail': 're:^https?://.*',
+            'timestamp': 1463440500,
+            'upload_date': '20160516',
+            'subtitles': {
+                'en': 'mincount:5',
+            },
+            'categories': [
+                'Stations\\Spoken Word\\KCBSTV',
+                'Syndication\\MSN',
+                'Syndication\\NDN',
+                'Syndication\\AOL',
+                'Syndication\\Yahoo',
+                'Syndication\\Tribune',
+                'Syndication\\Curb.tv',
+                'Content\\News'
+            ],
+        },
+    }, {
+        # SendtoNews embed
+        'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
+        'info_dict': {
+            'id': 'GxfCe0Zo7D-175909-5588',
+            'ext': 'mp4',
+            'title': 'Recap: CLE 15, CIN 6',
+            'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
+            'upload_date': '20160516',
+            'timestamp': 1463433840,
+            'duration': 49,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        sendtonews_url = SendtoNewsIE._extract_url(webpage)
+        if sendtonews_url:
+            info_dict = {
+                '_type': 'url_transparent',
+                'url': compat_urlparse.urljoin(url, sendtonews_url),
+            }
+        else:
+            info_dict = self._extract_anvato_videos(webpage, display_id)
+
+        time_str = self._html_search_regex(
+            r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
+        timestamp = None
+        if time_str:
+            timestamp = calendar.timegm(datetime.datetime.strptime(
+                time_str, '%b %d, %Y %I:%M %p').timetuple())
+
+        info_dict.update({
+            'display_id': display_id,
+            'timestamp': timestamp,
+        })
+
+        return info_dict
index 7319ee1b734e6fc30e4cb681f66eb082ffb9622a..387537e766fc4886201285f221022181b2beeb53 100644 (file)
@@ -2,16 +2,15 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from .theplatform import ThePlatformIE
+from .cbs import CBSBaseIE
 from ..utils import (
     parse_duration,
-    find_xpath_attr,
 )
 
 
-class CBSNewsIE(ThePlatformIE):
+class CBSNewsIE(CBSBaseIE):
     IE_DESC = 'CBS News'
-    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
 
     _TESTS = [
         {
@@ -31,9 +30,12 @@ class CBSNewsIE(ThePlatformIE):
         {
             'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
             'info_dict': {
-                'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
+                'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y',
                 'ext': 'mp4',
                 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
+                'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
+                'upload_date': '19700101',
+                'uploader': 'CBSI-NEW',
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'duration': 205,
                 'subtitles': {
@@ -49,15 +51,6 @@ class CBSNewsIE(ThePlatformIE):
         },
     ]
 
-    def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
-        closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
-        return {
-            'en': [{
-                'ext': 'ttml',
-                'url': closed_caption_e.attrib['value'],
-            }]
-        } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -68,35 +61,13 @@ class CBSNewsIE(ThePlatformIE):
             webpage, 'video JSON info'), video_id)
 
         item = video_info['item'] if 'item' in video_info else video_info
-        title = item.get('articleTitle') or item.get('hed')
-        duration = item.get('duration')
-        thumbnail = item.get('mediaImage') or item.get('thumbnail')
-
-        subtitles = {}
-        formats = []
-        for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
-            pid = item.get('media' + format_id)
-            if not pid:
-                continue
-            release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid
-            tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
-            formats.extend(tp_formats)
-            subtitles = self._merge_subtitles(subtitles, tp_subtitles)
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'formats': formats,
-            'subtitles': subtitles,
-        }
+        guid = item['mpxRefId']
+        return self._extract_video_info('byGuid=%s' % guid, guid)
 
 
 class CBSNewsLiveVideoIE(InfoExtractor):
     IE_DESC = 'CBS News Live Videos'
-    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
 
     _TEST = {
         'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
@@ -122,6 +93,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
             for entry in f4m_formats:
                 # URLs without the extra param induce an 404 error
                 entry.update({'extra_param_to_segment_url': hdcore_sign})
+        self._sort_formats(f4m_formats)
 
         return {
             'id': video_id,
index ae47e74ccf583ac9d821dd588f07f33ff57673db..78ca44b024bfb20dc6ce79e4ee51f3472d599711 100644 (file)
@@ -1,30 +1,28 @@
 from __future__ import unicode_literals
 
-import re
+from .cbs import CBSBaseIE
 
-from .common import InfoExtractor
 
+class CBSSportsIE(CBSBaseIE):
+    _VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
 
-class CBSSportsIE(InfoExtractor):
-    _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
-
-    _TEST = {
-        'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
+    _TESTS = [{
+        'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast',
         'info_dict': {
-            'id': '_d5_GbO8p1sT',
-            'ext': 'flv',
-            'title': 'US Open flashbacks: 1990s',
-            'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
+            'id': '708337219968',
+            'ext': 'mp4',
+            'title': 'Ben Simmons the next LeBron? Not so fast',
+            'description': 'md5:854294f627921baba1f4b9a990d87197',
+            'timestamp': 1466293740,
+            'upload_date': '20160618',
+            'uploader': 'CBSI-NEW',
         },
-    }
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        section = mobj.group('section')
-        video_id = mobj.group('id')
-        all_videos = self._download_json(
-            'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
-            video_id)
-        # The json file contains the info of all the videos in the section
-        video_info = next(v for v in all_videos if v['pcid'] == video_id)
-        return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
+        video_id = self._match_id(url)
+        return self._extract_video_info('byId=%s' % video_id, video_id)
index dda2c0959882c3cd3c5de56b817ccd7815ef0068..8f7f09e22dad6eda3ca08edfbf9edc118146e893 100644 (file)
@@ -1,13 +1,9 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
-    parse_duration,
-    qualities,
-    unified_strdate,
+    parse_iso8601,
 )
 
 
@@ -19,14 +15,14 @@ class CCCIE(InfoExtractor):
         'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
         'md5': '3a1eda8f3a29515d27f5adb967d7e740',
         'info_dict': {
-            'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor',
+            'id': '1839',
             'ext': 'mp4',
             'title': 'Introduction to Processor Design',
-            'description': 'md5:80be298773966f66d56cb11260b879af',
+            'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
             'thumbnail': 're:^https?://.*\.jpg$',
-            'view_count': int,
             'upload_date': '20131228',
-            'duration': 3660,
+            'timestamp': 1388188800,
+            'duration': 3710,
         }
     }, {
         'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
@@ -34,79 +30,48 @@ class CCCIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        if self._downloader.params.get('prefer_free_formats'):
-            preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
-        else:
-            preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
-
-        title = self._html_search_regex(
-            r'(?s)<h1>(.*?)</h1>', webpage, 'title')
-        description = self._html_search_regex(
-            r'(?s)<h3>About</h3>(.+?)<h3>',
-            webpage, 'description', fatal=False)
-        upload_date = unified_strdate(self._html_search_regex(
-            r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",
-            webpage, 'upload date', fatal=False))
-        view_count = int_or_none(self._html_search_regex(
-            r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
-            webpage, 'view count', fatal=False))
-        duration = parse_duration(self._html_search_regex(
-            r'(?s)<span[^>]+class=(["\']).*?fa-clock-o.*?\1[^>]*></span>(?P<duration>.+?)</li',
-            webpage, 'duration', fatal=False, group='duration'))
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        event_id = self._search_regex("data-id='(\d+)'", webpage, 'event id')
+        event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
 
-        matches = re.finditer(r'''(?xs)
-            <(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s*
-            <(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s*
-            <a\s+download\s+href='(?P<http_url>[^']+)'>\s*
-            (?:
-                .*?
-                <a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)'
-            )?''', webpage)
         formats = []
-        for m in matches:
-            format = m.group('format')
-            format_id = self._search_regex(
-                r'.*/([a-z0-9_-]+)/[^/]*$',
-                m.group('http_url'), 'format id', default=None)
-            if format_id:
-                format_id = m.group('lang') + '-' + format_id
-            vcodec = 'h264' if 'h264' in format_id else (
-                'none' if format_id in ('mp3', 'opus') else None
+        for recording in event_data.get('recordings', []):
+            recording_url = recording.get('recording_url')
+            if not recording_url:
+                continue
+            language = recording.get('language')
+            folder = recording.get('folder')
+            format_id = None
+            if language:
+                format_id = language
+            if folder:
+                if language:
+                    format_id += '-' + folder
+                else:
+                    format_id = folder
+            vcodec = 'h264' if 'h264' in folder else (
+                'none' if folder in ('mp3', 'opus') else None
             )
             formats.append({
                 'format_id': format_id,
-                'format': format,
-                'language': m.group('lang'),
-                'url': m.group('http_url'),
+                'url': recording_url,
+                'width': int_or_none(recording.get('width')),
+                'height': int_or_none(recording.get('height')),
+                'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
+                'language': language,
                 'vcodec': vcodec,
-                'preference': preference(format_id),
             })
-
-            if m.group('torrent_url'):
-                formats.append({
-                    'format_id': 'torrent-%s' % (format if format_id is None else format_id),
-                    'format': '%s (torrent)' % format,
-                    'proto': 'torrent',
-                    'format_note': '(unsupported; will just download the .torrent file)',
-                    'vcodec': vcodec,
-                    'preference': -100 + preference(format_id),
-                    'url': m.group('torrent_url'),
-                })
         self._sort_formats(formats)
 
-        thumbnail = self._html_search_regex(
-            r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
-
         return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'view_count': view_count,
-            'upload_date': upload_date,
-            'duration': duration,
+            'id': event_id,
+            'display_id': display_id,
+            'title': event_data['title'],
+            'description': event_data.get('description'),
+            'thumbnail': event_data.get('thumb_url'),
+            'timestamp': parse_iso8601(event_data.get('date')),
+            'duration': int_or_none(event_data.get('length')),
+            'tags': event_data.get('tags'),
             'formats': formats,
         }
diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py
new file mode 100755 (executable)
index 0000000..8af3187
--- /dev/null
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    decode_packed_codes,
+    ExtractorError,
+    parse_duration
+)
+
+
+class CDAIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
+    _TESTS = [{
+        'url': 'http://www.cda.pl/video/5749950c',
+        'md5': '6f844bf51b15f31fae165365707ae970',
+        'info_dict': {
+            'id': '5749950c',
+            'ext': 'mp4',
+            'height': 720,
+            'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
+            'duration': 39
+        }
+    }, {
+        'url': 'http://www.cda.pl/video/57413289',
+        'md5': 'a88828770a8310fc00be6c95faf7f4d5',
+        'info_dict': {
+            'id': '57413289',
+            'ext': 'mp4',
+            'title': 'Lądowanie na lotnisku na Maderze',
+            'duration': 137
+        }
+    }, {
+        'url': 'http://ebd.cda.pl/0x0/5749950c',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
+
+        if 'Ten film jest dostępny dla użytkowników premium' in webpage:
+            raise ExtractorError('This video is only available for premium users.', expected=True)
+
+        title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
+
+        formats = []
+
+        info_dict = {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'duration': None,
+        }
+
+        def extract_format(page, version):
+            unpacked = decode_packed_codes(page)
+            format_url = self._search_regex(
+                r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
+                '%s url' % version, fatal=False, group='url')
+            if not format_url:
+                return
+            f = {
+                'url': format_url,
+            }
+            m = re.search(
+                r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
+                page)
+            if m:
+                f.update({
+                    'format_id': m.group('format_id'),
+                    'height': int(m.group('height')),
+                })
+            info_dict['formats'].append(f)
+            if not info_dict['duration']:
+                info_dict['duration'] = parse_duration(self._search_regex(
+                    r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
+                    unpacked, 'duration', fatal=False, group='duration'))
+
+        extract_format(webpage, 'default')
+
+        for href, resolution in re.findall(
+                r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
+                webpage):
+            webpage = self._download_webpage(
+                href, video_id, 'Downloading %s version information' % resolution, fatal=False)
+            if not webpage:
+                # Manually report warning because empty page is returned when
+                # invalid version is requested.
+                self.report_warning('Unable to download %s version information' % resolution)
+                continue
+            extract_format(webpage, resolution)
+
+        self._sort_formats(formats)
+
+        return info_dict
index b27b4e6708675027ccc0337f5abc44b4c480816b..5a58d1777d50297557cae49039df19cbfe15fef0 100644 (file)
@@ -5,7 +5,6 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
     compat_urllib_parse_unquote,
     compat_urllib_parse_urlparse,
 )
@@ -13,6 +12,7 @@ from ..utils import (
     ExtractorError,
     float_or_none,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -33,19 +33,33 @@ class CeskaTelevizeIE(InfoExtractor):
             'skip_download': True,
         },
     }, {
-        'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
+        'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
         'info_dict': {
-            'id': '61924494876844374',
+            'id': '61924494877028507',
             'ext': 'mp4',
-            'title': 'První republika: Zpěvačka z Dupárny Bobina',
-            'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
+            'title': 'Hyde Park Civilizace: Bonus 01 - En',
+            'description': 'English Subtittles',
             'thumbnail': 're:^https?://.*\.jpg',
-            'duration': 88.4,
+            'duration': 81.3,
         },
         'params': {
             # m3u8 download
             'skip_download': True,
         },
+    }, {
+        # live stream
+        'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
+        'info_dict': {
+            'id': 402,
+            'ext': 'mp4',
+            'title': 're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+            'is_live': True,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'skip': 'Georestricted to Czech Republic',
     }, {
         # video with 18+ caution trailer
         'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
@@ -102,7 +116,7 @@ class CeskaTelevizeIE(InfoExtractor):
 
         req = sanitized_Request(
             'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
-            data=compat_urllib_parse.urlencode(data))
+            data=urlencode_postdata(data))
 
         req.add_header('Content-type', 'application/x-www-form-urlencoded')
         req.add_header('x-addr', '127.0.0.1')
@@ -118,18 +132,21 @@ class CeskaTelevizeIE(InfoExtractor):
         req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
         req.add_header('Referer', url)
 
-        playlist_title = self._og_search_title(webpage)
-        playlist_description = self._og_search_description(webpage)
+        playlist_title = self._og_search_title(webpage, default=None)
+        playlist_description = self._og_search_description(webpage, default=None)
 
         playlist = self._download_json(req, playlist_id)['playlist']
         playlist_len = len(playlist)
 
         entries = []
         for item in playlist:
+            is_live = item.get('type') == 'LIVE'
             formats = []
             for format_id, stream_url in item['streamUrls'].items():
                 formats.extend(self._extract_m3u8_formats(
-                    stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native'))
+                    stream_url, playlist_id, 'mp4',
+                    entry_protocol='m3u8' if is_live else 'm3u8_native',
+                    fatal=False))
             self._sort_formats(formats)
 
             item_id = item.get('id') or item['assetId']
@@ -144,14 +161,22 @@ class CeskaTelevizeIE(InfoExtractor):
                 if subs:
                     subtitles = self.extract_subtitles(episode_id, subs)
 
+            if playlist_len == 1:
+                final_title = playlist_title or title
+                if is_live:
+                    final_title = self._live_title(final_title)
+            else:
+                final_title = '%s (%s)' % (playlist_title, title)
+
             entries.append({
                 'id': item_id,
-                'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title),
+                'title': final_title,
                 'description': playlist_description if playlist_len == 1 else None,
                 'thumbnail': thumbnail,
                 'duration': duration,
                 'formats': formats,
                 'subtitles': subtitles,
+                'is_live': is_live,
             })
 
         return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
index c74553dcfa7c689b7fc8d69147625b1169e1e178..34d4e61569b110b49998768f13bb81cdda75bd75 100644 (file)
@@ -20,54 +20,64 @@ class Channel9IE(InfoExtractor):
     '''
     IE_DESC = 'Channel 9'
     IE_NAME = 'channel9'
-    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
-
-    _TESTS = [
-        {
-            'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
-            'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
-            'info_dict': {
-                'id': 'Events/TechEd/Australia/2013/KOS002',
-                'ext': 'mp4',
-                'title': 'Developer Kick-Off Session: Stuff We Love',
-                'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
-                'duration': 4576,
-                'thumbnail': 're:http://.*\.jpg',
-                'session_code': 'KOS002',
-                'session_day': 'Day 1',
-                'session_room': 'Arena 1A',
-                'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'],
-            },
+    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
+
+    _TESTS = [{
+        'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
+        'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
+        'info_dict': {
+            'id': 'Events/TechEd/Australia/2013/KOS002',
+            'ext': 'mp4',
+            'title': 'Developer Kick-Off Session: Stuff We Love',
+            'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
+            'duration': 4576,
+            'thumbnail': 're:http://.*\.jpg',
+            'session_code': 'KOS002',
+            'session_day': 'Day 1',
+            'session_room': 'Arena 1A',
+            'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
+                                 'Mads Kristensen'],
         },
-        {
-            'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
-            'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
-            'info_dict': {
-                'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
-                'ext': 'mp4',
-                'title': 'Self-service BI with Power BI - nuclear testing',
-                'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
-                'duration': 1540,
-                'thumbnail': 're:http://.*\.jpg',
-                'authors': ['Mike Wilmot'],
-            },
+    }, {
+        'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
+        'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
+        'info_dict': {
+            'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
+            'ext': 'mp4',
+            'title': 'Self-service BI with Power BI - nuclear testing',
+            'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
+            'duration': 1540,
+            'thumbnail': 're:http://.*\.jpg',
+            'authors': ['Mike Wilmot'],
         },
-        {
-            # low quality mp4 is best
-            'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
-            'info_dict': {
-                'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
-                'ext': 'mp4',
-                'title': 'Ranges for the Standard Library',
-                'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
-                'duration': 5646,
-                'thumbnail': 're:http://.*\.jpg',
-            },
-            'params': {
-                'skip_download': True,
-            },
-        }
-    ]
+    }, {
+        # low quality mp4 is best
+        'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
+        'info_dict': {
+            'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
+            'ext': 'mp4',
+            'title': 'Ranges for the Standard Library',
+            'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
+            'duration': 5646,
+            'thumbnail': 're:http://.*\.jpg',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
+        'info_dict': {
+            'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
+            'title': 'Channel 9',
+        },
+        'playlist_count': 2,
+    }, {
+        'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
+        'only_matching': True,
+    }, {
+        'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
+        'only_matching': True,
+    }]
 
     _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
 
@@ -254,22 +264,30 @@ class Channel9IE(InfoExtractor):
 
         return self.playlist_result(contents)
 
-    def _extract_list(self, content_path):
-        rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
+    def _extract_list(self, video_id, rss_url=None):
+        if not rss_url:
+            rss_url = self._RSS_URL % video_id
+        rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
         entries = [self.url_result(session_url.text, 'Channel9')
                    for session_url in rss.findall('./channel/item/link')]
         title_text = rss.find('./channel/title').text
-        return self.playlist_result(entries, content_path, title_text)
+        return self.playlist_result(entries, video_id, title_text)
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         content_path = mobj.group('contentpath')
+        rss = mobj.group('rss')
+
+        if rss:
+            return self._extract_list(content_path, url)
 
-        webpage = self._download_webpage(url, content_path, 'Downloading web page')
+        webpage = self._download_webpage(
+            url, content_path, 'Downloading web page')
 
-        page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage)
-        if page_type_m is not None:
-            page_type = page_type_m.group('pagetype')
+        page_type = self._search_regex(
+            r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
+            webpage, 'page type', default=None, group='pagetype')
+        if page_type:
             if page_type == 'Entry':      # Any 'item'-like page, may contain downloadable content
                 return self._extract_entry_item(webpage, content_path)
             elif page_type == 'Session':  # Event session page, may contain downloadable content
@@ -278,6 +296,5 @@ class Channel9IE(InfoExtractor):
                 return self._extract_list(content_path)
             else:
                 raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
-
         else:  # Assuming list
             return self._extract_list(content_path)
index 242fba311fd10380cf7ed4b67e3074eddc279cb2..b2234549e8b6747cd76d6d1936af893522e59cae 100644 (file)
@@ -48,6 +48,7 @@ class ChaturbateIE(InfoExtractor):
             raise ExtractorError('Unable to find stream URL')
 
         formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py
deleted file mode 100644 (file)
index 6d9cd8a..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import ExtractorError
-from .screenwavemedia import ScreenwaveMediaIE
-
-
-class CinemassacreIE(InfoExtractor):
-    _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
-    _TESTS = [
-        {
-            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
-            'md5': 'fde81fbafaee331785f58cd6c0d46190',
-            'info_dict': {
-                'id': 'Cinemassacre-19911',
-                'ext': 'mp4',
-                'upload_date': '20121110',
-                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
-                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
-            },
-        },
-        {
-            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
-            'md5': 'd72f10cd39eac4215048f62ab477a511',
-            'info_dict': {
-                'id': 'Cinemassacre-521be8ef82b16',
-                'ext': 'mp4',
-                'upload_date': '20131002',
-                'title': 'The Mummy’s Hand (1940)',
-            },
-        },
-        {
-            # Youtube embedded video
-            'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
-            'md5': 'df4cf8a1dcedaec79a73d96d83b99023',
-            'info_dict': {
-                'id': 'OEVzPCY2T-g',
-                'ext': 'mp4',
-                'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
-                'upload_date': '20061207',
-                'uploader': 'Cinemassacre',
-                'uploader_id': 'JamesNintendoNerd',
-                'description': 'md5:784734696c2b8b7f4b8625cc799e07f6',
-            }
-        },
-        {
-            # Youtube embedded video
-            'url': 'http://cinemassacre.com/2006/09/01/mckids/',
-            'md5': '6eb30961fa795fedc750eac4881ad2e1',
-            'info_dict': {
-                'id': 'FnxsNhuikpo',
-                'ext': 'mp4',
-                'upload_date': '20060901',
-                'uploader': 'Cinemassacre Extras',
-                'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
-                'uploader_id': 'Cinemassacre',
-                'title': 'AVGN: McKids',
-            }
-        },
-        {
-            'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/',
-            'md5': '1376908e49572389e7b06251a53cdd08',
-            'info_dict': {
-                'id': 'Cinemassacre-555779690c440',
-                'ext': 'mp4',
-                'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
-                'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
-                'upload_date': '20150525',
-            }
-        }
-    ]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
-        video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
-
-        webpage = self._download_webpage(url, display_id)
-
-        playerdata_url = self._search_regex(
-            [
-                ScreenwaveMediaIE.EMBED_PATTERN,
-                r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
-            ],
-            webpage, 'player data URL', default=None, group='url')
-        if not playerdata_url:
-            raise ExtractorError('Unable to find player data')
-
-        video_title = self._html_search_regex(
-            r'<title>(?P<title>.+?)\|', webpage, 'title')
-        video_description = self._html_search_regex(
-            r'<div class="entry-content">(?P<description>.+?)</div>',
-            webpage, 'description', flags=re.DOTALL, fatal=False)
-        video_thumbnail = self._og_search_thumbnail(webpage)
-
-        return {
-            '_type': 'url_transparent',
-            'display_id': display_id,
-            'title': video_title,
-            'description': video_description,
-            'upload_date': video_date,
-            'thumbnail': video_thumbnail,
-            'url': playerdata_url,
-        }
index 2996b6b09e81fcd0e04038d1744f2fdf3d54e694..19f8b397e44a679ea936ad638048ccb488dc4b93 100644 (file)
@@ -19,7 +19,7 @@ def _decode(s):
 class CliphunterIE(InfoExtractor):
     IE_NAME = 'cliphunter'
 
-    _VALID_URL = r'''(?x)http://(?:www\.)?cliphunter\.com/w/
+    _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
         (?P<id>[0-9]+)/
         (?P<seo>.+?)(?:$|[#\?])
     '''
diff --git a/youtube_dl/extractor/cliprs.py b/youtube_dl/extractor/cliprs.py
new file mode 100644 (file)
index 0000000..4f9320e
--- /dev/null
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
+)
+
+
+class ClipRsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
+    _TEST = {
+        'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
+        'md5': 'c412d57815ba07b56f9edc7b5d6a14e5',
+        'info_dict': {
+            'id': '1488842.1399140381',
+            'ext': 'mp4',
+            'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli',
+            'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026',
+            'duration': 229,
+            'timestamp': 1459850243,
+            'upload_date': '20160405',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_id = self._search_regex(
+            r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
+
+        response = self._download_json(
+            'http://qi.ckm.onetapi.pl/', video_id,
+            query={
+                'body[id]': video_id,
+                'body[jsonrpc]': '2.0',
+                'body[method]': 'get_asset_detail',
+                'body[params][ID_Publikacji]': video_id,
+                'body[params][Service]': 'www.onet.pl',
+                'content-type': 'application/jsonp',
+                'x-onet-app': 'player.front.onetapi.pl',
+            })
+
+        error = response.get('error')
+        if error:
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, error['message']), expected=True)
+
+        video = response['result'].get('0')
+
+        formats = []
+        for _, formats_dict in video['formats'].items():
+            if not isinstance(formats_dict, dict):
+                continue
+            for format_id, format_list in formats_dict.items():
+                if not isinstance(format_list, list):
+                    continue
+                for f in format_list:
+                    if not f.get('url'):
+                        continue
+                    formats.append({
+                        'url': f['url'],
+                        'format_id': format_id,
+                        'height': int_or_none(f.get('vertical_resolution')),
+                        'width': int_or_none(f.get('horizontal_resolution')),
+                        'abr': float_or_none(f.get('audio_bitrate')),
+                        'vbr': float_or_none(f.get('video_bitrate')),
+                    })
+        self._sort_formats(formats)
+
+        meta = video.get('meta', {})
+
+        title = self._og_search_title(webpage, default=None) or meta['title']
+        description = self._og_search_description(webpage, default=None) or meta.get('description')
+        duration = meta.get('length') or meta.get('lenght')
+        timestamp = parse_iso8601(meta.get('addDate'), ' ')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'timestamp': timestamp,
+            'formats': formats,
+        }
index 8306d6fb7d0d4414cff36f7b381ca9c877820f58..0b6ad895fd7841e70b7dc0dd136052ff0459dd3c 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import (
 
 
 class ClipsyndicateIE(InfoExtractor):
-    _VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
diff --git a/youtube_dl/extractor/closertotruth.py b/youtube_dl/extractor/closertotruth.py
new file mode 100644 (file)
index 0000000..26243d5
--- /dev/null
@@ -0,0 +1,92 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class CloserToTruthIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
+        'info_dict': {
+            'id': '0_zof1ktre',
+            'display_id': 'solutions-the-mind-body-problem',
+            'ext': 'mov',
+            'title': 'Solutions to the Mind-Body Problem?',
+            'upload_date': '20140221',
+            'timestamp': 1392956007,
+            'uploader_id': 'CTTXML'
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://closertotruth.com/episodes/how-do-brains-work',
+        'info_dict': {
+            'id': '0_iuxai6g6',
+            'display_id': 'how-do-brains-work',
+            'ext': 'mov',
+            'title': 'How do Brains Work?',
+            'upload_date': '20140221',
+            'timestamp': 1392956024,
+            'uploader_id': 'CTTXML'
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://closertotruth.com/interviews/1725',
+        'info_dict': {
+            'id': '1725',
+            'title': 'AyaFr-002',
+        },
+        'playlist_mincount': 2,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        partner_id = self._search_regex(
+            r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
+            webpage, 'kaltura partner_id')
+
+        title = self._search_regex(
+            r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')
+
+        select = self._search_regex(
+            r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
+            webpage, 'select version', default=None)
+        if select:
+            entry_ids = set()
+            entries = []
+            for mobj in re.finditer(
+                    r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
+                    webpage):
+                entry_id = mobj.group('id')
+                if entry_id in entry_ids:
+                    continue
+                entry_ids.add(entry_id)
+                entries.append({
+                    '_type': 'url_transparent',
+                    'url': 'kaltura:%s:%s' % (partner_id, entry_id),
+                    'ie_key': 'Kaltura',
+                    'title': mobj.group('title'),
+                })
+            if entries:
+                return self.playlist_result(entries, display_id, title)
+
+        entry_id = self._search_regex(
+            r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
+            webpage, 'kaltura entry_id', group='id')
+
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'url': 'kaltura:%s:%s' % (partner_id, entry_id),
+            'ie_key': 'Kaltura',
+            'title': title
+        }
index 0fa720ee8745cfc728b4413b41888e17787fb5db..9a28ef35423a5dfd28295333ffc037b8919873ac 100644 (file)
@@ -6,7 +6,7 @@ import re
 from .common import InfoExtractor
 from ..compat import (
     compat_parse_qs,
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_HTTPError,
 )
 from ..utils import (
@@ -19,7 +19,7 @@ from ..utils import (
 class CloudyIE(InfoExtractor):
     _IE_DESC = 'cloudy.ec and videoraj.ch'
     _VALID_URL = r'''(?x)
-        https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.ch)/
+        https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/
         (?:v/|embed\.php\?id=)
         (?P<id>[A-Za-z0-9]+)
         '''
@@ -37,7 +37,7 @@ class CloudyIE(InfoExtractor):
             }
         },
         {
-            'url': 'http://www.videoraj.ch/v/47f399fd8bb60',
+            'url': 'http://www.videoraj.to/v/47f399fd8bb60',
             'md5': '7d0f8799d91efd4eda26587421c3c3b0',
             'info_dict': {
                 'id': '47f399fd8bb60',
@@ -64,7 +64,7 @@ class CloudyIE(InfoExtractor):
                 'errorUrl': error_url,
             })
 
-        data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form))
+        data_url = self._API_URL % (video_host, compat_urllib_parse_urlencode(form))
         player_data = self._download_webpage(
             data_url, video_id, 'Downloading player data')
         data = compat_parse_qs(player_data)
index 1dfa7c12e19dd3151b9fdbda9c76ad4f58fa3192..2fba93543474cd7ebd53848aca62848c32bf7164 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class ClubicIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
 
     _TESTS = [{
         'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py
new file mode 100644 (file)
index 0000000..d354d9f
--- /dev/null
@@ -0,0 +1,36 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class CNBCIE(InfoExtractor):
+    _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://video.cnbc.com/gallery/?video=3000503714',
+        'info_dict': {
+            'id': '3000503714',
+            'ext': 'mp4',
+            'title': 'Fighting zombies is big business',
+            'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
+            'timestamp': 1459332000,
+            'upload_date': '20160330',
+            'uploader': 'NBCU-CNBC',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'ThePlatform',
+            'url': smuggle_url(
+                'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
+                {'force_smil_url': True}),
+            'id': video_id,
+        }
diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py
deleted file mode 100644 (file)
index 002b240..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class CollegeHumorIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
-
-    _TESTS = [
-        {
-            'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
-            'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
-            'info_dict': {
-                'id': '6902724',
-                'ext': 'mp4',
-                'title': 'Comic-Con Cosplay Catastrophe',
-                'description': "Fans get creative this year at San Diego.  Too creative.  And yes, that's really Joss Whedon.",
-                'age_limit': 13,
-                'duration': 187,
-            },
-        }, {
-            'url': 'http://www.collegehumor.com/video/3505939/font-conference',
-            'md5': '72fa701d8ef38664a4dbb9e2ab721816',
-            'info_dict': {
-                'id': '3505939',
-                'ext': 'mp4',
-                'title': 'Font Conference',
-                'description': "This video wasn't long enough, so we made it double-spaced.",
-                'age_limit': 10,
-                'duration': 179,
-            },
-        }, {
-            # embedded youtube video
-            'url': 'http://www.collegehumor.com/embed/6950306',
-            'info_dict': {
-                'id': 'Z-bao9fg6Yc',
-                'ext': 'mp4',
-                'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
-                'uploader': 'Mark Dice',
-                'uploader_id': 'MarkDice',
-                'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
-                'upload_date': '20140127',
-            },
-            'params': {
-                'skip_download': True,
-            },
-            'add_ie': ['Youtube'],
-        },
-    ]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
-
-        jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json'
-        data = json.loads(self._download_webpage(
-            jsonUrl, video_id, 'Downloading info JSON'))
-        vdata = data['video']
-        if vdata.get('youtubeId') is not None:
-            return {
-                '_type': 'url',
-                'url': vdata['youtubeId'],
-                'ie_key': 'Youtube',
-            }
-
-        AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
-        rating = vdata.get('rating')
-        if rating:
-            age_limit = AGE_LIMITS.get(rating.lower())
-        else:
-            age_limit = None  # None = No idea
-
-        PREFS = {'high_quality': 2, 'low_quality': 0}
-        formats = []
-        for format_key in ('mp4', 'webm'):
-            for qname, qurl in vdata.get(format_key, {}).items():
-                formats.append({
-                    'format_id': format_key + '_' + qname,
-                    'url': qurl,
-                    'format': format_key,
-                    'preference': PREFS.get(qname),
-                })
-        self._sort_formats(formats)
-
-        duration = int_or_none(vdata.get('duration'), 1000)
-        like_count = int_or_none(vdata.get('likes'))
-
-        return {
-            'id': video_id,
-            'title': vdata['title'],
-            'description': vdata.get('description'),
-            'thumbnail': vdata.get('thumbnail'),
-            'formats': formats,
-            'age_limit': age_limit,
-            'duration': duration,
-            'like_count': like_count,
-        }
index 7dff684929721699bb3df90a5a4ce52f0552848b..747c245c844171958637213b37daec3dd03f3a7e 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class ComCarCoffIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
+    _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
     _TESTS = [{
         'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
         'info_dict': {
@@ -41,7 +41,13 @@ class ComCarCoffIE(InfoExtractor):
 
         display_id = full_data['activeVideo']['video']
         video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
+
         video_id = compat_str(video_data['mediaId'])
+        title = video_data['title']
+        formats = self._extract_m3u8_formats(
+            video_data['mediaUrl'], video_id, 'mp4')
+        self._sort_formats(formats)
+
         thumbnails = [{
             'url': video_data['images']['thumb'],
         }, {
@@ -54,15 +60,14 @@ class ComCarCoffIE(InfoExtractor):
             video_data.get('duration'))
 
         return {
-            '_type': 'url_transparent',
-            'url': 'crackle:%s' % video_id,
             'id': video_id,
             'display_id': display_id,
-            'title': video_data['title'],
+            'title': title,
             'description': video_data.get('description'),
             'timestamp': timestamp,
             'duration': duration,
             'thumbnails': thumbnails,
+            'formats': formats,
             'season_number': int_or_none(video_data.get('season')),
             'episode_number': int_or_none(video_data.get('episode')),
             'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
index 5b1b99675c760a7249bcdb23ff3072af86710114..2b6aaa3aa47541669a59b7936477023d54ae7437 100644 (file)
@@ -5,7 +5,7 @@ import re
 from .mtv import MTVServicesInfoExtractor
 from ..compat import (
     compat_str,
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
 )
 from ..utils import (
     ExtractorError,
@@ -44,10 +44,10 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
     #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
     _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
                       |https?://(:www\.)?
-                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
+                          (?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
                          ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
                           (?P<clip>
-                              (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
+                              (?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
                               |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
                           )|
@@ -129,6 +129,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
     }, {
         'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
         'only_matching': True,
+    }, {
+        'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
+        'only_matching': True,
     }]
 
     _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
@@ -201,7 +204,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
         # Correct cc.com in uri
         uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
 
-        index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
+        index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri}))
         idoc = self._download_xml(
             index_url, epTitle,
             'Downloading show index', 'Unable to download episode index')
index 14f57563529ad5782b3c70c24d4760c78c3b334d..5a2603b509244810e5a19d8adb2b124d3a5c2d78 100644 (file)
@@ -15,14 +15,17 @@ import math
 from ..compat import (
     compat_cookiejar,
     compat_cookies,
+    compat_etree_fromstring,
     compat_getpass,
     compat_http_client,
+    compat_os_name,
+    compat_str,
     compat_urllib_error,
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
+    compat_urllib_request,
     compat_urlparse,
-    compat_str,
-    compat_etree_fromstring,
 )
+from ..downloader.f4m import remove_encrypted_media
 from ..utils import (
     NO_DEFAULT,
     age_restricted,
@@ -42,11 +45,15 @@ from ..utils import (
     unescapeHTML,
     unified_strdate,
     url_basename,
+    xpath_element,
     xpath_text,
     xpath_with_ns,
     determine_protocol,
     parse_duration,
     mimetype2ext,
+    update_Request,
+    update_url_query,
+    parse_m3u8_attributes,
 )
 
 
@@ -104,7 +111,7 @@ class InfoExtractor(object):
                     * protocol   The protocol that will be used for the actual
                                  download, lower-case.
                                  "http", "https", "rtsp", "rtmp", "rtmpe",
-                                 "m3u8", or "m3u8_native".
+                                 "m3u8", "m3u8_native" or "http_dash_segments".
                     * preference Order number of this format. If this field is
                                  present and not None, the formats get sorted
                                  by this field, regardless of all other values.
@@ -157,12 +164,14 @@ class InfoExtractor(object):
     thumbnail:      Full URL to a video thumbnail image.
     description:    Full video description.
     uploader:       Full name of the video uploader.
-    creator:        The main artist who created the video.
+    license:        License name the video is licensed under.
+    creator:        The creator of the video.
     release_date:   The date (YYYYMMDD) when the video was released.
     timestamp:      UNIX timestamp of the moment the video became available.
     upload_date:    Video upload date (YYYYMMDD).
                     If not explicitly set, calculated from timestamp.
     uploader_id:    Nickname or id of the video uploader.
+    uploader_url:   Full URL to a personal webpage of the video uploader.
     location:       Physical location where the video was filmed.
     subtitles:      The available subtitles as a dictionary in the format
                     {language: subformats}. "subformats" is a list sorted from
@@ -225,6 +234,24 @@ class InfoExtractor(object):
     episode_number: Number of the video episode within a season, as an integer.
     episode_id:     Id of the video episode, as a unicode string.
 
+    The following fields should only be used when the media is a track or a part of
+    a music album:
+
+    track:          Title of the track.
+    track_number:   Number of the track within an album or a disc, as an integer.
+    track_id:       Id of the track (useful in case of custom indexing, e.g. 6.iii),
+                    as a unicode string.
+    artist:         Artist(s) of the track.
+    genre:          Genre(s) of the track.
+    album:          Title of the album the track belongs to.
+    album_type:     Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
+    album_artist:   List of all artists appeared on the album (e.g.
+                    "Ash Borer / Fell Voices" or "Various Artists", useful for splits
+                    and compilations).
+    disc_number:    Number of the disc or other physical medium the track belongs to,
+                    as an integer.
+    release_year:   Year (YYYY) when the album was released.
+
     Unless mentioned otherwise, the fields should be Unicode strings.
 
     Unless mentioned otherwise, None is equivalent to absence of information.
@@ -342,7 +369,7 @@ class InfoExtractor(object):
     def IE_NAME(self):
         return compat_str(type(self).__name__[:-2])
 
-    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
+    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
         """ Returns the response handle """
         if note is None:
             self.report_download_webpage(video_id)
@@ -351,6 +378,14 @@ class InfoExtractor(object):
                 self.to_screen('%s' % (note,))
             else:
                 self.to_screen('%s: %s' % (video_id, note))
+        if isinstance(url_or_request, compat_urllib_request.Request):
+            url_or_request = update_Request(
+                url_or_request, data=data, headers=headers, query=query)
+        else:
+            if query:
+                url_or_request = update_url_query(url_or_request, query)
+            if data is not None or headers:
+                url_or_request = sanitized_Request(url_or_request, data, headers)
         try:
             return self._downloader.urlopen(url_or_request)
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -366,13 +401,13 @@ class InfoExtractor(object):
                 self._downloader.report_warning(errmsg)
                 return False
 
-    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None):
+    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}):
         """ Returns a tuple (page content as string, URL handle) """
         # Strip hashes from the URL (#1038)
         if isinstance(url_or_request, (compat_str, str)):
             url_or_request = url_or_request.partition('#')[0]
 
-        urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
+        urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
         if urlh is False:
             assert not fatal
             return False
@@ -425,7 +460,7 @@ class InfoExtractor(object):
             self.to_screen('Saving request to ' + filename)
             # Working around MAX_PATH limitation on Windows (see
             # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
-            if os.name == 'nt':
+            if compat_os_name == 'nt':
                 absfilepath = os.path.abspath(filename)
                 if len(absfilepath) > 259:
                     filename = '\\\\?\\' + absfilepath
@@ -459,13 +494,13 @@ class InfoExtractor(object):
 
         return content
 
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
+    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}):
         """ Returns the data of the page as a string """
         success = False
         try_count = 0
         while success is False:
             try:
-                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding)
+                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query)
                 success = True
             except compat_http_client.IncompleteRead as e:
                 try_count += 1
@@ -480,10 +515,10 @@ class InfoExtractor(object):
 
     def _download_xml(self, url_or_request, video_id,
                       note='Downloading XML', errnote='Unable to download XML',
-                      transform_source=None, fatal=True, encoding=None):
+                      transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
         """Return the xml as an xml.etree.ElementTree.Element"""
         xml_string = self._download_webpage(
-            url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding)
+            url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
         if xml_string is False:
             return xml_string
         if transform_source:
@@ -494,10 +529,10 @@ class InfoExtractor(object):
                        note='Downloading JSON metadata',
                        errnote='Unable to download JSON metadata',
                        transform_source=None,
-                       fatal=True, encoding=None):
+                       fatal=True, encoding=None, data=None, headers={}, query={}):
         json_string = self._download_webpage(
             url_or_request, video_id, note, errnote, fatal=fatal,
-            encoding=encoding)
+            encoding=encoding, data=data, headers=headers, query=query)
         if (not fatal) and json_string is False:
             return None
         return self._parse_json(
@@ -594,7 +629,7 @@ class InfoExtractor(object):
                 if mobj:
                     break
 
-        if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
+        if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
             _name = '\033[0;34m%s\033[0m' % name
         else:
             _name = name
@@ -809,7 +844,7 @@ class InfoExtractor(object):
         for input in re.findall(r'(?i)<input([^>]+)>', html):
             if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
                 continue
-            name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
+            name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input)
             if not name:
                 continue
             value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
@@ -852,6 +887,7 @@ class InfoExtractor(object):
             proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1
 
             if f.get('vcodec') == 'none':  # audio only
+                preference -= 50
                 if self._downloader.params.get('prefer_free_formats'):
                     ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
                 else:
@@ -862,6 +898,8 @@ class InfoExtractor(object):
                 except ValueError:
                     audio_ext_preference = -1
             else:
+                if f.get('acodec') == 'none':  # video only
+                    preference -= 40
                 if self._downloader.params.get('prefer_free_formats'):
                     ORDER = ['flv', 'mp4', 'webm']
                 else:
@@ -951,7 +989,7 @@ class InfoExtractor(object):
 
     def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
                              transform_source=lambda s: fix_xml_ampersands(s).strip(),
-                             fatal=True):
+                             fatal=True, m3u8_id=None):
         manifest = self._download_xml(
             manifest_url, video_id, 'Downloading f4m manifest',
             'Unable to download f4m manifest',
@@ -963,20 +1001,56 @@ class InfoExtractor(object):
         if manifest is False:
             return []
 
+        return self._parse_f4m_formats(
+            manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+            transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
+
+    def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
+                           transform_source=lambda s: fix_xml_ampersands(s).strip(),
+                           fatal=True, m3u8_id=None):
+        # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
+        akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
+        if akamai_pv is not None and ';' in akamai_pv.text:
+            playerVerificationChallenge = akamai_pv.text.split(';')[0]
+            if playerVerificationChallenge.strip() != '':
+                return []
+
         formats = []
         manifest_version = '1.0'
         media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
         if not media_nodes:
             manifest_version = '2.0'
             media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
+        # Remove unsupported DRM protected media from final formats
+        # rendition (see https://github.com/rg3/youtube-dl/issues/8573).
+        media_nodes = remove_encrypted_media(media_nodes)
+        if not media_nodes:
+            return formats
         base_url = xpath_text(
             manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
             'base URL', default=None)
         if base_url:
             base_url = base_url.strip()
+
+        bootstrap_info = xpath_element(
+            manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
+            'bootstrap info', default=None)
+
         for i, media_el in enumerate(media_nodes):
-            if manifest_version == '2.0':
-                media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
+            tbr = int_or_none(media_el.attrib.get('bitrate'))
+            width = int_or_none(media_el.attrib.get('width'))
+            height = int_or_none(media_el.attrib.get('height'))
+            format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
+            # If <bootstrapInfo> is present, the specified f4m is a
+            # stream-level manifest, and only set-level manifests may refer to
+            # external resources.  See section 11.4 and section 4 of F4M spec
+            if bootstrap_info is None:
+                media_url = None
+                # @href is introduced in 2.0, see section 11.6 of F4M spec
+                if manifest_version == '2.0':
+                    media_url = media_el.attrib.get('href')
+                if media_url is None:
+                    media_url = media_el.attrib.get('url')
                 if not media_url:
                     continue
                 manifest_url = (
@@ -986,30 +1060,43 @@ class InfoExtractor(object):
                 # since bitrates in parent manifest (this one) and media_url manifest
                 # may differ leading to inability to resolve the format by requested
                 # bitrate in f4m downloader
-                if determine_ext(manifest_url) == 'f4m':
-                    formats.extend(self._extract_f4m_formats(
-                        manifest_url, video_id, preference, f4m_id, fatal=fatal))
+                ext = determine_ext(manifest_url)
+                if ext == 'f4m':
+                    f4m_formats = self._extract_f4m_formats(
+                        manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+                        transform_source=transform_source, fatal=fatal)
+                    # Sometimes stream-level manifest contains single media entry that
+                    # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
+                    # At the same time parent's media entry in set-level manifest may
+                    # contain it. We will copy it from parent in such cases.
+                    if len(f4m_formats) == 1:
+                        f = f4m_formats[0]
+                        f.update({
+                            'tbr': f.get('tbr') or tbr,
+                            'width': f.get('width') or width,
+                            'height': f.get('height') or height,
+                            'format_id': f.get('format_id') if not tbr else format_id,
+                        })
+                    formats.extend(f4m_formats)
+                    continue
+                elif ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        manifest_url, video_id, 'mp4', preference=preference,
+                        m3u8_id=m3u8_id, fatal=fatal))
                     continue
-            tbr = int_or_none(media_el.attrib.get('bitrate'))
             formats.append({
-                'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
+                'format_id': format_id,
                 'url': manifest_url,
-                'ext': 'flv',
+                'ext': 'flv' if bootstrap_info is not None else None,
                 'tbr': tbr,
-                'width': int_or_none(media_el.attrib.get('width')),
-                'height': int_or_none(media_el.attrib.get('height')),
+                'width': width,
+                'height': height,
                 'preference': preference,
             })
-        self._sort_formats(formats)
-
         return formats
 
-    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
-                              entry_protocol='m3u8', preference=None,
-                              m3u8_id=None, note=None, errnote=None,
-                              fatal=True):
-
-        formats = [{
+    def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
+        return {
             'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
             'url': m3u8_url,
             'ext': ext,
@@ -1017,7 +1104,14 @@ class InfoExtractor(object):
             'preference': preference - 1 if preference else -1,
             'resolution': 'multiple',
             'format_note': 'Quality selection URL',
-        }]
+        }
+
+    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
+                              entry_protocol='m3u8', preference=None,
+                              m3u8_id=None, note=None, errnote=None,
+                              fatal=True, live=False):
+
+        formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
 
         format_url = lambda u: (
             u
@@ -1033,11 +1127,21 @@ class InfoExtractor(object):
             return []
         m3u8_doc, urlh = res
         m3u8_url = urlh.geturl()
-        # A Media Playlist Tag MUST NOT appear in a Master Playlist
-        # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
-        # The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists
-        # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
-        if '#EXT-X-TARGETDURATION' in m3u8_doc:
+
+        # We should try extracting formats only from master playlists [1], i.e.
+        # playlists that describe available qualities. On the other hand media
+        # playlists [2] should be returned as is since they contain just the media
+        # without qualities renditions.
+        # Fortunately, master playlist can be easily distinguished from media
+        # playlist based on particular tags availability. As of [1, 2] master
+        # playlist tags MUST NOT appear in a media playist and vice versa.
+        # As of [3] #EXT-X-TARGETDURATION tag is REQUIRED for every media playlist
+        # and MUST NOT appear in master playlist thus we can clearly detect media
+        # playlist with this criterion.
+        # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.4
+        # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
+        # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
+        if '#EXT-X-TARGETDURATION' in m3u8_doc:  # media playlist, return as is
             return [{
                 'url': m3u8_url,
                 'format_id': m3u8_id,
@@ -1047,23 +1151,11 @@ class InfoExtractor(object):
             }]
         last_info = None
         last_media = None
-        kv_rex = re.compile(
-            r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
         for line in m3u8_doc.splitlines():
             if line.startswith('#EXT-X-STREAM-INF:'):
-                last_info = {}
-                for m in kv_rex.finditer(line):
-                    v = m.group('val')
-                    if v.startswith('"'):
-                        v = v[1:-1]
-                    last_info[m.group('key')] = v
+                last_info = parse_m3u8_attributes(line)
             elif line.startswith('#EXT-X-MEDIA:'):
-                last_media = {}
-                for m in kv_rex.finditer(line):
-                    v = m.group('val')
-                    if v.startswith('"'):
-                        v = v[1:-1]
-                    last_media[m.group('key')] = v
+                last_media = parse_m3u8_attributes(line)
             elif line.startswith('#') or not line.strip():
                 continue
             else:
@@ -1074,8 +1166,15 @@ class InfoExtractor(object):
                 format_id = []
                 if m3u8_id:
                     format_id.append(m3u8_id)
-                last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
-                format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
+                last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None
+                # Despite specification does not mention NAME attribute for
+                # EXT-X-STREAM-INF it still sometimes may be present
+                stream_name = last_info.get('NAME') or last_media_name
+                # Bandwidth of live streams may differ over time thus making
+                # format_id unpredictable. So it's better to keep provided
+                # format_id intact.
+                if not live:
+                    format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
                 f = {
                     'format_id': '-'.join(format_id),
                     'url': format_url(line.strip()),
@@ -1084,25 +1183,34 @@ class InfoExtractor(object):
                     'protocol': entry_protocol,
                     'preference': preference,
                 }
-                codecs = last_info.get('CODECS')
-                if codecs:
-                    # TODO: looks like video codec is not always necessarily goes first
-                    va_codecs = codecs.split(',')
-                    if va_codecs[0]:
-                        f['vcodec'] = va_codecs[0]
-                    if len(va_codecs) > 1 and va_codecs[1]:
-                        f['acodec'] = va_codecs[1]
                 resolution = last_info.get('RESOLUTION')
                 if resolution:
                     width_str, height_str = resolution.split('x')
                     f['width'] = int(width_str)
                     f['height'] = int(height_str)
+                codecs = last_info.get('CODECS')
+                if codecs:
+                    vcodec, acodec = [None] * 2
+                    va_codecs = codecs.split(',')
+                    if len(va_codecs) == 1:
+                        # Audio only entries usually come with single codec and
+                        # no resolution. For more robustness we also check it to
+                        # be mp4 audio.
+                        if not resolution and va_codecs[0].startswith('mp4a'):
+                            vcodec, acodec = 'none', va_codecs[0]
+                        else:
+                            vcodec = va_codecs[0]
+                    else:
+                        vcodec, acodec = va_codecs[:2]
+                    f.update({
+                        'acodec': acodec,
+                        'vcodec': vcodec,
+                    })
                 if last_media is not None:
                     f['m3u8_media'] = last_media
                     last_media = None
                 formats.append(f)
                 last_info = {}
-        self._sort_formats(formats)
         return formats
 
     @staticmethod
@@ -1117,8 +1225,8 @@ class InfoExtractor(object):
                 out.append('{%s}%s' % (namespace, c))
         return '/'.join(out)
 
-    def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None):
-        smil = self._download_smil(smil_url, video_id, fatal=fatal)
+    def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
+        smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
 
         if smil is False:
             assert not fatal
@@ -1135,10 +1243,10 @@ class InfoExtractor(object):
             return {}
         return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
 
-    def _download_smil(self, smil_url, video_id, fatal=True):
+    def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
         return self._download_xml(
             smil_url, video_id, 'Downloading SMIL file',
-            'Unable to download SMIL file', fatal=fatal)
+            'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
 
     def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
         namespace = self._parse_smil_namespace(smil)
@@ -1198,21 +1306,21 @@ class InfoExtractor(object):
         m3u8_count = 0
 
         srcs = []
-        videos = smil.findall(self._xpath_ns('.//video', namespace))
-        for video in videos:
-            src = video.get('src')
+        media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
+        for medium in media:
+            src = medium.get('src')
             if not src or src in srcs:
                 continue
             srcs.append(src)
 
-            bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
-            filesize = int_or_none(video.get('size') or video.get('fileSize'))
-            width = int_or_none(video.get('width'))
-            height = int_or_none(video.get('height'))
-            proto = video.get('proto')
-            ext = video.get('ext')
+            bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
+            filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
+            width = int_or_none(medium.get('width'))
+            height = int_or_none(medium.get('height'))
+            proto = medium.get('proto')
+            ext = medium.get('ext')
             src_ext = determine_ext(src)
-            streamer = video.get('streamer') or base
+            streamer = medium.get('streamer') or base
 
             if proto == 'rtmp' or streamer.startswith('rtmp'):
                 rtmp_count += 1
@@ -1259,7 +1367,7 @@ class InfoExtractor(object):
                         'plugin': 'flowplayer-3.2.0.1',
                     }
                 f4m_url += '&' if '?' in f4m_url else '?'
-                f4m_url += compat_urllib_parse.urlencode(f4m_params)
+                f4m_url += compat_urllib_parse_urlencode(f4m_params)
                 formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
                 continue
 
@@ -1276,8 +1384,6 @@ class InfoExtractor(object):
                 })
                 continue
 
-        self._sort_formats(formats)
-
         return formats
 
     def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
@@ -1288,7 +1394,7 @@ class InfoExtractor(object):
             if not src or src in urls:
                 continue
             urls.append(src)
-            ext = textstream.get('ext') or determine_ext(src) or mimetype2ext(textstream.get('type'))
+            ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src)
             lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
             subtitles.setdefault(lang, []).append({
                 'url': src,
@@ -1424,8 +1530,9 @@ class InfoExtractor(object):
                         continue
                     representation_attrib = adaptation_set.attrib.copy()
                     representation_attrib.update(representation.attrib)
-                    mime_type = representation_attrib.get('mimeType')
-                    content_type = mime_type.split('/')[0] if mime_type else representation_attrib.get('contentType')
+                    # According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
+                    mime_type = representation_attrib['mimeType']
+                    content_type = mime_type.split('/')[0]
                     if content_type == 'text':
                         # TODO implement WebVTT downloading
                         pass
@@ -1448,6 +1555,7 @@ class InfoExtractor(object):
                         f = {
                             'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
                             'url': base_url,
+                            'ext': mimetype2ext(mime_type),
                             'width': int_or_none(representation_attrib.get('width')),
                             'height': int_or_none(representation_attrib.get('height')),
                             'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
@@ -1466,9 +1574,16 @@ class InfoExtractor(object):
                                 representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
                             media_template = representation_ms_info['media_template']
                             media_template = media_template.replace('$RepresentationID$', representation_id)
-                            media_template = re.sub(r'\$(Number|Bandwidth)(?:%(0\d+)d)?\$', r'%(\1)\2d', media_template)
+                            media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
+                            media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template)
                             media_template.replace('$$', '$')
-                            representation_ms_info['segment_urls'] = [media_template % {'Number': segment_number, 'Bandwidth': representation_attrib.get('bandwidth')} for segment_number in range(representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+                            representation_ms_info['segment_urls'] = [
+                                media_template % {
+                                    'Number': segment_number,
+                                    'Bandwidth': representation_attrib.get('bandwidth')}
+                                for segment_number in range(
+                                    representation_ms_info['start_number'],
+                                    representation_ms_info['total_number'] + representation_ms_info['start_number'])]
                         if 'segment_urls' in representation_ms_info:
                             f.update({
                                 'segment_urls': representation_ms_info['segment_urls'],
@@ -1493,7 +1608,6 @@ class InfoExtractor(object):
                             existing_format.update(f)
                     else:
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
-        self._sort_formats(formats)
         return formats
 
     def _live_title(self, name):
@@ -1600,6 +1714,15 @@ class InfoExtractor(object):
     def _get_automatic_captions(self, *args, **kwargs):
         raise NotImplementedError('This method must be implemented by subclasses')
 
+    def mark_watched(self, *args, **kwargs):
+        if (self._downloader.params.get('mark_watched', False) and
+                (self._get_login_info()[0] is not None or
+                    self._downloader.params.get('cookiefile') is not None)):
+            self._mark_watched(*args, **kwargs)
+
+    def _mark_watched(self, *args, **kwargs):
+        raise NotImplementedError('This method must be implemented by subclasses')
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
diff --git a/youtube_dl/extractor/commonprotocols.py b/youtube_dl/extractor/commonprotocols.py
new file mode 100644 (file)
index 0000000..5d130a1
--- /dev/null
@@ -0,0 +1,36 @@
+from __future__ import unicode_literals
+
+import os
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_unquote,
+    compat_urlparse,
+)
+from ..utils import url_basename
+
+
+class RtmpIE(InfoExtractor):
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'(?i)rtmp[est]?://.+'
+
+    _TESTS = [{
+        'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4',
+        'only_matching': True,
+    }, {
+        'url': 'rtmp://edge.live.hitbox.tv/live/dimak',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+        title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': [{
+                'url': url,
+                'ext': 'flv',
+                'format_id': compat_urlparse.urlparse(url).scheme,
+            }],
+        }
index 6f92ae2ed0cd1383f66d21b3a9274be151ca32b2..e8f2b5a07591410c16fe6fe096678a12006abe48 100644 (file)
@@ -5,7 +5,7 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
     compat_urlparse,
 )
@@ -45,7 +45,7 @@ class CondeNastIE(InfoExtractor):
         'wmagazine': 'W Magazine',
     }
 
-    _VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
+    _VALID_URL = r'https?://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
     IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
 
     EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys())
@@ -97,7 +97,7 @@ class CondeNastIE(InfoExtractor):
         video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
         player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id')
         target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target')
-        data = compat_urllib_parse.urlencode({'videoId': video_id,
+        data = compat_urllib_parse_urlencode({'videoId': video_id,
                                               'playerId': player_id,
                                               'target': target,
                                               })
diff --git a/youtube_dl/extractor/coub.py b/youtube_dl/extractor/coub.py
new file mode 100644 (file)
index 0000000..a901b8d
--- /dev/null
@@ -0,0 +1,143 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
+    qualities,
+)
+
+
+class CoubIE(InfoExtractor):
+    _VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)'
+
+    _TESTS = [{
+        'url': 'http://coub.com/view/5u5n1',
+        'info_dict': {
+            'id': '5u5n1',
+            'ext': 'mp4',
+            'title': 'The Matrix Moonwalk',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 4.6,
+            'timestamp': 1428527772,
+            'upload_date': '20150408',
+            'uploader': 'Артём Лоскутников',
+            'uploader_id': 'artyom.loskutnikov',
+            'view_count': int,
+            'like_count': int,
+            'repost_count': int,
+            'comment_count': int,
+            'age_limit': 0,
+        },
+    }, {
+        'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4',
+        'only_matching': True,
+    }, {
+        'url': 'coub:5u5n1',
+        'only_matching': True,
+    }, {
+        # longer video id
+        'url': 'http://coub.com/view/237d5l5h',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        coub = self._download_json(
+            'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id)
+
+        if coub.get('error'):
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, coub['error']), expected=True)
+
+        title = coub['title']
+
+        file_versions = coub['file_versions']
+
+        QUALITIES = ('low', 'med', 'high')
+
+        MOBILE = 'mobile'
+        IPHONE = 'iphone'
+        HTML5 = 'html5'
+
+        SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5)
+
+        quality_key = qualities(QUALITIES)
+        preference_key = qualities(SOURCE_PREFERENCE)
+
+        formats = []
+
+        for kind, items in file_versions.get(HTML5, {}).items():
+            if kind not in ('video', 'audio'):
+                continue
+            if not isinstance(items, dict):
+                continue
+            for quality, item in items.items():
+                if not isinstance(item, dict):
+                    continue
+                item_url = item.get('url')
+                if not item_url:
+                    continue
+                formats.append({
+                    'url': item_url,
+                    'format_id': '%s-%s-%s' % (HTML5, kind, quality),
+                    'filesize': int_or_none(item.get('size')),
+                    'vcodec': 'none' if kind == 'audio' else None,
+                    'quality': quality_key(quality),
+                    'preference': preference_key(HTML5),
+                })
+
+        iphone_url = file_versions.get(IPHONE, {}).get('url')
+        if iphone_url:
+            formats.append({
+                'url': iphone_url,
+                'format_id': IPHONE,
+                'preference': preference_key(IPHONE),
+            })
+
+        mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
+        if mobile_url:
+            formats.append({
+                'url': mobile_url,
+                'format_id': '%s-audio' % MOBILE,
+                'preference': preference_key(MOBILE),
+            })
+
+        self._sort_formats(formats)
+
+        thumbnail = coub.get('picture')
+        duration = float_or_none(coub.get('duration'))
+        timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
+        uploader = coub.get('channel', {}).get('title')
+        uploader_id = coub.get('channel', {}).get('permalink')
+
+        view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
+        like_count = int_or_none(coub.get('likes_count'))
+        repost_count = int_or_none(coub.get('recoubs_count'))
+        comment_count = int_or_none(coub.get('comments_count'))
+
+        age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
+        if age_restricted is not None:
+            age_limit = 18 if age_restricted is True else 0
+        else:
+            age_limit = None
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'view_count': view_count,
+            'like_count': like_count,
+            'repost_count': repost_count,
+            'comment_count': comment_count,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
index c7032ffa220dc94f7f7b0e419d56b43d38d6c7ba..90a64303d3282c925d1620787922622e99a8b612 100644 (file)
@@ -11,8 +11,7 @@ from math import pow, sqrt, floor
 from .common import InfoExtractor
 from ..compat import (
     compat_etree_fromstring,
-    compat_urllib_parse,
-    compat_urllib_parse_unquote,
+    compat_urllib_parse_urlencode,
     compat_urllib_request,
     compat_urlparse,
 )
@@ -27,6 +26,7 @@ from ..utils import (
     unified_strdate,
     urlencode_postdata,
     xpath_text,
+    extract_attributes,
 )
 from ..aes import (
     aes_cbc_decrypt,
@@ -54,7 +54,7 @@ class CrunchyrollBaseIE(InfoExtractor):
     def _real_initialize(self):
         self._login()
 
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
+    def _download_webpage(self, url_or_request, *args, **kwargs):
         request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
                    else sanitized_Request(url_or_request))
         # Accept-Language must be set explicitly to accept any language to avoid issues
@@ -65,8 +65,7 @@ class CrunchyrollBaseIE(InfoExtractor):
         # Crunchyroll to not work in georestriction cases in some browsers that don't place
         # the locale lang first in header. However allowing any language seems to workaround the issue.
         request.add_header('Accept-Language', '*')
-        return super(CrunchyrollBaseIE, self)._download_webpage(
-            request, video_id, note, errnote, fatal, tries, timeout, encoding)
+        return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
 
     @staticmethod
     def _add_skip_wall(url):
@@ -79,7 +78,7 @@ class CrunchyrollBaseIE(InfoExtractor):
         # See https://github.com/rg3/youtube-dl/issues/7202.
         qs['skip_wall'] = ['1']
         return compat_urlparse.urlunparse(
-            parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
+            parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
 
 class CrunchyrollIE(CrunchyrollBaseIE):
@@ -307,28 +306,36 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
             'video_uploader', fatal=False)
 
-        playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
-        playerdata_req = sanitized_Request(playerdata_url)
-        playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
-        playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
-
-        stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')
-        video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
-
+        available_fmts = []
+        for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
+            attrs = extract_attributes(a)
+            href = attrs.get('href')
+            if href and '/freetrial' in href:
+                continue
+            available_fmts.append(fmt)
+        if not available_fmts:
+            for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
+                available_fmts = re.findall(p, webpage)
+                if available_fmts:
+                    break
+        video_encode_ids = []
         formats = []
-        for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
+        for fmt in available_fmts:
             stream_quality, stream_format = self._FORMAT_IDS[fmt]
             video_format = fmt + 'p'
             streamdata_req = sanitized_Request(
                 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
-                % (stream_id, stream_format, stream_quality),
-                compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))
+                % (video_id, stream_format, stream_quality),
+                compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8'))
             streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
             streamdata = self._download_xml(
                 streamdata_req, video_id,
                 note='Downloading media info for %s' % video_format)
             stream_info = streamdata.find('./{default}preload/stream_info')
+            video_encode_id = xpath_text(stream_info, './video_encode_id')
+            if video_encode_id in video_encode_ids:
+                continue
+            video_encode_ids.append(video_encode_id)
             video_url = xpath_text(stream_info, './host')
             video_play_path = xpath_text(stream_info, './file')
             if not video_url or not video_play_path:
@@ -360,6 +367,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                 'ext': 'flv',
             })
             formats.append(format_info)
+        self._sort_formats(formats)
+
+        metadata = self._download_xml(
+            'http://www.crunchyroll.com/xml', video_id,
+            note='Downloading media info', query={
+                'req': 'RpcApiVideoPlayer_GetMediaMetadata',
+                'media_id': video_id,
+            })
 
         subtitles = self.extract_subtitles(video_id, webpage)
 
@@ -367,9 +382,12 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             'id': video_id,
             'title': video_title,
             'description': video_description,
-            'thumbnail': video_thumbnail,
+            'thumbnail': xpath_text(metadata, 'episode_image_url'),
             'uploader': video_uploader,
             'upload_date': video_upload_date,
+            'series': xpath_text(metadata, 'series_title'),
+            'episode': xpath_text(metadata, 'episode_title'),
+            'episode_number': int_or_none(xpath_text(metadata, 'episode_number')),
             'subtitles': subtitles,
             'formats': formats,
         }
index b8b9d058ddce866fa497597863c80d1de0a7f5c5..84b36f44cfac7bd45a8a7d28adb6767093a7d19b 100644 (file)
@@ -15,7 +15,7 @@ from .senateisvp import SenateISVPIE
 
 
 class CSpanIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
+    _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
     IE_DESC = 'C-SPAN'
     _TESTS = [{
         'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
index 45049bf371370da6e4b64952441e76d86814fd6a..1622fc844a1b8d4794fc12694f03f37c00076f15 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import parse_iso8601, ExtractorError
 class CtsNewsIE(InfoExtractor):
     IE_DESC = '華視新聞'
     # https connection failed (Connection reset)
-    _VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
     _TESTS = [{
         'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
         'md5': 'a9875cb790252b08431186d741beaabe',
index 36af67013bea5519f6ab62e9cec519fecd7e30b4..ebd14cb1638b6309f1522c142502c0dac5d763a2 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class CWTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/shows/(?:[^/]+/){2}\?play=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
+    _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/(?:shows/)?(?:[^/]+/){2}\?.*\bplay=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
     _TESTS = [{
         'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
         'info_dict': {
@@ -48,6 +48,9 @@ class CWTVIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         }
+    }, {
+        'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -57,6 +60,7 @@ class CWTVIE(InfoExtractor):
 
         formats = self._extract_m3u8_formats(
             video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
+        self._sort_formats(formats)
 
         thumbnails = [{
             'url': image['uri'],
diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dl/extractor/dailymail.py
new file mode 100644 (file)
index 0000000..b60a1d8
--- /dev/null
@@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    determine_protocol,
+)
+
+
+class DailyMailIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html',
+        'md5': '2f639d446394f53f3a33658b518b6615',
+        'info_dict': {
+            'id': '1288527',
+            'ext': 'mp4',
+            'title': 'Turn any video into an impressionist masterpiece',
+            'description': 'md5:88ddbcb504367987b2708bb38677c9d2',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        video_data = self._parse_json(self._search_regex(
+            r"data-opts='({.+?})'", webpage, 'video data'), video_id)
+        title = video_data['title']
+        video_sources = self._download_json(video_data.get(
+            'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
+
+        formats = []
+        for rendition in video_sources['renditions']:
+            rendition_url = rendition.get('url')
+            if not rendition_url:
+                continue
+            tbr = int_or_none(rendition.get('encodingRate'), 1000)
+            container = rendition.get('videoContainer')
+            is_hls = container == 'M2TS'
+            protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
+            formats.append({
+                'format_id': ('hls' if is_hls else protocol) + ('-%d' % tbr if tbr else ''),
+                'url': rendition_url,
+                'width': int_or_none(rendition.get('frameWidth')),
+                'height': int_or_none(rendition.get('frameHeight')),
+                'tbr': tbr,
+                'vcodec': rendition.get('videoCodec'),
+                'container': container,
+                'protocol': protocol,
+                'ext': 'mp4' if is_hls else None,
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('descr'),
+            'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
+            'formats': formats,
+        }
index c84c5105886c73f613ef3adae1a7e1831be83f1d..86024a745661dda2da9d3fb883ccf4db017a722c 100644 (file)
@@ -8,8 +8,8 @@ import itertools
 from .common import InfoExtractor
 from ..compat import (
     compat_parse_qs,
-    compat_urllib_parse,
     compat_urllib_parse_unquote,
+    compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
@@ -70,7 +70,7 @@ class DaumIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = compat_urllib_parse_unquote(self._match_id(url))
-        query = compat_urllib_parse.urlencode({'vid': video_id})
+        query = compat_urllib_parse_urlencode({'vid': video_id})
         movie_data = self._download_json(
             'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
             video_id, 'Downloading video formats info')
@@ -86,7 +86,7 @@ class DaumIE(InfoExtractor):
         formats = []
         for format_el in movie_data['output_list']['output_list']:
             profile = format_el['profile']
-            format_query = compat_urllib_parse.urlencode({
+            format_query = compat_urllib_parse_urlencode({
                 'vid': video_id,
                 'profile': profile,
             })
index 15a1c40f7a07c36136a0142148af3f7f44f49733..efb8585e825c9f5cd746e4601380aa90d09246f7 100644 (file)
@@ -6,7 +6,7 @@ import base64
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_str,
 )
 from ..utils import (
@@ -15,11 +15,12 @@ from ..utils import (
     sanitized_Request,
     smuggle_url,
     unsmuggle_url,
+    urlencode_postdata,
 )
 
 
 class DCNIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
+    _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
 
     def _real_extract(self, url):
         show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
@@ -54,30 +55,32 @@ class DCNBaseIE(InfoExtractor):
             'is_live': is_live,
         }
 
-    def _extract_video_formats(self, webpage, video_id, entry_protocol):
+    def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
         formats = []
-        m3u8_url = self._html_search_regex(
-            r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False)
-        if m3u8_url:
-            formats.extend(self._extract_m3u8_formats(
-                m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None))
-
-        rtsp_url = self._search_regex(
-            r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
-        if rtsp_url:
-            formats.append({
-                'url': rtsp_url,
-                'format_id': 'rtsp',
-            })
-
+        format_url_base = 'http' + self._html_search_regex(
+            [
+                r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
+                r'<a[^>]+href="rtsp(://[^"]+)"'
+            ], webpage, 'format url')
+        # TODO: Current DASH formats are broken - $Time$ pattern in
+        # <SegmentTemplate> not implemented yet
+        # formats.extend(self._extract_mpd_formats(
+        #     format_url_base + '/manifest.mpd',
+        #     video_id, mpd_id='dash', fatal=False))
+        formats.extend(self._extract_m3u8_formats(
+            format_url_base + '/playlist.m3u8', video_id, 'mp4',
+            m3u8_entry_protocol, m3u8_id='hls', fatal=False))
+        formats.extend(self._extract_f4m_formats(
+            format_url_base + '/manifest.f4m',
+            video_id, f4m_id='hds', fatal=False))
         self._sort_formats(formats)
         return formats
 
 
 class DCNVideoIE(DCNBaseIE):
     IE_NAME = 'dcn:video'
-    _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
+    _TESTS = [{
         'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
         'info_dict':
         {
@@ -93,7 +96,10 @@ class DCNVideoIE(DCNBaseIE):
             # m3u8 download
             'skip_download': True,
         },
-    }
+    }, {
+        'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -106,7 +112,7 @@ class DCNVideoIE(DCNBaseIE):
 
         webpage = self._download_webpage(
             'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
-            compat_urllib_parse.urlencode({
+            compat_urllib_parse_urlencode({
                 'id': video_data['id'],
                 'user_id': video_data['user_id'],
                 'signature': video_data['signature'],
@@ -119,7 +125,7 @@ class DCNVideoIE(DCNBaseIE):
 
 class DCNLiveIE(DCNBaseIE):
     IE_NAME = 'dcn:live'
-    _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
 
     def _real_extract(self, url):
         channel_id = self._match_id(url)
@@ -133,7 +139,7 @@ class DCNLiveIE(DCNBaseIE):
 
         webpage = self._download_webpage(
             'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' +
-            compat_urllib_parse.urlencode({
+            compat_urllib_parse_urlencode({
                 'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
                 'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
                 'signature': channel_data['signature'],
@@ -146,7 +152,7 @@ class DCNLiveIE(DCNBaseIE):
 
 class DCNSeasonIE(InfoExtractor):
     IE_NAME = 'dcn:season'
-    _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
+    _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
     _TEST = {
         'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
         'info_dict':
@@ -174,7 +180,7 @@ class DCNSeasonIE(InfoExtractor):
         data['show_id'] = show_id
         request = sanitized_Request(
             'http://admin.mangomolo.com/analytics/index.php/plus/show',
-            compat_urllib_parse.urlencode(data),
+            urlencode_postdata(data),
             {
                 'Origin': 'http://www.dcndigital.ae',
                 'Content-Type': 'application/x-www-form-urlencoded'
index aa2c09eb686f9da5a7bedfdfe57566e9d29a0700..9099f5046a14ad7c769a6da50d813076f8b9231e 100644 (file)
@@ -6,7 +6,7 @@ from ..compat import compat_str
 
 
 class DctpTvIE(InfoExtractor):
-    _VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
+    _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
     _TEST = {
         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
         'info_dict': {
index c3205ff5fc243c069494e9d50f3522299ea84d34..7a07f3267db874649e5bcc5228a1c7881ebe19d3 100644 (file)
@@ -41,7 +41,9 @@ class DeezerPlaylistIE(InfoExtractor):
                 'Deezer said: %s' % geoblocking_msg, expected=True)
 
         data_json = self._search_regex(
-            r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n', webpage, 'data JSON')
+            (r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>',
+             r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'),
+            webpage, 'data JSON')
         data = json.loads(data_json)
 
         playlist_title = data.get('DATA', {}).get('TITLE')
index 98e3aedfd08ada1300cbf3114a41022949062402..9fe144e1431941051f3f2b7134fd9eb888522e96 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 class DefenseGouvFrIE(InfoExtractor):
     IE_NAME = 'defense.gouv.fr'
-    _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
+    _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
 
     _TEST = {
         'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
index 6cd395e1169d8253c589efe3da2d24f1632b0356..65a98d7892816e36608a3350cc00db9d7efd4cb9 100644 (file)
@@ -17,37 +17,53 @@ class DemocracynowIE(InfoExtractor):
     IE_NAME = 'democracynow'
     _TESTS = [{
         'url': 'http://www.democracynow.org/shows/2015/7/3',
-        'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d',
+        'md5': '3757c182d3d84da68f5c8f506c18c196',
         'info_dict': {
             'id': '2015-0703-001',
             'ext': 'mp4',
-            'title': 'July 03, 2015 - Democracy Now!',
-            'description': 'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs',
+            'title': 'Daily Show',
         },
     }, {
         'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
-        'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d',
         'info_dict': {
             'id': '2015-0703-001',
             'ext': 'mp4',
             'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag',
             'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21',
         },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
+
         webpage = self._download_webpage(url, display_id)
-        description = self._og_search_description(webpage)
 
         json_data = self._parse_json(self._search_regex(
             r'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'),
             display_id)
-        video_id = None
+
+        title = json_data['title']
         formats = []
 
-        default_lang = 'en'
+        video_id = None
+
+        for key in ('file', 'audio', 'video', 'high_res_video'):
+            media_url = json_data.get(key, '')
+            if not media_url:
+                continue
+            media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url))
+            video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn')
+            formats.append({
+                'url': media_url,
+                'vcodec': 'none' if key == 'audio' else None,
+            })
+
+        self._sort_formats(formats)
 
+        default_lang = 'en'
         subtitles = {}
 
         def add_subtitle_item(lang, info_dict):
@@ -67,22 +83,13 @@ class DemocracynowIE(InfoExtractor):
                 'url': compat_urlparse.urljoin(url, subtitle_item['url']),
             })
 
-        for key in ('file', 'audio', 'video'):
-            media_url = json_data.get(key, '')
-            if not media_url:
-                continue
-            media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url))
-            video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn')
-            formats.append({
-                'url': media_url,
-            })
-
-        self._sort_formats(formats)
+        description = self._og_search_description(webpage, default=None)
 
         return {
             'id': video_id or display_id,
-            'title': json_data['title'],
+            'title': title,
             'description': description,
+            'thumbnail': json_data.get('image'),
             'subtitles': subtitles,
             'formats': formats,
         }
index 263532cc6e66a94c79670caa5e1600444ce909da..a4d0448c26149429ebd7d5813f432b56bf0e6020 100644 (file)
@@ -12,38 +12,46 @@ class DFBIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
-        # The md5 is different each time
+        'md5': 'ac0f98a52a330f700b4b3034ad240649',
         'info_dict': {
             'id': '11633',
             'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
             'upload_date': '20150714',
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
+        display_id, video_id = re.match(self._VALID_URL, url).groups()
 
-        webpage = self._download_webpage(url, display_id)
         player_info = self._download_xml(
             'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
             display_id)
         video_info = player_info.find('video')
-
-        f4m_info = self._download_xml(
-            self._proto_relative_url(video_info.find('url').text.strip()), display_id)
-        token_el = f4m_info.find('token')
-        manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
-        formats = self._extract_f4m_formats(manifest_url, display_id)
+        stream_access_url = self._proto_relative_url(video_info.find('url').text.strip())
+
+        formats = []
+        # see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats
+        for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'):
+            stream_access_info = self._download_xml(sa_url, display_id)
+            token_el = stream_access_info.find('token')
+            manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth']
+            if '.f4m' in manifest_url:
+                formats.extend(self._extract_f4m_formats(
+                    manifest_url + '&hdcore=3.2.0',
+                    display_id, f4m_id='hds', fatal=False))
+            else:
+                formats.extend(self._extract_m3u8_formats(
+                    manifest_url, display_id, 'mp4',
+                    'm3u8_native', m3u8_id='hls', fatal=False))
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'display_id': display_id,
             'title': video_info.find('title').text,
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id,
             'upload_date': unified_strdate(video_info.find('time_date').text),
             'formats': formats,
         }
index ce680a9f3192832a7fb9e35956382f85bc60d043..55853f76f91e97db19423c6cf8c1b8b56e006447 100644 (file)
@@ -9,7 +9,7 @@ from ..compat import compat_str
 
 
 class DiscoveryIE(InfoExtractor):
-    _VALID_URL = r'''(?x)http://(?:www\.)?(?:
+    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
             discovery|
             investigationdiscovery|
             discoverylife|
@@ -33,6 +33,7 @@ class DiscoveryIE(InfoExtractor):
             'duration': 156,
             'timestamp': 1302032462,
             'upload_date': '20110405',
+            'uploader_id': '103207',
         },
         'params': {
             'skip_download': True,  # requires ffmpeg
@@ -54,7 +55,11 @@ class DiscoveryIE(InfoExtractor):
             'upload_date': '20140725',
             'timestamp': 1406246400,
             'duration': 116,
+            'uploader_id': '103207',
         },
+        'params': {
+            'skip_download': True,  # requires ffmpeg
+        }
     }]
 
     def _real_extract(self, url):
@@ -63,18 +68,30 @@ class DiscoveryIE(InfoExtractor):
 
         video_title = info.get('playlist_title') or info.get('video_title')
 
-        entries = [{
-            'id': compat_str(video_info['id']),
-            'formats': self._extract_m3u8_formats(
-                video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
-                note='Download m3u8 information for video %d' % (idx + 1)),
-            'title': video_info['title'],
-            'description': video_info.get('description'),
-            'duration': parse_duration(video_info.get('video_length')),
-            'webpage_url': video_info.get('href') or video_info.get('url'),
-            'thumbnail': video_info.get('thumbnailURL'),
-            'alt_title': video_info.get('secondary_title'),
-            'timestamp': parse_iso8601(video_info.get('publishedDate')),
-        } for idx, video_info in enumerate(info['playlist'])]
+        entries = []
+
+        for idx, video_info in enumerate(info['playlist']):
+            subtitles = {}
+            caption_url = video_info.get('captionsUrl')
+            if caption_url:
+                subtitles = {
+                    'en': [{
+                        'url': caption_url,
+                    }]
+                }
+
+            entries.append({
+                '_type': 'url_transparent',
+                'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'],
+                'id': compat_str(video_info['id']),
+                'title': video_info['title'],
+                'description': video_info.get('description'),
+                'duration': parse_duration(video_info.get('video_length')),
+                'webpage_url': video_info.get('href') or video_info.get('url'),
+                'thumbnail': video_info.get('thumbnailURL'),
+                'alt_title': video_info.get('secondary_title'),
+                'timestamp': parse_iso8601(video_info.get('publishedDate')),
+                'subtitles': subtitles,
+            })
 
         return self.playlist_result(entries, display_id, video_title)
diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py
new file mode 100644 (file)
index 0000000..a78cb8a
--- /dev/null
@@ -0,0 +1,114 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    remove_end,
+    xpath_element,
+    xpath_text,
+)
+
+
+class DigitallySpeakingIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
+
+    _TESTS = [{
+        # From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
+        'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml',
+        'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
+        'info_dict': {
+            'id': '840376_BQRC',
+            'ext': 'mp4',
+            'title': 'Tenacious Design and The Interface of \'Destiny\'',
+        },
+    }, {
+        # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
+        'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
+        'only_matching': True,
+    }]
+
+    def _parse_mp4(self, metadata):
+        video_formats = []
+        video_root = None
+
+        mp4_video = xpath_text(metadata, './mp4video', default=None)
+        if mp4_video is not None:
+            mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video)
+            video_root = mobj.group('root')
+        if video_root is None:
+            http_host = xpath_text(metadata, 'httpHost', default=None)
+            if http_host:
+                video_root = 'http://%s/' % http_host
+        if video_root is None:
+            # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js
+            # Works for GPUTechConf, too
+            video_root = 'http://s3-2u.digitallyspeaking.com/'
+
+        formats = metadata.findall('./MBRVideos/MBRVideo')
+        if not formats:
+            return None
+        for a_format in formats:
+            stream_name = xpath_text(a_format, 'streamName', fatal=True)
+            video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
+            url = video_root + video_path
+            vbr = xpath_text(a_format, 'bitrate')
+            video_formats.append({
+                'url': url,
+                'vbr': int_or_none(vbr),
+            })
+        return video_formats
+
+    def _parse_flv(self, metadata):
+        formats = []
+        akamai_url = xpath_text(metadata, './akamaiHost', fatal=True)
+        audios = metadata.findall('./audios/audio')
+        for audio in audios:
+            formats.append({
+                'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+                'play_path': remove_end(audio.get('url'), '.flv'),
+                'ext': 'flv',
+                'vcodec': 'none',
+                'format_id': audio.get('code'),
+            })
+        slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
+        formats.append({
+            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+            'play_path': remove_end(slide_video_path, '.flv'),
+            'ext': 'flv',
+            'format_note': 'slide deck video',
+            'quality': -2,
+            'preference': -2,
+            'format_id': 'slides',
+        })
+        speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
+        formats.append({
+            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+            'play_path': remove_end(speaker_video_path, '.flv'),
+            'ext': 'flv',
+            'format_note': 'speaker video',
+            'quality': -1,
+            'preference': -1,
+            'format_id': 'speaker',
+        })
+        return formats
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        xml_description = self._download_xml(url, video_id)
+        metadata = xpath_element(xml_description, 'metadata')
+
+        video_formats = self._parse_mp4(metadata)
+        if video_formats is None:
+            video_formats = self._parse_flv(metadata)
+
+        return {
+            'id': video_id,
+            'formats': video_formats,
+            'title': xpath_text(metadata, 'title', fatal=True),
+            'duration': parse_duration(xpath_text(metadata, 'endTime')),
+            'creator': xpath_text(metadata, 'speaker'),
+        }
index 373b3b4b4735d8544128c48a10037eed3c570e5d..ce6962755831a8c6f853271ad49112fee4fcbc63 100644 (file)
@@ -10,7 +10,7 @@ from ..compat import (compat_str, compat_basestring)
 
 class DouyuTVIE(InfoExtractor):
     IE_DESC = '斗鱼'
-    _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P<id>[A-Za-z0-9]+)'
     _TESTS = [{
         'url': 'http://www.douyutv.com/iseven',
         'info_dict': {
@@ -18,7 +18,7 @@ class DouyuTVIE(InfoExtractor):
             'display_id': 'iseven',
             'ext': 'flv',
             'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
+            'description': 're:.*m7show@163\.com.*',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': '7师傅',
             'uploader_id': '431925',
@@ -26,7 +26,7 @@ class DouyuTVIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
     }, {
         'url': 'http://www.douyutv.com/85982',
         'info_dict': {
@@ -42,7 +42,27 @@ class DouyuTVIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'skip': 'Room not found',
+    }, {
+        'url': 'http://www.douyutv.com/17732',
+        'info_dict': {
+            'id': '17732',
+            'display_id': '17732',
+            'ext': 'flv',
+            'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 're:.*m7show@163\.com.*',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': '7师傅',
+            'uploader_id': '431925',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.douyu.com/xiaocang',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -55,13 +75,28 @@ class DouyuTVIE(InfoExtractor):
             room_id = self._html_search_regex(
                 r'"room_id"\s*:\s*(\d+),', page, 'room id')
 
-        prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
-            room_id, int(time.time()))
-
-        auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
-        config = self._download_json(
-            'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
-            video_id)
+        config = None
+        # Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache"
+        # Retry with different parameters - same parameters cause same errors
+        for i in range(5):
+            prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
+                room_id, int(time.time()))
+            auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
+
+            config_page = self._download_webpage(
+                'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
+                video_id)
+            try:
+                config = self._parse_json(config_page, video_id, fatal=False)
+            except ExtractorError:
+                # Wait some time before retrying to get a different time() value
+                self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. '
+                                                      'Waiting for %(timeout)s seconds before retrying')
+                continue
+            else:
+                break
+        if config is None:
+            raise ExtractorError('Unable to fetch API result')
 
         data = config['data']
 
index 6cda56a7fdd045ef032bb496fadd8462c3839ecd..5790553f38ca29107bad44317fedb271dce0883a 100644 (file)
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
+import json
+import re
 import time
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..compat import compat_urlparse
+from ..utils import (
+    int_or_none,
+    update_url_query,
+)
 
 
 class DPlayIE(InfoExtractor):
-    _VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
 
-    _TEST = {
+    _TESTS = [{
+        # geo restricted, via direct unsigned hls URL
+        'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',
+        'info_dict': {
+            'id': '1255600',
+            'display_id': 'stagione-1-episodio-25',
+            'ext': 'mp4',
+            'title': 'Episodio 25',
+            'description': 'md5:cae5f40ad988811b197d2d27a53227eb',
+            'duration': 2761,
+            'timestamp': 1454701800,
+            'upload_date': '20160205',
+            'creator': 'RTIT',
+            'series': 'Take me out',
+            'season_number': 1,
+            'episode_number': 25,
+            'age_limit': 0,
+        },
+        'expected_warnings': ['Unable to download f4m manifest'],
+    }, {
+        # non geo restricted, via secure api, unsigned download hls URL
         'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
         'info_dict': {
             'id': '3172',
-            'ext': 'mp4',
             'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
+            'ext': 'mp4',
             'title': 'Svensken lär sig njuta av livet',
+            'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
             'duration': 2650,
+            'timestamp': 1365454320,
+            'upload_date': '20130408',
+            'creator': 'Kanal 5 (Home)',
+            'series': 'Nugammalt - 77 händelser som format Sverige',
+            'season_number': 1,
+            'episode_number': 1,
+            'age_limit': 0,
         },
-    }
+    }, {
+        # geo restricted, via secure api, unsigned download hls URL
+        'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
+        'info_dict': {
+            'id': '70816',
+            'display_id': 'season-6-episode-12',
+            'ext': 'mp4',
+            'title': 'Episode 12',
+            'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
+            'duration': 2563,
+            'timestamp': 1429696800,
+            'upload_date': '20150422',
+            'creator': 'Kanal 4 (Home)',
+            'series': 'Mig og min mor',
+            'season_number': 6,
+            'episode_number': 12,
+            'age_limit': 0,
+        },
+    }, {
+        # geo restricted, via direct unsigned hls URL
+        'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id')
+        domain = mobj.group('domain')
+
         webpage = self._download_webpage(url, display_id)
+
         video_id = self._search_regex(
-            r'data-video-id="(\d+)"', webpage, 'video id')
+            r'data-video-id=["\'](\d+)', webpage, 'video id')
 
         info = self._download_json(
-            'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id,
+            'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
             video_id)['data'][0]
 
-        self._set_cookie(
-            'secure.dplay.se', 'dsc-geo',
-            '{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000))
-        # TODO: consider adding support for 'stream_type=hds', it seems to
-        # require setting some cookies
-        manifest_url = self._download_json(
-            'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id,
-            video_id, 'Getting manifest url for hls stream')['hls']
-        formats = self._extract_m3u8_formats(
-            manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native')
+        title = info['title']
+
+        PROTOCOLS = ('hls', 'hds')
+        formats = []
+
+        def extract_formats(protocol, manifest_url):
+            if protocol == 'hls':
+                m3u8_formats = self._extract_m3u8_formats(
+                    manifest_url, video_id, ext='mp4',
+                    entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
+                # Sometimes final URLs inside m3u8 are unsigned, let's fix this
+                # ourselves
+                query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
+                for m3u8_format in m3u8_formats:
+                    m3u8_format['url'] = update_url_query(m3u8_format['url'], query)
+                formats.extend(m3u8_formats)
+            elif protocol == 'hds':
+                formats.extend(self._extract_f4m_formats(
+                    manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
+                    video_id, f4m_id=protocol, fatal=False))
+
+        domain_tld = domain.split('.')[-1]
+        if domain_tld in ('se', 'dk', 'no'):
+            for protocol in PROTOCOLS:
+                # Providing dsc-geo allows to bypass geo restriction in some cases
+                self._set_cookie(
+                    'secure.dplay.%s' % domain_tld, 'dsc-geo',
+                    json.dumps({
+                        'countryCode': domain_tld.upper(),
+                        'expiry': (time.time() + 20 * 60) * 1000,
+                    }))
+                stream = self._download_json(
+                    'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s'
+                    % (domain_tld, video_id, protocol), video_id,
+                    'Downloading %s stream JSON' % protocol, fatal=False)
+                if stream and stream.get(protocol):
+                    extract_formats(protocol, stream[protocol])
+
+        # The last resort is to try direct unsigned hls/hds URLs from info dictionary.
+        # Sometimes this does work even when secure API with dsc-geo has failed (e.g.
+        # http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/).
+        if not formats:
+            for protocol in PROTOCOLS:
+                if info.get(protocol):
+                    extract_formats(protocol, info[protocol])
+
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for lang in ('se', 'sv', 'da', 'nl', 'no'):
+            for format_id in ('web_vtt', 'vtt', 'srt'):
+                subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id))
+                if subtitle_url:
+                    subtitles.setdefault(lang, []).append({'url': subtitle_url})
 
         return {
             'id': video_id,
             'display_id': display_id,
-            'title': info['title'],
-            'formats': formats,
+            'title': title,
+            'description': info.get('video_metadata_longDescription'),
             'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
+            'timestamp': int_or_none(info.get('video_publish_date')),
+            'creator': info.get('video_metadata_homeChannel'),
+            'series': info.get('video_metadata_show'),
+            'season_number': int_or_none(info.get('season')),
+            'episode_number': int_or_none(info.get('episode')),
+            'age_limit': int_or_none(info.get('minimum_age')),
+            'formats': formats,
+            'subtitles': subtitles,
         }
index d35e88881d859555d20914240fff733508a4ac42..3b6529f4b108052e3019c8e400bbc2cd0eb5a9a1 100644 (file)
@@ -6,7 +6,6 @@ import itertools
 from .amp import AMPIE
 from ..compat import (
     compat_HTTPError,
-    compat_urllib_parse,
     compat_urlparse,
 )
 from ..utils import (
@@ -14,6 +13,7 @@ from ..utils import (
     clean_html,
     int_or_none,
     sanitized_Request,
+    urlencode_postdata
 )
 
 
@@ -50,7 +50,7 @@ class DramaFeverBaseIE(AMPIE):
         }
 
         request = sanitized_Request(
-            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+            self._LOGIN_URL, urlencode_postdata(login_form))
         response = self._download_webpage(
             request, None, 'Logging in as %s' % username)
 
index 028144f20b3458e61ed214703d5f6fd17ab79871..0040e70d4929828ebf2dc7dc74199ed639dcfebd 100644 (file)
@@ -7,7 +7,7 @@ from .zdf import ZDFIE
 
 class DreiSatIE(ZDFIE):
     IE_NAME = '3sat'
-    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+    _VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
     _TESTS = [
         {
             'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
diff --git a/youtube_dl/extractor/dump.py b/youtube_dl/extractor/dump.py
deleted file mode 100644 (file)
index ff78d4f..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class DumpIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?dump\.com/(?P<id>[a-zA-Z0-9]+)/'
-
-    _TEST = {
-        'url': 'http://www.dump.com/oneus/',
-        'md5': 'ad71704d1e67dfd9e81e3e8b42d69d99',
-        'info_dict': {
-            'id': 'oneus',
-            'ext': 'flv',
-            'title': "He's one of us.",
-            'thumbnail': 're:^https?://.*\.jpg$',
-        },
-    }
-
-    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
-
-        webpage = self._download_webpage(url, video_id)
-        video_url = self._search_regex(
-            r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
-
-        title = self._og_search_title(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'url': video_url,
-            'thumbnail': thumbnail,
-        }
index c1a4bc757f78770179f332d651227f47cceb8a99..974c69dbc75fcb29bd57e30432fa466182b68743 100644 (file)
@@ -15,7 +15,7 @@ class DVTVIE(InfoExtractor):
     IE_NAME = 'dvtv'
     IE_DESC = 'http://video.aktualne.cz/'
 
-    _VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
+    _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
 
     _TESTS = [{
         'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py
new file mode 100644 (file)
index 0000000..d740652
--- /dev/null
@@ -0,0 +1,108 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    unified_strdate,
+)
+from ..compat import compat_urlparse
+
+
+class DWIE(InfoExtractor):
+    IE_NAME = 'dw'
+    _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
+    _TESTS = [{
+        # video
+        'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
+        'md5': '7372046e1815c5a534b43f3c3c36e6e9',
+        'info_dict': {
+            'id': '19112290',
+            'ext': 'mp4',
+            'title': 'Intelligent light',
+            'description': 'md5:90e00d5881719f2a6a5827cb74985af1',
+            'upload_date': '20160311',
+        }
+    }, {
+        # audio
+        'url': 'http://www.dw.com/en/worldlink-my-business/av-19111941',
+        'md5': '2814c9a1321c3a51f8a7aeb067a360dd',
+        'info_dict': {
+            'id': '19111941',
+            'ext': 'mp3',
+            'title': 'WorldLink: My business',
+            'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
+            'upload_date': '20160311',
+        }
+    }, {
+        # DW documentaries, only last for one or two weeks
+        'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
+        'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
+        'info_dict': {
+            'id': '19274438',
+            'ext': 'mp4',
+            'title': 'Welcome to the 90s – Hip Hop',
+            'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
+            'upload_date': '20160521',
+        },
+        'skip': 'Video removed',
+    }]
+
+    def _real_extract(self, url):
+        media_id = self._match_id(url)
+        webpage = self._download_webpage(url, media_id)
+        hidden_inputs = self._hidden_inputs(webpage)
+        title = hidden_inputs['media_title']
+        media_id = hidden_inputs.get('media_id') or media_id
+
+        if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
+            formats = self._extract_smil_formats(
+                'http://www.dw.com/smil/v-%s' % media_id, media_id,
+                transform_source=lambda s: s.replace(
+                    'rtmp://tv-od.dw.de/flash/',
+                    'http://tv-download.dw.de/dwtv_video/flv/'))
+            self._sort_formats(formats)
+        else:
+            formats = [{'url': hidden_inputs['file_name']}]
+
+        upload_date = hidden_inputs.get('display_date')
+        if not upload_date:
+            upload_date = self._html_search_regex(
+                r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
+                'upload date', default=None)
+            upload_date = unified_strdate(upload_date)
+
+        return {
+            'id': media_id,
+            'title': title,
+            'description': self._og_search_description(webpage),
+            'thumbnail': hidden_inputs.get('preview_image'),
+            'duration': int_or_none(hidden_inputs.get('file_duration')),
+            'upload_date': upload_date,
+            'formats': formats,
+        }
+
+
+class DWArticleIE(InfoExtractor):
+    IE_NAME = 'dw:article'
+    _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.dw.com/en/no-hope-limited-options-for-refugees-in-idomeni/a-19111009',
+        'md5': '8ca657f9d068bbef74d6fc38b97fc869',
+        'info_dict': {
+            'id': '19105868',
+            'ext': 'mp4',
+            'title': 'The harsh life of refugees in Idomeni',
+            'description': 'md5:196015cc7e48ebf474db9399420043c7',
+            'upload_date': '20160310',
+        }
+    }
+
+    def _real_extract(self, url):
+        article_id = self._match_id(url)
+        webpage = self._download_webpage(url, article_id)
+        hidden_inputs = self._hidden_inputs(webpage)
+        media_id = hidden_inputs['media_id']
+        media_path = self._search_regex(r'href="([^"]+av-%s)"\s+class="overlayLink"' % media_id, webpage, 'media url')
+        media_url = compat_urlparse.urljoin(url, media_path)
+        return self.url_result(media_url, 'DW', media_id)
index 7bbf617d468f9d5295ee2e1123452679a21a7e4b..113a4966fb3c444f5759e617e36b293ee88e6872 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
+    url_basename,
 )
 
 
@@ -21,7 +23,7 @@ class EaglePlatformIE(InfoExtractor):
     _TESTS = [{
         # http://lenta.ru/news/2015/03/06/navalny/
         'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
-        'md5': '70f5187fb620f2c1d503b3b22fd4efe3',
+        # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
         'info_dict': {
             'id': '227304',
             'ext': 'mp4',
@@ -36,7 +38,7 @@ class EaglePlatformIE(InfoExtractor):
         # http://muz-tv.ru/play/7129/
         # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
         'url': 'eagleplatform:media.clipyou.ru:12820',
-        'md5': '90b26344ba442c8e44aa4cf8f301164a',
+        'md5': '358597369cf8ba56675c1df15e7af624',
         'info_dict': {
             'id': '12820',
             'ext': 'mp4',
@@ -55,8 +57,13 @@ class EaglePlatformIE(InfoExtractor):
             raise ExtractorError(' '.join(response['errors']), expected=True)
 
     def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
-        response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
-        self._handle_error(response)
+        try:
+            response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
+        except ExtractorError as ee:
+            if isinstance(ee.cause, compat_HTTPError):
+                response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
+                self._handle_error(response)
+            raise
         return response
 
     def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
@@ -84,17 +91,33 @@ class EaglePlatformIE(InfoExtractor):
 
         secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
 
+        formats = []
+
         m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
-        formats = self._extract_m3u8_formats(
+        m3u8_formats = self._extract_m3u8_formats(
             m3u8_url, video_id,
             'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+        formats.extend(m3u8_formats)
 
         mp4_url = self._get_video_url(
             # Secure mp4 URL is constructed according to Player.prototype.mp4 from
             # http://lentaru.media.eagleplatform.com/player/player.js
             re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
             video_id, 'Downloading mp4 JSON')
-        formats.append({'url': mp4_url, 'format_id': 'mp4'})
+        mp4_url_basename = url_basename(mp4_url)
+        for m3u8_format in m3u8_formats:
+            mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
+            if mobj:
+                http_format = m3u8_format.copy()
+                video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
+                if not self._is_valid_url(video_url, video_id):
+                    continue
+                http_format.update({
+                    'url': video_url,
+                    'format_id': m3u8_format['format_id'].replace('hls', 'http'),
+                    'protocol': 'http',
+                })
+                formats.append(http_format)
 
         self._sort_formats(formats)
 
index b6bfd2b2dedc5388ef383a3cd8853bbb0c541f68..c97682cd367edebfd9fc6a476ad073cb03240054 100644 (file)
@@ -4,10 +4,10 @@ from .common import InfoExtractor
 
 
 class EbaumsWorldIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ebaumsworld\.com/videos/[^/]+/(?P<id>\d+)'
 
     _TEST = {
-        'url': 'http://www.ebaumsworld.com/video/watch/83367677/',
+        'url': 'http://www.ebaumsworld.com/videos/a-giant-python-opens-the-door/83367677/',
         'info_dict': {
             'id': '83367677',
             'ext': 'mp4',
index d2d94049d368e74413d93ca40628e3d5174a7675..6b7cc652fe43c60cb8d8326f1cf6bd0c51fbd59f 100644 (file)
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 
 
 class EchoMskIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.echo.msk.ru/sounds/1464134.html',
         'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
index 00a69e6312aede6069e062c6abff29137939daa9..8c725a4e631860584781b116e72b02dd05813fc2 100644 (file)
@@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):
     _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
     IE_DESC = 'El País'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
         'md5': '98406f301f19562170ec071b83433d55',
         'info_dict': {
@@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):
             'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
             'upload_date': '20140206',
         }
-    }
+    }, {
+        'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
+        'md5': '3bd5b09509f3519d7d9e763179b013de',
+        'info_dict': {
+            'id': '1456340311_668921',
+            'ext': 'mp4',
+            'title': 'Cómo hacer el mejor café con cafetera italiana',
+            'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
+            'upload_date': '20160303',
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         prefix = self._html_search_regex(
-            r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
+            r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
         video_suffix = self._search_regex(
-            r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
+            r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
         video_url = prefix + video_suffix
         thumbnail_suffix = self._search_regex(
-            r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
-            fatal=False)
+            r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
+            webpage, 'thumbnail URL', fatal=False)
         thumbnail = (
             None if thumbnail_suffix is None
             else prefix + thumbnail_suffix)
         title = self._html_search_regex(
-            '<h2 class="entry-header entry-title.*?>(.*?)</h2>',
+            (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
+             r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
             webpage, 'title')
-        date_str = self._search_regex(
+        upload_date = unified_strdate(self._search_regex(
             r'<p class="date-header date-int updated"\s+title="([^"]+)">',
-            webpage, 'upload date', fatal=False)
-        upload_date = (None if date_str is None else unified_strdate(date_str))
+            webpage, 'upload date', default=None) or self._html_search_meta(
+            'datePublished', webpage, 'timestamp'))
 
         return {
             'id': video_id,
index e4180701d7d5fe7f538d029e8ffb27235b6135df..e5e57d48518d3dd3999dad650d0c32406079ce33 100644 (file)
@@ -1,21 +1,13 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
-    url_basename,
-)
 
 
 class EngadgetIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://www.engadget.com/
-        (?:video(?:/5min)?/(?P<id>\d+)|
-            [\d/]+/.*?)
-        '''
+    _VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)'
 
     _TEST = {
-        'url': 'http://www.engadget.com/video/5min/518153925/',
+        'url': 'http://www.engadget.com/video/518153925/',
         'md5': 'c6820d4828a5064447a4d9fc73f312c9',
         'info_dict': {
             'id': '518153925',
@@ -27,15 +19,4 @@ class EngadgetIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
-        if video_id is not None:
-            return self.url_result('5min:%s' % video_id)
-        else:
-            title = url_basename(url)
-            webpage = self._download_webpage(url, title)
-            ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
-            return {
-                '_type': 'playlist',
-                'title': title,
-                'entries': [self.url_result('5min:%s' % vid) for vid in ids]
-            }
+        return self.url_result('5min:%s' % video_id)
index e006921ec3f8d2a0aff0e6bb0595148469b1c256..ac5d0fe2426a8e3fb213b2bb6d8f59fa8acc5fba 100644 (file)
@@ -11,8 +11,8 @@ from ..utils import (
 
 
 class EpornerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
+    _TESTS = [{
         'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
         'md5': '39d486f046212d8e1b911c52ab4691f8',
         'info_dict': {
@@ -23,8 +23,12 @@ class EpornerIE(InfoExtractor):
             'duration': 1838,
             'view_count': int,
             'age_limit': 18,
-        }
-    }
+        },
+    }, {
+        # New (May 2016) URL layout
+        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index 7fcd0151d8efdfd2b2378c0097b363e563e3171b..297f8a6f5fa4371415554bfe6c44d0745c262491 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_urlencode
 from ..utils import (
     ExtractorError,
     unescapeHTML
@@ -43,7 +43,7 @@ class EroProfileIE(InfoExtractor):
         if username is None:
             return
 
-        query = compat_urllib_parse.urlencode({
+        query = compat_urllib_parse_urlencode({
             'username': username,
             'password': password,
             'url': 'http://www.eroprofile.com/',
index db4b263bcbf40a9cb133d2a9729e4fe07292bae3..66c08bec47d8aa639cf758bb3e083b9772230c76 100644 (file)
@@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
     _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://espn.go.com/video/clip?id=10365079',
+        'md5': '60e5d097a523e767d06479335d1bdc58',
         'info_dict': {
             'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
             'ext': 'mp4',
@@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
             'description': None,
         },
         'params': {
-            # m3u8 download
             'skip_download': True,
         },
+        'add_ie': ['OoyalaExternal'],
     }, {
         # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
         'url': 'http://espn.go.com/video/clip?id=2743663',
+        'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
         'info_dict': {
             'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
             'ext': 'mp4',
             'title': 'Must-See Moments: Best of the MLS season',
         },
         'params': {
-            # m3u8 download
             'skip_download': True,
         },
+        'add_ie': ['OoyalaExternal'],
     }, {
         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
         'only_matching': True,
index 0c0fe6d652b11c2d30746f7d4da232ee4a1d65e3..09ed4f2b5644c5c8d55ea944d98e1684acacc125 100644 (file)
@@ -8,7 +8,7 @@ from .common import InfoExtractor
 class ExfmIE(InfoExtractor):
     IE_NAME = 'exfm'
     IE_DESC = 'ex.fm'
-    _VALID_URL = r'http://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
     _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
     _TESTS = [
         {
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
new file mode 100644 (file)
index 0000000..6fc5a18
--- /dev/null
@@ -0,0 +1,1064 @@
+# flake8: noqa
+from __future__ import unicode_literals
+
+from .abc import ABCIE
+from .abc7news import Abc7NewsIE
+from .abcnews import (
+    AbcNewsIE,
+    AbcNewsVideoIE,
+)
+from .academicearth import AcademicEarthCourseIE
+from .acast import (
+    ACastIE,
+    ACastChannelIE,
+)
+from .addanime import AddAnimeIE
+from .adobetv import (
+    AdobeTVIE,
+    AdobeTVShowIE,
+    AdobeTVChannelIE,
+    AdobeTVVideoIE,
+)
+from .adultswim import AdultSwimIE
+from .aenetworks import AENetworksIE
+from .afreecatv import AfreecaTVIE
+from .aftonbladet import AftonbladetIE
+from .airmozilla import AirMozillaIE
+from .aljazeera import AlJazeeraIE
+from .alphaporno import AlphaPornoIE
+from .animeondemand import AnimeOnDemandIE
+from .anitube import AnitubeIE
+from .anysex import AnySexIE
+from .aol import (
+    AolIE,
+    AolFeaturesIE,
+)
+from .allocine import AllocineIE
+from .aparat import AparatIE
+from .appleconnect import AppleConnectIE
+from .appletrailers import (
+    AppleTrailersIE,
+    AppleTrailersSectionIE,
+)
+from .archiveorg import ArchiveOrgIE
+from .ard import (
+    ARDIE,
+    ARDMediathekIE,
+)
+from .arte import (
+    ArteTvIE,
+    ArteTVPlus7IE,
+    ArteTVCreativeIE,
+    ArteTVConcertIE,
+    ArteTVInfoIE,
+    ArteTVFutureIE,
+    ArteTVCinemaIE,
+    ArteTVDDCIE,
+    ArteTVMagazineIE,
+    ArteTVEmbedIE,
+    ArteTVPlaylistIE,
+)
+from .atresplayer import AtresPlayerIE
+from .atttechchannel import ATTTechChannelIE
+from .audimedia import AudiMediaIE
+from .audioboom import AudioBoomIE
+from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .azubu import AzubuIE, AzubuLiveIE
+from .baidu import BaiduVideoIE
+from .bambuser import BambuserIE, BambuserChannelIE
+from .bandcamp import BandcampIE, BandcampAlbumIE
+from .bbc import (
+    BBCCoUkIE,
+    BBCCoUkArticleIE,
+    BBCCoUkIPlayerPlaylistIE,
+    BBCCoUkPlaylistIE,
+    BBCIE,
+)
+from .beeg import BeegIE
+from .behindkink import BehindKinkIE
+from .beatportpro import BeatportProIE
+from .bet import BetIE
+from .bigflix import BigflixIE
+from .bild import BildIE
+from .bilibili import BiliBiliIE
+from .biobiochiletv import BioBioChileTVIE
+from .biqle import BIQLEIE
+from .bleacherreport import (
+    BleacherReportIE,
+    BleacherReportCMSIE,
+)
+from .blinkx import BlinkxIE
+from .bloomberg import BloombergIE
+from .bokecc import BokeCCIE
+from .bpb import BpbIE
+from .br import BRIE
+from .bravotv import BravoTVIE
+from .breakcom import BreakIE
+from .brightcove import (
+    BrightcoveLegacyIE,
+    BrightcoveNewIE,
+)
+from .buzzfeed import BuzzFeedIE
+from .byutv import BYUtvIE
+from .c56 import C56IE
+from .camdemy import (
+    CamdemyIE,
+    CamdemyFolderIE
+)
+from .camwithher import CamWithHerIE
+from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .canvas import CanvasIE
+from .carambatv import (
+    CarambaTVIE,
+    CarambaTVPageIE,
+)
+from .cbc import (
+    CBCIE,
+    CBCPlayerIE,
+)
+from .cbs import CBSIE
+from .cbslocal import CBSLocalIE
+from .cbsinteractive import CBSInteractiveIE
+from .cbsnews import (
+    CBSNewsIE,
+    CBSNewsLiveVideoIE,
+)
+from .cbssports import CBSSportsIE
+from .ccc import CCCIE
+from .cda import CDAIE
+from .ceskatelevize import CeskaTelevizeIE
+from .channel9 import Channel9IE
+from .chaturbate import ChaturbateIE
+from .chilloutzone import ChilloutzoneIE
+from .chirbit import (
+    ChirbitIE,
+    ChirbitProfileIE,
+)
+from .cinchcast import CinchcastIE
+from .cliprs import ClipRsIE
+from .clipfish import ClipfishIE
+from .cliphunter import CliphunterIE
+from .clipsyndicate import ClipsyndicateIE
+from .closertotruth import CloserToTruthIE
+from .cloudy import CloudyIE
+from .clubic import ClubicIE
+from .clyp import ClypIE
+from .cmt import CMTIE
+from .cnbc import CNBCIE
+from .cnn import (
+    CNNIE,
+    CNNBlogsIE,
+    CNNArticleIE,
+)
+from .coub import CoubIE
+from .collegerama import CollegeRamaIE
+from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
+from .comcarcoff import ComCarCoffIE
+from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonprotocols import RtmpIE
+from .condenast import CondeNastIE
+from .cracked import CrackedIE
+from .crackle import CrackleIE
+from .criterion import CriterionIE
+from .crooksandliars import CrooksAndLiarsIE
+from .crunchyroll import (
+    CrunchyrollIE,
+    CrunchyrollShowPlaylistIE
+)
+from .cspan import CSpanIE
+from .ctsnews import CtsNewsIE
+from .cultureunplugged import CultureUnpluggedIE
+from .cwtv import CWTVIE
+from .dailymail import DailyMailIE
+from .dailymotion import (
+    DailymotionIE,
+    DailymotionPlaylistIE,
+    DailymotionUserIE,
+    DailymotionCloudIE,
+)
+from .daum import (
+    DaumIE,
+    DaumClipIE,
+    DaumPlaylistIE,
+    DaumUserIE,
+)
+from .dbtv import DBTVIE
+from .dcn import (
+    DCNIE,
+    DCNVideoIE,
+    DCNLiveIE,
+    DCNSeasonIE,
+)
+from .dctp import DctpTvIE
+from .deezer import DeezerPlaylistIE
+from .democracynow import DemocracynowIE
+from .dfb import DFBIE
+from .dhm import DHMIE
+from .dotsub import DotsubIE
+from .douyutv import DouyuTVIE
+from .dplay import DPlayIE
+from .dramafever import (
+    DramaFeverIE,
+    DramaFeverSeriesIE,
+)
+from .dreisat import DreiSatIE
+from .drbonanza import DRBonanzaIE
+from .drtuber import DrTuberIE
+from .drtv import DRTVIE
+from .dvtv import DVTVIE
+from .dumpert import DumpertIE
+from .defense import DefenseGouvFrIE
+from .discovery import DiscoveryIE
+from .dispeak import DigitallySpeakingIE
+from .dropbox import DropboxIE
+from .dw import (
+    DWIE,
+    DWArticleIE,
+)
+from .eagleplatform import EaglePlatformIE
+from .ebaumsworld import EbaumsWorldIE
+from .echomsk import EchoMskIE
+from .ehow import EHowIE
+from .eighttracks import EightTracksIE
+from .einthusan import EinthusanIE
+from .eitb import EitbIE
+from .ellentv import (
+    EllenTVIE,
+    EllenTVClipsIE,
+)
+from .elpais import ElPaisIE
+from .embedly import EmbedlyIE
+from .engadget import EngadgetIE
+from .eporner import EpornerIE
+from .eroprofile import EroProfileIE
+from .escapist import EscapistIE
+from .espn import ESPNIE
+from .esri import EsriVideoIE
+from .europa import EuropaIE
+from .everyonesmixtape import EveryonesMixtapeIE
+from .exfm import ExfmIE
+from .expotv import ExpoTVIE
+from .extremetube import ExtremeTubeIE
+from .eyedotv import EyedoTVIE
+from .facebook import FacebookIE
+from .faz import FazIE
+from .fc2 import FC2IE
+from .fczenit import FczenitIE
+from .firstpost import FirstpostIE
+from .firsttv import FirstTVIE
+from .fivemin import FiveMinIE
+from .fivetv import FiveTVIE
+from .fktv import FKTVIE
+from .flickr import FlickrIE
+from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
+from .formula1 import Formula1IE
+from .fourtube import FourTubeIE
+from .fox import FOXIE
+from .foxgay import FoxgayIE
+from .foxnews import FoxNewsIE
+from .foxsports import FoxSportsIE
+from .franceculture import (
+    FranceCultureIE,
+    FranceCultureEmissionIE,
+)
+from .franceinter import FranceInterIE
+from .francetv import (
+    PluzzIE,
+    FranceTvInfoIE,
+    FranceTVIE,
+    GenerationQuoiIE,
+    CultureboxIE,
+)
+from .freesound import FreesoundIE
+from .freespeech import FreespeechIE
+from .freevideo import FreeVideoIE
+from .funimation import FunimationIE
+from .funnyordie import FunnyOrDieIE
+from .gameinformer import GameInformerIE
+from .gamekings import GamekingsIE
+from .gameone import (
+    GameOneIE,
+    GameOnePlaylistIE,
+)
+from .gamersyde import GamersydeIE
+from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
+from .gazeta import GazetaIE
+from .gdcvault import GDCVaultIE
+from .generic import GenericIE
+from .gfycat import GfycatIE
+from .giantbomb import GiantBombIE
+from .giga import GigaIE
+from .glide import GlideIE
+from .globo import (
+    GloboIE,
+    GloboArticleIE,
+)
+from .godtube import GodTubeIE
+from .godtv import GodTVIE
+from .goldenmoustache import GoldenMoustacheIE
+from .golem import GolemIE
+from .googledrive import GoogleDriveIE
+from .googleplus import GooglePlusIE
+from .googlesearch import GoogleSearchIE
+from .goshgay import GoshgayIE
+from .gputechconf import GPUTechConfIE
+from .groupon import GrouponIE
+from .hark import HarkIE
+from .hbo import HBOIE
+from .hearthisat import HearThisAtIE
+from .heise import HeiseIE
+from .hellporno import HellPornoIE
+from .helsinki import HelsinkiIE
+from .hentaistigma import HentaiStigmaIE
+from .historicfilms import HistoricFilmsIE
+from .hitbox import HitboxIE, HitboxLiveIE
+from .hornbunny import HornBunnyIE
+from .hotnewhiphop import HotNewHipHopIE
+from .hotstar import HotStarIE
+from .howcast import HowcastIE
+from .howstuffworks import HowStuffWorksIE
+from .huffpost import HuffPostIE
+from .hypem import HypemIE
+from .iconosquare import IconosquareIE
+from .ign import (
+    IGNIE,
+    OneUPIE,
+    PCMagIE,
+)
+from .imdb import (
+    ImdbIE,
+    ImdbListIE
+)
+from .imgur import (
+    ImgurIE,
+    ImgurAlbumIE,
+)
+from .ina import InaIE
+from .indavideo import (
+    IndavideoIE,
+    IndavideoEmbedIE,
+)
+from .infoq import InfoQIE
+from .instagram import InstagramIE, InstagramUserIE
+from .internetvideoarchive import InternetVideoArchiveIE
+from .iprima import IPrimaIE
+from .iqiyi import IqiyiIE
+from .ir90tv import Ir90TvIE
+from .ivi import (
+    IviIE,
+    IviCompilationIE
+)
+from .ivideon import IvideonIE
+from .izlesene import IzleseneIE
+from .jeuxvideo import JeuxVideoIE
+from .jove import JoveIE
+from .jwplatform import JWPlatformIE
+from .jpopsukitv import JpopsukiIE
+from .kaltura import KalturaIE
+from .kanalplay import KanalPlayIE
+from .kankan import KankanIE
+from .karaoketv import KaraoketvIE
+from .karrierevideos import KarriereVideosIE
+from .keezmovies import KeezMoviesIE
+from .khanacademy import KhanAcademyIE
+from .kickstarter import KickStarterIE
+from .keek import KeekIE
+from .konserthusetplay import KonserthusetPlayIE
+from .kontrtube import KontrTubeIE
+from .krasview import KrasViewIE
+from .ku6 import Ku6IE
+from .kusi import KUSIIE
+from .kuwo import (
+    KuwoIE,
+    KuwoAlbumIE,
+    KuwoChartIE,
+    KuwoSingerIE,
+    KuwoCategoryIE,
+    KuwoMvIE,
+)
+from .la7 import LA7IE
+from .laola1tv import Laola1TvIE
+from .learnr import LearnrIE
+from .lecture2go import Lecture2GoIE
+from .lemonde import LemondeIE
+from .leeco import (
+    LeIE,
+    LePlaylistIE,
+    LetvCloudIE,
+)
+from .libraryofcongress import LibraryOfCongressIE
+from .libsyn import LibsynIE
+from .lifenews import (
+    LifeNewsIE,
+    LifeEmbedIE,
+)
+from .limelight import (
+    LimelightMediaIE,
+    LimelightChannelIE,
+    LimelightChannelListIE,
+)
+from .litv import LiTVIE
+from .liveleak import LiveLeakIE
+from .livestream import (
+    LivestreamIE,
+    LivestreamOriginalIE,
+    LivestreamShortenerIE,
+)
+from .lnkgo import LnkGoIE
+from .localnews8 import LocalNews8IE
+from .lovehomeporn import LoveHomePornIE
+from .lrt import LRTIE
+from .lynda import (
+    LyndaIE,
+    LyndaCourseIE
+)
+from .m6 import M6IE
+from .macgamestore import MacGameStoreIE
+from .mailru import MailRuIE
+from .makerschannel import MakersChannelIE
+from .makertv import MakerTVIE
+from .matchtv import MatchTVIE
+from .mdr import MDRIE
+from .metacafe import MetacafeIE
+from .metacritic import MetacriticIE
+from .mgoon import MgoonIE
+from .mgtv import MGTVIE
+from .microsoftvirtualacademy import (
+    MicrosoftVirtualAcademyIE,
+    MicrosoftVirtualAcademyCourseIE,
+)
+from .minhateca import MinhatecaIE
+from .ministrygrid import MinistryGridIE
+from .minoto import MinotoIE
+from .miomio import MioMioIE
+from .mit import TechTVMITIE, MITIE, OCWMITIE
+from .mitele import MiTeleIE
+from .mixcloud import (
+    MixcloudIE,
+    MixcloudUserIE,
+    MixcloudPlaylistIE,
+    MixcloudStreamIE,
+)
+from .mlb import MLBIE
+from .mnet import MnetIE
+from .mpora import MporaIE
+from .moevideo import MoeVideoIE
+from .mofosex import MofosexIE
+from .mojvideo import MojvideoIE
+from .moniker import MonikerIE
+from .morningstar import MorningstarIE
+from .motherless import MotherlessIE
+from .motorsport import MotorsportIE
+from .movieclips import MovieClipsIE
+from .moviezine import MoviezineIE
+from .mtv import (
+    MTVIE,
+    MTVServicesEmbeddedIE,
+    MTVIggyIE,
+    MTVDEIE,
+)
+from .muenchentv import MuenchenTVIE
+from .musicplayon import MusicPlayOnIE
+from .mwave import MwaveIE, MwaveMeetGreetIE
+from .myspace import MySpaceIE, MySpaceAlbumIE
+from .myspass import MySpassIE
+from .myvi import MyviIE
+from .myvideo import MyVideoIE
+from .myvidster import MyVidsterIE
+from .nationalgeographic import (
+    NationalGeographicIE,
+    NationalGeographicChannelIE,
+)
+from .naver import NaverIE
+from .nba import NBAIE
+from .nbc import (
+    CSNNEIE,
+    NBCIE,
+    NBCNewsIE,
+    NBCSportsIE,
+    NBCSportsVPlayerIE,
+)
+from .ndr import (
+    NDRIE,
+    NJoyIE,
+    NDREmbedBaseIE,
+    NDREmbedIE,
+    NJoyEmbedIE,
+)
+from .ndtv import NDTVIE
+from .netzkino import NetzkinoIE
+from .nerdcubed import NerdCubedFeedIE
+from .neteasemusic import (
+    NetEaseMusicIE,
+    NetEaseMusicAlbumIE,
+    NetEaseMusicSingerIE,
+    NetEaseMusicListIE,
+    NetEaseMusicMvIE,
+    NetEaseMusicProgramIE,
+    NetEaseMusicDjRadioIE,
+)
+from .newgrounds import NewgroundsIE
+from .newstube import NewstubeIE
+from .nextmedia import (
+    NextMediaIE,
+    NextMediaActionNewsIE,
+    AppleDailyIE,
+)
+from .nextmovie import NextMovieIE
+from .nfb import NFBIE
+from .nfl import NFLIE
+from .nhl import (
+    NHLVideocenterIE,
+    NHLNewsIE,
+    NHLVideocenterCategoryIE,
+    NHLIE,
+)
+from .nick import (
+    NickIE,
+    NickDeIE,
+)
+from .niconico import NiconicoIE, NiconicoPlaylistIE
+from .ninegag import NineGagIE
+from .noco import NocoIE
+from .normalboots import NormalbootsIE
+from .nosvideo import NosVideoIE
+from .nova import NovaIE
+from .novamov import (
+    AuroraVidIE,
+    CloudTimeIE,
+    NowVideoIE,
+    VideoWeedIE,
+    WholeCloudIE,
+)
+from .nowness import (
+    NownessIE,
+    NownessPlaylistIE,
+    NownessSeriesIE,
+)
+from .nowtv import (
+    NowTVIE,
+    NowTVListIE,
+)
+from .noz import NozIE
+from .npo import (
+    NPOIE,
+    NPOLiveIE,
+    NPORadioIE,
+    NPORadioFragmentIE,
+    SchoolTVIE,
+    VPROIE,
+    WNLIE
+)
+from .npr import NprIE
+from .nrk import (
+    NRKIE,
+    NRKPlaylistIE,
+    NRKSkoleIE,
+    NRKTVIE,
+)
+from .ntvde import NTVDeIE
+from .ntvru import NTVRuIE
+from .nytimes import (
+    NYTimesIE,
+    NYTimesArticleIE,
+)
+from .nuvid import NuvidIE
+from .odnoklassniki import OdnoklassnikiIE
+from .oktoberfesttv import OktoberfestTVIE
+from .onionstudios import OnionStudiosIE
+from .ooyala import (
+    OoyalaIE,
+    OoyalaExternalIE,
+)
+from .openload import OpenloadIE
+from .ora import OraTVIE
+from .orf import (
+    ORFTVthekIE,
+    ORFOE1IE,
+    ORFFM4IE,
+    ORFIPTVIE,
+)
+from .pandoratv import PandoraTVIE
+from .parliamentliveuk import ParliamentLiveUKIE
+from .patreon import PatreonIE
+from .pbs import PBSIE
+from .people import PeopleIE
+from .periscope import (
+    PeriscopeIE,
+    PeriscopeUserIE,
+)
+from .philharmoniedeparis import PhilharmonieDeParisIE
+from .phoenix import PhoenixIE
+from .photobucket import PhotobucketIE
+from .pinkbike import PinkbikeIE
+from .pladform import PladformIE
+from .played import PlayedIE
+from .playfm import PlayFMIE
+from .plays import PlaysTVIE
+from .playtvak import PlaytvakIE
+from .playvid import PlayvidIE
+from .playwire import PlaywireIE
+from .pluralsight import (
+    PluralsightIE,
+    PluralsightCourseIE,
+)
+from .podomatic import PodomaticIE
+from .porn91 import Porn91IE
+from .pornhd import PornHdIE
+from .pornhub import (
+    PornHubIE,
+    PornHubPlaylistIE,
+    PornHubUserVideosIE,
+)
+from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
+from .pornoxo import PornoXOIE
+from .presstv import PressTVIE
+from .primesharetv import PrimeShareTVIE
+from .promptfile import PromptFileIE
+from .prosiebensat1 import ProSiebenSat1IE
+from .puls4 import Puls4IE
+from .pyvideo import PyvideoIE
+from .qqmusic import (
+    QQMusicIE,
+    QQMusicSingerIE,
+    QQMusicAlbumIE,
+    QQMusicToplistIE,
+    QQMusicPlaylistIE,
+)
+from .r7 import (
+    R7IE,
+    R7ArticleIE,
+)
+from .radiocanada import (
+    RadioCanadaIE,
+    RadioCanadaAudioVideoIE,
+)
+from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
+from .radiobremen import RadioBremenIE
+from .radiofrance import RadioFranceIE
+from .rai import (
+    RaiTVIE,
+    RaiIE,
+)
+from .rbmaradio import RBMARadioIE
+from .rds import RDSIE
+from .redtube import RedTubeIE
+from .regiotv import RegioTVIE
+from .restudy import RestudyIE
+from .reuters import ReutersIE
+from .reverbnation import ReverbNationIE
+from .revision3 import (
+    Revision3EmbedIE,
+    Revision3IE,
+)
+from .rice import RICEIE
+from .ringtv import RingTVIE
+from .ro220 import Ro220IE
+from .rockstargames import RockstarGamesIE
+from .rottentomatoes import RottenTomatoesIE
+from .roxwel import RoxwelIE
+from .rtbf import RTBFIE
+from .rte import RteIE, RteRadioIE
+from .rtlnl import RtlNlIE
+from .rtl2 import RTL2IE
+from .rtp import RTPIE
+from .rts import RTSIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
+from .rtvnh import RTVNHIE
+from .ruhd import RUHDIE
+from .ruleporn import RulePornIE
+from .rutube import (
+    RutubeIE,
+    RutubeChannelIE,
+    RutubeEmbedIE,
+    RutubeMovieIE,
+    RutubePersonIE,
+)
+from .rutv import RUTVIE
+from .ruutu import RuutuIE
+from .sandia import SandiaIE
+from .safari import (
+    SafariIE,
+    SafariApiIE,
+    SafariCourseIE,
+)
+from .sapo import SapoIE
+from .savefrom import SaveFromIE
+from .sbs import SBSIE
+from .scivee import SciVeeIE
+from .screencast import ScreencastIE
+from .screencastomatic import ScreencastOMaticIE
+from .screenjunkies import ScreenJunkiesIE
+from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
+from .seeker import SeekerIE
+from .senateisvp import SenateISVPIE
+from .sendtonews import SendtoNewsIE
+from .servingsys import ServingSysIE
+from .sexu import SexuIE
+from .shahid import ShahidIE
+from .shared import SharedIE
+from .sharesix import ShareSixIE
+from .sina import SinaIE
+from .skynewsarabia import (
+    SkyNewsArabiaIE,
+    SkyNewsArabiaArticleIE,
+)
+from .slideshare import SlideshareIE
+from .slutload import SlutloadIE
+from .smotri import (
+    SmotriIE,
+    SmotriCommunityIE,
+    SmotriUserIE,
+    SmotriBroadcastIE,
+)
+from .snotr import SnotrIE
+from .sohu import SohuIE
+from .soundcloud import (
+    SoundcloudIE,
+    SoundcloudSetIE,
+    SoundcloudUserIE,
+    SoundcloudPlaylistIE,
+    SoundcloudSearchIE
+)
+from .soundgasm import (
+    SoundgasmIE,
+    SoundgasmProfileIE
+)
+from .southpark import (
+    SouthParkIE,
+    SouthParkDeIE,
+    SouthParkDkIE,
+    SouthParkEsIE,
+    SouthParkNlIE
+)
+from .spankbang import SpankBangIE
+from .spankwire import SpankwireIE
+from .spiegel import SpiegelIE, SpiegelArticleIE
+from .spiegeltv import SpiegeltvIE
+from .spike import SpikeIE
+from .stitcher import StitcherIE
+from .sport5 import Sport5IE
+from .sportbox import (
+    SportBoxIE,
+    SportBoxEmbedIE,
+)
+from .sportdeutschland import SportDeutschlandIE
+from .sportschau import SportschauIE
+from .srgssr import (
+    SRGSSRIE,
+    SRGSSRPlayIE,
+)
+from .srmediathek import SRMediathekIE
+from .ssa import SSAIE
+from .stanfordoc import StanfordOpenClassroomIE
+from .steam import SteamIE
+from .streamcloud import StreamcloudIE
+from .streamcz import StreamCZIE
+from .streetvoice import StreetVoiceIE
+from .sunporno import SunPornoIE
+from .svt import (
+    SVTIE,
+    SVTPlayIE,
+)
+from .swrmediathek import SWRMediathekIE
+from .syfy import SyfyIE
+from .sztvhu import SztvHuIE
+from .tagesschau import (
+    TagesschauPlayerIE,
+    TagesschauIE,
+)
+from .tapely import TapelyIE
+from .tass import TassIE
+from .tdslifeway import TDSLifewayIE
+from .teachertube import (
+    TeacherTubeIE,
+    TeacherTubeUserIE,
+)
+from .teachingchannel import TeachingChannelIE
+from .teamcoco import TeamcocoIE
+from .techtalks import TechTalksIE
+from .ted import TEDIE
+from .tele13 import Tele13IE
+from .telebruxelles import TeleBruxellesIE
+from .telecinco import TelecincoIE
+from .telegraaf import TelegraafIE
+from .telemb import TeleMBIE
+from .teletask import TeleTaskIE
+from .telewebion import TelewebionIE
+from .testurl import TestURLIE
+from .tf1 import TF1IE
+from .theintercept import TheInterceptIE
+from .theplatform import (
+    ThePlatformIE,
+    ThePlatformFeedIE,
+)
+from .thescene import TheSceneIE
+from .thesixtyone import TheSixtyOneIE
+from .thestar import TheStarIE
+from .thisamericanlife import ThisAmericanLifeIE
+from .thisav import ThisAVIE
+from .threeqsdn import ThreeQSDNIE
+from .tinypic import TinyPicIE
+from .tlc import TlcDeIE
+from .tmz import (
+    TMZIE,
+    TMZArticleIE,
+)
+from .tnaflix import (
+    TNAFlixNetworkEmbedIE,
+    TNAFlixIE,
+    EMPFlixIE,
+    MovieFapIE,
+)
+from .toggle import ToggleIE
+from .thvideo import (
+    THVideoIE,
+    THVideoPlaylistIE
+)
+from .toutv import TouTvIE
+from .toypics import ToypicsUserIE, ToypicsIE
+from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
+from .trollvids import TrollvidsIE
+from .trutube import TruTubeIE
+from .tube8 import Tube8IE
+from .tubitv import TubiTvIE
+from .tudou import (
+    TudouIE,
+    TudouPlaylistIE,
+    TudouAlbumIE,
+)
+from .tumblr import TumblrIE
+from .tunein import (
+    TuneInClipIE,
+    TuneInStationIE,
+    TuneInProgramIE,
+    TuneInTopicIE,
+    TuneInShortenerIE,
+)
+from .turbo import TurboIE
+from .tutv import TutvIE
+from .tv2 import (
+    TV2IE,
+    TV2ArticleIE,
+)
+from .tv3 import TV3IE
+from .tv4 import TV4IE
+from .tvc import (
+    TVCIE,
+    TVCArticleIE,
+)
+from .tvigle import TvigleIE
+from .tvland import TVLandIE
+from .tvp import (
+    TVPIE,
+    TVPSeriesIE,
+)
+from .tvplay import TVPlayIE
+from .tweakers import TweakersIE
+from .twentyfourvideo import TwentyFourVideoIE
+from .twentymin import TwentyMinutenIE
+from .twentytwotracks import (
+    TwentyTwoTracksIE,
+    TwentyTwoTracksGenreIE
+)
+from .twitch import (
+    TwitchVideoIE,
+    TwitchChapterIE,
+    TwitchVodIE,
+    TwitchProfileIE,
+    TwitchPastBroadcastsIE,
+    TwitchStreamIE,
+    TwitchClipsIE,
+)
+from .twitter import (
+    TwitterCardIE,
+    TwitterIE,
+    TwitterAmplifyIE,
+)
+from .udemy import (
+    UdemyIE,
+    UdemyCourseIE
+)
+from .udn import UDNEmbedIE
+from .digiteka import DigitekaIE
+from .unistra import UnistraIE
+from .urort import UrortIE
+from .usatoday import USATodayIE
+from .ustream import UstreamIE, UstreamChannelIE
+from .ustudio import (
+    UstudioIE,
+    UstudioEmbedIE,
+)
+from .varzesh3 import Varzesh3IE
+from .vbox7 import Vbox7IE
+from .veehd import VeeHDIE
+from .veoh import VeohIE
+from .vessel import VesselIE
+from .vesti import VestiIE
+from .vevo import (
+    VevoIE,
+    VevoPlaylistIE,
+)
+from .vgtv import (
+    BTArticleIE,
+    BTVestlendingenIE,
+    VGTVIE,
+)
+from .vh1 import VH1IE
+from .vice import (
+    ViceIE,
+    ViceShowIE,
+)
+from .viddler import ViddlerIE
+from .videodetective import VideoDetectiveIE
+from .videofyme import VideofyMeIE
+from .videomega import VideoMegaIE
+from .videomore import (
+    VideomoreIE,
+    VideomoreVideoIE,
+    VideomoreSeasonIE,
+)
+from .videopremium import VideoPremiumIE
+from .videott import VideoTtIE
+from .vidio import VidioIE
+from .vidme import (
+    VidmeIE,
+    VidmeUserIE,
+    VidmeUserLikesIE,
+)
+from .vidzi import VidziIE
+from .vier import VierIE, VierVideosIE
+from .viewlift import (
+    ViewLiftIE,
+    ViewLiftEmbedIE,
+)
+from .viewster import ViewsterIE
+from .viidea import ViideaIE
+from .vimeo import (
+    VimeoIE,
+    VimeoAlbumIE,
+    VimeoChannelIE,
+    VimeoGroupsIE,
+    VimeoLikesIE,
+    VimeoOndemandIE,
+    VimeoReviewIE,
+    VimeoUserIE,
+    VimeoWatchLaterIE,
+)
+from .vimple import VimpleIE
+from .vine import (
+    VineIE,
+    VineUserIE,
+)
+from .viki import (
+    VikiIE,
+    VikiChannelIE,
+)
+from .vk import (
+    VKIE,
+    VKUserVideosIE,
+)
+from .vlive import VLiveIE
+from .vodlocker import VodlockerIE
+from .voicerepublic import VoiceRepublicIE
+from .voxmedia import VoxMediaIE
+from .vporn import VpornIE
+from .vrt import VRTIE
+from .vube import VubeIE
+from .vuclip import VuClipIE
+from .walla import WallaIE
+from .washingtonpost import (
+    WashingtonPostIE,
+    WashingtonPostArticleIE,
+)
+from .wat import WatIE
+from .watchindianporn import WatchIndianPornIE
+from .wdr import (
+    WDRIE,
+    WDRMobileIE,
+)
+from .webofstories import (
+    WebOfStoriesIE,
+    WebOfStoriesPlaylistIE,
+)
+from .weiqitv import WeiqiTVIE
+from .wimp import WimpIE
+from .wistia import WistiaIE
+from .worldstarhiphop import WorldStarHipHopIE
+from .wrzuta import (
+    WrzutaIE,
+    WrzutaPlaylistIE,
+)
+from .wsj import WSJIE
+from .xbef import XBefIE
+from .xboxclips import XboxClipsIE
+from .xfileshare import XFileShareIE
+from .xhamster import (
+    XHamsterIE,
+    XHamsterEmbedIE,
+)
+from .xiami import (
+    XiamiSongIE,
+    XiamiAlbumIE,
+    XiamiArtistIE,
+    XiamiCollectionIE
+)
+from .xminus import XMinusIE
+from .xnxx import XNXXIE
+from .xstream import XstreamIE
+from .xtube import XTubeUserIE, XTubeIE
+from .xuite import XuiteIE
+from .xvideos import XVideosIE
+from .xxxymovies import XXXYMoviesIE
+from .yahoo import (
+    YahooIE,
+    YahooSearchIE,
+)
+from .yam import YamIE
+from .yandexmusic import (
+    YandexMusicTrackIE,
+    YandexMusicAlbumIE,
+    YandexMusicPlaylistIE,
+)
+from .yesjapan import YesJapanIE
+from .yinyuetai import YinYueTaiIE
+from .ynet import YnetIE
+from .youjizz import YouJizzIE
+from .youku import (
+    YoukuIE,
+    YoukuShowIE,
+)
+from .youporn import YouPornIE
+from .yourupload import YourUploadIE
+from .youtube import (
+    YoutubeIE,
+    YoutubeChannelIE,
+    YoutubeFavouritesIE,
+    YoutubeHistoryIE,
+    YoutubeLiveIE,
+    YoutubePlaylistIE,
+    YoutubePlaylistsIE,
+    YoutubeRecommendedIE,
+    YoutubeSearchDateIE,
+    YoutubeSearchIE,
+    YoutubeSearchURLIE,
+    YoutubeShowIE,
+    YoutubeSubscriptionsIE,
+    YoutubeTruncatedIDIE,
+    YoutubeTruncatedURLIE,
+    YoutubeUserIE,
+    YoutubeWatchLaterIE,
+)
+from .zapiks import ZapiksIE
+from .zdf import ZDFIE, ZDFChannelIE
+from .zingmp3 import (
+    ZingMp3SongIE,
+    ZingMp3AlbumIE,
+)
+from .zippcast import ZippCastIE
diff --git a/youtube_dl/extractor/eyedotv.py b/youtube_dl/extractor/eyedotv.py
new file mode 100644 (file)
index 0000000..2f30351
--- /dev/null
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    parse_duration,
+    ExtractorError,
+)
+
+
+class EyedoTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
+        'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
+        'info_dict': {
+            'id': '16301',
+            'ext': 'mp4',
+            'title': 'Journée du conseil scientifique de l\'Afnic 2015',
+            'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
+            'uploader': 'Afnic Live',
+            'uploader_id': '8023',
+        }
+    }
+    _ROOT_URL = 'http://live.eyedo.net:1935/'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id)
+
+        def _add_ns(path):
+            return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
+
+        title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
+        state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
+        if state_live_code == 'avenir':
+            raise ExtractorError(
+                '%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME,
+                expected=True)
+
+        is_live = state_live_code == 'live'
+        m3u8_url = None
+        # http://eyedo.tv/Content/Html5/Scripts/html5view.js
+        if is_live:
+            if xpath_text(video_data, 'Cdn') == 'true':
+                m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id
+            else:
+                m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id
+        else:
+            m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
+            'description': xpath_text(video_data, _add_ns('Description')),
+            'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
+            'uploader': xpath_text(video_data, _add_ns('Createur')),
+            'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
+            'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
+            'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
+        }
index 0a9a5ca718ce171ef211fee6f04b4685452add1e..9b87b37ae54da724c360e85429de804f29413bc6 100644 (file)
@@ -34,9 +34,12 @@ class FacebookIE(InfoExtractor):
                                 video/video\.php|
                                 photo\.php|
                                 video\.php|
-                                video/embed
-                            )\?(?:.*?)(?:v|video_id)=|
-                            [^/]+/videos/(?:[^/]+/)?
+                                video/embed|
+                                story\.php
+                            )\?(?:.*?)(?:v|video_id|story_fbid)=|
+                            [^/]+/videos/(?:[^/]+/)?|
+                            [^/]+/posts/|
+                            groups/[^/]+/permalink/
                         )|
                     facebook:
                 )
@@ -49,6 +52,8 @@ class FacebookIE(InfoExtractor):
 
     _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
 
+    _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
+
     _TESTS = [{
         'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
         'md5': '6a40d33c0eccbb1af76cf0485a052659',
@@ -80,6 +85,33 @@ class FacebookIE(InfoExtractor):
             'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
             'uploader': 'Demy de Zeeuw',
         },
+    }, {
+        'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
+        'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
+        'info_dict': {
+            'id': '544765982287235',
+            'ext': 'mp4',
+            'title': '"What are you doing running in the snow?"',
+            'uploader': 'FailArmy',
+        }
+    }, {
+        'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
+        'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
+        'info_dict': {
+            'id': '1035862816472149',
+            'ext': 'mp4',
+            'title': 'What the Flock Is Going On In New Zealand  Credit: ViralHog',
+            'uploader': 'S. Saint',
+        },
+    }, {
+        'note': 'swf params escaped',
+        'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
+        'md5': '97ba073838964d12c70566e0085c2b91',
+        'info_dict': {
+            'id': '10153664894881749',
+            'ext': 'mp4',
+            'title': 'Facebook video #10153664894881749',
+        },
     }, {
         'url': 'https://www.facebook.com/video.php?v=10204634152394104',
         'only_matching': True,
@@ -92,6 +124,9 @@ class FacebookIE(InfoExtractor):
     }, {
         'url': 'facebook:544765982287235',
         'only_matching': True,
+    }, {
+        'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
+        'only_matching': True,
     }]
 
     def _login(self):
@@ -160,19 +195,19 @@ class FacebookIE(InfoExtractor):
     def _real_initialize(self):
         self._login()
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        req = sanitized_Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
+    def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
+        req = sanitized_Request(url)
         req.add_header('User-Agent', self._CHROME_USER_AGENT)
         webpage = self._download_webpage(req, video_id)
 
         video_data = None
 
-        BEFORE = '{swf.addParam(param[0], param[1]);});\n'
+        BEFORE = '{swf.addParam(param[0], param[1]);});'
         AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
-        m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
+        m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
         if m:
-            data = dict(json.loads(m.group(1)))
+            swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
+            data = dict(json.loads(swf_params))
             params_raw = compat_urllib_parse_unquote(data['params'])
             video_data = json.loads(params_raw)['video_data']
 
@@ -185,13 +220,15 @@ class FacebookIE(InfoExtractor):
 
         if not video_data:
             server_js_data = self._parse_json(self._search_regex(
-                r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id)
+                r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
             for item in server_js_data.get('instances', []):
                 if item[1][0] == 'VideoConfig':
                     video_data = video_data_list2dict(item[2][0]['videoData'])
                     break
 
         if not video_data:
+            if not fatal_if_no_video:
+                return webpage, False
             m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
             if m_msg is not None:
                 raise ExtractorError(
@@ -202,16 +239,21 @@ class FacebookIE(InfoExtractor):
 
         formats = []
         for format_id, f in video_data.items():
+            if f and isinstance(f, dict):
+                f = [f]
             if not f or not isinstance(f, list):
                 continue
             for quality in ('sd', 'hd'):
                 for src_type in ('src', 'src_no_ratelimit'):
                     src = f[0].get('%s_%s' % (quality, src_type))
                     if src:
+                        preference = -10 if format_id == 'progressive' else 0
+                        if quality == 'hd':
+                            preference += 5
                         formats.append({
                             'format_id': '%s_%s_%s' % (format_id, quality, src_type),
                             'url': src,
-                            'preference': -10 if format_id == 'progressive' else 0,
+                            'preference': preference,
                         })
             dash_manifest = f[0].get('dash_manifest')
             if dash_manifest:
@@ -234,39 +276,36 @@ class FacebookIE(InfoExtractor):
             video_title = 'Facebook video #%s' % video_id
         uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
 
-        return {
+        info_dict = {
             'id': video_id,
             'title': video_title,
             'formats': formats,
             'uploader': uploader,
         }
 
-
-class FacebookPostIE(InfoExtractor):
-    IE_NAME = 'facebook:post'
-    _VALID_URL = r'https?://(?:\w+\.)?facebook\.com/[^/]+/posts/(?P<id>\d+)'
-    _TEST = {
-        'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
-        'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
-        'info_dict': {
-            'id': '544765982287235',
-            'ext': 'mp4',
-            'title': '"What are you doing running in the snow?"',
-            'uploader': 'FailArmy',
-        }
-    }
+        return webpage, info_dict
 
     def _real_extract(self, url):
-        post_id = self._match_id(url)
+        video_id = self._match_id(url)
+
+        real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
+        webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
 
-        webpage = self._download_webpage(url, post_id)
+        if info_dict:
+            return info_dict
 
-        entries = [
-            self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
-            for video_id in self._parse_json(
-                self._search_regex(
-                    r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
-                    webpage, 'video ids', group='ids'),
-                post_id)]
+        if '/posts/' in url:
+            entries = [
+                self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
+                for vid in self._parse_json(
+                    self._search_regex(
+                        r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
+                        webpage, 'video ids', group='ids'),
+                    video_id)]
 
-        return self.playlist_result(entries, post_id)
+            return self.playlist_result(entries, video_id)
+        else:
+            _, info_dict = self._extract_from_url(
+                self._VIDEO_PAGE_TEMPLATE % video_id,
+                video_id, fatal_if_no_video=True)
+            return info_dict
index 9580f5c0c5d1f4eb9bd54eb76126744322ae85e9..c7d69ff1f980de46bd4ecce96e4ac301b1f1be59 100644 (file)
@@ -5,19 +5,18 @@ import hashlib
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
     compat_urllib_request,
     compat_urlparse,
 )
 from ..utils import (
-    encode_dict,
     ExtractorError,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
 class FC2IE(InfoExtractor):
-    _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
+    _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
     IE_NAME = 'fc2'
     _NETRC_MACHINE = 'fc2'
     _TESTS = [{
@@ -57,7 +56,7 @@ class FC2IE(InfoExtractor):
             'Submit': ' Login ',
         }
 
-        login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
+        login_data = urlencode_postdata(login_form_strs)
         request = sanitized_Request(
             'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
 
index f1f150ef2ce41defbcee841d86fe4f9ada34d25d..8d1010b88c83dcbfd3e71e9f20275bf6fb9c9d21 100644 (file)
@@ -1,20 +1,19 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 
 
 class FczenitIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/gl(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/(?P<id>[0-9]+)'
     _TEST = {
-        'url': 'http://fc-zenit.ru/video/gl6785/',
-        'md5': '458bacc24549173fe5a5aa29174a5606',
+        'url': 'http://fc-zenit.ru/video/41044/',
+        'md5': '0e3fab421b455e970fa1aa3891e57df0',
         'info_dict': {
-            'id': '6785',
+            'id': '41044',
             'ext': 'mp4',
-            'title': '«Зенит-ТВ»: как Олег Шатов играл против «Урала»',
+            'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»',
         },
     }
 
@@ -22,15 +21,23 @@ class FczenitIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        video_title = self._html_search_regex(r'<div class=\"photoalbum__title\">([^<]+)', webpage, 'title')
+        video_title = self._html_search_regex(
+            r'<[^>]+class=\"photoalbum__title\">([^<]+)', webpage, 'title')
+
+        video_items = self._parse_json(self._search_regex(
+            r'arrPath\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, 'video items'),
+            video_id)
 
-        bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, 'video URL')
-        bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw)
+        def merge_dicts(*dicts):
+            ret = {}
+            for a_dict in dicts:
+                ret.update(a_dict)
+            return ret
 
         formats = [{
-            'url': furl,
-            'tbr': tbr,
-        } for furl, tbr in bitrates]
+            'url': compat_urlparse.urljoin(url, video_url),
+            'tbr': int(tbr),
+        } for tbr, video_url in merge_dicts(*video_items).items()]
 
         self._sort_formats(formats)
 
index 298227d5793770c82d8868256d655fa7ea3dc31c..e8936cb2468f78bd2c1b59008a13e9411204380e 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class FirstpostIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
 
     _TEST = {
         'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
index 510d4b108944d1f220c45ddc2fbe85cdad6114ca..88bca100763337011a444369543a1479296e097e 100644 (file)
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..compat import compat_xpath
+from ..utils import (
+    int_or_none,
+    qualities,
+    unified_strdate,
+    xpath_attr,
+    xpath_element,
+    xpath_text,
+    xpath_with_ns,
+)
 
 
 class FirstTVIE(InfoExtractor):
     IE_NAME = '1tv'
     IE_DESC = 'Первый канал'
-    _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P<id>\d+)'
 
     _TESTS = [{
-        'url': 'http://www.1tv.ru/videoarchive/73390',
-        'md5': '777f525feeec4806130f4f764bc18a4f',
+        # single format via video_materials.json API
+        'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
+        'md5': '82a2777648acae812d58b3f5bd42882b',
         'info_dict': {
-            'id': '73390',
+            'id': '35930',
             'ext': 'mp4',
-            'title': 'Ð\9eлимпийÑ\81кие ÐºÐ°Ð½Ð°Ñ\82нÑ\8bе Ð´Ð¾Ñ\80оги',
-            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+            'title': 'Ð\93оÑ\81Ñ\82Ñ\8c Ð\9bÑ\8eдмила Ð¡ÐµÐ½Ñ\87ина. Ð\9dаедине Ñ\81о Ð²Ñ\81еми. Ð\92Ñ\8bпÑ\83Ñ\81к Ð¾Ñ\82 12.02.2015',
+            'description': 'md5:357933adeede13b202c7c21f91b871b2',
             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
-            'duration': 149,
-            'like_count': int,
-            'dislike_count': int,
+            'upload_date': '20150212',
+            'duration': 2694,
         },
-        'skip': 'Only works from Russia',
     }, {
-        'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
-        'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
+        # multiple formats via video_materials.json API
+        'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641',
         'info_dict': {
-            'id': '35930',
+            'id': '113641',
             'ext': 'mp4',
-            'title': 'Ð\9dаедине Ñ\81о Ð²Ñ\81еми. Ð\9bÑ\8eдмила Ð¡ÐµÐ½Ñ\87ина',
-            'description': 'md5:89553aed1d641416001fe8d450f06cb9',
+            'title': 'Ð\92еÑ\81еннÑ\8fÑ\8f Ð°Ð»Ð»ÐµÑ\80гиÑ\8f. Ð\94обÑ\80ое Ñ\83Ñ\82Ñ\80о. Ð¤Ñ\80агменÑ\82 Ð²Ñ\8bпÑ\83Ñ\81ка Ð¾Ñ\82 07.04.2016',
+            'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2',
             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
-            'duration': 2694,
+            'upload_date': '20160407',
+            'duration': 179,
+            'formats': 'mincount:3',
+        },
+        'params': {
+            'skip_download': True,
         },
-        'skip': 'Only works from Russia',
+    }, {
+        # single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API
+        'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038',
+        'md5': '519d306c5b5669761fd8906c39dbee23',
+        'info_dict': {
+            'id': '47038',
+            'ext': 'mp4',
+            'title': '"Побег". Второй сезон. 3 серия',
+            'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b',
+            'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
+            'upload_date': '20120516',
+            'duration': 3080,
+        },
+    }, {
+        'url': 'http://www.1tv.ru/videoarchive/9967',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id, 'Downloading page')
+        # Videos with multiple formats only available via this API
+        video = self._download_json(
+            'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id,
+            video_id, fatal=False)
 
-        video_url = self._html_search_regex(
-            r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
-            webpage, 'video URL')
+        description, thumbnail, upload_date, duration = [None] * 4
 
-        title = self._html_search_regex(
-            [r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
-             r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
-        description = self._html_search_regex(
-            r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
-            webpage, 'description', default=None) or self._html_search_meta(
-                'description', webpage, 'description')
+        if video:
+            item = video[0]
+            title = item['title']
+            quality = qualities(('ld', 'sd', 'hd', ))
+            formats = [{
+                'url': f['src'],
+                'format_id': f.get('name'),
+                'quality': quality(f.get('name')),
+            } for f in item['mbr'] if f.get('src')]
+            thumbnail = item.get('poster')
+        else:
+            # Some videos are not available via video_materials.json
+            video = self._download_xml(
+                'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id,
+                video_id)
+
+            NS_MAP = {
+                'media': 'http://search.yahoo.com/mrss/',
+            }
 
-        thumbnail = self._og_search_thumbnail(webpage)
-        duration = self._og_search_property(
-            'video:duration', webpage,
-            'video duration', fatal=False)
+            item = xpath_element(video, './channel/item', fatal=True)
+            title = xpath_text(item, './title', fatal=True)
+            formats = [{
+                'url': content.attrib['url'],
+            } for content in item.findall(
+                compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')]
+            thumbnail = xpath_attr(
+                item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url')
 
-        like_count = self._html_search_regex(
-            r'title="Понравилось".*?/></label> \[(\d+)\]',
-            webpage, 'like count', default=None)
-        dislike_count = self._html_search_regex(
-            r'title="Не понравилось".*?/></label> \[(\d+)\]',
-            webpage, 'dislike count', default=None)
+        self._sort_formats(formats)
+
+        webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False)
+        if webpage:
+            title = self._html_search_regex(
+                (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
+                 r"'title'\s*:\s*'([^']+)'"),
+                webpage, 'title', default=None) or title
+            description = self._html_search_regex(
+                r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
+                webpage, 'description', default=None) or self._html_search_meta(
+                'description', webpage, 'description')
+            thumbnail = thumbnail or self._og_search_thumbnail(webpage)
+            duration = int_or_none(self._html_search_meta(
+                'video:duration', webpage, 'video duration', fatal=False))
+            upload_date = unified_strdate(self._html_search_meta(
+                'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
 
         return {
             'id': video_id,
-            'url': video_url,
             'thumbnail': thumbnail,
             'title': title,
             'description': description,
+            'upload_date': upload_date,
             'duration': int_or_none(duration),
-            'like_count': int_or_none(like_count),
-            'dislike_count': int_or_none(dislike_count),
+            'formats': formats
         }
index 2955965d908c15f21b3f8880993530779f97ec15..6b834541636533d808ce396ae456f980f989c731 100644 (file)
@@ -1,9 +1,11 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
     compat_parse_qs,
+    compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
     compat_urlparse,
 )
@@ -16,12 +18,7 @@ from ..utils import (
 
 class FiveMinIE(InfoExtractor):
     IE_NAME = '5min'
-    _VALID_URL = r'''(?x)
-        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
-            https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
-            5min:)
-        (?P<id>\d+)
-        '''
+    _VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
 
     _TESTS = [
         {
@@ -45,6 +42,7 @@ class FiveMinIE(InfoExtractor):
                 'title': 'How to Make a Next-Level Fruit Salad',
                 'duration': 184,
             },
+            'skip': 'no longer available',
         },
     ]
     _ERRORS = {
@@ -91,20 +89,33 @@ class FiveMinIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        sid = mobj.group('sid')
+
+        if mobj.group('query'):
+            qs = compat_parse_qs(mobj.group('query'))
+            if not qs.get('playList'):
+                raise ExtractorError('Invalid URL', expected=True)
+            video_id = qs['playList'][0]
+            if qs.get('sid'):
+                sid = qs['sid'][0]
+
         embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
-        embed_page = self._download_webpage(embed_url, video_id,
-                                            'Downloading embed page')
-        sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
-        query = compat_urllib_parse.urlencode({
-            'func': 'GetResults',
-            'playlist': video_id,
-            'sid': sid,
-            'isPlayerSeed': 'true',
-            'url': embed_url,
-        })
+        if not sid:
+            embed_page = self._download_webpage(embed_url, video_id,
+                                                'Downloading embed page')
+            sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
+
         response = self._download_json(
-            'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
+            'https://syn.5min.com/handlers/SenseHandler.ashx?' +
+            compat_urllib_parse_urlencode({
+                'func': 'GetResults',
+                'playlist': video_id,
+                'sid': sid,
+                'isPlayerSeed': 'true',
+                'url': embed_url,
+            }),
             video_id)
         if not response['success']:
             raise ExtractorError(
@@ -118,9 +129,7 @@ class FiveMinIE(InfoExtractor):
         parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
             compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
         for rendition in info['Renditions']:
-            if rendition['RenditionType'] == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
-            elif rendition['RenditionType'] == 'aac':
+            if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
                 continue
             else:
                 rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
index 5f6e65daed2d5dc2c18c09a97450ae2a9c88e2df..a3a2915998dc1cc2fca8f5ccdf6cec6cac0d528b 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import (
 
 class FKTVIE(InfoExtractor):
     IE_NAME = 'fernsehkritik.tv'
-    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
+    _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
 
     _TEST = {
         'url': 'http://fernsehkritik.tv/folge-1',
index 18f439df978b59e8d48454300498e1de337c3ad4..a8e1bf42a433fd87f638e8b34ce5ab68464a9252 100644 (file)
@@ -1,7 +1,7 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_urlencode
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -24,13 +24,28 @@ class FlickrIE(InfoExtractor):
             'upload_date': '20110423',
             'uploader_id': '10922353@N03',
             'uploader': 'Forest Wander',
+            'uploader_url': 'https://www.flickr.com/photos/forestwander-nature-pictures/',
             'comment_count': int,
             'view_count': int,
             'tags': list,
+            'license': 'Attribution-ShareAlike',
         }
     }
-
     _API_BASE_URL = 'https://api.flickr.com/services/rest?'
+    # https://help.yahoo.com/kb/flickr/SLN25525.html
+    _LICENSES = {
+        '0': 'All Rights Reserved',
+        '1': 'Attribution-NonCommercial-ShareAlike',
+        '2': 'Attribution-NonCommercial',
+        '3': 'Attribution-NonCommercial-NoDerivs',
+        '4': 'Attribution',
+        '5': 'Attribution-ShareAlike',
+        '6': 'Attribution-NoDerivs',
+        '7': 'No known copyright restrictions',
+        '8': 'United States government work',
+        '9': 'Public Domain Dedication (CC0)',
+        '10': 'Public Domain Work',
+    }
 
     def _call_api(self, method, video_id, api_key, note, secret=None):
         query = {
@@ -42,7 +57,7 @@ class FlickrIE(InfoExtractor):
         }
         if secret:
             query['secret'] = secret
-        data = self._download_json(self._API_BASE_URL + compat_urllib_parse.urlencode(query), video_id, note)
+        data = self._download_json(self._API_BASE_URL + compat_urllib_parse_urlencode(query), video_id, note)
         if data['stat'] != 'ok':
             raise ExtractorError(data['message'])
         return data
@@ -75,6 +90,9 @@ class FlickrIE(InfoExtractor):
             self._sort_formats(formats)
 
             owner = video_info.get('owner', {})
+            uploader_id = owner.get('nsid')
+            uploader_path = owner.get('path_alias') or uploader_id
+            uploader_url = 'https://www.flickr.com/photos/%s/' % uploader_path if uploader_path else None
 
             return {
                 'id': video_id,
@@ -83,11 +101,13 @@ class FlickrIE(InfoExtractor):
                 'formats': formats,
                 'timestamp': int_or_none(video_info.get('dateuploaded')),
                 'duration': int_or_none(video_info.get('video', {}).get('duration')),
-                'uploader_id': owner.get('nsid'),
+                'uploader_id': uploader_id,
                 'uploader': owner.get('realname'),
+                'uploader_url': uploader_url,
                 'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
                 'view_count': int_or_none(video_info.get('views')),
-                'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])]
+                'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])],
+                'license': self._LICENSES.get(video_info.get('license')),
             }
         else:
             raise ExtractorError('not a video', expected=True)
index 370fd006fe015e4ab1a017d10fd8784b33ba034a..d2503ae2eff3d2e46497bbcba356af11db665452 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 
 class FootyRoomIE(InfoExtractor):
-    _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
         'info_dict': {
diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py
new file mode 100644 (file)
index 0000000..322c41e
--- /dev/null
@@ -0,0 +1,26 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class Formula1IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
+    _TEST = {
+        'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
+        'md5': '8c79e54be72078b26b89e0e111c0502b',
+        'info_dict': {
+            'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
+            'ext': 'flv',
+            'title': 'Race highlights - Spain 2016',
+        },
+        'add_ie': ['Ooyala'],
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        ooyala_embed_code = self._search_regex(
+            r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
+        return self.url_result(
+            'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
index fa05af50d99ba1e580ba631717515f184bc838de..95c1abf94bde4cf6ff9cbe5ffe383106db2a2237 100644 (file)
@@ -16,6 +16,9 @@ class FOXIE(InfoExtractor):
             'title': 'Official Trailer: Gotham',
             'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.',
             'duration': 129,
+            'timestamp': 1400020798,
+            'upload_date': '20140513',
+            'uploader': 'NEWA-FNG-FOXCOM',
         },
         'add_ie': ['ThePlatform'],
     }
index 08b8ea36235993f78ce7a4ba05ac05f255ee4ea7..70c1a815d3121bf048da9510a00abf10dc516126 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class FoxgayIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
+    _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
     _TEST = {
         'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
         'md5': '80d72beab5d04e1655a56ad37afe6841',
index 318ac013d44b9ca8ce9de5c77d67b2cd3c9bb1e1..b04da2415246974c4959c6baa7745e550c0c9fa4 100644 (file)
@@ -18,8 +18,8 @@ class FoxNewsIE(AMPIE):
                 'title': 'Frozen in Time',
                 'description': '16-year-old girl is size of toddler',
                 'duration': 265,
-                'timestamp': 1304411491,
-                'upload_date': '20110503',
+                'timestamp': 1304411491,
+                'upload_date': '20110503',
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
         },
@@ -32,10 +32,14 @@ class FoxNewsIE(AMPIE):
                 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
                 'description': "Congressman discusses president's plan",
                 'duration': 292,
-                'timestamp': 1417662047,
-                'upload_date': '20141204',
+                'timestamp': 1417662047,
+                'upload_date': '20141204',
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
         },
         {
             'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
index df7665176ec3827f836e18b8ca46e3fec7c97c3b..a3bb98377cf4feb769d89769c40fe7098ae20743 100644 (file)
@@ -1,7 +1,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import smuggle_url
+from ..utils import (
+    smuggle_url,
+    update_url_query,
+)
 
 
 class FoxSportsIE(InfoExtractor):
@@ -9,11 +12,15 @@ class FoxSportsIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://www.foxsports.com/video?vid=432609859715',
+        'md5': 'b49050e955bebe32c301972e4012ac17',
         'info_dict': {
-            'id': 'gA0bHB3Ladz3',
-            'ext': 'flv',
+            'id': 'i0qKWsk3qJaM',
+            'ext': 'mp4',
             'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
             'description': 'Courtney Lee talks about Memphis being focused.',
+            'upload_date': '20150423',
+            'timestamp': 1429761109,
+            'uploader': 'NEWA-FNG-FOXSPORTS',
         },
         'add_ie': ['ThePlatform'],
     }
@@ -28,5 +35,8 @@ class FoxSportsIE(InfoExtractor):
                 r"data-player-config='([^']+)'", webpage, 'data player config'),
             video_id)
 
-        return self.url_result(smuggle_url(
-            config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True}))
+        return self.url_result(smuggle_url(update_url_query(
+            config['releaseURL'], {
+                'mbr': 'true',
+                'switch': 'http',
+            }), {'force_smil_url': True}))
index 0388ba00c2a7ab5bfa4cf622c359c697615daec4..2369f868da4a39b1cf84c7cee6a5830859484082 100644 (file)
@@ -6,7 +6,7 @@ from ..utils import int_or_none
 
 
 class FranceInterIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
         'md5': '4764932e466e6f6c79c317d2e74f6884',
index 3f4ac30939cd38a7294f0d8eb815cb58c6f2fbcd..ad94e31f346cc97cd71ad1be9f6983a16b6df209 100644 (file)
@@ -60,28 +60,31 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                     video_id, 'Downloading f4m manifest token', fatal=False)
                 if f4m_url:
                     formats.extend(self._extract_f4m_formats(
-                        f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id))
+                        f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
+                        video_id, f4m_id=format_id, fatal=False))
             elif ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id=format_id, fatal=False))
             elif video_url.startswith('rtmp'):
                 formats.append({
                     'url': video_url,
                     'format_id': 'rtmp-%s' % format_id,
                     'ext': 'flv',
-                    'preference': 1,
                 })
             else:
-                formats.append({
-                    'url': video_url,
-                    'format_id': format_id,
-                    'preference': -1,
-                })
+                if self._is_valid_url(video_url, video_id, format_id):
+                    formats.append({
+                        'url': video_url,
+                        'format_id': format_id,
+                    })
         self._sort_formats(formats)
 
         title = info['titre']
         subtitle = info.get('sous_titre')
         if subtitle:
             title += ' - %s' % subtitle
+        title = title.strip()
 
         subtitles = {}
         subtitles_list = [{
@@ -125,13 +128,13 @@ class PluzzIE(FranceTVBaseInfoExtractor):
 
 class FranceTvInfoIE(FranceTVBaseInfoExtractor):
     IE_NAME = 'francetvinfo.fr'
-    _VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
+    _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
 
     _TESTS = [{
         'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
         'info_dict': {
             'id': '84981923',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Soir 3',
             'upload_date': '20130826',
             'timestamp': 1377548400,
@@ -139,6 +142,10 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
                 'fr': 'mincount:2',
             },
         },
+        'params': {
+            # m3u8 downloads
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
         'info_dict': {
@@ -155,11 +162,32 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
         'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
         'md5': 'f485bda6e185e7d15dbc69b72bae993e',
         'info_dict': {
-            'id': '556e03339473995ee145930c',
+            'id': 'NI_173343',
             'ext': 'mp4',
             'title': 'Les entreprises familiales : le secret de la réussite',
             'thumbnail': 're:^https?://.*\.jpe?g$',
-        }
+            'timestamp': 1433273139,
+            'upload_date': '20150602',
+        },
+        'params': {
+            # m3u8 downloads
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
+        'md5': 'f485bda6e185e7d15dbc69b72bae993e',
+        'info_dict': {
+            'id': 'NI_657393',
+            'ext': 'mp4',
+            'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"',
+            'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
+            'thumbnail': 're:^https?://.*\.jpe?g$',
+            'timestamp': 1458300695,
+            'upload_date': '20160318',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
@@ -172,7 +200,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
             return self.url_result(dmcloud_url, 'DailymotionCloud')
 
         video_id, catalogue = self._search_regex(
-            r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
+            (r'id-video=([^@]+@[^"]+)',
+             r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
+            webpage, 'video id').split('@')
         return self._extract_video(video_id, catalogue)
 
 
index c210177f7297e174d38988a2e62f379a9a478305..1477708bbec14c38bf0db7801d09d68a22ff1546 100644 (file)
@@ -14,7 +14,7 @@ class FreespeechIE(InfoExtractor):
         'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
         'info_dict': {
             'id': 'poKsVCZ64uU',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'Obama, Romney Campaign in Colorado Ahead of Debate',
             'description': 'Obama, Romney Campaign in Colorado Ahead of Debate',
             'uploader': 'freespeechtv',
index c7bec027bbe37d8c13f645ae918655caeb5a53cd..cd8423a6faff4431c310e1258396bbb58dc92a14 100644 (file)
@@ -5,7 +5,7 @@ from ..utils import ExtractorError
 
 
 class FreeVideoIE(InfoExtractor):
-    _VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
+    _VALID_URL = r'^https?://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
 
     _TEST = {
         'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',
index 0f37ed7863c93da4813a6ac6be2d6402fc7bbd0a..0ad0d9b6a9fe789228487e861139fa2166d88767 100644 (file)
@@ -2,10 +2,13 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_HTTPError,
+    compat_urllib_parse_unquote_plus,
+)
 from ..utils import (
     clean_html,
     determine_ext,
-    encode_dict,
     int_or_none,
     sanitized_Request,
     ExtractorError,
@@ -28,6 +31,7 @@ class FunimationIE(InfoExtractor):
             'description': 'md5:1769f43cd5fc130ace8fd87232207892',
             'thumbnail': 're:https?://.*\.jpg',
         },
+        'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
     }, {
         'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
         'info_dict': {
@@ -38,6 +42,7 @@ class FunimationIE(InfoExtractor):
             'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
             'thumbnail': 're:https?://.*\.jpg',
         },
+        'skip': 'Access without user interaction is forbidden by CloudFlare',
     }, {
         'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
         'info_dict': {
@@ -48,18 +53,49 @@ class FunimationIE(InfoExtractor):
             'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
             'thumbnail': 're:https?://.*\.(?:jpg|png)',
         },
+        'skip': 'Access without user interaction is forbidden by CloudFlare',
     }]
 
+    _LOGIN_URL = 'http://www.funimation.com/login'
+
+    def _download_webpage(self, *args, **kwargs):
+        try:
+            return super(FunimationIE, self)._download_webpage(*args, **kwargs)
+        except ExtractorError as ee:
+            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+                response = ee.cause.read()
+                if b'>Please complete the security check to access<' in response:
+                    raise ExtractorError(
+                        'Access to funimation.com is blocked by CloudFlare. '
+                        'Please browse to http://www.funimation.com/, solve '
+                        'the reCAPTCHA, export browser cookies to a text file,'
+                        ' and then try again with --cookies YOUR_COOKIE_FILE.',
+                        expected=True)
+            raise
+
+    def _extract_cloudflare_session_ua(self, url):
+        ci_session_cookie = self._get_cookies(url).get('ci_session')
+        if ci_session_cookie:
+            ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value)
+            # ci_session is a string serialized by PHP function serialize()
+            # This case is simple enough to use regular expressions only
+            return self._search_regex(
+                r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent',
+                default=None)
+
     def _login(self):
         (username, password) = self._get_login_info()
         if username is None:
             return
-        data = urlencode_postdata(encode_dict({
+        data = urlencode_postdata({
             'email_field': username,
             'password_field': password,
-        }))
-        login_request = sanitized_Request('http://www.funimation.com/login', data, headers={
-            'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0',
+        })
+        user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL)
+        if not user_agent:
+            user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
+        login_request = sanitized_Request(self._LOGIN_URL, data, headers={
+            'User-Agent': user_agent,
             'Content-Type': 'application/x-www-form-urlencoded'
         })
         login_page = self._download_webpage(
@@ -104,11 +140,16 @@ class FunimationIE(InfoExtractor):
             ('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
         )
 
+        user_agent = self._extract_cloudflare_session_ua(url)
+        if user_agent:
+            USER_AGENTS = ((None, user_agent),)
+
         for kind, user_agent in USER_AGENTS:
             request = sanitized_Request(url)
             request.add_header('User-Agent', user_agent)
             webpage = self._download_webpage(
-                request, display_id, 'Downloading %s webpage' % kind)
+                request, display_id,
+                'Downloading %s webpage' % kind if kind else 'Downloading webpage')
 
             playlist = self._parse_json(
                 self._search_regex(
index 4c4a87e2a3337bfb5de955a329c7edba2c338ad2..8c5ffc9e84cec305e9fc813a6366b360b7e36230 100644 (file)
@@ -46,8 +46,8 @@ class FunnyOrDieIE(InfoExtractor):
         links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
 
         m3u8_url = self._search_regex(
-            r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8)\1',
-            webpage, 'm3u8 url', default=None, group='url')
+            r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8[^"\']*)\1',
+            webpage, 'm3u8 url', group='url')
 
         formats = []
 
index 25870c131534dce64cda034a424cdd1aff3a033d..a66e309de6993210052d09c4107fe26a168f1b33 100644 (file)
@@ -2,42 +2,27 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import int_or_none
 
 
 class GameInformerIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx'
     _TEST = {
         'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
+        'md5': '292f26da1ab4beb4c9099f1304d2b071',
         'info_dict': {
             'id': '4515472681001',
-            'ext': 'm3u8',
+            'ext': 'mp4',
             'title': 'Replay - Animal Crossing',
             'description': 'md5:2e211891b215c85d061adc7a4dd2d930',
-            'timestamp': 1443457610706,
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
+            'timestamp': 1443457610,
+            'upload_date': '20150928',
+            'uploader_id': '694940074001',
         },
     }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
-
-        bc_api_url = self._search_regex(r"getVideo\('([^']+)'", webpage, 'brightcove api url')
-        json_data = self._download_json(
-            bc_api_url + '&video_fields=id,name,shortDescription,publishedDate,videoStillURL,length,IOSRenditions',
-            display_id)
-
-        return {
-            'id': compat_str(json_data['id']),
-            'display_id': display_id,
-            'url': json_data['IOSRenditions'][0]['url'],
-            'title': json_data['name'],
-            'description': json_data.get('shortDescription'),
-            'timestamp': int_or_none(json_data.get('publishedDate')),
-            'duration': int_or_none(json_data.get('length')),
-        }
+        brightcove_id = self._search_regex(r"getVideo\('[^']+video_id=(\d+)", webpage, 'brightcove id')
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
index f6b9046f943c702262897002b07c418e3e666b3d..cbcddcb7cd116a05a8b294c06988586aec955c51 100644 (file)
@@ -10,7 +10,7 @@ from .youtube import YoutubeIE
 
 
 class GamekingsIE(InfoExtractor):
-    _VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
     _TESTS = [{
         # YouTube embed video
         'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
index b3f1bafcc37ee98f1c5b89a644909f3ee0a32049..621257c9f72383a5c6133001953a7e51cc5df435 100644 (file)
@@ -1,20 +1,20 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
-from .common import InfoExtractor
+from .once import OnceIE
 from ..compat import (
     compat_urllib_parse_unquote,
-    compat_urlparse,
 )
 from ..utils import (
     unescapeHTML,
+    url_basename,
+    dict_get,
 )
 
 
-class GameSpotIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
+class GameSpotIE(OnceIE):
+    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
     _TESTS = [{
         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
         'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@@ -39,29 +39,73 @@ class GameSpotIE(InfoExtractor):
         webpage = self._download_webpage(url, page_id)
         data_video_json = self._search_regex(
             r'data-video=["\'](.*?)["\']', webpage, 'data video')
-        data_video = json.loads(unescapeHTML(data_video_json))
+        data_video = self._parse_json(unescapeHTML(data_video_json), page_id)
         streams = data_video['videoStreams']
 
+        manifest_url = None
         formats = []
         f4m_url = streams.get('f4m_stream')
-        if f4m_url is not None:
-            # Transform the manifest url to a link to the mp4 files
-            # they are used in mobile devices.
-            f4m_path = compat_urlparse.urlparse(f4m_url).path
-            QUALITIES_RE = r'((,\d+)+,?)'
-            qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',')
-            http_path = f4m_path[1:].split('/', 1)[1]
-            http_template = re.sub(QUALITIES_RE, r'%s', http_path)
-            http_template = http_template.replace('.csmil/manifest.f4m', '')
-            http_template = compat_urlparse.urljoin(
-                'http://video.gamespotcdn.com/', http_template)
-            for q in qualities:
-                formats.append({
-                    'url': http_template % q,
-                    'ext': 'mp4',
-                    'format_id': q,
-                })
-        else:
+        if f4m_url:
+            manifest_url = f4m_url
+            formats.extend(self._extract_f4m_formats(
+                f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
+        m3u8_url = streams.get('m3u8_stream')
+        if m3u8_url:
+            manifest_url = m3u8_url
+            m3u8_formats = self._extract_m3u8_formats(
+                m3u8_url, page_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False)
+            formats.extend(m3u8_formats)
+        progressive_url = dict_get(
+            streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
+        if progressive_url and manifest_url:
+            qualities_basename = self._search_regex(
+                '/([^/]+)\.csmil/',
+                manifest_url, 'qualities basename', default=None)
+            if qualities_basename:
+                QUALITIES_RE = r'((,\d+)+,?)'
+                qualities = self._search_regex(
+                    QUALITIES_RE, qualities_basename,
+                    'qualities', default=None)
+                if qualities:
+                    qualities = list(map(lambda q: int(q), qualities.strip(',').split(',')))
+                    qualities.sort()
+                    http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename)
+                    http_url_basename = url_basename(progressive_url)
+                    if m3u8_formats:
+                        self._sort_formats(m3u8_formats)
+                        m3u8_formats = list(filter(
+                            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+                            m3u8_formats))
+                    if len(qualities) == len(m3u8_formats):
+                        for q, m3u8_format in zip(qualities, m3u8_formats):
+                            f = m3u8_format.copy()
+                            f.update({
+                                'url': progressive_url.replace(
+                                    http_url_basename, http_template % q),
+                                'format_id': f['format_id'].replace('hls', 'http'),
+                                'protocol': 'http',
+                            })
+                            formats.append(f)
+                    else:
+                        for q in qualities:
+                            formats.append({
+                                'url': progressive_url.replace(
+                                    http_url_basename, http_template % q),
+                                'ext': 'mp4',
+                                'format_id': 'http-%d' % q,
+                                'tbr': q,
+                            })
+
+        onceux_json = self._search_regex(
+            r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
+        if onceux_json:
+            onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
+            if onceux_url:
+                formats.extend(self._extract_once_formats(re.sub(
+                    r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', '')))
+
+        if not formats:
             for quality in ['sd', 'hd']:
                 # It's actually a link to a flv file
                 flv_url = streams.get('f4m_{0}'.format(quality))
@@ -71,6 +115,7 @@ class GameSpotIE(InfoExtractor):
                         'ext': 'flv',
                         'format_id': quality,
                     })
+        self._sort_formats(formats)
 
         return {
             'id': data_video['guid'],
index 590ccf5266d61e67772a1276a83bfdb6919abc63..69058a5835f2bac0d1e56ce0917909df0fb9a92b 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class GameStarIE(InfoExtractor):
-    _VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
     _TEST = {
         'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
         'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py
deleted file mode 100644 (file)
index c3f031d..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    parse_age_limit,
-    url_basename,
-)
-
-
-class GametrailersIE(InfoExtractor):
-    _VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
-
-    _TEST = {
-        'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review',
-        'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a',
-        'info_dict': {
-            'id': '2983958',
-            'ext': 'mp4',
-            'display_id': '116437-Just-Cause-3-Review',
-            'title': 'Just Cause 3 - Review',
-            'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?',
-        },
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        title = self._html_search_regex(
-            r'<title>(.+?)\|', webpage, 'title').strip()
-        embed_url = self._proto_relative_url(
-            self._search_regex(
-                r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage,
-                'embed url'),
-            scheme='http:')
-        video_id = url_basename(embed_url)
-        embed_page = self._download_webpage(embed_url, video_id)
-        embed_vars_json = self._search_regex(
-            r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page,
-            'embed vars')
-        info = self._parse_json(embed_vars_json, video_id)
-
-        formats = []
-        for media in info['media']:
-            if media['mediaPurpose'] == 'play':
-                formats.append({
-                    'url': media['uri'],
-                    'height': media['height'],
-                    'width:': media['width'],
-                })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'title': title,
-            'formats': formats,
-            'thumbnail': info.get('thumbUri'),
-            'description': self._og_search_description(webpage),
-            'duration': int_or_none(info.get('videoLengthInSeconds')),
-            'age_limit': parse_age_limit(info.get('audienceRating')),
-        }
index ea32b621c390c390e22ebf8a6010304466700a4a..18ef5c252a9adc0ac2a1e6ae6806d2ea9b5b2546 100644 (file)
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 
 
 class GazetaIE(InfoExtractor):
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
     _TESTS = [{
         'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
         'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
@@ -18,9 +18,19 @@ class GazetaIE(InfoExtractor):
             'description': 'md5:38617526050bd17b234728e7f9620a71',
             'thumbnail': 're:^https?://.*\.jpg',
         },
+        'skip': 'video not found',
     }, {
         'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
         'only_matching': True,
+    }, {
+        'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml',
+        'md5': '37f19f78355eb2f4256ee1688359f24c',
+        'info_dict': {
+            'id': '252048',
+            'ext': 'mp4',
+            'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"',
+        },
+        'add_ie': ['EaglePlatform'],
     }]
 
     def _real_extract(self, url):
index 3befd3e7b8f9ffeb48a31a284b5971b3a8a5cfab..3136427db39a2f1739fa0a791bd2cc85f1eedd02 100644 (file)
@@ -3,11 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
-    remove_end,
     HEADRequest,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -51,63 +50,33 @@ class GDCVaultIE(InfoExtractor):
         {
             'url': 'http://gdcvault.com/play/1020791/',
             'only_matching': True,
-        }
+        },
+        {
+            # Hard-coded hostname
+            'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface',
+            'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
+            'info_dict': {
+                'id': '1023460',
+                'ext': 'mp4',
+                'display_id': 'Tenacious-Design-and-The-Interface',
+                'title': 'Tenacious Design and The Interface of \'Destiny\'',
+            },
+        },
+        {
+            # Multiple audios
+            'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC',
+            'info_dict': {
+                'id': '1014631',
+                'ext': 'flv',
+                'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man',
+            },
+            'params': {
+                'skip_download': True,  # Requires rtmpdump
+                'format': 'jp',  # The japanese audio
+            }
+        },
     ]
 
-    def _parse_mp4(self, xml_description):
-        video_formats = []
-        mp4_video = xml_description.find('./metadata/mp4video')
-        if mp4_video is None:
-            return None
-
-        mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text)
-        video_root = mobj.group('root')
-        formats = xml_description.findall('./metadata/MBRVideos/MBRVideo')
-        for format in formats:
-            mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text)
-            url = video_root + mobj.group('path')
-            vbr = format.find('bitrate').text
-            video_formats.append({
-                'url': url,
-                'vbr': int(vbr),
-            })
-        return video_formats
-
-    def _parse_flv(self, xml_description):
-        formats = []
-        akamai_url = xml_description.find('./metadata/akamaiHost').text
-        audios = xml_description.find('./metadata/audios')
-        if audios is not None:
-            for audio in audios:
-                formats.append({
-                    'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-                    'play_path': remove_end(audio.get('url'), '.flv'),
-                    'ext': 'flv',
-                    'vcodec': 'none',
-                    'format_id': audio.get('code'),
-                })
-        slide_video_path = xml_description.find('./metadata/slideVideo').text
-        formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-            'play_path': remove_end(slide_video_path, '.flv'),
-            'ext': 'flv',
-            'format_note': 'slide deck video',
-            'quality': -2,
-            'preference': -2,
-            'format_id': 'slides',
-        })
-        speaker_video_path = xml_description.find('./metadata/speakerVideo').text
-        formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-            'play_path': remove_end(speaker_video_path, '.flv'),
-            'ext': 'flv',
-            'format_note': 'speaker video',
-            'quality': -1,
-            'preference': -1,
-            'format_id': 'speaker',
-        })
-        return formats
-
     def _login(self, webpage_url, display_id):
         (username, password) = self._get_login_info()
         if username is None or password is None:
@@ -123,7 +92,7 @@ class GDCVaultIE(InfoExtractor):
             'password': password,
         }
 
-        request = sanitized_Request(login_url, compat_urllib_parse.urlencode(login_form))
+        request = sanitized_Request(login_url, urlencode_postdata(login_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         self._download_webpage(request, display_id, 'Logging in')
         start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
@@ -159,9 +128,10 @@ class GDCVaultIE(InfoExtractor):
                 'title': title,
             }
 
+        PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/player.*?\.html.*?".*?</iframe>'
+
         xml_root = self._html_search_regex(
-            r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>',
-            start_page, 'xml root', default=None)
+            PLAYER_REGEX, start_page, 'xml root', default=None)
         if xml_root is None:
             # Probably need to authenticate
             login_res = self._login(webpage_url, display_id)
@@ -171,27 +141,21 @@ class GDCVaultIE(InfoExtractor):
                 start_page = login_res
                 # Grab the url from the authenticated page
                 xml_root = self._html_search_regex(
-                    r'<iframe src="(.*?)player.html.*?".*?</iframe>',
-                    start_page, 'xml root')
+                    PLAYER_REGEX, start_page, 'xml root')
 
         xml_name = self._html_search_regex(
             r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
             start_page, 'xml filename', default=None)
         if xml_name is None:
             # Fallback to the older format
-            xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
-
-        xml_description_url = xml_root + 'xml/' + xml_name
-        xml_description = self._download_xml(xml_description_url, display_id)
-
-        video_title = xml_description.find('./metadata/title').text
-        video_formats = self._parse_mp4(xml_description)
-        if video_formats is None:
-            video_formats = self._parse_flv(xml_description)
+            xml_name = self._html_search_regex(
+                r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>',
+                start_page, 'xml filename')
 
         return {
+            '_type': 'url_transparent',
             'id': video_id,
             'display_id': display_id,
-            'title': video_title,
-            'formats': video_formats,
+            'url': '%s/xml/%s' % (xml_root, xml_name),
+            'ie_key': 'DigitallySpeaking',
         }
index c6bf8d270f2dc07c3f0e1e3bf8732804393437a1..4aa24061c0cb97f8c36145e4a8b94ba83b8e33e6 100644 (file)
@@ -47,10 +47,11 @@ from .senateisvp import SenateISVPIE
 from .svt import SVTIE
 from .pornhub import PornHubIE
 from .xhamster import XHamsterEmbedIE
+from .tnaflix import TNAFlixNetworkEmbedIE
 from .vimeo import VimeoIE
 from .dailymotion import DailymotionCloudIE
 from .onionstudios import OnionStudiosIE
-from .snagfilms import SnagFilmsEmbedIE
+from .viewlift import ViewLiftEmbedIE
 from .screenwavemedia import ScreenwaveMediaIE
 from .mtv import MTVServicesEmbeddedIE
 from .pladform import PladformIE
@@ -58,6 +59,11 @@ from .videomore import VideomoreIE
 from .googledrive import GoogleDriveIE
 from .jwplatform import JWPlatformIE
 from .digiteka import DigitekaIE
+from .instagram import InstagramIE
+from .liveleak import LiveLeakIE
+from .threeqsdn import ThreeQSDNIE
+from .theplatform import ThePlatformIE
+from .vessel import VesselIE
 
 
 class GenericIE(InfoExtractor):
@@ -102,7 +108,8 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,  # infinite live stream
             },
             'expected_warnings': [
-                r'501.*Not Implemented'
+                r'501.*Not Implemented',
+                r'400.*Bad Request',
             ],
         },
         # Direct link with incorrect MIME type
@@ -233,11 +240,41 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'car-20120827-manifest',
                 'formats': 'mincount:9',
+                'upload_date': '20130904',
             },
             'params': {
                 'format': 'bestvideo',
             },
         },
+        # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
+        {
+            'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
+            'info_dict': {
+                'id': 'content',
+                'ext': 'mp4',
+                'title': 'content',
+                'formats': 'mincount:8',
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            }
+        },
+        # m3u8 served with Content-Type: text/plain
+        {
+            'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
+            'info_dict': {
+                'id': 'index',
+                'ext': 'mp4',
+                'title': 'index',
+                'upload_date': '20140720',
+                'formats': 'mincount:11',
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            }
+        },
         # google redirect
         {
             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@ -375,19 +412,6 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         },
-        # multiple ooyala embeds on SBN network websites
-        {
-            'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
-            'info_dict': {
-                'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
-                'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
-            },
-            'playlist_mincount': 3,
-            'params': {
-                'skip_download': True,
-            },
-            'add_ie': ['Ooyala'],
-        },
         # embed.ly video
         {
             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
@@ -576,7 +600,11 @@ class GenericIE(InfoExtractor):
                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
                 'ext': 'mp4',
                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
+                'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
                 'uploader': 'Spi0n',
+                'uploader_id': 'xgditw',
+                'upload_date': '20140425',
+                'timestamp': 1398441542,
             },
             'add_ie': ['Dailymotion'],
         },
@@ -599,13 +627,13 @@ class GenericIE(InfoExtractor):
         },
         # MTVSercices embed
         {
-            'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
-            'md5': '35727f82f58c76d996fc188f9755b0d5',
+            'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
+            'md5': 'ca1aef97695ef2c1d6973256a57e5252',
             'info_dict': {
-                'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
+                'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
                 'ext': 'mp4',
-                'title': 'Review',
-                'description': 'Mario\'s life in the fast lane has never looked so good.',
+                'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
+                'description': 'Two valets share their love for movie star Liam Neesons.',
             },
         },
         # YouTube embed via <data-embed-url="">
@@ -691,15 +719,18 @@ class GenericIE(InfoExtractor):
         },
         # Wistia embed
         {
-            'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
-            'md5': '8788b683c777a5cf25621eaf286d0c23',
+            'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
+            'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
             'info_dict': {
-                'id': '1cfaf6b7ea',
+                'id': '6e2wtrbdaf',
                 'ext': 'mov',
-                'title': 'md5:51364a8d3d009997ba99656004b5e20d',
-                'duration': 643.0,
-                'filesize': 182808282,
-                'uploader': 'education-portal.com',
+                'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
+                'description': 'a Paywall Videos video from Remilon',
+                'duration': 644.072,
+                'uploader': 'study.com',
+                'timestamp': 1459678540,
+                'upload_date': '20160403',
+                'filesize': 24687186,
             },
         },
         {
@@ -708,10 +739,29 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'id': 'uxjb0lwrcz',
                 'ext': 'mp4',
-                'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
+                'title': 'Conversation about Hexagonal Rails Part 1',
+                'description': 'a Martin Fowler video from ThoughtWorks',
                 'duration': 1715.0,
                 'uploader': 'thoughtworks.wistia.com',
+                'timestamp': 1401832161,
+                'upload_date': '20140603',
+            },
+        },
+        # Wistia standard embed (async)
+        {
+            'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
+            'info_dict': {
+                'id': '807fafadvk',
+                'ext': 'mp4',
+                'title': 'Drip Brennan Dunn Workshop',
+                'description': 'a JV Webinars video from getdrip-1',
+                'duration': 4986.95,
+                'timestamp': 1463607249,
+                'upload_date': '20160518',
             },
+            'params': {
+                'skip_download': True,
+            }
         },
         # Soundcloud embed
         {
@@ -735,6 +785,19 @@ class GenericIE(InfoExtractor):
                 'title': 'Rosetta #CometLanding webcast HL 10',
             }
         },
+        # Another Livestream embed, without 'new.' in URL
+        {
+            'url': 'https://www.freespeech.org/',
+            'info_dict': {
+                'id': '123537347',
+                'ext': 'mp4',
+                'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            },
+            'params': {
+                # Live stream
+                'skip_download': True,
+            },
+        },
         # LazyYT
         {
             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
@@ -819,18 +882,6 @@ class GenericIE(InfoExtractor):
                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
             }
         },
-        # Kaltura embed
-        {
-            'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
-            'info_dict': {
-                'id': '1_eergr3h1',
-                'ext': 'mp4',
-                'upload_date': '20150226',
-                'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
-                'timestamp': int,
-                'title': 'John Carlson Postgame 2/25/15',
-            },
-        },
         # Kaltura embed (different embed code)
         {
             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
@@ -856,9 +907,23 @@ class GenericIE(InfoExtractor):
                 'uploader_id': 'echojecka',
             },
         },
+        # Kaltura embed with single quotes
+        {
+            'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
+            'info_dict': {
+                'id': '0_izeg5utt',
+                'ext': 'mp4',
+                'title': '35871',
+                'timestamp': 1355743100,
+                'upload_date': '20121217',
+                'uploader_id': 'batchUser',
+            },
+            'add_ie': ['Kaltura'],
+        },
         # Eagle.Platform embed (generic URL)
         {
             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
+            # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
             'info_dict': {
                 'id': '227304',
                 'ext': 'mp4',
@@ -873,6 +938,7 @@ class GenericIE(InfoExtractor):
         # ClipYou (Eagle.Platform) embed (custom URL)
         {
             'url': 'http://muz-tv.ru/play/7129/',
+            # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
             'info_dict': {
                 'id': '12820',
                 'ext': 'mp4',
@@ -961,18 +1027,36 @@ class GenericIE(InfoExtractor):
                 'ext': 'flv',
                 'title': "PFT Live: New leader in the 'new-look' defense",
                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+                'uploader': 'NBCU-SPORTS',
+                'upload_date': '20140107',
+                'timestamp': 1389118457,
+            },
+        },
+        # NBC News embed
+        {
+            'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
+            'md5': '1aa589c675898ae6d37a17913cf68d66',
+            'info_dict': {
+                'id': '701714499682',
+                'ext': 'mp4',
+                'title': 'PREVIEW: On Assignment: David Letterman',
+                'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
             },
         },
         # UDN embed
         {
-            'url': 'http://www.udn.com/news/story/7314/822787',
+            'url': 'https://video.udn.com/news/300346',
             'md5': 'fd2060e988c326991037b9aff9df21a6',
             'info_dict': {
                 'id': '300346',
                 'ext': 'mp4',
                 'title': '中一中男師變性 全校師生力挺',
                 'thumbnail': 're:^https?://.*\.jpg$',
-            }
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
         },
         # Ooyala embed
         {
@@ -989,20 +1073,6 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             }
         },
-        # Contains a SMIL manifest
-        {
-            'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
-            'info_dict': {
-                'id': 'file',
-                'ext': 'flv',
-                'title': '+ Football: Lottery Champions League Europe',
-                'uploader': 'www.telewebion.com',
-            },
-            'params': {
-                # rtmpe downloads
-                'skip_download': True,
-            }
-        },
         # Brightcove URL in single quotes
         {
             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
@@ -1013,6 +1083,9 @@ class GenericIE(InfoExtractor):
                 'title': 'SN Presents: Russell Martin, World Citizen',
                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
                 'uploader': 'Rogers Sportsnet',
+                'uploader_id': '1704050871',
+                'upload_date': '20150525',
+                'timestamp': 1432570283,
             },
         },
         # Dailymotion Cloud video
@@ -1093,7 +1166,60 @@ class GenericIE(InfoExtractor):
                 # m3u8 downloads
                 'skip_download': True,
             }
-        }
+        },
+        # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
+        # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
+        {
+            'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
+            'info_dict': {
+                'id': '4785848093001',
+                'ext': 'mp4',
+                'title': 'The Cardinal Pell Interview',
+                'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
+                'uploader': 'GlobeCast Australia - GlobeStream',
+                'uploader_id': '2733773828001',
+                'upload_date': '20160304',
+                'timestamp': 1457083087,
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            },
+        },
+        # Another form of arte.tv embed
+        {
+            'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
+            'md5': '850bfe45417ddf221288c88a0cffe2e2',
+            'info_dict': {
+                'id': '030273-562_PLUS7-F',
+                'ext': 'mp4',
+                'title': 'ARTE Reportage - Nulle part, en France',
+                'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
+                'upload_date': '20160409',
+            },
+        },
+        # LiveLeak embed
+        {
+            'url': 'http://www.wykop.pl/link/3088787/',
+            'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
+            'info_dict': {
+                'id': '874_1459135191',
+                'ext': 'mp4',
+                'title': 'Man shows poor quality of new apartment building',
+                'description': 'The wall is like a sand pile.',
+                'uploader': 'Lake8737',
+            }
+        },
+        # Duplicated embedded video URLs
+        {
+            'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
+            'info_dict': {
+                'id': '149298443_480_16c25b74_2',
+                'ext': 'mp4',
+                'title': 'vs. Blue Orange Spring Game',
+                'uploader': 'www.hudl.com',
+            },
+        },
     ]
 
     def report_following_redirect(self, new_url):
@@ -1241,28 +1367,31 @@ class GenericIE(InfoExtractor):
             full_response = self._request_webpage(request, video_id)
             head_response = full_response
 
+        info_dict = {
+            'id': video_id,
+            'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+            'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
+        }
+
         # Check for direct link to a video
-        content_type = head_response.headers.get('Content-Type', '')
-        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
+        content_type = head_response.headers.get('Content-Type', '').lower()
+        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
         if m:
-            upload_date = unified_strdate(
-                head_response.headers.get('Last-Modified'))
-            formats = []
-            if m.group('format_id').endswith('mpegurl'):
+            format_id = m.group('format_id')
+            if format_id.endswith('mpegurl'):
                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
+            elif format_id == 'f4m':
+                formats = self._extract_f4m_formats(url, video_id)
             else:
                 formats = [{
                     'format_id': m.group('format_id'),
                     'url': url,
                     'vcodec': 'none' if m.group('type') == 'audio' else None
                 }]
-            return {
-                'id': video_id,
-                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
-                'direct': True,
-                'formats': formats,
-                'upload_date': upload_date,
-            }
+                info_dict['direct'] = True
+            self._sort_formats(formats)
+            info_dict['formats'] = formats
+            return info_dict
 
         if not self._downloader.params.get('test', False) and not is_intentional:
             force = self._downloader.params.get('force_generic_extractor', False)
@@ -1282,21 +1411,24 @@ class GenericIE(InfoExtractor):
             request.add_header('Accept-Encoding', '*')
             full_response = self._request_webpage(request, video_id)
 
+        first_bytes = full_response.read(512)
+
+        # Is it an M3U playlist?
+        if first_bytes.startswith(b'#EXTM3U'):
+            info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
+            self._sort_formats(info_dict['formats'])
+            return info_dict
+
         # Maybe it's a direct link to a video?
         # Be careful not to download the whole thing!
-        first_bytes = full_response.read(512)
         if not is_html(first_bytes):
             self._downloader.report_warning(
                 'URL could be a direct video link, returning it as such.')
-            upload_date = unified_strdate(
-                head_response.headers.get('Last-Modified'))
-            return {
-                'id': video_id,
-                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+            info_dict.update({
                 'direct': True,
                 'url': url,
-                'upload_date': upload_date,
-            }
+            })
+            return info_dict
 
         webpage = self._webpage_read_content(
             full_response, url, video_id, prefix=first_bytes)
@@ -1309,16 +1441,20 @@ class GenericIE(InfoExtractor):
             if doc.tag == 'rss':
                 return self._extract_rss(url, video_id, doc)
             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
-                return self._parse_smil(doc, url, video_id)
+                smil = self._parse_smil(doc, url, video_id)
+                self._sort_formats(smil['formats'])
+                return smil
             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
-                return {
-                    'id': video_id,
-                    'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
-                    'formats': self._parse_mpd_formats(
-                        doc, video_id, mpd_base_url=url.rpartition('/')[0]),
-                }
+                info_dict['formats'] = self._parse_mpd_formats(
+                    doc, video_id, mpd_base_url=url.rpartition('/')[0])
+                self._sort_formats(info_dict['formats'])
+                return info_dict
+            elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
+                info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
+                self._sort_formats(info_dict['formats'])
+                return info_dict
         except compat_xml_parse_error:
             pass
 
@@ -1338,7 +1474,8 @@ class GenericIE(InfoExtractor):
         #   Site Name | Video Title
         #   Video Title - Tagline | Site Name
         # and so on and so forth; it's just not practical
-        video_title = self._html_search_regex(
+        video_title = self._og_search_title(
+            webpage, default=None) or self._html_search_regex(
             r'(?s)<title>(.*?)</title>', webpage, 'video title',
             default='video')
 
@@ -1356,6 +1493,9 @@ class GenericIE(InfoExtractor):
         video_uploader = self._search_regex(
             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 
+        video_description = self._og_search_description(webpage, default=None)
+        video_thumbnail = self._og_search_thumbnail(webpage, default=None)
+
         # Helper method
         def _playlist_from_matches(matches, getter=None, ie=None):
             urlrs = orderedSet(
@@ -1386,6 +1526,16 @@ class GenericIE(InfoExtractor):
         if bc_urls:
             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
 
+        # Look for ThePlatform embeds
+        tp_urls = ThePlatformIE._extract_urls(webpage)
+        if tp_urls:
+            return _playlist_from_matches(tp_urls, ie='ThePlatform')
+
+        # Look for Vessel embeds
+        vessel_urls = VesselIE._extract_urls(webpage)
+        if vessel_urls:
+            return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
+
         # Look for embedded rtl.nl player
         matches = re.findall(
             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
@@ -1454,21 +1604,26 @@ class GenericIE(InfoExtractor):
                 'url': embed_url,
                 'ie_key': 'Wistia',
                 'uploader': video_uploader,
-                'title': video_title,
-                'id': video_id,
             }
 
         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
         if match:
             return {
                 '_type': 'url_transparent',
-                'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
+                'url': 'wistia:%s' % match.group('id'),
                 'ie_key': 'Wistia',
                 'uploader': video_uploader,
-                'title': video_title,
-                'id': match.group('id')
             }
 
+        match = re.search(
+            r'''(?sx)
+                <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
+                <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
+            ''', webpage)
+        if match:
+            return self.url_result(self._proto_relative_url(
+                'wistia:%s' % match.group('id')), 'Wistia')
+
         # Look for SVT player
         svt_url = SVTIE._extract_url(webpage)
         if svt_url:
@@ -1633,6 +1788,11 @@ class GenericIE(InfoExtractor):
         if xhamster_urls:
             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
 
+        # Look for embedded TNAFlixNetwork player
+        tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
+        if tnaflix_urls:
+            return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
+
         # Look for embedded Tvigle player
         mobj = re.search(
             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@@ -1653,7 +1813,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded arte.tv player
         mobj = re.search(
-            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
+            r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
             webpage)
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
@@ -1683,14 +1843,6 @@ class GenericIE(InfoExtractor):
             url = unescapeHTML(mobj.group('url'))
             return self.url_result(url)
 
-        # Look for embedded vulture.com player
-        mobj = re.search(
-            r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
-            webpage)
-        if mobj is not None:
-            url = unescapeHTML(mobj.group('url'))
-            return self.url_result(url, ie='Vulture')
-
         # Look for embedded mtvservices player
         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
         if mtvservices_url:
@@ -1739,7 +1891,7 @@ class GenericIE(InfoExtractor):
             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
 
         mobj = re.search(
-            r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
+            r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
             webpage)
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'Livestream')
@@ -1751,7 +1903,7 @@ class GenericIE(InfoExtractor):
             return self.url_result(mobj.group('url'), 'Zapiks')
 
         # Look for Kaltura embeds
-        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
+        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
         if mobj is not None:
             return self.url_result(smuggle_url(
@@ -1803,6 +1955,12 @@ class GenericIE(InfoExtractor):
         if nbc_sports_url:
             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
 
+        # Look for NBC News embeds
+        nbc_news_embed_url = re.search(
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
+        if nbc_news_embed_url:
+            return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
+
         # Look for Google Drive embeds
         google_drive_url = GoogleDriveIE._extract_url(webpage)
         if google_drive_url:
@@ -1830,10 +1988,10 @@ class GenericIE(InfoExtractor):
         if onionstudios_url:
             return self.url_result(onionstudios_url)
 
-        # Look for SnagFilms embeds
-        snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
-        if snagfilms_url:
-            return self.url_result(snagfilms_url)
+        # Look for ViewLift embeds
+        viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
+        if viewlift_url:
+            return self.url_result(viewlift_url)
 
         # Look for JWPlatform embeds
         jwplatform_url = JWPlatformIE._extract_url(webpage)
@@ -1870,6 +2028,38 @@ class GenericIE(InfoExtractor):
                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
                 'AdobeTVVideo')
 
+        # Look for Vine embeds
+        mobj = re.search(
+            r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
+            webpage)
+        if mobj is not None:
+            return self.url_result(
+                self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
+
+        # Look for Instagram embeds
+        instagram_embed_url = InstagramIE._extract_embed_url(webpage)
+        if instagram_embed_url is not None:
+            return self.url_result(
+                self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
+
+        # Look for LiveLeak embeds
+        liveleak_url = LiveLeakIE._extract_url(webpage)
+        if liveleak_url:
+            return self.url_result(liveleak_url, 'LiveLeak')
+
+        # Look for 3Q SDN embeds
+        threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
+        if threeqsdn_url:
+            return {
+                '_type': 'url_transparent',
+                'ie_key': ThreeQSDNIE.ie_key(),
+                'url': self._proto_relative_url(threeqsdn_url),
+                'title': video_title,
+                'description': video_description,
+                'thumbnail': video_thumbnail,
+                'uploader': video_uploader,
+            }
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True
@@ -1950,7 +2140,8 @@ class GenericIE(InfoExtractor):
             raise UnsupportedError(url)
 
         entries = []
-        for video_url in found:
+        for video_url in orderedSet(found):
+            video_url = unescapeHTML(video_url)
             video_url = video_url.replace('\\/', '/')
             video_url = compat_urlparse.urljoin(url, video_url)
             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
@@ -1979,9 +2170,14 @@ class GenericIE(InfoExtractor):
                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
             elif ext == 'mpd':
                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
+            elif ext == 'f4m':
+                entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
             else:
                 entry_info_dict['url'] = video_url
 
+            if entry_info_dict.get('formats'):
+                self._sort_formats(entry_info_dict['formats'])
+
             entries.append(entry_info_dict)
 
         if len(entries) == 1:
index 9561ed5fbaa25404654303956a676b000da2af67..62ff84835c87b28d18ace1afa5eee19f894d198d 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import unified_strdate
 
 
 class GlideIE(InfoExtractor):
@@ -15,26 +16,38 @@ class GlideIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Damon Timm\'s Glide message',
             'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
+            'uploader': 'Damon Timm',
+            'upload_date': '20140919',
         }
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
         webpage = self._download_webpage(url, video_id)
+
         title = self._html_search_regex(
-            r'<title>(.*?)</title>', webpage, 'title')
-        video_url = self.http_scheme() + self._search_regex(
-            r'<source src="(.*?)" type="video/mp4">', webpage, 'video URL')
-        thumbnail_url = self._search_regex(
-            r'<img id="video-thumbnail" src="(.*?)"',
-            webpage, 'thumbnail url', fatal=False)
-        thumbnail = (
-            thumbnail_url if thumbnail_url is None
-            else self.http_scheme() + thumbnail_url)
+            r'<title>(.+?)</title>', webpage, 'title')
+        video_url = self._proto_relative_url(self._search_regex(
+            r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
+            webpage, 'video URL', default=None,
+            group='url')) or self._og_search_video_url(webpage)
+        thumbnail = self._proto_relative_url(self._search_regex(
+            r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',
+            webpage, 'thumbnail url', default=None,
+            group='url')) or self._og_search_thumbnail(webpage)
+        uploader = self._search_regex(
+            r'<div[^>]+class=["\']info-name["\'][^>]*>([^<]+)',
+            webpage, 'uploader', fatal=False)
+        upload_date = unified_strdate(self._search_regex(
+            r'<div[^>]+class="info-date"[^>]*>([^<]+)',
+            webpage, 'upload date', fatal=False))
 
         return {
             'id': video_id,
             'title': title,
             'url': video_url,
             'thumbnail': thumbnail,
+            'uploader': uploader,
+            'upload_date': upload_date,
         }
diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py
new file mode 100644 (file)
index 0000000..c5d3b4e
--- /dev/null
@@ -0,0 +1,66 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+from ..utils import js_to_json
+
+
+class GodTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
+        'info_dict': {
+            'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
+            'ext': 'mp4',
+            'title': 'Randy Needham',
+            'duration': 3615.08,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://god.tv/playlist/bible-study',
+        'info_dict': {
+            'id': 'bible-study',
+        },
+        'playlist_mincount': 37,
+    }, {
+        'url': 'http://god.tv/node/15097',
+        'only_matching': True,
+    }, {
+        'url': 'http://god.tv/live/africa',
+        'only_matching': True,
+    }, {
+        'url': 'http://god.tv/liveevents',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        settings = self._parse_json(
+            self._search_regex(
+                r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+                webpage, 'settings', default='{}'),
+            display_id, transform_source=js_to_json, fatal=False)
+
+        ooyala_id = None
+
+        if settings:
+            playlist = settings.get('playlist')
+            if playlist and isinstance(playlist, list):
+                entries = [
+                    OoyalaIE._build_url_result(video['content_id'])
+                    for video in playlist if video.get('content_id')]
+                if entries:
+                    return self.playlist_result(entries, display_id)
+            ooyala_id = settings.get('ooyala', {}).get('content_id')
+
+        if not ooyala_id:
+            ooyala_id = self._search_regex(
+                r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
+                webpage, 'ooyala id', group='id')
+
+        return OoyalaIE._build_url_result(ooyala_id)
index 37be34091895392b014a8fe2167d9c708ca60e71..766fc26d0f01145bdd2456a221940fa60ece6953 100644 (file)
@@ -10,8 +10,8 @@ from ..utils import (
 
 
 class GoogleDriveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
-    _TEST = {
+    _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
+    _TESTS = [{
         'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
         'md5': '881f7700aec4f538571fa1e0eed4a7b6',
         'info_dict': {
@@ -20,7 +20,11 @@ class GoogleDriveIE(InfoExtractor):
             'title': 'Big Buck Bunny.mp4',
             'duration': 46,
         }
-    }
+    }, {
+        # video id is longer than 28 characters
+        'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
+        'only_matching': True,
+    }]
     _FORMATS_EXT = {
         '5': 'flv',
         '6': 'flv',
@@ -43,7 +47,7 @@ class GoogleDriveIE(InfoExtractor):
     @staticmethod
     def _extract_url(webpage):
         mobj = re.search(
-            r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
+            r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})',
             webpage)
         if mobj:
             return 'https://drive.google.com/file/d/%s' % mobj.group('id')
index 1d9166455aae935f1eb51777d170e0f6259ffd4e..0c015141fa322465b1476e035f87da223555b211 100644 (file)
@@ -14,13 +14,13 @@ class GoshgayIE(InfoExtractor):
     _VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)'
     _TEST = {
         'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
-        'md5': '027fcc54459dff0feb0bc06a7aeda680',
+        'md5': '4b6db9a0a333142eb9f15913142b0ed1',
         'info_dict': {
             'id': '299069',
             'ext': 'flv',
             'title': 'DIESEL SFW XXX Video',
             'thumbnail': 're:^http://.*\.jpg$',
-            'duration': 79,
+            'duration': 80,
             'age_limit': 18,
         }
     }
@@ -47,5 +47,5 @@ class GoshgayIE(InfoExtractor):
             'title': title,
             'thumbnail': thumbnail,
             'duration': duration,
-            'age_limit': self._family_friendly_search(webpage),
+            'age_limit': 18,
         }
index 145b55bf3e019277d1e8ef958aacb90015cad737..73dc62c494e4a69f5499841164dce0c90aaa3656 100644 (file)
@@ -2,12 +2,6 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import (
-    xpath_element,
-    xpath_text,
-    int_or_none,
-    parse_duration,
-)
 
 
 class GPUTechConfIE(InfoExtractor):
@@ -27,29 +21,15 @@ class GPUTechConfIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        root_path = self._search_regex(r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 'http://evt.dispeak.com/nvidia/events/gtc15/')
-        xml_file_id = self._search_regex(r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
-
-        doc = self._download_xml('%sxml/%s.xml' % (root_path, xml_file_id), video_id)
-
-        metadata = xpath_element(doc, 'metadata')
-        http_host = xpath_text(metadata, 'httpHost', 'http host', True)
-        mbr_videos = xpath_element(metadata, 'MBRVideos')
-
-        formats = []
-        for mbr_video in mbr_videos.findall('MBRVideo'):
-            stream_name = xpath_text(mbr_video, 'streamName')
-            if stream_name:
-                formats.append({
-                    'url': 'http://%s/%s' % (http_host, stream_name.replace('mp4:', '')),
-                    'tbr': int_or_none(xpath_text(mbr_video, 'bitrate')),
-                })
-        self._sort_formats(formats)
+        root_path = self._search_regex(
+            r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path',
+            default='http://evt.dispeak.com/nvidia/events/gtc15/')
+        xml_file_id = self._search_regex(
+            r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
 
         return {
+            '_type': 'url_transparent',
             'id': video_id,
-            'title': xpath_text(metadata, 'title'),
-            'duration': parse_duration(xpath_text(metadata, 'endTime')),
-            'creator': xpath_text(metadata, 'speaker'),
-            'formats': formats,
+            'url': '%sxml/%s.xml' % (root_path, xml_file_id),
+            'ie_key': 'DigitallySpeaking',
         }
index 63c05b6a6f96dfa4437f15cd77524ab3d89e1018..a6da909310a5591fe39a68244142a46fb24ce65d 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class GrouponIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.groupon\.com/deals/(?P<id>[^?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)'
 
     _TEST = {
         'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
@@ -14,17 +14,27 @@ class GrouponIE(InfoExtractor):
             'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
         },
         'playlist': [{
+            'md5': '42428ce8a00585f9bc36e49226eae7a1',
             'info_dict': {
-                'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
+                'id': 'fk6OhWpXgIQ',
                 'ext': 'mp4',
-                'title': 'Bikram Yoga Huntington Beach | Orange County',
+                'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf',
                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
-                'duration': 44.961,
+                'duration': 45,
+                'upload_date': '20160405',
+                'uploader_id': 'groupon',
+                'uploader': 'Groupon',
             },
+            'add_ie': ['Youtube'],
         }],
         'params': {
-            'skip_download': 'HLS',
-        }
+            'skip_download': True,
+        },
+    }
+
+    _PROVIDERS = {
+        'ooyala': ('ooyala:%s', 'Ooyala'),
+        'youtube': ('%s', 'Youtube'),
     }
 
     def _real_extract(self, url):
@@ -32,16 +42,21 @@ class GrouponIE(InfoExtractor):
         webpage = self._download_webpage(url, playlist_id)
 
         payload = self._parse_json(self._search_regex(
-            r'var\s+payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id)
+            r'(?:var\s+|window\.)payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id)
         videos = payload['carousel'].get('dealVideos', [])
         entries = []
         for v in videos:
-            if v.get('provider') != 'OOYALA':
+            provider = v.get('provider')
+            video_id = v.get('media') or v.get('id') or v.get('baseURL')
+            if not provider or not video_id:
+                continue
+            url_pattern, ie_key = self._PROVIDERS.get(provider.lower())
+            if not url_pattern:
                 self.report_warning(
                     '%s: Unsupported video provider %s, skipping video' %
-                    (playlist_id, v.get('provider')))
+                    (playlist_id, provider))
                 continue
-            entries.append(self.url_result('ooyala:%s' % v['media']))
+            entries.append(self.url_result(url_pattern % video_id, ie_key))
 
         return {
             '_type': 'playlist',
diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py
new file mode 100644 (file)
index 0000000..dad0f39
--- /dev/null
@@ -0,0 +1,122 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    xpath_element,
+    int_or_none,
+    parse_duration,
+)
+
+
+class HBOIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
+        'md5': '1c33253f0c7782142c993c0ba62a8753',
+        'info_dict': {
+            'id': '1437839',
+            'ext': 'mp4',
+            'title': 'Ep. 64 Clip: Encryption',
+        }
+    }
+    _FORMATS_INFO = {
+        '1920': {
+            'width': 1280,
+            'height': 720,
+        },
+        '640': {
+            'width': 768,
+            'height': 432,
+        },
+        'highwifi': {
+            'width': 640,
+            'height': 360,
+        },
+        'high3g': {
+            'width': 640,
+            'height': 360,
+        },
+        'medwifi': {
+            'width': 400,
+            'height': 224,
+        },
+        'med3g': {
+            'width': 400,
+            'height': 224,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_xml(
+            'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
+        title = xpath_text(video_data, 'title', 'title', True)
+
+        formats = []
+        for source in xpath_element(video_data, 'videos', 'sources', True):
+            if source.tag == 'size':
+                path = xpath_text(source, './/path')
+                if not path:
+                    continue
+                width = source.attrib.get('width')
+                format_info = self._FORMATS_INFO.get(width, {})
+                height = format_info.get('height')
+                fmt = {
+                    'url': path,
+                    'format_id': 'http%s' % ('-%dp' % height if height else ''),
+                    'width': format_info.get('width'),
+                    'height': height,
+                }
+                rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
+                if rtmp:
+                    fmt.update({
+                        'url': rtmp.group('url'),
+                        'play_path': rtmp.group('playpath'),
+                        'app': rtmp.group('app'),
+                        'ext': 'flv',
+                        'format_id': fmt['format_id'].replace('http', 'rtmp'),
+                    })
+                formats.append(fmt)
+            else:
+                video_url = source.text
+                if not video_url:
+                    continue
+                if source.tag == 'tarball':
+                    formats.extend(self._extract_m3u8_formats(
+                        video_url.replace('.tar', '/base_index_w8.m3u8'),
+                        video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+                else:
+                    format_info = self._FORMATS_INFO.get(source.tag, {})
+                    formats.append({
+                        'format_id': 'http-%s' % source.tag,
+                        'url': video_url,
+                        'width': format_info.get('width'),
+                        'height': format_info.get('height'),
+                    })
+        self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
+
+        thumbnails = []
+        card_sizes = xpath_element(video_data, 'titleCardSizes')
+        if card_sizes is not None:
+            for size in card_sizes:
+                path = xpath_text(size, 'path')
+                if not path:
+                    continue
+                width = int_or_none(size.get('width'))
+                thumbnails.append({
+                    'id': width,
+                    'url': path,
+                    'width': width,
+                })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
+            'formats': formats,
+            'thumbnails': thumbnails,
+        }
index 7d8698655666f8de4e8850ac2684a16dd28810af..2564538820e7d534adc24fd8c967ee44490e0dc3 100644 (file)
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
     HEADRequest,
+    KNOWN_EXTENSIONS,
     sanitized_Request,
     str_to_int,
     urlencode_postdata,
@@ -17,7 +18,7 @@ from ..utils import (
 class HearThisAtIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
     _PLAYLIST_URL = 'https://hearthis.at/playlist.php'
-    _TEST = {
+    _TESTS = [{
         'url': 'https://hearthis.at/moofi/dr-kreep',
         'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
         'info_dict': {
@@ -26,7 +27,7 @@ class HearThisAtIE(InfoExtractor):
             'title': 'Moofi - Dr. Kreep',
             'thumbnail': 're:^https?://.*\.jpg$',
             'timestamp': 1421564134,
-            'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.',
+            'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP',
             'upload_date': '20150118',
             'comment_count': int,
             'view_count': int,
@@ -34,7 +35,25 @@ class HearThisAtIE(InfoExtractor):
             'duration': 71,
             'categories': ['Experimental'],
         }
-    }
+    }, {
+        # 'download' link redirects to the original webpage
+        'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
+        'md5': '5980ceb7c461605d30f1f039df160c6e',
+        'info_dict': {
+            'id': '811296',
+            'ext': 'mp3',
+            'title': 'TwitchSF - DJ Jim Hopkins -  Totally Bitchin\' 80\'s Dance Mix!',
+            'description': 'Listen to DJ Jim Hopkins -  Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance',
+            'upload_date': '20160328',
+            'timestamp': 1459186146,
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'comment_count': int,
+            'view_count': int,
+            'like_count': int,
+            'duration': 4360,
+            'categories': ['Dance'],
+        },
+    }]
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
@@ -90,13 +109,14 @@ class HearThisAtIE(InfoExtractor):
             ext_handle = self._request_webpage(
                 ext_req, display_id, note='Determining extension')
             ext = urlhandle_detect_ext(ext_handle)
-            formats.append({
-                'format_id': 'download',
-                'vcodec': 'none',
-                'ext': ext,
-                'url': download_url,
-                'preference': 2,  # Usually better quality
-            })
+            if ext in KNOWN_EXTENSIONS:
+                formats.append({
+                    'format_id': 'download',
+                    'vcodec': 'none',
+                    'ext': ext,
+                    'url': download_url,
+                    'preference': 2,  # Usually better quality
+                })
         self._sort_formats(formats)
 
         return {
index 31e219945412398909053ff464245763a671ae19..9db5652096acc5ead0cb926791d731d0f6f35565 100644 (file)
@@ -3,16 +3,16 @@ from __future__ import unicode_literals
 import base64
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     ExtractorError,
     HEADRequest,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
 class HotNewHipHopIE(InfoExtractor):
-    _VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
+    _VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
     _TEST = {
         'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
         'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
@@ -35,7 +35,7 @@ class HotNewHipHopIE(InfoExtractor):
                 r'"contentUrl" content="(.*?)"', webpage, 'content URL')
             return self.url_result(video_url, ie='Youtube')
 
-        reqdata = compat_urllib_parse.urlencode([
+        reqdata = urlencode_postdata([
             ('mediaType', 's'),
             ('mediaId', video_id),
         ])
index e8f51e545bfd2b89a251e1a4fbbeefe80aa371f9..7e36b85ad586984dfb761e4518b23d2b4a074bf7 100644 (file)
@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
-        'md5': '8b743df908c42f60cf6496586c7f12c3',
+        'md5': '7d45932269a288149483144f01b99789',
         'info_dict': {
             'id': '390161',
             'ext': 'mp4',
@@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
             'duration': 56.823,
         },
         'params': {
-            # m3u8 download
             'skip_download': True,
         },
+        'add_ie': ['Ooyala'],
     }
 
     def _real_extract(self, url):
index 663e6632a194d8ee271a0c031a921d7eed139005..65ba2a48b069bd67d2b3382f2d87bc1160145612 100644 (file)
@@ -6,6 +6,7 @@ from ..utils import (
     int_or_none,
     js_to_json,
     unescapeHTML,
+    determine_ext,
 )
 
 
@@ -23,6 +24,7 @@ class HowStuffWorksIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'duration': 161,
             },
+            'skip': 'Video broken',
         },
         {
             'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
@@ -39,7 +41,7 @@ class HowStuffWorksIE(InfoExtractor):
             'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
             'info_dict': {
                 'id': '440011',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'Sword Swallowing #1 by Dan Meyer',
                 'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
                 'display_id': 'sword-swallowing-1-by-dan-meyer',
@@ -63,13 +65,19 @@ class HowStuffWorksIE(InfoExtractor):
         video_id = clip_info['content_id']
         formats = []
         m3u8_url = clip_info.get('m3u8')
-        if m3u8_url:
-            formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+        if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
+            formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', format_id='hls', fatal=True))
+        flv_url = clip_info.get('flv_url')
+        if flv_url:
+            formats.append({
+                'url': flv_url,
+                'format_id': 'flv',
+            })
         for video in clip_info.get('mp4', []):
             formats.append({
                 'url': video['src'],
-                'format_id': video['bitrate'],
-                'vbr': int(video['bitrate'].rstrip('k')),
+                'format_id': 'mp4-%s' % video['bitrate'],
+                'vbr': int_or_none(video['bitrate'].rstrip('k')),
             })
 
         if not formats:
@@ -102,6 +110,6 @@ class HowStuffWorksIE(InfoExtractor):
             'title': unescapeHTML(clip_info['clip_title']),
             'description': unescapeHTML(clip_info.get('caption')),
             'thumbnail': clip_info.get('video_still_url'),
-            'duration': clip_info.get('duration'),
+            'duration': int_or_none(clip_info.get('duration')),
             'formats': formats,
         }
index a38eae421a9199b578b3a724d205b13e6367c67a..059073749e67605464b6159b9391f71eb5a6052d 100644 (file)
@@ -4,6 +4,7 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
     parse_duration,
     unified_strdate,
 )
@@ -29,7 +30,12 @@ class HuffPostIE(InfoExtractor):
             'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more.  ',
             'duration': 1549,
             'upload_date': '20140124',
-        }
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'expected_warnings': ['HTTP Error 404: Not Found'],
     }
 
     def _real_extract(self, url):
@@ -45,7 +51,7 @@ class HuffPostIE(InfoExtractor):
         description = data.get('description')
 
         thumbnails = []
-        for url in data['images'].values():
+        for url in filter(None, data['images'].values()):
             m = re.match('.*-([0-9]+x[0-9]+)\.', url)
             if not m:
                 continue
@@ -54,13 +60,25 @@ class HuffPostIE(InfoExtractor):
                 'resolution': m.group(1),
             })
 
-        formats = [{
-            'format': key,
-            'format_id': key.replace('/', '.'),
-            'ext': 'mp4',
-            'url': url,
-            'vcodec': 'none' if key.startswith('audio/') else None,
-        } for key, url in data.get('sources', {}).get('live', {}).items()]
+        formats = []
+        sources = data.get('sources', {})
+        live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items())
+        for key, url in live_sources:
+            ext = determine_ext(url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False))
+            else:
+                formats.append({
+                    'format': key,
+                    'format_id': key.replace('/', '.'),
+                    'ext': 'mp4',
+                    'url': url,
+                    'vcodec': 'none' if key.startswith('audio/') else None,
+                })
 
         if not formats and data.get('fivemin_id'):
             return self.url_result('5min:%s' % data['fivemin_id'])
index b3706fe6d6cd8dfabb8d8b614baf1a6e12ea75d9..f7c9130540e51a75a83052704d61403b488b25f6 100644 (file)
@@ -4,7 +4,7 @@ import json
 import time
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_urlencode
 from ..utils import (
     ExtractorError,
     sanitized_Request,
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class HypemIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
+    _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
     _TEST = {
         'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
         'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
@@ -28,7 +28,7 @@ class HypemIE(InfoExtractor):
         track_id = self._match_id(url)
 
         data = {'ax': 1, 'ts': time.time()}
-        request = sanitized_Request(url + '?' + compat_urllib_parse.urlencode(data))
+        request = sanitized_Request(url + '?' + compat_urllib_parse_urlencode(data))
         response, urlh = self._download_webpage_handle(
             request, track_id, 'Downloading webpage with the url')
 
index 02e1e428e9e41ba75a2ff5c37c7cf0732682c111..0acce9f4c2525a62e3d3ad22b16743737dbb5b07 100644 (file)
@@ -1,10 +1,10 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
 from ..utils import (
+    mimetype2ext,
     qualities,
 )
 
@@ -12,9 +12,9 @@ from ..utils import (
 class ImdbIE(InfoExtractor):
     IE_NAME = 'imdb'
     IE_DESC = 'Internet Movie Database trailers'
-    _VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.imdb.com/video/imdb/vi2524815897',
         'info_dict': {
             'id': '2524815897',
@@ -22,7 +22,16 @@ class ImdbIE(InfoExtractor):
             'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
             'description': 'md5:9061c2219254e5d14e03c25c98e96a81',
         }
-    }
+    }, {
+        'url': 'http://www.imdb.com/video/_/vi2524815897',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -42,19 +51,33 @@ class ImdbIE(InfoExtractor):
             for f_url, f_name in extra_formats]
         format_pages.append(player_page)
 
-        quality = qualities(['SD', '480p', '720p'])
+        quality = qualities(('SD', '480p', '720p', '1080p'))
         formats = []
         for format_page in format_pages:
             json_data = self._search_regex(
                 r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
                 format_page, 'json data', flags=re.DOTALL)
-            info = json.loads(json_data)
-            format_info = info['videoPlayerObject']['video']
-            f_id = format_info['ffname']
+            info = self._parse_json(json_data, video_id, fatal=False)
+            if not info:
+                continue
+            format_info = info.get('videoPlayerObject', {}).get('video', {})
+            if not format_info:
+                continue
+            video_info_list = format_info.get('videoInfoList')
+            if not video_info_list or not isinstance(video_info_list, list):
+                continue
+            video_info = video_info_list[0]
+            if not video_info or not isinstance(video_info, dict):
+                continue
+            video_url = video_info.get('videoUrl')
+            if not video_url:
+                continue
+            format_id = format_info.get('ffname')
             formats.append({
-                'format_id': f_id,
-                'url': format_info['videoInfoList'][0]['videoUrl'],
-                'quality': quality(f_id),
+                'format_id': format_id,
+                'url': video_url,
+                'ext': mimetype2ext(video_info.get('videoMimeType')),
+                'quality': quality(format_id),
             })
         self._sort_formats(formats)
 
@@ -70,7 +93,7 @@ class ImdbIE(InfoExtractor):
 class ImdbListIE(InfoExtractor):
     IE_NAME = 'imdb:list'
     IE_DESC = 'Internet Movie Database lists'
-    _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+    _VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
     _TEST = {
         'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
         'info_dict': {
index 12fb5e8e1dcb1e5dfba8057a1496edcbe0f61f82..c6f080484a99f43614f104ead8023e8e57609cda 100644 (file)
@@ -60,7 +60,8 @@ class IndavideoEmbedIE(InfoExtractor):
 
         formats = [{
             'url': video_url,
-            'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None),
+            'height': int_or_none(self._search_regex(
+                r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)),
         } for video_url in video_urls]
         self._sort_formats(formats)
 
@@ -73,7 +74,7 @@ class IndavideoEmbedIE(InfoExtractor):
             'url': self._proto_relative_url(thumbnail)
         } for thumbnail in video.get('thumbnails', [])]
 
-        tags = [tag['title'] for tag in video.get('tags', [])]
+        tags = [tag['title'] for tag in video.get('tags') or []]
 
         return {
             'id': video.get('id') or video_id,
index 016af2084b3301bccf6f6191df66b2eec84968a4..cca0b8a9323c0d2412c65610a3acb3ef2943ba6f 100644 (file)
@@ -4,15 +4,12 @@ from __future__ import unicode_literals
 
 import base64
 
-from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse_unquote,
-    compat_parse_qs,
-)
+from ..compat import compat_urllib_parse_unquote
 from ..utils import determine_ext
+from .bokecc import BokeCCBaseIE
 
 
-class InfoQIE(InfoExtractor):
+class InfoQIE(BokeCCBaseIE):
     _VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
 
     _TESTS = [{
@@ -38,26 +35,6 @@ class InfoQIE(InfoExtractor):
         },
     }]
 
-    def _extract_bokecc_videos(self, webpage, video_id):
-        # TODO: bokecc.com is a Chinese video cloud platform
-        # It should have an independent extractor but I don't have other
-        # examples using bokecc
-        player_params_str = self._html_search_regex(
-            r'<script[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
-            webpage, 'player params', default=None)
-
-        player_params = compat_parse_qs(player_params_str)
-
-        info_xml = self._download_xml(
-            'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
-                player_params['siteid'][0], player_params['vid'][0]), video_id)
-
-        return [{
-            'format_id': 'bokecc',
-            'url': quality.find('./copy').attrib['playurl'],
-            'preference': int(quality.attrib['value']),
-        } for quality in info_xml.findall('./video/quality')]
-
     def _extract_rtmp_videos(self, webpage):
         # The server URL is hardcoded
         video_url = 'rtmpe://video.infoq.com/cfx/st/'
@@ -101,7 +78,7 @@ class InfoQIE(InfoExtractor):
 
         if '/cn/' in url:
             # for China videos, HTTP video URL exists but always fails with 403
-            formats = self._extract_bokecc_videos(webpage, video_id)
+            formats = self._extract_bokecc_formats(webpage, video_id)
         else:
             formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)
 
index ed3e0711815bc36a6b8702c994087ae5827f5dab..fc0197ae19d2c3db4a504268256400d6ef81702d 100644 (file)
@@ -4,23 +4,32 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    get_element_by_attribute,
     int_or_none,
     limit_length,
+    lowercase_escape,
+    try_get,
 )
 
 
 class InstagramIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
     _TESTS = [{
         'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
         'md5': '0d2da106a9d2631273e192b372806516',
         'info_dict': {
             'id': 'aye83DjauH',
             'ext': 'mp4',
-            'uploader_id': 'naomipq',
             'title': 'Video by naomipq',
             'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
-        }
+            'thumbnail': 're:^https?://.*\.jpg',
+            'timestamp': 1371748545,
+            'upload_date': '20130620',
+            'uploader_id': 'naomipq',
+            'uploader': 'Naomi Leonor Phan-Quang',
+            'like_count': int,
+            'comment_count': int,
+        },
     }, {
         # missing description
         'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears',
@@ -29,6 +38,13 @@ class InstagramIE(InfoExtractor):
             'ext': 'mp4',
             'uploader_id': 'britneyspears',
             'title': 'Video by britneyspears',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'timestamp': 1453760977,
+            'upload_date': '20160125',
+            'uploader_id': 'britneyspears',
+            'uploader': 'Britney Spears',
+            'like_count': int,
+            'comment_count': int,
         },
         'params': {
             'skip_download': True,
@@ -36,25 +52,86 @@ class InstagramIE(InfoExtractor):
     }, {
         'url': 'https://instagram.com/p/-Cmh1cukG2/',
         'only_matching': True,
+    }, {
+        'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
+        'only_matching': True,
     }]
 
+    @staticmethod
+    def _extract_embed_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1',
+            webpage)
+        if mobj:
+            return mobj.group('url')
+
+        blockquote_el = get_element_by_attribute(
+            'class', 'instagram-media', webpage)
+        if blockquote_el is None:
+            return
+
+        mobj = re.search(
+            r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1', blockquote_el)
+        if mobj:
+            return mobj.group('link')
+
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        url = mobj.group('url')
 
         webpage = self._download_webpage(url, video_id)
-        uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
-                                         webpage, 'uploader id', fatal=False)
-        desc = self._search_regex(
-            r'"caption":"(.+?)"', webpage, 'description', default=None)
+
+        (video_url, description, thumbnail, timestamp, uploader,
+         uploader_id, like_count, comment_count) = [None] * 8
+
+        shared_data = self._parse_json(
+            self._search_regex(
+                r'window\._sharedData\s*=\s*({.+?});',
+                webpage, 'shared data', default='{}'),
+            video_id, fatal=False)
+        if shared_data:
+            media = try_get(
+                shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
+            if media:
+                video_url = media.get('video_url')
+                description = media.get('caption')
+                thumbnail = media.get('display_src')
+                timestamp = int_or_none(media.get('date'))
+                uploader = media.get('owner', {}).get('full_name')
+                uploader_id = media.get('owner', {}).get('username')
+                like_count = int_or_none(media.get('likes', {}).get('count'))
+                comment_count = int_or_none(media.get('comments', {}).get('count'))
+
+        if not video_url:
+            video_url = self._og_search_video_url(webpage, secure=False)
+
+        if not uploader_id:
+            uploader_id = self._search_regex(
+                r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"',
+                webpage, 'uploader id', fatal=False)
+
+        if not description:
+            description = self._search_regex(
+                r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None)
+            if description is not None:
+                description = lowercase_escape(description)
+
+        if not thumbnail:
+            thumbnail = self._og_search_thumbnail(webpage)
 
         return {
             'id': video_id,
-            'url': self._og_search_video_url(webpage, secure=False),
+            'url': video_url,
             'ext': 'mp4',
             'title': 'Video by %s' % uploader_id,
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
             'uploader_id': uploader_id,
-            'description': desc,
+            'uploader': uploader,
+            'like_count': like_count,
+            'comment_count': comment_count,
         }
 
 
@@ -136,7 +213,7 @@ class InstagramUserIE(InfoExtractor):
 
             if not page['items']:
                 break
-            max_id = page['items'][-1]['id']
+            max_id = page['items'][-1]['id'].split('_')[0]
             media_url = (
                 'http://instagram.com/%s/media?max_id=%s' % (
                     uploader_id, max_id))
index 483cc6f9e62da3bc272ba66efc540b95c17116e7..45add007fd99c8bd80f16c1becfb42cf403d45d5 100644 (file)
@@ -1,93 +1,91 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..compat import (
+    compat_parse_qs,
     compat_urlparse,
-    compat_urllib_parse,
 )
 from ..utils import (
-    xpath_with_ns,
+    determine_ext,
+    int_or_none,
+    xpath_text,
 )
 
 
 class InternetVideoArchiveIE(InfoExtractor):
-    _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
+    _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
 
     _TEST = {
-        'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
+        'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
         'info_dict': {
-            'id': '452693',
+            'id': '194487',
             'ext': 'mp4',
-            'title': 'SKYFALL',
-            'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
-            'duration': 152,
+            'title': 'KICK-ASS 2',
+            'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         },
     }
 
     @staticmethod
-    def _build_url(query):
-        return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
+    def _build_json_url(query):
+        return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
 
     @staticmethod
-    def _clean_query(query):
-        NEEDED_ARGS = ['publishedid', 'customerid']
-        query_dic = compat_urlparse.parse_qs(query)
-        cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)
-        # Other player ids return m3u8 urls
-        cleaned_dic['playerid'] = '247'
-        cleaned_dic['videokbrate'] = '100000'
-        return compat_urllib_parse.urlencode(cleaned_dic)
+    def _build_xml_url(query):
+        return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
 
     def _real_extract(self, url):
         query = compat_urlparse.urlparse(url).query
-        query_dic = compat_urlparse.parse_qs(query)
+        query_dic = compat_parse_qs(query)
         video_id = query_dic['publishedid'][0]
-        url = self._build_url(query)
 
-        flashconfiguration = self._download_xml(url, video_id,
-                                                'Downloading flash configuration')
-        file_url = flashconfiguration.find('file').text
-        file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
-        # Replace some of the parameters in the query to get the best quality
-        # and http links (no m3u8 manifests)
-        file_url = re.sub(r'(?<=\?)(.+)$',
-                          lambda m: self._clean_query(m.group()),
-                          file_url)
-        info = self._download_xml(file_url, video_id,
-                                  'Downloading video info')
-        item = info.find('channel/item')
+        if '/player/' in url:
+            configuration = self._download_json(url, video_id)
+
+            # There are multiple videos in the playlist whlie only the first one
+            # matches the video played in browsers
+            video_info = configuration['playlist'][0]
+
+            formats = []
+            for source in video_info['sources']:
+                file_url = source['file']
+                if determine_ext(file_url) == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        file_url, video_id, ext='mp4', m3u8_id='hls'))
+                else:
+                    a_format = {
+                        'url': file_url,
+                    }
+
+                    if source.get('label') and source['label'][-4:] == ' kbs':
+                        tbr = int_or_none(source['label'][:-4])
+                        a_format.update({
+                            'tbr': tbr,
+                            'format_id': 'http-%d' % tbr,
+                        })
+                        formats.append(a_format)
 
-        def _bp(p):
-            return xpath_with_ns(
-                p,
-                {
-                    'media': 'http://search.yahoo.com/mrss/',
-                    'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
-                }
-            )
-        formats = []
-        for content in item.findall(_bp('media:group/media:content')):
-            attr = content.attrib
-            f_url = attr['url']
-            width = int(attr['width'])
-            bitrate = int(attr['bitrate'])
-            format_id = '%d-%dk' % (width, bitrate)
-            formats.append({
-                'format_id': format_id,
-                'url': f_url,
-                'width': width,
-                'tbr': bitrate,
-            })
+            self._sort_formats(formats)
 
-        self._sort_formats(formats)
+            title = video_info['title']
+            description = video_info.get('description')
+            thumbnail = video_info.get('image')
+        else:
+            configuration = self._download_xml(url, video_id)
+            formats = [{
+                'url': xpath_text(configuration, './file', 'file URL', fatal=True),
+            }]
+            thumbnail = xpath_text(configuration, './image', 'thumbnail')
+            title = 'InternetVideoArchive video %s' % video_id
+            description = None
 
         return {
             'id': video_id,
-            'title': item.find('title').text,
+            'title': title,
             'formats': formats,
-            'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
-            'description': item.find('description').text,
-            'duration': int(attr['duration']),
+            'thumbnail': thumbnail,
+            'description': description,
         }
index 61a0de47283f426879d2fdad1c3620ac979ac0d0..788bbe0d5c44177b5a943da9f9c3c3adf46a77b1 100644 (file)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -6,6 +6,8 @@ import time
 
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
+    js_to_json,
     sanitized_Request,
 )
 
@@ -30,8 +32,7 @@ class IPrimaIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
 
@@ -43,9 +44,42 @@ class IPrimaIE(InfoExtractor):
         req.add_header('Referer', url)
         playerpage = self._download_webpage(req, video_id, note='Downloading player')
 
-        m3u8_url = self._search_regex(r"'src': '([^']+\.m3u8)'", playerpage, 'm3u8 url')
+        formats = []
 
-        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+        def extract_formats(format_url, format_key=None, lang=None):
+            ext = determine_ext(format_url)
+            new_formats = []
+            if format_key == 'hls' or ext == 'm3u8':
+                new_formats = self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False)
+            elif format_key == 'dash' or ext == 'mpd':
+                return
+                new_formats = self._extract_mpd_formats(
+                    format_url, video_id, mpd_id='dash', fatal=False)
+            if lang:
+                for f in new_formats:
+                    if not f.get('language'):
+                        f['language'] = lang
+            formats.extend(new_formats)
+
+        options = self._parse_json(
+            self._search_regex(
+                r'(?s)var\s+playerOptions\s*=\s*({.+?});',
+                playerpage, 'player options', default='{}'),
+            video_id, transform_source=js_to_json, fatal=False)
+        if options:
+            for key, tracks in options.get('tracks', {}).items():
+                if not isinstance(tracks, list):
+                    continue
+                for track in tracks:
+                    src = track.get('src')
+                    if src:
+                        extract_formats(src, key.lower(), track.get('lang'))
+
+        if not formats:
+            for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
+                extract_formats(src)
 
         self._sort_formats(formats)
 
index c3e33009a00a5174138c08ae3aec78ae9b7c899a..ddcb3c916e6a0610484dc5ceddbd84b507e761fd 100644 (file)
@@ -14,10 +14,11 @@ from .common import InfoExtractor
 from ..compat import (
     compat_parse_qs,
     compat_str,
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
 )
 from ..utils import (
+    decode_packed_codes,
     ExtractorError,
     ohdave_rsa_encrypt,
     remove_start,
@@ -126,43 +127,11 @@ class IqiyiSDK(object):
 
 
 class IqiyiSDKInterpreter(object):
-    BASE62_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
-
     def __init__(self, sdk_code):
         self.sdk_code = sdk_code
 
-    @classmethod
-    def base62(cls, num):
-        if num == 0:
-            return '0'
-        ret = ''
-        while num:
-            ret = cls.BASE62_TABLE[num % 62] + ret
-            num = num // 62
-        return ret
-
-    def decode_eval_codes(self):
-        self.sdk_code = self.sdk_code[5:-3]
-
-        mobj = re.search(
-            r"'([^']+)',62,(\d+),'([^']+)'\.split\('\|'\),[^,]+,{}",
-            self.sdk_code)
-        obfucasted_code, count, symbols = mobj.groups()
-        count = int(count)
-        symbols = symbols.split('|')
-        symbol_table = {}
-
-        while count:
-            count -= 1
-            b62count = self.base62(count)
-            symbol_table[b62count] = symbols[count] or b62count
-
-        self.sdk_code = re.sub(
-            r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
-            obfucasted_code)
-
     def run(self, target, ip, timestamp):
-        self.decode_eval_codes()
+        self.sdk_code = decode_packed_codes(self.sdk_code)
 
         functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code)
 
@@ -196,7 +165,7 @@ class IqiyiIE(InfoExtractor):
     IE_NAME = 'iqiyi'
     IE_DESC = '爱奇艺'
 
-    _VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html'
+    _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
 
     _NETRC_MACHINE = 'iqiyi'
 
@@ -304,6 +273,9 @@ class IqiyiIE(InfoExtractor):
             'title': '灌篮高手 国语版',
         },
         'playlist_count': 101,
+    }, {
+        'url': 'http://www.pps.tv/w_19rrbav0ph.html',
+        'only_matching': True,
     }]
 
     _FORMATS_MAP = [
@@ -315,6 +287,13 @@ class IqiyiIE(InfoExtractor):
         ('10', 'h1'),
     ]
 
+    AUTH_API_ERRORS = {
+        # No preview available (不允许试看鉴权失败)
+        'Q00505': 'This video requires a VIP account',
+        # End of preview time (试看结束鉴权失败)
+        'Q00506': 'Needs a VIP account for full video',
+    }
+
     def _real_initialize(self):
         self._login()
 
@@ -353,7 +332,7 @@ class IqiyiIE(InfoExtractor):
             'bird_t': timestamp,
         }
         validation_result = self._download_json(
-            'http://kylin.iqiyi.com/validate?' + compat_urllib_parse.urlencode(validation_params), None,
+            'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params), None,
             note='Validate credentials', errnote='Unable to validate credentials')
 
         MSG_MAP = {
@@ -399,12 +378,19 @@ class IqiyiIE(InfoExtractor):
             auth_req, video_id,
             note='Downloading video authentication JSON',
             errnote='Unable to download video authentication JSON')
-        if auth_result['code'] == 'Q00506':  # requires a VIP account
+
+        code = auth_result.get('code')
+        msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code
+        if code == 'Q00506':
             if do_report_warning:
-                self.report_warning('Needs a VIP account for full video')
+                self.report_warning(msg)
             return False
+        if 'data' not in auth_result:
+            if msg is not None:
+                raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True)
+            raise ExtractorError('Unexpected error from Iqiyi auth API')
 
-        return auth_result
+        return auth_result['data']
 
     def construct_video_urls(self, data, video_id, _uuid, tvid):
         def do_xor(x, y):
@@ -480,14 +466,14 @@ class IqiyiIE(InfoExtractor):
                         need_vip_warning_report = False
                         break
                     param.update({
-                        't': auth_result['data']['t'],
+                        't': auth_result['t'],
                         # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
                         'cid': 'afbe8fd3d73448c9',
                         'vid': video_id,
-                        'QY00001': auth_result['data']['u'],
+                        'QY00001': auth_result['u'],
                     })
                 api_video_url += '?' if '?' not in api_video_url else '&'
-                api_video_url += compat_urllib_parse.urlencode(param)
+                api_video_url += compat_urllib_parse_urlencode(param)
                 js = self._download_json(
                     api_video_url, video_id,
                     note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
@@ -519,20 +505,23 @@ class IqiyiIE(InfoExtractor):
             'enc': md5_text(enc_key + tail),
             'qyid': _uuid,
             'tn': random.random(),
-            'um': 0,
+            # In iQiyi's flash player, um is set to 1 if there's a logged user
+            # Some 1080P formats are only available with a logged user.
+            # Here force um=1 to trick the iQiyi server
+            'um': 1,
             'authkey': md5_text(md5_text('') + tail),
             'k_tag': 1,
         }
 
         api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
-            compat_urllib_parse.urlencode(param)
+            compat_urllib_parse_urlencode(param)
         raw_data = self._download_json(api_url, video_id)
         return raw_data
 
-    def get_enc_key(self, swf_url, video_id):
+    def get_enc_key(self, video_id):
         # TODO: automatic key extraction
         # last update at 2016-01-22 for Zombie::bite
-        enc_key = '6ab6d0280511493ba85594779759d4ed'
+        enc_key = '4a1caba4b4465345366f28da7c117d20'
         return enc_key
 
     def _extract_playlist(self, webpage):
@@ -582,11 +571,9 @@ class IqiyiIE(InfoExtractor):
             r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
         video_id = self._search_regex(
             r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
-        swf_url = self._search_regex(
-            r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL')
         _uuid = uuid.uuid4().hex
 
-        enc_key = self.get_enc_key(swf_url, video_id)
+        enc_key = self.get_enc_key(video_id)
 
         raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
 
index 617dc8c071d0ab983a74a95184a47ee5a6f82525..3ca824f7984f3cac8615644c5babbcacb4e5c4a3 100644 (file)
@@ -5,7 +5,7 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import qualities
@@ -62,7 +62,7 @@ class IvideonIE(InfoExtractor):
         quality = qualities(self._QUALITIES)
 
         formats = [{
-            'url': 'https://streaming.ivideon.com/flv/live?%s' % compat_urllib_parse.urlencode({
+            'url': 'https://streaming.ivideon.com/flv/live?%s' % compat_urllib_parse_urlencode({
                 'server': server_id,
                 'camera': camera_id,
                 'sessionId': 'demo',
index bc226fa67c064b991674a510b1eba54d40dc67e0..aa0728abc0155fa6abbe8e2a88de18dd89d85138 100644 (file)
@@ -29,7 +29,7 @@ class IzleseneIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
                 'description': 'md5:253753e2655dde93f59f74b572454f6d',
-                'thumbnail': 're:^http://.*\.jpg',
+                'thumbnail': 're:^https?://.*\.jpg',
                 'uploader_id': 'pelikzzle',
                 'timestamp': int,
                 'upload_date': '20140702',
@@ -44,8 +44,7 @@ class IzleseneIE(InfoExtractor):
                 'id': '17997',
                 'ext': 'mp4',
                 'title': 'Tarkan Dortmund 2006 Konseri',
-                'description': 'Tarkan Dortmund 2006 Konseri',
-                'thumbnail': 're:^http://.*\.jpg',
+                'thumbnail': 're:^https://.*\.jpg',
                 'uploader_id': 'parlayankiz',
                 'timestamp': int,
                 'upload_date': '20061112',
@@ -62,7 +61,7 @@ class IzleseneIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
+        description = self._og_search_description(webpage, default=None)
         thumbnail = self._proto_relative_url(
             self._og_search_thumbnail(webpage), scheme='http:')
 
diff --git a/youtube_dl/extractor/jadorecettepub.py b/youtube_dl/extractor/jadorecettepub.py
deleted file mode 100644 (file)
index 063e86d..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from .youtube import YoutubeIE
-
-
-class JadoreCettePubIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
-
-    _TEST = {
-        'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
-        'md5': '401286a06067c70b44076044b66515de',
-        'info_dict': {
-            'id': 'jLMja3tr7a4',
-            'ext': 'mp4',
-            'title': 'La pire utilisation de Star Wars',
-            'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon.  Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
-        },
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, display_id)
-
-        title = self._html_search_regex(
-            r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
-            webpage, 'title')
-        description = self._html_search_regex(
-            r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
-            fatal=False)
-        real_url = self._search_regex(
-            r'\[/postlink\](.*)endofvid', webpage, 'video URL')
-        video_id = YoutubeIE.extract_id(real_url)
-
-        return {
-            '_type': 'url_transparent',
-            'url': real_url,
-            'id': video_id,
-            'title': title,
-            'description': description,
-        }
index eef7daa299813219c5211aefe2051a1160238319..1a4227f6b4b0ef7370b0f09613ef9d4b8916b435 100644 (file)
@@ -8,7 +8,7 @@ from .common import InfoExtractor
 
 
 class JeuxVideoIE(InfoExtractor):
-    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
+    _VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
 
     _TESTS = [{
         'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
         webpage = self._download_webpage(url, title)
         title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
         config_url = self._html_search_regex(
-            r'data-src="(/contenu/medias/video.php.*?)"',
+            r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
             webpage, 'config URL')
         config_url = 'http://www.jeuxvideo.com' + config_url
 
index 8e90d59868c380d1f486d838d8b6f3283bdc2b58..e44e31104f55fc72daa6884f09bcf8faab390568 100644 (file)
@@ -4,67 +4,124 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    determine_ext,
+    float_or_none,
+    int_or_none,
+)
 
 
-class JWPlatformIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
-    _TEST = {
-        'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
-        'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
-        'info_dict': {
-            'id': 'nPripu9l',
-            'ext': 'mov',
-            'title': 'Big Buck Bunny Trailer',
-            'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
-            'upload_date': '20081127',
-            'timestamp': 1227796140,
-        }
-    }
-
+class JWPlatformBaseIE(InfoExtractor):
     @staticmethod
-    def _extract_url(webpage):
+    def _find_jwplayer_data(webpage):
+        # TODO: Merge this with JWPlayer-related codes in generic.py
+
         mobj = re.search(
-            r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
+            'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)',
             webpage)
         if mobj:
-            return mobj.group('url')
+            return mobj.group('options')
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
-        video_data = json_data['playlist'][0]
-        subtitles = {}
-        for track in video_data['tracks']:
-            if track['kind'] == 'captions':
-                subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
+    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+        jwplayer_data = self._parse_json(
+            self._find_jwplayer_data(webpage), video_id)
+        return self._parse_jwplayer_data(
+            jwplayer_data, video_id, *args, **kwargs)
+
+    def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None):
+        # JWPlayer backward compatibility: flattened playlists
+        # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
+        if 'playlist' not in jwplayer_data:
+            jwplayer_data = {'playlist': [jwplayer_data]}
+
+        video_data = jwplayer_data['playlist'][0]
+
+        # JWPlayer backward compatibility: flattened sources
+        # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
+        if 'sources' not in video_data:
+            video_data['sources'] = [video_data]
 
         formats = []
         for source in video_data['sources']:
             source_url = self._proto_relative_url(source['file'])
             source_type = source.get('type') or ''
-            if source_type == 'application/vnd.apple.mpegurl':
+            if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
-                    source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
+                    source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
             elif source_type.startswith('audio'):
                 formats.append({
                     'url': source_url,
                     'vcodec': 'none',
                 })
             else:
-                formats.append({
+                a_format = {
                     'url': source_url,
                     'width': int_or_none(source.get('width')),
                     'height': int_or_none(source.get('height')),
-                })
+                }
+                if source_url.startswith('rtmp'):
+                    a_format['ext'] = 'flv',
+
+                    # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+                    # of jwplayer.flash.swf
+                    rtmp_url_parts = re.split(
+                        r'((?:mp4|mp3|flv):)', source_url, 1)
+                    if len(rtmp_url_parts) == 3:
+                        rtmp_url, prefix, play_path = rtmp_url_parts
+                        a_format.update({
+                            'url': rtmp_url,
+                            'play_path': prefix + play_path,
+                        })
+                    if rtmp_params:
+                        a_format.update(rtmp_params)
+                formats.append(a_format)
         self._sort_formats(formats)
 
+        subtitles = {}
+        tracks = video_data.get('tracks')
+        if tracks and isinstance(tracks, list):
+            for track in tracks:
+                if track.get('file') and track.get('kind') == 'captions':
+                    subtitles.setdefault(track.get('label') or 'en', []).append({
+                        'url': self._proto_relative_url(track['file'])
+                    })
+
         return {
             'id': video_id,
-            'title': video_data['title'],
+            'title': video_data['title'] if require_title else video_data.get('title'),
             'description': video_data.get('description'),
             'thumbnail': self._proto_relative_url(video_data.get('image')),
             'timestamp': int_or_none(video_data.get('pubdate')),
+            'duration': float_or_none(jwplayer_data.get('duration')),
             'subtitles': subtitles,
             'formats': formats,
         }
+
+
+class JWPlatformIE(JWPlatformBaseIE):
+    _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+    _TEST = {
+        'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
+        'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
+        'info_dict': {
+            'id': 'nPripu9l',
+            'ext': 'mov',
+            'title': 'Big Buck Bunny Trailer',
+            'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
+            'upload_date': '20081127',
+            'timestamp': 1227796140,
+        }
+    }
+
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
+            webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
+        return self._parse_jwplayer_data(json_data, video_id)
index ccbc39c665412980e6b6104e83ffaf2e8574517f..a65697ff558864f36cc5e8b8f82f959b19ea16fc 100644 (file)
@@ -6,8 +6,9 @@ import base64
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urlparse,
+    compat_parse_qs,
 )
 from ..utils import (
     clean_html,
@@ -20,21 +21,17 @@ from ..utils import (
 class KalturaIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                 (?:
-                    kaltura:(?P<partner_id_s>\d+):(?P<id_s>[0-9a-z_]+)|
+                    kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
                     https?://
                         (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/
                         (?:
                             (?:
                                 # flash player
-                                index\.php/kwidget/
-                                (?:[^/]+/)*?wid/_(?P<partner_id>\d+)/
-                                (?:[^/]+/)*?entry_id/(?P<id>[0-9a-z_]+)|
+                                index\.php/kwidget|
                                 # html5 player
-                                html5/html5lib/
-                                (?:[^/]+/)*?entry_id/(?P<id_html5>[0-9a-z_]+)
-                                .*\?.*\bwid=_(?P<partner_id_html5>\d+)
+                                html5/html5lib/[^/]+/mwEmbedFrame\.php
                             )
-                        )
+                        )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
                 )
                 '''
     _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
@@ -74,7 +71,7 @@ class KalturaIE(InfoExtractor):
                 for k, v in a.items():
                     params['%d:%s' % (i, k)] = v
 
-        query = compat_urllib_parse.urlencode(params)
+        query = compat_urllib_parse_urlencode(params)
         url = self._API_BASE + query
         data = self._download_json(url, video_id, *args, **kwargs)
 
@@ -127,10 +124,41 @@ class KalturaIE(InfoExtractor):
         url, smuggled_data = unsmuggle_url(url, {})
 
         mobj = re.match(self._VALID_URL, url)
-        partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
-        entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
-
-        info, flavor_assets = self._get_video_info(entry_id, partner_id)
+        partner_id, entry_id = mobj.group('partner_id', 'id')
+        ks = None
+        if partner_id and entry_id:
+            info, flavor_assets = self._get_video_info(entry_id, partner_id)
+        else:
+            path, query = mobj.group('path', 'query')
+            if not path and not query:
+                raise ExtractorError('Invalid URL', expected=True)
+            params = {}
+            if query:
+                params = compat_parse_qs(query)
+            if path:
+                splitted_path = path.split('/')
+                params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]]))))
+            if 'wid' in params:
+                partner_id = params['wid'][0][1:]
+            elif 'p' in params:
+                partner_id = params['p'][0]
+            else:
+                raise ExtractorError('Invalid URL', expected=True)
+            if 'entry_id' in params:
+                entry_id = params['entry_id'][0]
+                info, flavor_assets = self._get_video_info(entry_id, partner_id)
+            elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
+                reference_id = params['flashvars[referenceId]'][0]
+                webpage = self._download_webpage(url, reference_id)
+                entry_data = self._parse_json(self._search_regex(
+                    r'window\.kalturaIframePackageData\s*=\s*({.*});',
+                    webpage, 'kalturaIframePackageData'),
+                    reference_id)['entryResult']
+                info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets']
+                entry_id = info['id']
+            else:
+                raise ExtractorError('Invalid URL', expected=True)
+            ks = params.get('flashvars[ks]', [None])[0]
 
         source_url = smuggled_data.get('source_url')
         if source_url:
@@ -140,14 +168,19 @@ class KalturaIE(InfoExtractor):
         else:
             referrer = None
 
+        def sign_url(unsigned_url):
+            if ks:
+                unsigned_url += '/ks/%s' % ks
+            if referrer:
+                unsigned_url += '?referrer=%s' % referrer
+            return unsigned_url
+
         formats = []
         for f in flavor_assets:
             # Continue if asset is not ready
             if f['status'] != 2:
                 continue
-            video_url = '%s/flavorId/%s' % (info['dataUrl'], f['id'])
-            if referrer:
-                video_url += '?referrer=%s' % referrer
+            video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id']))
             formats.append({
                 'format_id': '%(fileExt)s-%(bitrate)s' % f,
                 'ext': f.get('fileExt'),
@@ -160,9 +193,7 @@ class KalturaIE(InfoExtractor):
                 'width': int_or_none(f.get('width')),
                 'url': video_url,
             })
-        m3u8_url = info['dataUrl'].replace('format/url', 'format/applehttp')
-        if referrer:
-            m3u8_url += '?referrer=%s' % referrer
+        m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp'))
         formats.extend(self._extract_m3u8_formats(
             m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
 
index 06daf5a89ce3ffde4d71d7dc8ceee9441840b72b..a6050c4de3e1695ac26bd1a21bab981a52755c21 100644 (file)
@@ -2,39 +2,63 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
-from ..utils import (
-    js_to_json,
-)
 
 
 class KaraoketvIE(InfoExtractor):
-    _VALID_URL = r'http://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)'
+    _VALID_URL = r'http://www.karaoketv.co.il/[^/]+/(?P<id>\d+)'
     _TEST = {
-        'url': 'http://karaoketv.co.il/?container=songs&id=171568',
+        'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
         'info_dict': {
-            'id': '171568',
-            'ext': 'mp4',
-            'title': 'אל העולם שלך - רותם כהן - שרים קריוקי',
+            'id': '58356',
+            'ext': 'flv',
+            'title': 'קריוקי של איזון',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
         }
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
         webpage = self._download_webpage(url, video_id)
+        api_page_url = self._search_regex(
+            r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.karaoke\.co\.il/api_play\.php\?.+?)\1',
+            webpage, 'API play URL', group='url')
+
+        api_page = self._download_webpage(api_page_url, video_id)
+        video_cdn_url = self._search_regex(
+            r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.video-cdn\.com/embed/iframe/.+?)\1',
+            api_page, 'video cdn URL', group='url')
+
+        video_cdn = self._download_webpage(video_cdn_url, video_id)
+        play_path = self._parse_json(
+            self._search_regex(
+                r'var\s+options\s*=\s*({.+?});', video_cdn, 'options'),
+            video_id)['clip']['url']
 
-        page_video_url = self._og_search_video_url(webpage, video_id)
-        config_json = compat_urllib_parse_unquote_plus(self._search_regex(
-            r'config=(.*)', page_video_url, 'configuration'))
+        settings = self._parse_json(
+            self._search_regex(
+                r'var\s+settings\s*=\s*({.+?});', video_cdn, 'servers', default='{}'),
+            video_id, fatal=False) or {}
 
-        urls_info_json = self._download_json(
-            config_json, video_id, 'Downloading configuration',
-            transform_source=js_to_json)
+        servers = settings.get('servers')
+        if not servers or not isinstance(servers, list):
+            servers = ('wowzail.video-cdn.com:80/vodcdn', )
 
-        url = urls_info_json['playlist'][0]['url']
+        formats = [{
+            'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server,
+            'play_path': play_path,
+            'app': 'vodcdn',
+            'page_url': video_cdn_url,
+            'player_url': 'http://www.video-cdn.com/assets/flowplayer/flowplayer.commercial-3.2.18.swf',
+            'rtmp_real_time': True,
+            'ext': 'flv',
+        } for server in servers]
 
         return {
             'id': video_id,
             'title': self._og_search_title(webpage),
-            'url': url,
+            'formats': formats,
         }
index bed94bc9338d158c77087d4e74ef341aa236f94f..c05263e6165159320376939c252af7dea7aeadb2 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class KarriereVideosIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
         'info_dict': {
@@ -52,9 +52,12 @@ class KarriereVideosIE(InfoExtractor):
 
         video_id = self._search_regex(
             r'/config/video/(.+?)\.xml', webpage, 'video id')
+        # Server returns malformed headers
+        # Force Accept-Encoding: * to prevent gzipped results
         playlist = self._download_xml(
             'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
-            video_id, transform_source=fix_xml_ampersands)
+            video_id, transform_source=fix_xml_ampersands,
+            headers={'Accept-Encoding': '*'})
 
         NS_MAP = {
             'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
index 08a671fa86a007d3327ef03c257f1b943bd425db..61739efa7a4c3b84892083eab10237c23eb69e3d 100644 (file)
@@ -14,10 +14,10 @@ class KhanAcademyIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://www.khanacademy.org/video/one-time-pad',
-        'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
+        'md5': '7b391cce85e758fb94f763ddc1bbb979',
         'info_dict': {
             'id': 'one-time-pad',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'The one-time pad',
             'description': 'The perfect cipher',
             'duration': 176,
index a59c529f4c90d8f3211a783ec5e4e2d3f7be9d84..704bd7b34554af60dfec9b811251f5270cbd1f55 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 class KontrTubeIE(InfoExtractor):
     IE_NAME = 'kontrtube'
     IE_DESC = 'KontrTube.ru - Труба зовёт'
-    _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
+    _VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
 
     _TEST = {
         'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
index a602980a141f3f8ccce026eaddc8b383e7894352..a574408e55b6a5ee251d94ca1d0346b9e34ac0b8 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class Ku6IE(InfoExtractor):
-    _VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
+    _VALID_URL = r'https?://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
     _TEST = {
         'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html',
         'md5': '01203549b9efbb45f4b87d55bdea1ed1',
diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py
new file mode 100644 (file)
index 0000000..12cc56e
--- /dev/null
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote_plus
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    timeconvert,
+    update_url_query,
+    xpath_text,
+)
+
+
+class KUSIIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
+    _TESTS = [{
+        'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
+        'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
+        'info_dict': {
+            'id': '12203019',
+            'ext': 'mp4',
+            'title': 'Turko Files: Case Closed! & Put On Hold!',
+            'duration': 231.0,
+            'upload_date': '20160210',
+            'timestamp': 1455087571,
+            'thumbnail': 're:^https?://.*\.jpg$'
+        },
+    }, {
+        'url': 'http://kusi.com/video?clipId=12203019',
+        'info_dict': {
+            'id': '12203019',
+            'ext': 'mp4',
+            'title': 'Turko Files: Case Closed! & Put On Hold!',
+            'duration': 231.0,
+            'upload_date': '20160210',
+            'timestamp': 1455087571,
+            'thumbnail': 're:^https?://.*\.jpg$'
+        },
+        'params': {
+            'skip_download': True,  # Same as previous one
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        clip_id = mobj.group('clipId')
+        video_id = clip_id or mobj.group('path')
+
+        webpage = self._download_webpage(url, video_id)
+
+        if clip_id is None:
+            video_id = clip_id = self._html_search_regex(
+                r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id')
+
+        affiliate_id = self._search_regex(
+            r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id')
+
+        # See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf
+        xml_url = update_url_query('http://www.kusi.com/build.asp', {
+            'buildtype': 'buildfeaturexmlrequest',
+            'featureType': 'Clip',
+            'featureid': clip_id,
+            'affiliateno': affiliate_id,
+            'clientgroupid': '1',
+            'rnd': int(round(random.random() * 1000000)),
+        })
+
+        doc = self._download_xml(xml_url, video_id)
+
+        video_title = xpath_text(doc, 'HEADLINE', fatal=True)
+        duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
+        description = xpath_text(doc, 'ABSTRACT')
+        thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
+        createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
+
+        quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
+        formats = []
+        for quality in quality_options:
+            formats.append({
+                'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
+                'height': int_or_none(quality.attrib.get('height')),
+                'width': int_or_none(quality.attrib.get('width')),
+                'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'description': description,
+            'duration': duration,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'timestamp': createtion_time,
+        }
index f641edef8ada91cf1a4b458f388f7f51ce56fb3f..0221fb9191879cef1d57e42238b5d86737cdd621 100644 (file)
@@ -2,13 +2,13 @@
 from __future__ import unicode_literals
 
 import re
-import itertools
 
 from .common import InfoExtractor
 from ..utils import (
     get_element_by_id,
     clean_html,
     ExtractorError,
+    InAdvancePagedList,
     remove_start,
 )
 
@@ -23,16 +23,29 @@ class KuwoBaseIE(InfoExtractor):
         {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
     ]
 
-    def _get_formats(self, song_id):
+    def _get_formats(self, song_id, tolerate_ip_deny=False):
         formats = []
         for file_format in self._FORMATS:
+            headers = {}
+            cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+            if cn_verification_proxy:
+                headers['Ytdl-request-proxy'] = cn_verification_proxy
+
+            query = {
+                'format': file_format['ext'],
+                'br': file_format.get('br', ''),
+                'rid': 'MUSIC_%s' % song_id,
+                'type': 'convert_url',
+                'response': 'url'
+            }
+
             song_url = self._download_webpage(
-                'http://antiserver.kuwo.cn/anti.s?format=%s&br=%s&rid=MUSIC_%s&type=convert_url&response=url' %
-                (file_format['ext'], file_format.get('br', ''), song_id),
+                'http://antiserver.kuwo.cn/anti.s',
                 song_id, note='Download %s url info' % file_format['format'],
+                query=query, headers=headers,
             )
 
-            if song_url == 'IPDeny':
+            if song_url == 'IPDeny' and not tolerate_ip_deny:
                 raise ExtractorError('This song is blocked in this region', expected=True)
 
             if song_url.startswith('http://') or song_url.startswith('https://'):
@@ -43,14 +56,14 @@ class KuwoBaseIE(InfoExtractor):
                     'preference': file_format['preference'],
                     'abr': file_format.get('abr'),
                 })
-        self._sort_formats(formats)
+
         return formats
 
 
 class KuwoIE(KuwoBaseIE):
     IE_NAME = 'kuwo:song'
     IE_DESC = '酷我音乐'
-    _VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/'
+    _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.kuwo.cn/yinyue/635632/',
         'info_dict': {
@@ -68,12 +81,16 @@ class KuwoIE(KuwoBaseIE):
             'id': '6446136',
             'ext': 'mp3',
             'title': '心',
+            'description': 'md5:5d0e947b242c35dc0eb1d2fce9fbf02c',
             'creator': 'IU',
             'upload_date': '20150518',
         },
         'params': {
             'format': 'mp3-320'
         },
+    }, {
+        'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -85,18 +102,19 @@ class KuwoIE(KuwoBaseIE):
             raise ExtractorError('this song has been offline because of copyright issues', expected=True)
 
         song_name = self._html_search_regex(
-            r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?<h1[^>]+title="([^"]+)"', webpage, 'song name')
-        singer_name = self._html_search_regex(
-            r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
-            webpage, 'singer name', fatal=False)
+            r'<p[^>]+id="lrcName">([^<]+)</p>', webpage, 'song name')
+        singer_name = remove_start(self._html_search_regex(
+            r'<a[^>]+href="http://www\.kuwo\.cn/artist/content\?name=([^"]+)">',
+            webpage, 'singer name', fatal=False), '歌手')
         lrc_content = clean_html(get_element_by_id('lrcContent', webpage))
         if lrc_content == '暂无':     # indicates no lyrics
             lrc_content = None
 
         formats = self._get_formats(song_id)
+        self._sort_formats(formats)
 
         album_id = self._html_search_regex(
-            r'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
+            r'<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
             webpage, 'album id', fatal=False)
 
         publish_time = None
@@ -125,13 +143,13 @@ class KuwoIE(KuwoBaseIE):
 class KuwoAlbumIE(InfoExtractor):
     IE_NAME = 'kuwo:album'
     IE_DESC = '酷我音乐 - 专辑'
-    _VALID_URL = r'http://www\.kuwo\.cn/album/(?P<id>\d+?)/'
+    _VALID_URL = r'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/'
     _TEST = {
         'url': 'http://www.kuwo.cn/album/502294/',
         'info_dict': {
             'id': '502294',
-            'title': 'M',
-            'description': 'md5:6a7235a84cc6400ec3b38a7bdaf1d60c',
+            'title': 'Made\xa0Series\xa0《M》',
+            'description': 'md5:d463f0d8a0ff3c3ea3d6ed7452a9483f',
         },
         'playlist_count': 2,
     }
@@ -161,13 +179,11 @@ class KuwoAlbumIE(InfoExtractor):
 class KuwoChartIE(InfoExtractor):
     IE_NAME = 'kuwo:chart'
     IE_DESC = '酷我音乐 - 排行榜'
-    _VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
+    _VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
     _TEST = {
         'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',
         'info_dict': {
             'id': '香港中文龙虎榜',
-            'title': '香港中文龙虎榜',
-            'description': 're:\d{4}第\d{2}期',
         },
         'playlist_mincount': 10,
     }
@@ -178,30 +194,24 @@ class KuwoChartIE(InfoExtractor):
             url, chart_id, note='Download chart info',
             errnote='Unable to get chart info')
 
-        chart_name = self._html_search_regex(
-            r'<h1[^>]+class="unDis">([^<]+)</h1>', webpage, 'chart name')
-
-        chart_desc = self._html_search_regex(
-            r'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage, 'chart desc')
-
         entries = [
             self.url_result(song_url, 'Kuwo') for song_url in re.findall(
-                r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage)
+                r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)', webpage)
         ]
-        return self.playlist_result(entries, chart_id, chart_name, chart_desc)
+        return self.playlist_result(entries, chart_id)
 
 
 class KuwoSingerIE(InfoExtractor):
     IE_NAME = 'kuwo:singer'
     IE_DESC = '酷我音乐 - 歌手'
-    _VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
         'info_dict': {
             'id': 'bruno+mars',
-            'title': 'Bruno Mars',
+            'title': 'Bruno\xa0Mars',
         },
-        'playlist_count': 10,
+        'playlist_mincount': 329,
     }, {
         'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',
         'info_dict': {
@@ -212,6 +222,8 @@ class KuwoSingerIE(InfoExtractor):
         'skip': 'Regularly stalls travis build',  # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540
     }]
 
+    PAGE_SIZE = 15
+
     def _real_extract(self, url):
         singer_id = self._match_id(url)
         webpage = self._download_webpage(
@@ -219,25 +231,28 @@ class KuwoSingerIE(InfoExtractor):
             errnote='Unable to get singer info')
 
         singer_name = self._html_search_regex(
-            r'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage, 'singer name'
-        )
+            r'<h1>([^<]+)</h1>', webpage, 'singer name')
 
-        entries = []
-        first_page_only = False if re.search(r'/music(?:_\d+)?\.htm', url) else True
-        for page_num in itertools.count(1):
+        artist_id = self._html_search_regex(
+            r'data-artistid="(\d+)"', webpage, 'artist id')
+
+        page_count = int(self._html_search_regex(
+            r'data-page="(\d+)"', webpage, 'page count'))
+
+        def page_func(page_num):
             webpage = self._download_webpage(
-                'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num),
-                singer_id, note='Download song list page #%d' % page_num,
-                errnote='Unable to get song list page #%d' % page_num)
+                'http://www.kuwo.cn/artist/contentMusicsAjax',
+                singer_id, note='Download song list page #%d' % (page_num + 1),
+                errnote='Unable to get song list page #%d' % (page_num + 1),
+                query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE})
 
-            entries.extend([
+            return [
                 self.url_result(song_url, 'Kuwo') for song_url in re.findall(
-                    r'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/',
+                    r'<div[^>]+class="name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)',
                     webpage)
-            ][:10 if first_page_only else None])
+            ]
 
-            if first_page_only or not re.search(r'<a[^>]+href="[^"]+">下一页</a>', webpage):
-                break
+        entries = InAdvancePagedList(page_func, page_count, self.PAGE_SIZE)
 
         return self.playlist_result(entries, singer_id, singer_name)
 
@@ -245,7 +260,7 @@ class KuwoSingerIE(InfoExtractor):
 class KuwoCategoryIE(InfoExtractor):
     IE_NAME = 'kuwo:category'
     IE_DESC = '酷我音乐 - 分类'
-    _VALID_URL = r'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
+    _VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
     _TEST = {
         'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',
         'info_dict': {
@@ -253,7 +268,7 @@ class KuwoCategoryIE(InfoExtractor):
             'title': '八十年代精选',
             'description': '这些都是属于八十年代的回忆!',
         },
-        'playlist_count': 30,
+        'playlist_mincount': 24,
     }
 
     def _real_extract(self, url):
@@ -268,6 +283,8 @@ class KuwoCategoryIE(InfoExtractor):
         category_desc = remove_start(
             get_element_by_id('intro', webpage).strip(),
             '%s简介:' % category_name)
+        if category_desc == '暂无':
+            category_desc = None
 
         jsonm = self._parse_json(self._html_search_regex(
             r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
@@ -282,14 +299,20 @@ class KuwoCategoryIE(InfoExtractor):
 class KuwoMvIE(KuwoBaseIE):
     IE_NAME = 'kuwo:mv'
     IE_DESC = '酷我音乐 - MV'
-    _VALID_URL = r'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
+    _VALID_URL = r'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
     _TEST = {
         'url': 'http://www.kuwo.cn/mv/6480076/',
         'info_dict': {
             'id': '6480076',
-            'ext': 'mkv',
-            'title': '我们家MV',
-            'creator': '2PM',
+            'ext': 'mp4',
+            'title': 'My HouseMV',
+            'creator': 'PM02:00',
+        },
+        # In this video, music URLs (anti.s) are blocked outside China and
+        # USA, while the MV URL (mvurl) is available globally, so force the MV
+        # URL for consistent results in different countries
+        'params': {
+            'format': 'mv',
         },
     }
     _FORMATS = KuwoBaseIE._FORMATS + [
@@ -312,7 +335,17 @@ class KuwoMvIE(KuwoBaseIE):
         else:
             raise ExtractorError('Unable to find song or singer names')
 
-        formats = self._get_formats(song_id)
+        formats = self._get_formats(song_id, tolerate_ip_deny=True)
+
+        mv_url = self._download_webpage(
+            'http://www.kuwo.cn/yy/st/mvurl?rid=MUSIC_%s' % song_id,
+            song_id, note='Download %s MV URL' % song_id)
+        formats.append({
+            'url': mv_url,
+            'format_id': 'mv',
+        })
+
+        self._sort_formats(formats)
 
         return {
             'id': song_id,
index 5d8ebbeb3d6c01fc5493751b7a89623877bba7c3..2fab38079aac0c5f20a1772d52fa52642cb520bf 100644 (file)
@@ -5,7 +5,7 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
@@ -19,7 +19,7 @@ from ..utils import (
 
 
 class Laola1TvIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/[^/]+/(?P<slug>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/(?P<kind>[^/]+)/(?P<slug>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
         'info_dict': {
@@ -33,7 +33,7 @@ class Laola1TvIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
     }, {
         'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',
         'info_dict': {
@@ -47,17 +47,37 @@ class Laola1TvIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+    }, {
+        'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde',
+        'info_dict': {
+            'id': '487850',
+            'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde',
+            'ext': 'flv',
+            'title': 'Belogorie BELGOROD - TRENTINO Diatec',
+            'upload_date': '20160322',
+            'uploader': 'CEV - Europäischer Volleyball Verband',
+            'is_live': True,
+            'categories': ['Volleyball'],
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'skip': 'This live stream has already finished.',
     }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         display_id = mobj.group('slug')
+        kind = mobj.group('kind')
         lang = mobj.group('lang')
         portal = mobj.group('portal')
 
         webpage = self._download_webpage(url, display_id)
 
+        if 'Dieser Livestream ist bereits beendet.' in webpage:
+            raise ExtractorError('This live stream has already finished.', expected=True)
+
         iframe_url = self._search_regex(
             r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
             webpage, 'iframe url')
@@ -74,7 +94,7 @@ class Laola1TvIE(InfoExtractor):
 
         hd_doc = self._download_xml(
             'http://www.laola1.tv/server/hd_video.php?%s'
-            % compat_urllib_parse.urlencode({
+            % compat_urllib_parse_urlencode({
                 'play': video_id,
                 'partner': partner_id,
                 'portal': portal,
@@ -85,12 +105,17 @@ class Laola1TvIE(InfoExtractor):
         _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
         title = _v('title', fatal=True)
 
+        VS_TARGETS = {
+            'video': '2',
+            'livestream': '17',
+        }
+
         req = sanitized_Request(
             'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' %
-            compat_urllib_parse.urlencode({
+            compat_urllib_parse_urlencode({
                 'videoId': video_id,
-                'target': '2',
-                'label': 'laola1tv',
+                'target': VS_TARGETS.get(kind, '2'),
+                'label': _v('label'),
                 'area': _v('area'),
             }),
             urlencode_postdata(
@@ -109,6 +134,7 @@ class Laola1TvIE(InfoExtractor):
         formats = self._extract_f4m_formats(
             '%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
             video_id, f4m_id='hds')
+        self._sort_formats(formats)
 
         categories_str = _v('meta_sports')
         categories = categories_str.split(',') if categories_str else []
diff --git a/youtube_dl/extractor/learnr.py b/youtube_dl/extractor/learnr.py
new file mode 100644 (file)
index 0000000..1435e09
--- /dev/null
@@ -0,0 +1,33 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class LearnrIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
+        'md5': '3719fdf0a68397f49899e82c308a89de',
+        'info_dict': {
+            'id': '51624',
+            'ext': 'mp4',
+            'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
+            'description': 'md5:b36dbfa92350176cdf12b4d388485503',
+            'uploader': 'LearnCode.academy',
+            'uploader_id': 'learncodeacademy',
+            'upload_date': '20131021',
+        },
+        'add_ie': ['Youtube'],
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        return {
+            '_type': 'url_transparent',
+            'url': self._search_regex(
+                r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
+            'id': video_id,
+        }
index 40a3d23468636877cc485ac9e064ee3527a3dcb2..81b5d41be4a676e55c795fe233591913e7a691c8 100644 (file)
@@ -6,6 +6,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
+    determine_protocol,
     parse_duration,
     int_or_none,
 )
@@ -18,10 +19,14 @@ class Lecture2GoIE(InfoExtractor):
         'md5': 'ac02b570883020d208d405d5a3fd2f7f',
         'info_dict': {
             'id': '17473',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': '2 - Endliche Automaten und reguläre Sprachen',
             'creator': 'Frank Heitmann',
             'duration': 5220,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         }
     }
 
@@ -32,14 +37,18 @@ class Lecture2GoIE(InfoExtractor):
         title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
 
         formats = []
-        for url in set(re.findall(r'"src","([^"]+)"', webpage)):
+        for url in set(re.findall(r'var\s+playerUri\d+\s*=\s*"([^"]+)"', webpage)):
             ext = determine_ext(url)
+            protocol = determine_protocol({'url': url})
             if ext == 'f4m':
-                formats.extend(self._extract_f4m_formats(url, video_id))
+                formats.extend(self._extract_f4m_formats(url, video_id, f4m_id='hds'))
             elif ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(url, video_id))
+                formats.extend(self._extract_m3u8_formats(url, video_id, ext='mp4', m3u8_id='hls'))
             else:
+                if protocol == 'rtmp':
+                    continue  # XXX: currently broken
                 formats.append({
+                    'format_id': protocol,
                     'url': url,
                 })
 
similarity index 81%
rename from youtube_dl/extractor/letv.py
rename to youtube_dl/extractor/leeco.py
index 9665ece89c9bc4cd64707ca3f7516749e4a33e91..63f581cd9fe44013ce8ec85ce540d04d0df73713 100644 (file)
@@ -1,36 +1,39 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import base64
 import datetime
+import hashlib
 import re
 import time
-import base64
-import hashlib
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
     compat_ord,
     compat_str,
+    compat_urllib_parse_urlencode,
 )
 from ..utils import (
     determine_ext,
+    encode_data_uri,
     ExtractorError,
+    int_or_none,
+    orderedSet,
     parse_iso8601,
     sanitized_Request,
-    int_or_none,
     str_or_none,
-    encode_data_uri,
     url_basename,
 )
 
 
-class LetvIE(InfoExtractor):
+class LeIE(InfoExtractor):
     IE_DESC = '乐视网'
-    _VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
+    _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P<id>\d+)\.html'
+
+    _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
 
     _TESTS = [{
-        'url': 'http://www.letv.com/ptv/vplay/22005890.html',
+        'url': 'http://www.le.com/ptv/vplay/22005890.html',
         'md5': 'edadcfe5406976f42f9f266057ee5e40',
         'info_dict': {
             'id': '22005890',
@@ -42,7 +45,7 @@ class LetvIE(InfoExtractor):
             'hls_prefer_native': True,
         },
     }, {
-        'url': 'http://www.letv.com/ptv/vplay/1415246.html',
+        'url': 'http://www.le.com/ptv/vplay/1415246.html',
         'info_dict': {
             'id': '1415246',
             'ext': 'mp4',
@@ -54,7 +57,7 @@ class LetvIE(InfoExtractor):
         },
     }, {
         'note': 'This video is available only in Mainland China, thus a proxy is needed',
-        'url': 'http://www.letv.com/ptv/vplay/1118082.html',
+        'url': 'http://www.le.com/ptv/vplay/1118082.html',
         'md5': '2424c74948a62e5f31988438979c5ad1',
         'info_dict': {
             'id': '1118082',
@@ -66,6 +69,9 @@ class LetvIE(InfoExtractor):
             'hls_prefer_native': True,
         },
         'skip': 'Only available in China',
+    }, {
+        'url': 'http://sports.le.com/video/25737697.html',
+        'only_matching': True,
     }]
 
     @staticmethod
@@ -94,17 +100,16 @@ class LetvIE(InfoExtractor):
             return encrypted_data
         encrypted_data = encrypted_data[5:]
 
-        _loc4_ = bytearray()
-        while encrypted_data:
-            b = compat_ord(encrypted_data[0])
-            _loc4_.extend([b // 16, b & 0x0f])
-            encrypted_data = encrypted_data[1:]
+        _loc4_ = bytearray(2 * len(encrypted_data))
+        for idx, val in enumerate(encrypted_data):
+            b = compat_ord(val)
+            _loc4_[2 * idx] = b // 16
+            _loc4_[2 * idx + 1] = b % 16
         idx = len(_loc4_) - 11
         _loc4_ = _loc4_[idx:] + _loc4_[:idx]
-        _loc7_ = bytearray()
-        while _loc4_:
-            _loc7_.append(_loc4_[0] * 16 + _loc4_[1])
-            _loc4_ = _loc4_[2:]
+        _loc7_ = bytearray(len(encrypted_data))
+        for i in range(len(encrypted_data)):
+            _loc7_[i] = _loc4_[2 * i] * 16 + _loc4_[2 * i + 1]
 
         return bytes(_loc7_)
 
@@ -117,10 +122,10 @@ class LetvIE(InfoExtractor):
             'splatid': 101,
             'format': 1,
             'tkey': self.calc_time_key(int(time.time())),
-            'domain': 'www.letv.com'
+            'domain': 'www.le.com'
         }
         play_json_req = sanitized_Request(
-            'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
+            'http://api.le.com/mms/out/video/playJson?' + compat_urllib_parse_urlencode(params)
         )
         cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
         if cn_verification_proxy:
@@ -149,7 +154,7 @@ class LetvIE(InfoExtractor):
         for format_id in formats:
             if format_id in dispatch:
                 media_url = playurl['domain'][0] + dispatch[format_id][0]
-                media_url += '&' + compat_urllib_parse.urlencode({
+                media_url += '&' + compat_urllib_parse_urlencode({
                     'm3v': 1,
                     'format': 1,
                     'expect': 3,
@@ -193,26 +198,51 @@ class LetvIE(InfoExtractor):
         }
 
 
-class LetvTvIE(InfoExtractor):
-    _VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
+class LePlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://[a-z]+\.le\.com/(?!video)[a-z]+/(?P<id>[a-z0-9_]+)'
+
     _TESTS = [{
-        'url': 'http://www.letv.com/tv/46177.html',
+        'url': 'http://www.le.com/tv/46177.html',
         'info_dict': {
             'id': '46177',
             'title': '美人天下',
             'description': 'md5:395666ff41b44080396e59570dbac01c'
         },
         'playlist_count': 35
+    }, {
+        'url': 'http://tv.le.com/izt/wuzetian/index.html',
+        'info_dict': {
+            'id': 'wuzetian',
+            'title': '武媚娘传奇',
+            'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
+        },
+        # This playlist contains some extra videos other than the drama itself
+        'playlist_mincount': 96
+    }, {
+        'url': 'http://tv.le.com/pzt/lswjzzjc/index.shtml',
+        # This series is moved to http://www.le.com/tv/10005297.html
+        'only_matching': True,
+    }, {
+        'url': 'http://www.le.com/comic/92063.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://list.le.com/listn/c1009_sc532002_d2_p1_o1.html',
+        'only_matching': True,
     }]
 
+    @classmethod
+    def suitable(cls, url):
+        return False if LeIE.suitable(url) else super(LePlaylistIE, cls).suitable(url)
+
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
         page = self._download_webpage(url, playlist_id)
 
-        media_urls = list(set(re.findall(
-            r'http://www.letv.com/ptv/vplay/\d+.html', page)))
-        entries = [self.url_result(media_url, ie='Letv')
-                   for media_url in media_urls]
+        # Currently old domain names are still used in playlists
+        media_ids = orderedSet(re.findall(
+            r'<a[^>]+href="http://www\.letv\.com/ptv/vplay/(\d+)\.html', page))
+        entries = [self.url_result(LeIE._URL_TEMPLATE % media_id, ie='Le')
+                   for media_id in media_ids]
 
         title = self._html_search_meta('keywords', page,
                                        fatal=False).split(',')[0]
@@ -222,31 +252,9 @@ class LetvTvIE(InfoExtractor):
                                     playlist_description=description)
 
 
-class LetvPlaylistIE(LetvTvIE):
-    _VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
-    _TESTS = [{
-        'url': 'http://tv.letv.com/izt/wuzetian/index.html',
-        'info_dict': {
-            'id': 'wuzetian',
-            'title': '武媚娘传奇',
-            'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
-        },
-        # This playlist contains some extra videos other than the drama itself
-        'playlist_mincount': 96
-    }, {
-        'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
-        'info_dict': {
-            'id': 'lswjzzjc',
-            # The title should be "劲舞青春", but I can't find a simple way to
-            # determine the playlist title
-            'title': '乐视午间自制剧场',
-            'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
-        },
-        'playlist_mincount': 7
-    }]
-
-
 class LetvCloudIE(InfoExtractor):
+    # Most of *.letv.com is changed to *.le.com on 2016/01/02
+    # but yuntv.letv.com is kept, so also keep the extractor name
     IE_DESC = '乐视云'
     _VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
 
@@ -300,7 +308,7 @@ class LetvCloudIE(InfoExtractor):
             }
             self.sign_data(data)
             return self._download_json(
-                'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse.urlencode(data),
+                'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse_urlencode(data),
                 media_id, 'Downloading playJson data for type %s' % cf)
 
         play_json = get_play_json(cf, time.time())
@@ -327,7 +335,7 @@ class LetvCloudIE(InfoExtractor):
             formats.append({
                 'url': url,
                 'ext': determine_ext(decoded_url),
-                'format_id': int_or_none(play_url.get('vtype')),
+                'format_id': str_or_none(play_url.get('vtype')),
                 'format_note': str_or_none(play_url.get('definition')),
                 'width': int_or_none(play_url.get('vwidth')),
                 'height': int_or_none(play_url.get('vheight')),
diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py
new file mode 100644 (file)
index 0000000..0a94366
--- /dev/null
@@ -0,0 +1,143 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+    determine_ext,
+    float_or_none,
+    int_or_none,
+    parse_filesize,
+)
+
+
+class LibraryOfCongressIE(InfoExtractor):
+    IE_NAME = 'loc'
+    IE_DESC = 'Library of Congress'
+    _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)'
+    _TESTS = [{
+        # embedded via <div class="media-player"
+        'url': 'http://loc.gov/item/90716351/',
+        'md5': '353917ff7f0255aa6d4b80a034833de8',
+        'info_dict': {
+            'id': '90716351',
+            'ext': 'mp4',
+            'title': "Pa's trip to Mars",
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 0,
+            'view_count': int,
+        },
+    }, {
+        # webcast embedded via mediaObjectId
+        'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578',
+        'info_dict': {
+            'id': '5578',
+            'ext': 'mp4',
+            'title': 'Help! Preservation Training Needs Here, There & Everywhere',
+            'duration': 3765,
+            'view_count': int,
+            'subtitles': 'mincount:1',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # with direct download links
+        'url': 'https://www.loc.gov/item/78710669/',
+        'info_dict': {
+            'id': '78710669',
+            'ext': 'mp4',
+            'title': 'La vie et la passion de Jesus-Christ',
+            'duration': 0,
+            'view_count': int,
+            'formats': 'mincount:4',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        media_id = self._search_regex(
+            (r'id=(["\'])media-player-(?P<id>.+?)\1',
+             r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
+             r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
+             r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'),
+            webpage, 'media id', group='id')
+
+        data = self._download_json(
+            'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
+            video_id)['mediaObject']
+
+        derivative = data['derivatives'][0]
+        media_url = derivative['derivativeUrl']
+
+        title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(
+            webpage)
+
+        # Following algorithm was extracted from setAVSource js function
+        # found in webpage
+        media_url = media_url.replace('rtmp', 'https')
+
+        is_video = data.get('mediaType', 'v').lower() == 'v'
+        ext = determine_ext(media_url)
+        if ext not in ('mp4', 'mp3'):
+            media_url += '.mp4' if is_video else '.mp3'
+
+        if 'vod/mp4:' in media_url:
+            formats = [{
+                'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8',
+                'format_id': 'hls',
+                'ext': 'mp4',
+                'protocol': 'm3u8_native',
+                'quality': 1,
+            }]
+        elif 'vod/mp3:' in media_url:
+            formats = [{
+                'url': media_url.replace('vod/mp3:', ''),
+                'vcodec': 'none',
+            }]
+
+        download_urls = set()
+        for m in re.finditer(
+                r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?:&nbsp;|\s+)\((?P<size>.+?)\))?\s*<', webpage):
+            format_id = m.group('id').lower()
+            if format_id == 'gif':
+                continue
+            download_url = m.group('url')
+            if download_url in download_urls:
+                continue
+            download_urls.add(download_url)
+            formats.append({
+                'url': download_url,
+                'format_id': format_id,
+                'filesize_approx': parse_filesize(m.group('size')),
+            })
+
+        self._sort_formats(formats)
+
+        duration = float_or_none(data.get('duration'))
+        view_count = int_or_none(data.get('viewCount'))
+
+        subtitles = {}
+        cc_url = data.get('ccUrl')
+        if cc_url:
+            subtitles.setdefault('en', []).append({
+                'url': cc_url,
+                'ext': 'ttml',
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
+            'duration': duration,
+            'view_count': view_count,
+            'formats': formats,
+            'subtitles': subtitles,
+        }
index f8cbca7b36afab1890b71806d6761bbe67d7d924..c2b4490c49044bea3426dc48b873218aa98146b1 100644 (file)
@@ -7,124 +7,164 @@ from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
     determine_ext,
+    ExtractorError,
     int_or_none,
+    parse_iso8601,
     remove_end,
-    unified_strdate,
-    ExtractorError,
 )
 
 
 class LifeNewsIE(InfoExtractor):
-    IE_NAME = 'lifenews'
-    IE_DESC = 'LIFE | NEWS'
-    _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
+    IE_NAME = 'life'
+    IE_DESC = 'Life.ru'
+    _VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
 
     _TESTS = [{
-        'url': 'http://lifenews.ru/news/126342',
-        'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
+        # single video embedded via video/source
+        'url': 'https://life.ru/t/новости/98736',
+        'md5': '77c95eaefaca216e32a76a343ad89d23',
         'info_dict': {
-            'id': '126342',
+            'id': '98736',
             'ext': 'mp4',
-            'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
-            'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
-            'thumbnail': 're:http://.*\.jpg',
-            'upload_date': '20140130',
+            'title': 'Мужчина нашел дома архив оборонного завода',
+            'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
+            'timestamp': 1344154740,
+            'upload_date': '20120805',
+            'view_count': int,
         }
     }, {
-        # video in <iframe>
-        'url': 'http://lifenews.ru/news/152125',
+        # single video embedded via iframe
+        'url': 'https://life.ru/t/новости/152125',
         'md5': '77d19a6f0886cd76bdbf44b4d971a273',
         'info_dict': {
             'id': '152125',
             'ext': 'mp4',
             'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
             'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
+            'timestamp': 1427961840,
             'upload_date': '20150402',
+            'view_count': int,
         }
     }, {
-        'url': 'http://lifenews.ru/news/153461',
-        'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
+        # two videos embedded via iframe
+        'url': 'https://life.ru/t/новости/153461',
         'info_dict': {
             'id': '153461',
-            'ext': 'mp4',
             'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
             'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
-            'upload_date': '20150505',
-        }
+            'timestamp': 1430825520,
+            'view_count': int,
+        },
+        'playlist': [{
+            'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
+            'info_dict': {
+                'id': '153461-video1',
+                'ext': 'mp4',
+                'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
+                'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+                'timestamp': 1430825520,
+                'upload_date': '20150505',
+            },
+        }, {
+            'md5': 'ebb3bf3b1ce40e878d0d628e93eb0322',
+            'info_dict': {
+                'id': '153461-video2',
+                'ext': 'mp4',
+                'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
+                'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+                'timestamp': 1430825520,
+                'upload_date': '20150505',
+            },
+        }],
+    }, {
+        'url': 'https://life.ru/t/новости/213035',
+        'only_matching': True,
     }, {
-        'url': 'http://lifenews.ru/video/13035',
+        'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
+        'only_matching': True,
+    }, {
+        'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        section = mobj.group('section')
-
-        webpage = self._download_webpage(
-            'http://lifenews.ru/%s/%s' % (section, video_id),
-            video_id, 'Downloading page')
-
-        videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
-        iframe_link = self._html_search_regex(
-            '<iframe[^>]+src=["\']([^"\']+)["\']', webpage, 'iframe link', default=None)
-        if not videos and not iframe_link:
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_urls = re.findall(
+            r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
+
+        iframe_links = re.findall(
+            r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/embed/.+?)["\']',
+            webpage)
+
+        if not video_urls and not iframe_links:
             raise ExtractorError('No media links available for %s' % video_id)
 
         title = remove_end(
             self._og_search_title(webpage),
-            ' - Первый по срочным новостям — LIFE | NEWS')
+            ' - Life.ru')
 
         description = self._og_search_description(webpage)
 
         view_count = self._html_search_regex(
-            r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
-        comment_count = self._html_search_regex(
-            r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
-            webpage, 'comment count', fatal=False)
+            r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
+            webpage, 'view count', fatal=False, group='value')
 
-        upload_date = self._html_search_regex(
-            r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
-        if upload_date is not None:
-            upload_date = unified_strdate(upload_date)
+        timestamp = parse_iso8601(self._search_regex(
+            r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
+            webpage, 'upload date', fatal=False, group='value'))
 
         common_info = {
             'description': description,
             'view_count': int_or_none(view_count),
-            'comment_count': int_or_none(comment_count),
-            'upload_date': upload_date,
+            'timestamp': timestamp,
         }
 
-        def make_entry(video_id, media, video_number=None):
+        def make_entry(video_id, video_url, index=None):
             cur_info = dict(common_info)
             cur_info.update({
-                'id': video_id,
-                'url': media[1],
-                'thumbnail': media[0],
-                'title': title if video_number is None else '%s-video%s' % (title, video_number),
+                'id': video_id if not index else '%s-video%s' % (video_id, index),
+                'url': video_url,
+                'title': title if not index else '%s (Видео %s)' % (title, index),
             })
             return cur_info
 
-        if iframe_link:
-            iframe_link = self._proto_relative_url(iframe_link, 'http:')
-            cur_info = dict(common_info)
-            cur_info.update({
-                '_type': 'url_transparent',
-                'id': video_id,
-                'title': title,
-                'url': iframe_link,
-            })
+        def make_video_entry(video_id, video_url, index=None):
+            video_url = compat_urlparse.urljoin(url, video_url)
+            return make_entry(video_id, video_url, index)
+
+        def make_iframe_entry(video_id, video_url, index=None):
+            video_url = self._proto_relative_url(video_url, 'http:')
+            cur_info = make_entry(video_id, video_url, index)
+            cur_info['_type'] = 'url_transparent'
             return cur_info
 
-        if len(videos) == 1:
-            return make_entry(video_id, videos[0])
-        else:
-            return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]
+        if len(video_urls) == 1 and not iframe_links:
+            return make_video_entry(video_id, video_urls[0])
+
+        if len(iframe_links) == 1 and not video_urls:
+            return make_iframe_entry(video_id, iframe_links[0])
+
+        entries = []
+
+        if video_urls:
+            for num, video_url in enumerate(video_urls, 1):
+                entries.append(make_video_entry(video_id, video_url, num))
+
+        if iframe_links:
+            for num, iframe_link in enumerate(iframe_links, len(video_urls) + 1):
+                entries.append(make_iframe_entry(video_id, iframe_link, num))
+
+        playlist = common_info.copy()
+        playlist.update(self.playlist_result(entries, video_id, title, description))
+        return playlist
 
 
 class LifeEmbedIE(InfoExtractor):
     IE_NAME = 'life:embed'
-    _VALID_URL = r'http://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
+    _VALID_URL = r'https?://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
 
     _TEST = {
         'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
@@ -148,7 +188,8 @@ class LifeEmbedIE(InfoExtractor):
             ext = determine_ext(video_url)
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
-                    video_url, video_id, 'mp4', m3u8_id='m3u8'))
+                    video_url, video_id, 'mp4',
+                    entry_protocol='m3u8_native', m3u8_id='m3u8'))
             else:
                 formats.append({
                     'url': video_url,
index 1a0625ac3e0eeefa5ca18e968ae69e210a960662..5d2c3e256740d865e1c0be2c8e2808a9ee97ee43 100644 (file)
@@ -98,13 +98,19 @@ class LimelightBaseIE(InfoExtractor):
         } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
 
         subtitles = {}
-        for caption in properties.get('captions', {}):
+        for caption in properties.get('captions', []):
             lang = caption.get('language_code')
             subtitles_url = caption.get('url')
             if lang and subtitles_url:
-                subtitles[lang] = [{
+                subtitles.setdefault(lang, []).append({
                     'url': subtitles_url,
-                }]
+                })
+        closed_captions_url = properties.get('closed_captions_url')
+        if closed_captions_url:
+            subtitles.setdefault('en', []).append({
+                'url': closed_captions_url,
+                'ext': 'ttml',
+            })
 
         return {
             'id': video_id,
@@ -123,7 +129,18 @@ class LimelightBaseIE(InfoExtractor):
 
 class LimelightMediaIE(LimelightBaseIE):
     IE_NAME = 'limelight'
-    _VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
+    _VALID_URL = r'''(?x)
+                        (?:
+                            limelight:media:|
+                            https?://
+                                (?:
+                                    link\.videoplatform\.limelight\.com/media/|
+                                    assets\.delvenetworks\.com/player/loader\.swf
+                                )
+                                \?.*?\bmediaId=
+                        )
+                        (?P<id>[a-z0-9]{32})
+                    '''
     _TESTS = [{
         'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
         'info_dict': {
@@ -158,6 +175,9 @@ class LimelightMediaIE(LimelightBaseIE):
             # rtmp download
             'skip_download': True,
         },
+    }, {
+        'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
+        'only_matching': True,
     }]
     _PLAYLIST_SERVICE_PATH = 'media'
     _API_PATH = 'media'
@@ -176,15 +196,29 @@ class LimelightMediaIE(LimelightBaseIE):
 
 class LimelightChannelIE(LimelightBaseIE):
     IE_NAME = 'limelight:channel'
-    _VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
-    _TEST = {
+    _VALID_URL = r'''(?x)
+                        (?:
+                            limelight:channel:|
+                            https?://
+                                (?:
+                                    link\.videoplatform\.limelight\.com/media/|
+                                    assets\.delvenetworks\.com/player/loader\.swf
+                                )
+                                \?.*?\bchannelId=
+                        )
+                        (?P<id>[a-z0-9]{32})
+                    '''
+    _TESTS = [{
         'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
         'info_dict': {
             'id': 'ab6a524c379342f9b23642917020c082',
             'title': 'Javascript Sample Code',
         },
         'playlist_mincount': 3,
-    }
+    }, {
+        'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
+        'only_matching': True,
+    }]
     _PLAYLIST_SERVICE_PATH = 'channel'
     _API_PATH = 'channels'
 
@@ -207,15 +241,29 @@ class LimelightChannelIE(LimelightBaseIE):
 
 class LimelightChannelListIE(LimelightBaseIE):
     IE_NAME = 'limelight:channel_list'
-    _VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
-    _TEST = {
+    _VALID_URL = r'''(?x)
+                        (?:
+                            limelight:channel_list:|
+                            https?://
+                                (?:
+                                    link\.videoplatform\.limelight\.com/media/|
+                                    assets\.delvenetworks\.com/player/loader\.swf
+                                )
+                                \?.*?\bchannelListId=
+                        )
+                        (?P<id>[a-z0-9]{32})
+                    '''
+    _TESTS = [{
         'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
         'info_dict': {
             'id': '301b117890c4465c8179ede21fd92e2b',
             'title': 'Website - Hero Player',
         },
         'playlist_mincount': 2,
-    }
+    }, {
+        'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
+        'only_matching': True,
+    }]
     _PLAYLIST_SERVICE_PATH = 'channel_list'
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py
new file mode 100644 (file)
index 0000000..3356d01
--- /dev/null
@@ -0,0 +1,137 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    smuggle_url,
+    unsmuggle_url,
+)
+
+
+class LiTVIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.litv\.tv/vod/[^/]+/content\.do\?.*?\bid=(?P<id>[^&]+)'
+
+    _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
+
+    _TESTS = [{
+        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+        'info_dict': {
+            'id': 'VOD00041606',
+            'title': '花千骨',
+        },
+        'playlist_count': 50,
+    }, {
+        'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
+        'info_dict': {
+            'id': 'VOD00041610',
+            'ext': 'mp4',
+            'title': '花千骨第1集',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
+            'episode_number': 1,
+        },
+        'params': {
+            'noplaylist': True,
+            'skip_download': True,  # m3u8 download
+        },
+        'skip': 'Georestricted to Taiwan',
+    }]
+
+    def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True):
+        episode_title = view_data['title']
+        content_id = season_list['contentId']
+
+        if prompt:
+            self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (content_id, video_id))
+
+        all_episodes = [
+            self.url_result(smuggle_url(
+                self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']),
+                {'force_noplaylist': True}))  # To prevent infinite recursion
+            for episode in season_list['episode']]
+
+        return self.playlist_result(all_episodes, content_id, episode_title)
+
+    def _real_extract(self, url):
+        url, data = unsmuggle_url(url, {})
+
+        video_id = self._match_id(url)
+
+        noplaylist = self._downloader.params.get('noplaylist')
+        noplaylist_prompt = True
+        if 'force_noplaylist' in data:
+            noplaylist = data['force_noplaylist']
+            noplaylist_prompt = False
+
+        webpage = self._download_webpage(url, video_id)
+
+        view_data = dict(map(lambda t: (t[0], t[2]), re.findall(
+            r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2',
+            webpage)))
+
+        vod_data = self._parse_json(self._search_regex(
+            'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
+            video_id)
+
+        season_list = list(vod_data.get('seasonList', {}).values())
+        if season_list:
+            if not noplaylist:
+                return self._extract_playlist(
+                    season_list[0], video_id, vod_data, view_data,
+                    prompt=noplaylist_prompt)
+
+            if noplaylist_prompt:
+                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
+        # In browsers `getMainUrl` request is always issued. Usually this
+        # endpoint gives the same result as the data embedded in the webpage.
+        # If georestricted, there are no embedded data, so an extra request is
+        # necessary to get the error code
+        video_data = self._parse_json(self._search_regex(
+            r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
+            webpage, 'video data', default='{}'), video_id)
+        if not video_data:
+            payload = {
+                'assetId': view_data['assetId'],
+                'watchDevices': vod_data['watchDevices'],
+                'contentType': view_data['contentType'],
+            }
+            video_data = self._download_json(
+                'https://www.litv.tv/vod/getMainUrl', video_id,
+                data=json.dumps(payload).encode('utf-8'),
+                headers={'Content-Type': 'application/json'})
+
+        if not video_data.get('fullpath'):
+            error_msg = video_data.get('errorMessage')
+            if error_msg == 'vod.error.outsideregionerror':
+                self.raise_geo_restricted('This video is available in Taiwan only')
+            if error_msg:
+                raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True)
+            raise ExtractorError('Unexpected result from %s' % self.IE_NAME)
+
+        formats = self._extract_m3u8_formats(
+            video_data['fullpath'], video_id, ext='mp4', m3u8_id='hls')
+        for a_format in formats:
+            # LiTV HLS segments doesn't like compressions
+            a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
+
+        title = view_data['title'] + view_data.get('secondaryMark', '')
+        description = view_data.get('description')
+        thumbnail = view_data.get('imageFile')
+        categories = [item['name'] for item in vod_data.get('category', [])]
+        episode = int_or_none(view_data.get('episode'))
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'episode_number': episode,
+        }
index 4684994e1726fc1785de3b8df6061f2eaf278ed8..ea0565ac05099aab8c05609aee4140a1b4c2c1c7 100644 (file)
@@ -17,7 +17,8 @@ class LiveLeakIE(InfoExtractor):
             'ext': 'flv',
             'description': 'extremely bad day for this guy..!',
             'uploader': 'ljfriel2',
-            'title': 'Most unlucky car accident'
+            'title': 'Most unlucky car accident',
+            'thumbnail': 're:^https?://.*\.jpg$'
         }
     }, {
         'url': 'http://www.liveleak.com/view?i=f93_1390833151',
@@ -28,6 +29,7 @@ class LiveLeakIE(InfoExtractor):
             'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
             'uploader': 'ARD_Stinkt',
             'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
+            'thumbnail': 're:^https?://.*\.jpg$'
         }
     }, {
         'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
@@ -49,10 +51,19 @@ class LiveLeakIE(InfoExtractor):
             'ext': 'mp4',
             'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
             'uploader': 'bony333',
-            'title': 'Crazy Hungarian tourist films close call waterspout in Croatia'
+            'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
+            'thumbnail': 're:^https?://.*\.jpg$'
         }
     }]
 
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)',
+            webpage)
+        if mobj:
+            return 'http://www.liveleak.com/view?i=%s' % mobj.group('id')
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
@@ -64,6 +75,7 @@ class LiveLeakIE(InfoExtractor):
         age_limit = int_or_none(self._search_regex(
             r'you confirm that you are ([0-9]+) years and over.',
             webpage, 'age limit', default=None))
+        video_thumbnail = self._og_search_thumbnail(webpage)
 
         sources_raw = self._search_regex(
             r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
@@ -116,4 +128,5 @@ class LiveLeakIE(InfoExtractor):
             'uploader': video_uploader,
             'formats': formats,
             'age_limit': age_limit,
+            'thumbnail': video_thumbnail,
         }
index 38fb3d9e4166f5f4a188ab2436c27240a8b04283..bc7894bf13ed29963aa1dad7880cf8549be1ca77 100644 (file)
@@ -14,6 +14,7 @@ from ..utils import (
     xpath_with_ns,
     xpath_text,
     orderedSet,
+    update_url_query,
     int_or_none,
     float_or_none,
     parse_iso8601,
@@ -64,7 +65,7 @@ class LivestreamIE(InfoExtractor):
     def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
         base_ele = find_xpath_attr(
             smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase')
-        base = base_ele.get('content') if base_ele else 'http://livestreamvod-f.akamaihd.net/'
+        base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/'
 
         formats = []
         video_nodes = smil.findall(self._xpath_ns('.//video', namespace))
@@ -72,7 +73,10 @@ class LivestreamIE(InfoExtractor):
         for vn in video_nodes:
             tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000)
             furl = (
-                '%s%s?v=3.0.3&fp=WIN%%2014,0,0,145' % (base, vn.attrib['src']))
+                update_url_query(compat_urlparse.urljoin(base, vn.attrib['src']), {
+                    'v': '3.0.3',
+                    'fp': 'WIN% 14,0,0,145',
+                }))
             if 'clipBegin' in vn.attrib:
                 furl += '&ssek=' + vn.attrib['clipBegin']
             formats.append({
@@ -146,7 +150,7 @@ class LivestreamIE(InfoExtractor):
         }
 
     def _extract_stream_info(self, stream_info):
-        broadcast_id = stream_info['broadcast_id']
+        broadcast_id = compat_str(stream_info['broadcast_id'])
         is_live = stream_info.get('is_live')
 
         formats = []
@@ -199,9 +203,10 @@ class LivestreamIE(InfoExtractor):
             if not videos_info:
                 break
             for v in videos_info:
+                v_id = compat_str(v['id'])
                 entries.append(self.url_result(
-                    'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v['id']),
-                    'Livestream', v['id'], v['caption']))
+                    'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v_id),
+                    'Livestream', v_id, v.get('caption')))
             last_video = videos_info[-1]['id']
         return self.playlist_result(entries, event_id, event_data['full_name'])
 
diff --git a/youtube_dl/extractor/localnews8.py b/youtube_dl/extractor/localnews8.py
new file mode 100644 (file)
index 0000000..aad3961
--- /dev/null
@@ -0,0 +1,47 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class LocalNews8IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304',
+        'md5': 'be4d48aea61aa2bde7be2ee47691ad20',
+        'info_dict': {
+            'id': '35183304',
+            'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings',
+            'ext': 'mp4',
+            'title': 'Rexburg business turns carbon fiber scraps into wedding ring',
+            'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.',
+            'duration': 153,
+            'timestamp': 1441844822,
+            'upload_date': '20150910',
+            'uploader_id': 'api',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        partner_id = self._search_regex(
+            r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1',
+            webpage, 'partner id', group='id')
+        kaltura_id = self._search_regex(
+            r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1',
+            webpage, 'videl id', group='id')
+
+        return {
+            '_type': 'url_transparent',
+            'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
+            'ie_key': 'Kaltura',
+            'id': video_id,
+            'display_id': display_id,
+        }
index 863efd896b12afa1b674dfb1bcde6d6ffe0214fb..1072405b30c7663d19ddc4df86f858d94952fda5 100644 (file)
@@ -37,6 +37,7 @@ class LRTIE(InfoExtractor):
             r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
             webpage, 'm3u8 url', group='url')
         formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+        self._sort_formats(formats)
 
         thumbnail = self._og_search_thumbnail(webpage)
         description = self._og_search_description(webpage)
index d4e1ae99d4d91c887cfc2d55c29d8211fc48a80f..2d50400326215e91ea0efc6130ec172437d7439c 100644 (file)
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_HTTPError,
     compat_str,
-    compat_urllib_parse,
+    compat_urlparse,
 )
 from ..utils import (
     ExtractorError,
-    clean_html,
     int_or_none,
-    sanitized_Request,
+    urlencode_postdata,
 )
 
 
 class LyndaBaseIE(InfoExtractor):
-    _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
+    _SIGNIN_URL = 'https://www.lynda.com/signin'
+    _PASSWORD_URL = 'https://www.lynda.com/signin/password'
+    _USER_URL = 'https://www.lynda.com/signin/user'
     _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
     _NETRC_MACHINE = 'lynda'
 
     def _real_initialize(self):
         self._login()
 
+    @staticmethod
+    def _check_error(json_string, key_or_keys):
+        keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys
+        for key in keys:
+            error = json_string.get(key)
+            if error:
+                raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+    def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url):
+        action_url = self._search_regex(
+            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html,
+            'post url', default=fallback_action_url, group='url')
+
+        if not action_url.startswith('http'):
+            action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url)
+
+        form_data = self._hidden_inputs(form_html)
+        form_data.update(extra_form_data)
+
+        try:
+            response = self._download_json(
+                action_url, None, note,
+                data=urlencode_postdata(form_data),
+                headers={
+                    'Referer': referrer_url,
+                    'X-Requested-With': 'XMLHttpRequest',
+                })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
+                response = self._parse_json(e.cause.read().decode('utf-8'), None)
+                self._check_error(response, ('email', 'password'))
+            raise
+
+        self._check_error(response, 'ErrorMessage')
+
+        return response, action_url
+
     def _login(self):
         username, password = self._get_login_info()
         if username is None:
             return
 
-        login_form = {
-            'username': username.encode('utf-8'),
-            'password': password.encode('utf-8'),
-            'remember': 'false',
-            'stayPut': 'false'
-        }
-        request = sanitized_Request(
-            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
-        login_page = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
-
-        # Not (yet) logged in
-        m = re.search(r'loginResultJson\s*=\s*\'(?P<json>[^\']+)\';', login_page)
-        if m is not None:
-            response = m.group('json')
-            response_json = json.loads(response)
-            state = response_json['state']
-
-            if state == 'notlogged':
-                raise ExtractorError(
-                    'Unable to login, incorrect username and/or password',
-                    expected=True)
-
-            # This is when we get popup:
-            # > You're already logged in to lynda.com on two devices.
-            # > If you log in here, we'll log you out of another device.
-            # So, we need to confirm this.
-            if state == 'conflicted':
-                confirm_form = {
-                    'username': '',
-                    'password': '',
-                    'resolve': 'true',
-                    'remember': 'false',
-                    'stayPut': 'false',
-                }
-                request = sanitized_Request(
-                    self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
-                login_page = self._download_webpage(
-                    request, None,
-                    'Confirming log in and log out from another device')
-
-        if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
-            if 'login error' in login_page:
-                mobj = re.search(
-                    r'(?s)<h1[^>]+class="topmost">(?P<title>[^<]+)</h1>\s*<div>(?P<description>.+?)</div>',
-                    login_page)
-                if mobj:
-                    raise ExtractorError(
-                        'lynda returned error: %s - %s'
-                        % (mobj.group('title'), clean_html(mobj.group('description'))),
-                        expected=True)
-            raise ExtractorError('Unable to log in')
-
-    def _logout(self):
-        username, _ = self._get_login_info()
-        if username is None:
+        # Step 1: download signin page
+        signin_page = self._download_webpage(
+            self._SIGNIN_URL, None, 'Downloading signin page')
+
+        # Already logged in
+        if any(re.search(p, signin_page) for p in (
+                'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
             return
 
-        self._download_webpage(
-            'http://www.lynda.com/ajax/logout.aspx', None,
-            'Logging out', 'Unable to log out', fatal=False)
+        # Step 2: submit email
+        signin_form = self._search_regex(
+            r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)',
+            signin_page, 'signin form')
+        signin_page, signin_url = self._login_step(
+            signin_form, self._PASSWORD_URL, {'email': username},
+            'Submitting email', self._SIGNIN_URL)
+
+        # Step 3: submit password
+        password_form = signin_page['body']
+        self._login_step(
+            password_form, self._USER_URL, {'email': username, 'password': password},
+            'Submitting password', signin_url)
 
 
 class LyndaIE(LyndaBaseIE):
     IE_NAME = 'lynda'
     IE_DESC = 'lynda.com videos'
     _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'
-    _NETRC_MACHINE = 'lynda'
 
     _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
 
@@ -214,14 +212,12 @@ class LyndaCourseIE(LyndaBaseIE):
             'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
             course_id, 'Downloading course JSON')
 
-        self._logout()
-
         if course.get('Status') == 'NotFound':
             raise ExtractorError(
                 'Course %s does not exist' % course_id, expected=True)
 
         unaccessible_videos = 0
-        videos = []
+        entries = []
 
         # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
         # by single video API anymore
@@ -231,20 +227,23 @@ class LyndaCourseIE(LyndaBaseIE):
                 if video.get('HasAccess') is False:
                     unaccessible_videos += 1
                     continue
-                if video.get('ID'):
-                    videos.append(video['ID'])
+                video_id = video.get('ID')
+                if video_id:
+                    entries.append({
+                        '_type': 'url_transparent',
+                        'url': 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
+                        'ie_key': LyndaIE.ie_key(),
+                        'chapter': chapter.get('Title'),
+                        'chapter_number': int_or_none(chapter.get('ChapterIndex')),
+                        'chapter_id': compat_str(chapter.get('ID')),
+                    })
 
         if unaccessible_videos > 0:
             self._downloader.report_warning(
                 '%s videos are only available for members (or paid members) and will not be downloaded. '
                 % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
 
-        entries = [
-            self.url_result(
-                'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
-                'Lynda')
-            for video_id in videos]
-
         course_title = course.get('Title')
+        course_description = course.get('Description')
 
-        return self.playlist_result(entries, course_id, course_title)
+        return self.playlist_result(entries, course_id, course_title, course_description)
index 7e025831b51d611f00e248bda637b4ae8f35efb6..d5945ad66b3a784263fb1c5106534081b1f04913 100644 (file)
@@ -8,7 +8,7 @@ from .common import InfoExtractor
 
 class M6IE(InfoExtractor):
     IE_NAME = 'm6'
-    _VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
+    _VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
 
     _TEST = {
         'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
index 71085f279fb4b9d2a40d866785e75d6d6c419674..9a7098c43c600a3cc3ed697252bc784d9a9cf5b7 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 class MailRuIE(InfoExtractor):
     IE_NAME = 'mailru'
     IE_DESC = 'Видео@Mail.Ru'
-    _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
 
     _TESTS = [
         {
@@ -61,6 +61,10 @@ class MailRuIE(InfoExtractor):
                 'duration': 6001,
             },
             'skip': 'Not accessible from Travis CI server',
+        },
+        {
+            'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html',
+            'only_matching': True,
         }
     ]
 
diff --git a/youtube_dl/extractor/makerschannel.py b/youtube_dl/extractor/makerschannel.py
new file mode 100644 (file)
index 0000000..f5d00e6
--- /dev/null
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class MakersChannelIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?makerschannel\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://makerschannel.com/en/zoomin/community-highlights?video_id=849',
+        'md5': '624a512c6969236b5967bf9286345ad1',
+        'info_dict': {
+            'id': '849',
+            'ext': 'mp4',
+            'title': 'Landing a bus on a plane is an epic win',
+            'uploader': 'ZoomIn',
+            'description': 'md5:cd9cca2ea7b69b78be81d07020c97139',
+        }
+    }
+
+    def _real_extract(self, url):
+        id_type, url_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, url_id)
+        video_data = self._html_search_regex(r'<div([^>]+data-%s-id="%s"[^>]+)>' % (id_type, url_id), webpage, 'video data')
+
+        def extract_data_val(attr, fatal=False):
+            return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal)
+        minoto_id = self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id')
+
+        return {
+            '_type': 'url_transparent',
+            'url': 'minoto:%s' % minoto_id,
+            'id': extract_data_val('video-id', True),
+            'title': extract_data_val('title', True),
+            'description': extract_data_val('description'),
+            'thumbnail': extract_data_val('image'),
+            'uploader': extract_data_val('channel'),
+        }
diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py
deleted file mode 100644 (file)
index 92511a6..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
-
-
-class MalemotionIE(InfoExtractor):
-    _VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
-    _TEST = {
-        'url': 'http://malemotion.com/video/bete-de-concours.ltc',
-        'md5': '3013e53a0afbde2878bc39998c33e8a5',
-        'info_dict': {
-            'id': 'ltc',
-            'ext': 'mp4',
-            'title': 'Bête de Concours',
-            'age_limit': 18,
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        video_url = compat_urllib_parse_unquote(self._search_regex(
-            r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
-        video_title = self._html_search_regex(
-            r'<title>(.*?)</title', webpage, 'title')
-        video_thumbnail = self._search_regex(
-            r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
-
-        formats = [{
-            'url': video_url,
-            'ext': 'mp4',
-            'format_id': 'mp4',
-            'preference': 1,
-        }]
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'formats': formats,
-            'title': video_title,
-            'thumbnail': video_thumbnail,
-            'age_limit': 18,
-        }
index 28e0dfe63348082f65df20463ab8a2001d8592fa..33b0b539fa9dfde80274d983aa003ea7b39e6622 100644 (file)
@@ -4,16 +4,12 @@ from __future__ import unicode_literals
 import random
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
-from ..utils import (
-    sanitized_Request,
-    xpath_text,
-)
+from ..utils import xpath_text
 
 
 class MatchTVIE(InfoExtractor):
-    _VALID_URL = r'https?://matchtv\.ru/?#live-player'
-    _TEST = {
+    _VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
+    _TESTS = [{
         'url': 'http://matchtv.ru/#live-player',
         'info_dict': {
             'id': 'matchtv-live',
@@ -24,12 +20,16 @@ class MatchTVIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
-    }
+    }, {
+        'url': 'http://matchtv.ru/on-air/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = 'matchtv-live'
-        request = sanitized_Request(
-            'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse.urlencode({
+        video_url = self._download_json(
+            'http://player.matchtv.ntvplus.tv/player/smil', video_id,
+            query={
                 'ts': '',
                 'quality': 'SD',
                 'contentId': '561d2c0df7159b37178b4567',
@@ -40,13 +40,13 @@ class MatchTVIE(InfoExtractor):
                 'contentType': 'channel',
                 'timeShift': '0',
                 'platform': 'portal',
-            }),
+            },
             headers={
                 'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
-            })
-        video_url = self._download_json(request, video_id)['data']['videoUrl']
+            })['data']['videoUrl']
         f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
         formats = self._extract_f4m_formats(f4m_url, video_id)
+        self._sort_formats(formats)
         return {
             'id': video_id,
             'title': self._live_title('Матч ТВ - Прямой эфир'),
index 425fc9e2a69b93879eb71a32eae2a042d97770cc..2100583df46ab7955846f8e3b08467d13ed3440e 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 class MDRIE(InfoExtractor):
     IE_DESC = 'MDR.DE and KiKA'
-    _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html'
+    _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+-?(?P<id>\d+)(?:_.+?)?\.html'
 
     _TESTS = [{
         # MDR regularly deletes its videos
@@ -49,8 +49,8 @@ class MDRIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Beutolomäus und der geheime Weihnachtswunsch',
             'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
-            'timestamp': 1419047100,
-            'upload_date': '20141220',
+            'timestamp': 1450950000,
+            'upload_date': '20151224',
             'duration': 4628,
             'uploader': 'KIKA',
         },
@@ -60,6 +60,9 @@ class MDRIE(InfoExtractor):
     }, {
         'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
         'only_matching': True,
+    }, {
+        'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -68,8 +71,8 @@ class MDRIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         data_url = self._search_regex(
-            r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1',
-            webpage, 'data url', group='url')
+            r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
+            webpage, 'data url', group='url').replace('\/', '/')
 
         doc = self._download_xml(
             compat_urlparse.urljoin(url, data_url), video_id)
index 67d6271e1ad107aceddaa2c8b4bd96558426bfce..b6f00cc25ff9c2769176e551b33bd5901c121f64 100644 (file)
@@ -5,7 +5,6 @@ import re
 from .common import InfoExtractor
 from ..compat import (
     compat_parse_qs,
-    compat_urllib_parse,
     compat_urllib_parse_unquote,
 )
 from ..utils import (
@@ -13,11 +12,12 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
 class MetacafeIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
+    _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
     _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
     _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
     IE_NAME = 'metacafe'
@@ -81,6 +81,9 @@ class MetacafeIE(InfoExtractor):
                 'title': 'Open: This is Face the Nation, February 9',
                 'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476',
                 'duration': 96,
+                'uploader': 'CBSI-NEW',
+                'upload_date': '20140209',
+                'timestamp': 1391959800,
             },
             'params': {
                 # rtmp download
@@ -117,7 +120,7 @@ class MetacafeIE(InfoExtractor):
             'filters': '0',
             'submit': "Continue - I'm over 18",
         }
-        request = sanitized_Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
+        request = sanitized_Request(self._FILTER_POST, urlencode_postdata(disclaimer_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         self.report_age_confirmation()
         self._download_webpage(request, None, False, 'Unable to confirm age')
index e30320569805aedaa6694ae54f9086909593f7a4..444ec0310877e8377f78e88b07fd110ca9e6aa0d 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 class MetacriticIE(InfoExtractor):
     _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
         'info_dict': {
             'id': '3698222',
@@ -20,7 +20,17 @@ class MetacriticIE(InfoExtractor):
             'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
             'duration': 221,
         },
-    }
+        'skip': 'Not providing trailers anymore',
+    }, {
+        'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315',
+        'info_dict': {
+            'id': '5740315',
+            'ext': 'mp4',
+            'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler',
+            'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).',
+            'duration': 114,
+        },
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py
new file mode 100644 (file)
index 0000000..9fbc74f
--- /dev/null
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class MGTVIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
+    IE_DESC = '芒果TV'
+
+    _TEST = {
+        'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
+        'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
+        'info_dict': {
+            'id': '3116640',
+            'ext': 'mp4',
+            'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗',
+            'description': '我是歌手第四季双年巅峰会',
+            'duration': 7461,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        api_data = self._download_json(
+            'http://v.api.mgtv.com/player/video', video_id,
+            query={'video_id': video_id})['data']
+        info = api_data['info']
+
+        formats = []
+        for idx, stream in enumerate(api_data['stream']):
+            stream_url = stream.get('url')
+            if not stream_url:
+                continue
+            tbr = int_or_none(self._search_regex(
+                r'(\d+)\.mp4', stream_url, 'tbr', default=None))
+
+            def extract_format(stream_url, format_id, idx, query={}):
+                format_info = self._download_json(
+                    stream_url, video_id,
+                    note='Download video info for format %s' % format_id or '#%d' % idx, query=query)
+                return {
+                    'format_id': format_id,
+                    'url': format_info['info'],
+                    'ext': 'mp4',
+                    'tbr': tbr,
+                }
+
+            formats.append(extract_format(
+                stream_url, 'hls-%d' % tbr if tbr else None, idx * 2))
+            formats.append(extract_format(stream_url.replace(
+                '/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031}))
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': info['title'].strip(),
+            'formats': formats,
+            'description': info.get('desc'),
+            'duration': int_or_none(info.get('duration')),
+            'thumbnail': info.get('thumb'),
+        }
diff --git a/youtube_dl/extractor/microsoftvirtualacademy.py b/youtube_dl/extractor/microsoftvirtualacademy.py
new file mode 100644 (file)
index 0000000..afd3e98
--- /dev/null
@@ -0,0 +1,192 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_xpath,
+)
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    smuggle_url,
+    unsmuggle_url,
+    xpath_text,
+)
+
+
+class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
+    def _extract_base_url(self, course_id, display_id):
+        return self._download_json(
+            'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id,
+            display_id, 'Downloading course base URL')
+
+    def _extract_chapter_and_title(self, title):
+        if not title:
+            return None, None
+        m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
+        return (int(m.group('chapter')), m.group('title')) if m else (None, title)
+
+
+class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
+    IE_NAME = 'mva'
+    IE_DESC = 'Microsoft Virtual Academy videos'
+    _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME
+
+    _TESTS = [{
+        'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
+        'md5': '7826c44fc31678b12ad8db11f6b5abb9',
+        'info_dict': {
+            'id': 'gfVXISmEB_6804984382',
+            'ext': 'mp4',
+            'title': 'Course Introduction',
+            'formats': 'mincount:3',
+            'subtitles': {
+                'en': [{
+                    'ext': 'ttml',
+                }],
+            },
+        }
+    }, {
+        'url': 'mva:11788:gfVXISmEB_6804984382',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+
+        mobj = re.match(self._VALID_URL, url)
+        course_id = mobj.group('course_id')
+        video_id = mobj.group('id')
+
+        base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
+
+        settings = self._download_xml(
+            '%s/content/content_%s/videosettings.xml?v=1' % (base_url, video_id),
+            video_id, 'Downloading video settings XML')
+
+        _, title = self._extract_chapter_and_title(xpath_text(
+            settings, './/Title', 'title', fatal=True))
+
+        formats = []
+
+        for sources in settings.findall(compat_xpath('.//MediaSources')):
+            if sources.get('videoType') == 'smoothstreaming':
+                continue
+            for source in sources.findall(compat_xpath('./MediaSource')):
+                video_url = source.text
+                if not video_url or not video_url.startswith('http'):
+                    continue
+                video_mode = source.get('videoMode')
+                height = int_or_none(self._search_regex(
+                    r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
+                codec = source.get('codec')
+                acodec, vcodec = [None] * 2
+                if codec:
+                    codecs = codec.split(',')
+                    if len(codecs) == 2:
+                        acodec, vcodec = codecs
+                    elif len(codecs) == 1:
+                        vcodec = codecs[0]
+                formats.append({
+                    'url': video_url,
+                    'format_id': video_mode,
+                    'height': height,
+                    'acodec': acodec,
+                    'vcodec': vcodec,
+                })
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for source in settings.findall(compat_xpath('.//MarkerResourceSource')):
+            subtitle_url = source.text
+            if not subtitle_url:
+                continue
+            subtitles.setdefault('en', []).append({
+                'url': '%s/%s' % (base_url, subtitle_url),
+                'ext': source.get('type'),
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'subtitles': subtitles,
+            'formats': formats
+        }
+
+
+class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
+    IE_NAME = 'mva:course'
+    IE_DESC = 'Microsoft Virtual Academy courses'
+    _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME
+
+    _TESTS = [{
+        'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
+        'info_dict': {
+            'id': '11788',
+            'title': 'Microsoft Azure Fundamentals: Virtual Machines',
+        },
+        'playlist_count': 36,
+    }, {
+        # with emphasized chapters
+        'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
+        'info_dict': {
+            'id': '16335',
+            'title': 'Developing Windows 10 Games with Construct 2',
+        },
+        'playlist_count': 10,
+    }, {
+        'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
+        'only_matching': True,
+    }, {
+        'url': 'mva:course:11788',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if MicrosoftVirtualAcademyIE.suitable(url) else super(
+            MicrosoftVirtualAcademyCourseIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        course_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        base_url = self._extract_base_url(course_id, display_id)
+
+        manifest = self._download_json(
+            '%s/imsmanifestlite.json' % base_url,
+            display_id, 'Downloading course manifest JSON')['manifest']
+
+        organization = manifest['organizations']['organization'][0]
+
+        entries = []
+        for chapter in organization['item']:
+            chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
+            chapter_id = chapter.get('@identifier')
+            for item in chapter.get('item', []):
+                item_id = item.get('@identifier')
+                if not item_id:
+                    continue
+                metadata = item.get('resource', {}).get('metadata') or {}
+                if metadata.get('learningresourcetype') != 'Video':
+                    continue
+                _, title = self._extract_chapter_and_title(item.get('title'))
+                duration = parse_duration(metadata.get('duration'))
+                description = metadata.get('description')
+                entries.append({
+                    '_type': 'url_transparent',
+                    'url': smuggle_url(
+                        'mva:%s:%s' % (course_id, item_id), {'base_url': base_url}),
+                    'title': title,
+                    'description': description,
+                    'duration': duration,
+                    'chapter': chapter_title,
+                    'chapter_number': chapter_number,
+                    'chapter_id': chapter_id,
+                })
+
+        title = organization.get('title') or manifest.get('metadata', {}).get('title')
+
+        return self.playlist_result(entries, course_id, title)
index e46b23a6f73990e14e73d911cb2e8253fba08a85..e6730b75a68d27c16e694fedaac088d27a0ab1ec 100644 (file)
@@ -2,12 +2,12 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     int_or_none,
     parse_duration,
     parse_filesize,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -39,7 +39,7 @@ class MinhatecaIE(InfoExtractor):
         ]
         req = sanitized_Request(
             'http://minhateca.com.br/action/License/Download',
-            data=compat_urllib_parse.urlencode(token_data))
+            data=urlencode_postdata(token_data))
         req.add_header('Content-Type', 'application/x-www-form-urlencoded')
         data = self._download_json(
             req, video_id, note='Downloading metadata')
index 949ad11db2ecd0c53e5cb4c361bc43aa779cb1e6..e48eba3fa7343bbdf964be583a680affa5ad29fa 100644 (file)
@@ -1,8 +1,5 @@
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
@@ -20,21 +17,28 @@ class MinistryGridIE(InfoExtractor):
             'id': '3453494717001',
             'ext': 'mp4',
             'title': 'The Gospel by Numbers',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'upload_date': '20140410',
             'description': 'Coming soon from T4G 2014!',
-            'uploader': 'LifeWay Christian Resources (MG)',
+            'uploader_id': '2034960640001',
+            'timestamp': 1397145591,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         },
+        'add_ie': ['TDSLifeway'],
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
-        portlets_json = self._search_regex(
-            r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
-        portlets = json.loads(portlets_json)
+        portlets = self._parse_json(self._search_regex(
+            r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'),
+            video_id)
         pl_id = self._search_regex(
-            r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
+            r'getPlid:function\(\){return"(\d+)"}', webpage, 'p_l_id')
 
         for i, portlet in enumerate(portlets):
             portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
@@ -46,12 +50,8 @@ class MinistryGridIE(InfoExtractor):
                 r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
                 default=None)
             if video_iframe_url:
-                surl = smuggle_url(
-                    video_iframe_url, {'force_videoid': video_id})
-                return {
-                    '_type': 'url',
-                    'id': video_id,
-                    'url': surl,
-                }
+                return self.url_result(
+                    smuggle_url(video_iframe_url, {'force_videoid': video_id}),
+                    video_id=video_id)
 
         raise ExtractorError('Could not find video iframe in any portlets')
diff --git a/youtube_dl/extractor/minoto.py b/youtube_dl/extractor/minoto.py
new file mode 100644 (file)
index 0000000..959a105
--- /dev/null
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class MinotoIE(InfoExtractor):
+    _VALID_URL = r'(?:minoto:|https?://(?:play|iframe|embed)\.minoto-video\.com/(?P<player_id>[0-9]+)/)(?P<id>[a-zA-Z0-9]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        player_id = mobj.group('player_id') or '1'
+        video_id = mobj.group('id')
+        video_data = self._download_json('http://play.minoto-video.com/%s/%s.js' % (player_id, video_id), video_id)
+        video_metadata = video_data['video-metadata']
+        formats = []
+        for fmt in video_data['video-files']:
+            fmt_url = fmt.get('url')
+            if not fmt_url:
+                continue
+            container = fmt.get('container')
+            if container == 'hls':
+                formats.extend(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+            else:
+                fmt_profile = fmt.get('profile') or {}
+                f = {
+                    'format_id': fmt_profile.get('name-short'),
+                    'format_note': fmt_profile.get('name'),
+                    'url': fmt_url,
+                    'container': container,
+                    'tbr': int_or_none(fmt.get('bitrate')),
+                    'filesize': int_or_none(fmt.get('filesize')),
+                    'width': int_or_none(fmt.get('width')),
+                    'height': int_or_none(fmt.get('height')),
+                }
+                codecs = fmt.get('codecs')
+                if codecs:
+                    codecs = codecs.split(',')
+                    if len(codecs) == 2:
+                        f.update({
+                            'vcodec': codecs[0],
+                            'acodec': codecs[1],
+                        })
+                formats.append(f)
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_metadata['title'],
+            'description': video_metadata.get('description'),
+            'thumbnail': video_metadata.get('video-poster', {}).get('url'),
+            'formats': formats,
+        }
index 29ca45778a17654c4d2125ceda177b71cffca8a8..1aea78d118a84a135494214da54c3c2c21465bc9 100644 (file)
@@ -91,7 +91,7 @@ class MITIE(TechTVMITIE):
 
 class OCWMITIE(InfoExtractor):
     IE_NAME = 'ocw.mit.edu'
-    _VALID_URL = r'^http://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
+    _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
     _BASE_URL = 'http://ocw.mit.edu/'
 
     _TESTS = [
@@ -99,7 +99,7 @@ class OCWMITIE(InfoExtractor):
             'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
             'info_dict': {
                 'id': 'EObHWIEKGjA',
-                'ext': 'mp4',
+                'ext': 'webm',
                 'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
                 'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
                 'upload_date': '20121109',
index c595f20775efd8e4aed348e4886ac55209e2c6e2..5a00cd3975b46fbd0f191ecd8e82fb09ea78490a 100644 (file)
@@ -1,33 +1,57 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
-    encode_dict,
     get_element_by_attribute,
     int_or_none,
+    remove_start,
 )
 
 
 class MiTeleIE(InfoExtractor):
     IE_DESC = 'mitele.es'
-    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
+    _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
 
     _TESTS = [{
         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
-        'md5': '0ff1a13aebb35d9bc14081ff633dd324',
+        # MD5 is unstable
         'info_dict': {
             'id': '0NF1jJnxS1Wu3pHrmvFyw2',
             'display_id': 'programa-144',
             'ext': 'flv',
             'title': 'Tor, la web invisible',
             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
+            'series': 'Diario de',
+            'season': 'La redacción',
+            'episode': 'Programa 144',
             'thumbnail': 're:(?i)^https?://.*\.jpg$',
             'duration': 2913,
         },
+    }, {
+        # no explicit title
+        'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/',
+        'info_dict': {
+            'id': 'eLZSwoEd1S3pVyUm8lc6F',
+            'display_id': 'programa-226',
+            'ext': 'flv',
+            'title': 'Cuarto Milenio - Temporada 6 - Programa 226',
+            'description': 'md5:50daf9fadefa4e62d9fc866d0c015701',
+            'series': 'Cuarto Milenio',
+            'season': 'Temporada 6',
+            'episode': 'Programa 226',
+            'thumbnail': 're:(?i)^https?://.*\.jpg$',
+            'duration': 7312,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
@@ -60,7 +84,7 @@ class MiTeleIE(InfoExtractor):
                 'sta': '0',
             }
             media = self._download_json(
-                '%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data))),
+                '%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)),
                 display_id, 'Downloading %s JSON' % location['loc'])
             file_ = media.get('file')
             if not file_:
@@ -68,9 +92,25 @@ class MiTeleIE(InfoExtractor):
             formats.extend(self._extract_f4m_formats(
                 file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
                 display_id, f4m_id=loc))
+        self._sort_formats(formats)
 
         title = self._search_regex(
-            r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title')
+            r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>',
+            webpage, 'title', default=None)
+
+        mobj = re.search(r'''(?sx)
+                            class="Destacado-text"[^>]*>.*?<h1>\s*
+                            <span>(?P<series>[^<]+)</span>\s*
+                            <span>(?P<season>[^<]+)</span>\s*
+                            <span>(?P<episode>[^<]+)</span>''', webpage)
+        series, season, episode = mobj.groups() if mobj else [None] * 3
+
+        if not title:
+            if mobj:
+                title = '%s - %s - %s' % (series, season, episode)
+            else:
+                title = remove_start(self._search_regex(
+                    r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
 
         video_id = self._search_regex(
             r'data-media-id\s*=\s*"([^"]+)"', webpage,
@@ -83,6 +123,9 @@ class MiTeleIE(InfoExtractor):
             'display_id': display_id,
             'title': title,
             'description': get_element_by_attribute('class', 'text', webpage),
+            'series': series,
+            'season': season,
+            'episode': episode,
             'thumbnail': thumbnail,
             'duration': duration,
             'formats': formats,
index c2b7ed9abbd27a2b2c8e0d9d95c59e387630180a..560fe188b675a619785332eea285484fa85154bf 100644 (file)
@@ -1,25 +1,35 @@
 from __future__ import unicode_literals
 
+import base64
+import functools
+import itertools
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
+    compat_chr,
+    compat_ord,
+    compat_urllib_parse_unquote,
+    compat_urlparse,
+)
 from ..utils import (
+    clean_html,
     ExtractorError,
-    HEADRequest,
+    OnDemandPagedList,
+    parse_count,
     str_to_int,
 )
 
 
 class MixcloudIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
     IE_NAME = 'mixcloud'
 
     _TESTS = [{
         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
         'info_dict': {
             'id': 'dholbach-cryptkeeper',
-            'ext': 'mp3',
+            'ext': 'm4a',
             'title': 'Cryptkeeper',
             'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
             'uploader': 'Daniel Holbach',
@@ -37,22 +47,22 @@ class MixcloudIE(InfoExtractor):
             'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
             'uploader': 'Gilles Peterson Worldwide',
             'uploader_id': 'gillespeterson',
-            'thumbnail': 're:https?://.*/images/',
+            'thumbnail': 're:https?://.*',
             'view_count': int,
             'like_count': int,
         },
     }]
 
-    def _check_url(self, url, track_id, ext):
-        try:
-            # We only want to know if the request succeed
-            # don't download the whole file
-            self._request_webpage(
-                HEADRequest(url), track_id,
-                'Trying %s URL' % ext)
-            return True
-        except ExtractorError:
-            return False
+    # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
+    @staticmethod
+    def _decrypt_play_info(play_info):
+        KEY = 'pleasedontdownloadourmusictheartistswontgetpaid'
+
+        play_info = base64.b64decode(play_info.encode('ascii'))
+
+        return ''.join([
+            compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)]))
+            for idx, ch in enumerate(play_info)])
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -62,14 +72,19 @@ class MixcloudIE(InfoExtractor):
 
         webpage = self._download_webpage(url, track_id)
 
-        preview_url = self._search_regex(
-            r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
-        song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url)
-        song_url = song_url.replace('/previews/', '/c/originals/')
-        if not self._check_url(song_url, track_id, 'mp3'):
-            song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
-            if not self._check_url(song_url, track_id, 'm4a'):
-                raise ExtractorError('Unable to extract track url')
+        message = self._html_search_regex(
+            r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
+            webpage, 'error message', default=None)
+
+        encrypted_play_info = self._search_regex(
+            r'm-play-info="([^"]+)"', webpage, 'play info')
+        play_info = self._parse_json(
+            self._decrypt_play_info(encrypted_play_info), track_id)
+
+        if message and 'stream_url' not in play_info:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+
+        song_url = play_info['stream_url']
 
         PREFIX = (
             r'm-play-on-spacebar[^>]+'
@@ -85,13 +100,13 @@ class MixcloudIE(InfoExtractor):
         uploader_id = self._search_regex(
             r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
         description = self._og_search_description(webpage)
-        like_count = str_to_int(self._search_regex(
-            r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',
-            webpage, 'like count', fatal=False))
+        like_count = parse_count(self._search_regex(
+            r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)',
+            webpage, 'like count', default=None))
         view_count = str_to_int(self._search_regex(
             [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
              r'/listeners/?">([0-9,.]+)</a>'],
-            webpage, 'play count', fatal=False))
+            webpage, 'play count', default=None))
 
         return {
             'id': track_id,
@@ -104,3 +119,201 @@ class MixcloudIE(InfoExtractor):
             'view_count': view_count,
             'like_count': like_count,
         }
+
+
+class MixcloudPlaylistBaseIE(InfoExtractor):
+    _PAGE_SIZE = 24
+
+    def _find_urls_in_page(self, page):
+        for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page):
+            yield self.url_result(
+                compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)),
+                MixcloudIE.ie_key())
+
+    def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None):
+        real_page_number = real_page_number or current_page + 1
+        return self._download_webpage(
+            'https://www.mixcloud.com/%s/' % path, video_id,
+            note='Download %s (page %d)' % (page_name, current_page + 1),
+            errnote='Unable to download %s' % page_name,
+            query={'page': real_page_number, 'list': 'main', '_ajax': '1'},
+            headers={'X-Requested-With': 'XMLHttpRequest'})
+
+    def _tracks_page_func(self, page, video_id, page_name, current_page):
+        resp = self._fetch_tracks_page(page, video_id, page_name, current_page)
+
+        for item in self._find_urls_in_page(resp):
+            yield item
+
+    def _get_user_description(self, page_content):
+        return self._html_search_regex(
+            r'<div[^>]+class="description-text"[^>]*>(.+?)</div>',
+            page_content, 'user description', fatal=False)
+
+
+class MixcloudUserIE(MixcloudPlaylistBaseIE):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
+    IE_NAME = 'mixcloud:user'
+
+    _TESTS = [{
+        'url': 'http://www.mixcloud.com/dholbach/',
+        'info_dict': {
+            'id': 'dholbach_uploads',
+            'title': 'Daniel Holbach (uploads)',
+            'description': 'md5:327af72d1efeb404a8216c27240d1370',
+        },
+        'playlist_mincount': 11,
+    }, {
+        'url': 'http://www.mixcloud.com/dholbach/uploads/',
+        'info_dict': {
+            'id': 'dholbach_uploads',
+            'title': 'Daniel Holbach (uploads)',
+            'description': 'md5:327af72d1efeb404a8216c27240d1370',
+        },
+        'playlist_mincount': 11,
+    }, {
+        'url': 'http://www.mixcloud.com/dholbach/favorites/',
+        'info_dict': {
+            'id': 'dholbach_favorites',
+            'title': 'Daniel Holbach (favorites)',
+            'description': 'md5:327af72d1efeb404a8216c27240d1370',
+        },
+        'params': {
+            'playlist_items': '1-100',
+        },
+        'playlist_mincount': 100,
+    }, {
+        'url': 'http://www.mixcloud.com/dholbach/listens/',
+        'info_dict': {
+            'id': 'dholbach_listens',
+            'title': 'Daniel Holbach (listens)',
+            'description': 'md5:327af72d1efeb404a8216c27240d1370',
+        },
+        'params': {
+            'playlist_items': '1-100',
+        },
+        'playlist_mincount': 100,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user_id = mobj.group('user')
+        list_type = mobj.group('type')
+
+        # if only a profile URL was supplied, default to download all uploads
+        if list_type is None:
+            list_type = 'uploads'
+
+        video_id = '%s_%s' % (user_id, list_type)
+
+        profile = self._download_webpage(
+            'https://www.mixcloud.com/%s/' % user_id, video_id,
+            note='Downloading user profile',
+            errnote='Unable to download user profile')
+
+        username = self._og_search_title(profile)
+        description = self._get_user_description(profile)
+
+        entries = OnDemandPagedList(
+            functools.partial(
+                self._tracks_page_func,
+                '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
+            self._PAGE_SIZE, use_cache=True)
+
+        return self.playlist_result(
+            entries, video_id, '%s (%s)' % (username, list_type), description)
+
+
+class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
+    IE_NAME = 'mixcloud:playlist'
+
+    _TESTS = [{
+        'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/',
+        'info_dict': {
+            'id': 'RedBullThre3style_tokyo-finalists-2015',
+            'title': 'National Champions 2015',
+            'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3',
+        },
+        'playlist_mincount': 16,
+    }, {
+        'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
+        'info_dict': {
+            'id': 'maxvibes_jazzcat-on-ness-radio',
+            'title': 'Jazzcat on Ness Radio',
+            'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263',
+        },
+        'playlist_mincount': 23
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user_id = mobj.group('user')
+        playlist_id = mobj.group('playlist')
+        video_id = '%s_%s' % (user_id, playlist_id)
+
+        profile = self._download_webpage(
+            url, user_id,
+            note='Downloading playlist page',
+            errnote='Unable to download playlist page')
+
+        description = self._get_user_description(profile)
+        playlist_title = self._html_search_regex(
+            r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>',
+            profile, 'playlist title')
+
+        entries = OnDemandPagedList(
+            functools.partial(
+                self._tracks_page_func,
+                '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
+            self._PAGE_SIZE)
+
+        return self.playlist_result(entries, video_id, playlist_title, description)
+
+
+class MixcloudStreamIE(MixcloudPlaylistBaseIE):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
+    IE_NAME = 'mixcloud:stream'
+
+    _TEST = {
+        'url': 'https://www.mixcloud.com/FirstEar/stream/',
+        'info_dict': {
+            'id': 'FirstEar',
+            'title': 'First Ear',
+            'description': 'Curators of good music\nfirstearmusic.com',
+        },
+        'playlist_mincount': 192,
+    }
+
+    def _real_extract(self, url):
+        user_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, user_id)
+
+        entries = []
+        prev_page_url = None
+
+        def _handle_page(page):
+            entries.extend(self._find_urls_in_page(page))
+            return self._search_regex(
+                r'm-next-page-url="([^"]+)"', page,
+                'next page URL', default=None)
+
+        next_page_url = _handle_page(webpage)
+
+        for idx in itertools.count(0):
+            if not next_page_url or prev_page_url == next_page_url:
+                break
+
+            prev_page_url = next_page_url
+            current_page = int(self._search_regex(
+                r'\?page=(\d+)', next_page_url, 'next page number'))
+
+            next_page_url = _handle_page(self._fetch_tracks_page(
+                '%s/stream' % user_id, user_id, 'stream', idx,
+                real_page_number=current_page))
+
+        username = self._og_search_title(webpage)
+        description = self._get_user_description(webpage)
+
+        return self.playlist_result(entries, user_id, username, description)
diff --git a/youtube_dl/extractor/mnet.py b/youtube_dl/extractor/mnet.py
new file mode 100644 (file)
index 0000000..e3f42e7
--- /dev/null
@@ -0,0 +1,81 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+)
+
+
+class MnetIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?mnet\.(?:com|interest\.me)/tv/vod/(?:.*?\bclip_id=)?(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://www.mnet.com/tv/vod/171008',
+        'info_dict': {
+            'id': '171008',
+            'title': 'SS_이해인@히든박스',
+            'description': 'md5:b9efa592c3918b615ba69fe9f8a05c55',
+            'duration': 88,
+            'upload_date': '20151231',
+            'timestamp': 1451564040,
+            'age_limit': 0,
+            'thumbnails': 'mincount:5',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'ext': 'flv',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://mnet.interest.me/tv/vod/172790',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.mnet.com/tv/vod/vod_view.asp?clip_id=172790&tabMenu=',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        info = self._download_json(
+            'http://content.api.mnet.com/player/vodConfig?id=%s&ctype=CLIP' % video_id,
+            video_id, 'Downloading vod config JSON')['data']['info']
+
+        title = info['title']
+
+        rtmp_info = self._download_json(
+            info['cdn'], video_id, 'Downloading vod cdn JSON')
+
+        formats = [{
+            'url': rtmp_info['serverurl'] + rtmp_info['fileurl'],
+            'ext': 'flv',
+            'page_url': url,
+            'player_url': 'http://flvfile.mnet.com/service/player/201602/cjem_player_tv.swf?v=201602191318',
+        }]
+
+        description = info.get('ment')
+        duration = parse_duration(info.get('time'))
+        timestamp = parse_iso8601(info.get('date'), delimiter=' ')
+        age_limit = info.get('adult')
+        if age_limit is not None:
+            age_limit = 0 if age_limit == 'N' else 18
+        thumbnails = [{
+            'id': thumb_format,
+            'url': thumb['url'],
+            'width': int_or_none(thumb.get('width')),
+            'height': int_or_none(thumb.get('height')),
+        } for thumb_format, thumb in info.get('cover', {}).items() if thumb.get('url')]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'timestamp': timestamp,
+            'age_limit': age_limit,
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
index d930b96343bd081f029ad1bb7ce1ceee3c241f59..978d5d5bfeaf5ff64b7279343876a3177c43339a 100644 (file)
@@ -5,11 +5,11 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     ExtractorError,
     int_or_none,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -77,7 +77,7 @@ class MoeVideoIE(InfoExtractor):
             ],
         ]
         r_json = json.dumps(r)
-        post = compat_urllib_parse.urlencode({'r': r_json})
+        post = urlencode_postdata({'r': r_json})
         req = sanitized_Request(self._API_URL, post)
         req.add_header('Content-type', 'application/x-www-form-urlencoded')
 
index f6bf94f2f6b3d6868fa098994a2d275f8bb59a99..b208820fe64970b3f7b362dba13c534a6955686d 100644 (file)
@@ -5,11 +5,11 @@ import os.path
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     ExtractorError,
     remove_start,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -88,7 +88,7 @@ class MonikerIE(InfoExtractor):
             fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
             data = dict(fields)
 
-            post = compat_urllib_parse.urlencode(data)
+            post = urlencode_postdata(data)
             headers = {
                 b'Content-Type': b'application/x-www-form-urlencoded',
             }
diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py
deleted file mode 100644 (file)
index 7cc7f05..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse
-from ..utils import (
-    ExtractorError,
-    sanitized_Request,
-)
-
-
-class MooshareIE(InfoExtractor):
-    IE_NAME = 'mooshare'
-    IE_DESC = 'Mooshare.biz'
-    _VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
-
-    _TESTS = [
-        {
-            'url': 'http://mooshare.biz/8dqtk4bjbp8g',
-            'md5': '4e14f9562928aecd2e42c6f341c8feba',
-            'info_dict': {
-                'id': '8dqtk4bjbp8g',
-                'ext': 'mp4',
-                'title': 'Comedy Football 2011 - (part 1-2)',
-                'duration': 893,
-            },
-        },
-        {
-            'url': 'http://mooshare.biz/aipjtoc4g95j',
-            'info_dict': {
-                'id': 'aipjtoc4g95j',
-                'ext': 'mp4',
-                'title': 'Orange Caramel  Dashing Through the Snow',
-                'duration': 212,
-            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            }
-        }
-    ]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        page = self._download_webpage(url, video_id, 'Downloading page')
-
-        if re.search(r'>Video Not Found or Deleted<', page) is not None:
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
-        hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
-        title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
-
-        download_form = {
-            'op': 'download1',
-            'id': video_id,
-            'hash': hash_key,
-        }
-
-        request = sanitized_Request(
-            'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
-        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-
-        self._sleep(5, video_id)
-
-        video_page = self._download_webpage(request, video_id, 'Downloading video page')
-
-        thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
-        duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
-        duration = int(duration_str) if duration_str is not None else None
-
-        formats = []
-
-        # SD video
-        mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
-        if mobj is not None:
-            formats.append({
-                'url': mobj.group('url'),
-                'format_id': 'sd',
-                'format': 'SD',
-            })
-
-        # HD video
-        mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
-        if mobj is not None:
-            formats.append({
-                'url': mobj.group('url'),
-                'format_id': 'hd',
-                'format': 'HD',
-            })
-
-        # rtmp video
-        mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
-        if mobj is not None:
-            formats.append({
-                'url': mobj.group('rtmpurl'),
-                'play_path': mobj.group('playpath'),
-                'rtmp_live': False,
-                'ext': 'mp4',
-                'format_id': 'rtmp',
-                'format': 'HD',
-            })
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'formats': formats,
-        }
index 97d5da626a7a5d2555ac3107eb89d1a4fd11b510..5e1a8a71a93aa28962d7f260af966d10cf8e9f7a 100644 (file)
@@ -5,62 +5,73 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
     str_to_int,
     unified_strdate,
 )
 
 
 class MotherlessIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
-    _TESTS = [
-        {
-            'url': 'http://motherless.com/AC3FFE1',
-            'md5': '310f62e325a9fafe64f68c0bccb6e75f',
-            'info_dict': {
-                'id': 'AC3FFE1',
-                'ext': 'mp4',
-                'title': 'Fucked in the ass while playing PS3',
-                'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
-                'upload_date': '20100913',
-                'uploader_id': 'famouslyfuckedup',
-                'thumbnail': 're:http://.*\.jpg',
-                'age_limit': 18,
-            }
-        },
-        {
-            'url': 'http://motherless.com/532291B',
-            'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
-            'info_dict': {
-                'id': '532291B',
-                'ext': 'mp4',
-                'title': 'Amazing girl playing the omegle game, PERFECT!',
-                'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
-                'upload_date': '20140622',
-                'uploader_id': 'Sulivana7x',
-                'thumbnail': 're:http://.*\.jpg',
-                'age_limit': 18,
-            }
+    _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
+    _TESTS = [{
+        'url': 'http://motherless.com/AC3FFE1',
+        'md5': '310f62e325a9fafe64f68c0bccb6e75f',
+        'info_dict': {
+            'id': 'AC3FFE1',
+            'ext': 'mp4',
+            'title': 'Fucked in the ass while playing PS3',
+            'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
+            'upload_date': '20100913',
+            'uploader_id': 'famouslyfuckedup',
+            'thumbnail': 're:http://.*\.jpg',
+            'age_limit': 18,
+        }
+    }, {
+        'url': 'http://motherless.com/532291B',
+        'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
+        'info_dict': {
+            'id': '532291B',
+            'ext': 'mp4',
+            'title': 'Amazing girl playing the omegle game, PERFECT!',
+            'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
+                           'game', 'hairy'],
+            'upload_date': '20140622',
+            'uploader_id': 'Sulivana7x',
+            'thumbnail': 're:http://.*\.jpg',
+            'age_limit': 18,
         },
-        {
-            'url': 'http://motherless.com/g/cosplay/633979F',
-            'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
-            'info_dict': {
-                'id': '633979F',
-                'ext': 'mp4',
-                'title': 'Turtlette',
-                'categories': ['superheroine heroine  superher'],
-                'upload_date': '20140827',
-                'uploader_id': 'shade0230',
-                'thumbnail': 're:http://.*\.jpg',
-                'age_limit': 18,
-            }
+        'skip': '404',
+    }, {
+        'url': 'http://motherless.com/g/cosplay/633979F',
+        'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
+        'info_dict': {
+            'id': '633979F',
+            'ext': 'mp4',
+            'title': 'Turtlette',
+            'categories': ['superheroine heroine  superher'],
+            'upload_date': '20140827',
+            'uploader_id': 'shade0230',
+            'thumbnail': 're:http://.*\.jpg',
+            'age_limit': 18,
         }
-    ]
+    }, {
+        # no keywords
+        'url': 'http://motherless.com/8B4BBC1',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
+        if any(p in webpage for p in (
+                '<title>404 - MOTHERLESS.COM<',
+                ">The page you're looking for cannot be found.<")):
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+        if '>The content you are trying to view is for friends only.' in webpage:
+            raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
+
         title = self._html_search_regex(
             r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
         video_url = self._html_search_regex(
@@ -86,7 +97,7 @@ class MotherlessIE(InfoExtractor):
             r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
             webpage, 'uploader_id')
 
-        categories = self._html_search_meta('keywords', webpage)
+        categories = self._html_search_meta('keywords', webpage, default=None)
         if categories:
             categories = [cat.strip() for cat in categories.split(',')]
 
index c1a482dba39fb98efdb28e85b681565eb58e3f9e..370328b362c2a0661925d054be121a7216dc94c7 100644 (file)
@@ -9,7 +9,7 @@ from ..compat import (
 
 class MotorsportIE(InfoExtractor):
     IE_DESC = 'motorsport.com'
-    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
+    _VALID_URL = r'https?://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
     _TEST = {
         'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
         'info_dict': {
index 1564cb71f6844a3e83fd958ddd0c192be835fc3a..d0cb8278e9860591a3bcb6e711998ae204c4962d 100644 (file)
@@ -2,39 +2,48 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import sanitized_Request
+from ..utils import (
+    smuggle_url,
+    float_or_none,
+    parse_iso8601,
+    update_url_query,
+)
 
 
 class MovieClipsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
     _TEST = {
-        'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597?autoPlay=true&playlistId=5',
+        'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
+        'md5': '42b5a0352d4933a7bd54f2104f481244',
         'info_dict': {
             'id': 'pKIGmG83AqD9',
-            'display_id': 'warcraft-trailer-1-561180739597',
             'ext': 'mp4',
             'title': 'Warcraft Trailer 1',
             'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
             'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1446843055,
+            'upload_date': '20151106',
+            'uploader': 'Movieclips',
         },
         'add_ie': ['ThePlatform'],
     }
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        req = sanitized_Request(url)
-        # it doesn't work if it thinks the browser it's too old
-        req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
-        webpage = self._download_webpage(req, display_id)
-        theplatform_link = self._html_search_regex(r'src="(http://player.theplatform.com/p/.*?)"', webpage, 'theplatform link')
-        title = self._html_search_regex(r'<title[^>]*>([^>]+)-\s*\d+\s*|\s*Movieclips.com</title>', webpage, 'title')
-        description = self._html_search_meta('description', webpage)
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        video = next(v for v in self._parse_json(self._search_regex(
+            r'var\s+__REACT_ENGINE__\s*=\s*({.+});',
+            webpage, 'react engine'), video_id)['playlist']['videos'] if v['id'] == video_id)
 
         return {
             '_type': 'url_transparent',
-            'url': theplatform_link,
-            'title': title,
-            'display_id': display_id,
-            'description': description,
+            'ie_key': 'ThePlatform',
+            'url': smuggle_url(update_url_query(
+                video['contentUrl'], {'mbr': 'true'}), {'force_smil_url': True}),
+            'title': self._og_search_title(webpage),
+            'description': self._html_search_meta('description', webpage),
+            'duration': float_or_none(video.get('duration')),
+            'timestamp': parse_iso8601(video.get('dateCreated')),
+            'thumbnail': video.get('defaultImage'),
+            'uploader': video.get('provider'),
         }
index ed068365d3d4936d35e2e62146f1dcbec750728b..dd06395891ac02ce81b40efd471776cb080d328d 100644 (file)
@@ -4,8 +4,9 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_str,
+    compat_xpath,
 )
 from ..utils import (
     ExtractorError,
@@ -17,6 +18,7 @@ from ..utils import (
     unescapeHTML,
     url_basename,
     RegexNotFoundError,
+    xpath_text,
 )
 
 
@@ -83,9 +85,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
                 rtmp_video_url = rendition.find('./src').text
                 if rtmp_video_url.endswith('siteunavail.png'):
                     continue
+                new_url = self._transform_rtmp_url(rtmp_video_url)
                 formats.append({
-                    'ext': ext,
-                    'url': self._transform_rtmp_url(rtmp_video_url),
+                    'ext': 'flv' if new_url.startswith('rtmp') else ext,
+                    'url': new_url,
                     'format_id': rendition.get('bitrate'),
                     'width': int(rendition.get('width')),
                     'height': int(rendition.get('height')),
@@ -130,11 +133,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
             message += item.text
             raise ExtractorError(message, expected=True)
 
-        description_node = itemdoc.find('description')
-        if description_node is not None:
-            description = description_node.text.strip()
-        else:
-            description = None
+        description = xpath_text(itemdoc, 'description')
 
         title_el = None
         if title_el is None:
@@ -142,9 +141,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
                 itemdoc, './/{http://search.yahoo.com/mrss/}category',
                 'scheme', 'urn:mtvn:video_title')
         if title_el is None:
-            title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
+            title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title'))
         if title_el is None:
-            title_el = itemdoc.find('.//title') or itemdoc.find('./title')
+            title_el = itemdoc.find(compat_xpath('.//title'))
             if title_el.text is None:
                 title_el = None
 
@@ -174,7 +173,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         data = {'uri': uri}
         if self._LANG:
             data['lang'] = self._LANG
-        return compat_urllib_parse.urlencode(data)
+        return compat_urllib_parse_urlencode(data)
 
     def _get_videos_info(self, uri):
         video_id = self._id_from_uri(uri)
index 50d92b50ae5ec2fa49e45cc64aea7f08cc21ccea..2174e5665778b590055c06255a91c030cb579d29 100644 (file)
@@ -1,17 +1,21 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..compat import compat_urlparse
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    mimetype2ext,
+)
 
 
 class MusicPlayOnIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=\d+&play)=(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://en.musicplayon.com/play?v=433377',
+        'md5': '00cdcdea1726abdf500d1e7fd6dd59bb',
         'info_dict': {
             'id': '433377',
             'ext': 'mp4',
@@ -20,15 +24,16 @@ class MusicPlayOnIE(InfoExtractor):
             'duration': 342,
             'uploader': 'ultrafish',
         },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }
+    }, {
+        'url': 'http://en.musicplayon.com/play?pl=102&play=442629',
+        'only_matching': True,
+    }]
+
+    _URL_TEMPLATE = 'http://en.musicplayon.com/play?v=%s'
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
+        url = self._URL_TEMPLATE % video_id
 
         page = self._download_webpage(url, video_id)
 
@@ -40,28 +45,14 @@ class MusicPlayOnIE(InfoExtractor):
         uploader = self._html_search_regex(
             r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
 
-        formats = [
-            {
-                'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id,
-                'ext': 'mp4',
-            }
-        ]
-
-        manifest = self._download_webpage(
-            'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
-
-        for entry in manifest.split('#')[1:]:
-            if entry.startswith('EXT-X-STREAM-INF:'):
-                meta, url, _ = entry.split('\n')
-                params = dict(param.split('=') for param in meta.split(',')[1:])
-                formats.append({
-                    'url': url,
-                    'ext': 'mp4',
-                    'tbr': int(params['BANDWIDTH']),
-                    'width': int(params['RESOLUTION'].split('x')[1]),
-                    'height': int(params['RESOLUTION'].split('x')[-1]),
-                    'format_note': params['NAME'].replace('"', '').strip(),
-                })
+        sources = self._parse_json(
+            self._search_regex(r'setup\[\'_sources\'\]\s*=\s*([^;]+);', page, 'video sources'),
+            video_id, transform_source=js_to_json)
+        formats = [{
+            'url': compat_urlparse.urljoin(url, source['src']),
+            'ext': mimetype2ext(source.get('type')),
+            'format_note': source.get('data-res'),
+        } for source in sources]
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py
deleted file mode 100644 (file)
index 1e9cf8d..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-)
-
-
-class MuzuTVIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
-    IE_NAME = 'muzu.tv'
-
-    _TEST = {
-        'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
-        'md5': '98f8b2c7bc50578d6a0364fff2bfb000',
-        'info_dict': {
-            'id': '1981454',
-            'ext': 'mp4',
-            'title': 'Cat Walk (Original Mix)',
-            'description': 'md5:90e868994de201b2570e4e5854e19420',
-            'uploader': 'MarcAshken featuring SOS',
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        info_data = compat_urllib_parse.urlencode({
-            'format': 'json',
-            'url': url,
-        })
-        info = self._download_json(
-            'http://www.muzu.tv/api/oembed/?%s' % info_data,
-            video_id, 'Downloading video info')
-
-        player_info = self._download_json(
-            'http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
-            video_id, 'Downloading player info')
-        video_info = player_info['videos'][0]
-        for quality in ['1080', '720', '480', '360']:
-            if video_info.get('v%s' % quality):
-                break
-
-        data = compat_urllib_parse.urlencode({
-            'ai': video_id,
-            # Even if each time you watch a video the hash changes,
-            # it seems to work for different videos, and it will work
-            # even if you use any non empty string as a hash
-            'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
-            'device': 'web',
-            'qv': quality,
-        })
-        video_url_info = self._download_json(
-            'http://player.muzu.tv/player/requestVideo?%s' % data,
-            video_id, 'Downloading video url')
-        video_url = video_url_info['url']
-
-        return {
-            'id': video_id,
-            'title': info['title'],
-            'url': video_url,
-            'thumbnail': info['thumbnail_url'],
-            'description': info['description'],
-            'uploader': info['author_name'],
-        }
index 66b5231979ce8399c38074a9a14a2b4ea2114ee8..a103e0323a6c62e4b0d283afdb6d4f5662bb1869 100644 (file)
@@ -10,9 +10,10 @@ from ..utils import (
 
 class MwaveIE(InfoExtractor):
     _VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
+    _URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s'
     _TEST = {
         'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
-        'md5': 'c930e27b7720aaa3c9d0018dfc8ff6cc',
+        # md5 is unstable
         'info_dict': {
             'id': '168859',
             'ext': 'flv',
@@ -56,3 +57,28 @@ class MwaveIE(InfoExtractor):
             'view_count': int_or_none(vod_info.get('hit')),
             'formats': formats,
         }
+
+
+class MwaveMeetGreetIE(InfoExtractor):
+    _VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://mwave.interest.me/meetgreet/view/256',
+        'info_dict': {
+            'id': '173294',
+            'ext': 'flv',
+            'title': '[MEET&GREET] Park BoRam',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'Mwave',
+            'duration': 3634,
+            'view_count': int,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        clip_id = self._html_search_regex(
+            r'<iframe[^>]+src="/mnettv/ifr_clip\.m\?searchVideoDetailVO\.clip_id=(\d+)',
+            webpage, 'clip ID')
+        clip_url = MwaveIE._URL_TEMPLATE % clip_id
+        return self.url_result(clip_url, 'Mwave', clip_id)
index 83414a2325586d7319c06247fa037c42bb2b199a..0d5238d777ad00ab13e84a69474d42b360cdecc1 100644 (file)
@@ -2,13 +2,13 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    parse_iso8601,
 )
-from ..utils import ExtractorError
 
 
 class MySpaceIE(InfoExtractor):
@@ -24,6 +24,8 @@ class MySpaceIE(InfoExtractor):
                 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
                 'uploader': 'Five Minutes to the Stage',
                 'uploader_id': 'fiveminutestothestage',
+                'timestamp': 1414108751,
+                'upload_date': '20141023',
             },
             'params': {
                 # rtmp download
@@ -64,7 +66,7 @@ class MySpaceIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Starset - First Light',
                 'description': 'md5:2d5db6c9d11d527683bcda818d332414',
-                'uploader': 'Jacob Soren',
+                'uploader': 'Yumi K',
                 'uploader_id': 'SorenPromotions',
                 'upload_date': '20140725',
             }
@@ -78,6 +80,19 @@ class MySpaceIE(InfoExtractor):
         player_url = self._search_regex(
             r'playerSwf":"([^"?]*)', webpage, 'player URL')
 
+        def rtmp_format_from_stream_url(stream_url, width=None, height=None):
+            rtmp_url, play_path = stream_url.split(';', 1)
+            return {
+                'format_id': 'rtmp',
+                'url': rtmp_url,
+                'play_path': play_path,
+                'player_url': player_url,
+                'protocol': 'rtmp',
+                'ext': 'flv',
+                'width': width,
+                'height': height,
+            }
+
         if mobj.group('mediatype').startswith('music/song'):
             # songs don't store any useful info in the 'context' variable
             song_data = self._search_regex(
@@ -93,8 +108,8 @@ class MySpaceIE(InfoExtractor):
                 return self._search_regex(
                     r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
                     song_data, name, default='', group='data')
-            streamUrl = search_data('stream-url')
-            if not streamUrl:
+            stream_url = search_data('stream-url')
+            if not stream_url:
                 vevo_id = search_data('vevo-id')
                 youtube_id = search_data('youtube-id')
                 if vevo_id:
@@ -106,36 +121,47 @@ class MySpaceIE(InfoExtractor):
                 else:
                     raise ExtractorError(
                         'Found song but don\'t know how to download it')
-            info = {
+            return {
                 'id': video_id,
                 'title': self._og_search_title(webpage),
                 'uploader': search_data('artist-name'),
                 'uploader_id': search_data('artist-username'),
                 'thumbnail': self._og_search_thumbnail(webpage),
+                'duration': int_or_none(search_data('duration')),
+                'formats': [rtmp_format_from_stream_url(stream_url)]
             }
         else:
-            context = json.loads(self._search_regex(
-                r'context = ({.*?});', webpage, 'context'))
-            video = context['video']
-            streamUrl = video['streamUrl']
-            info = {
-                'id': compat_str(video['mediaId']),
+            video = self._parse_json(self._search_regex(
+                r'context = ({.*?});', webpage, 'context'),
+                video_id)['video']
+            formats = []
+            hls_stream_url = video.get('hlsStreamUrl')
+            if hls_stream_url:
+                formats.append({
+                    'format_id': 'hls',
+                    'url': hls_stream_url,
+                    'protocol': 'm3u8_native',
+                    'ext': 'mp4',
+                })
+            stream_url = video.get('streamUrl')
+            if stream_url:
+                formats.append(rtmp_format_from_stream_url(
+                    stream_url,
+                    int_or_none(video.get('width')),
+                    int_or_none(video.get('height'))))
+            self._sort_formats(formats)
+            return {
+                'id': video_id,
                 'title': video['title'],
-                'description': video['description'],
-                'thumbnail': video['imageUrl'],
-                'uploader': video['artistName'],
-                'uploader_id': video['artistUsername'],
+                'description': video.get('description'),
+                'thumbnail': video.get('imageUrl'),
+                'uploader': video.get('artistName'),
+                'uploader_id': video.get('artistUsername'),
+                'duration': int_or_none(video.get('duration')),
+                'timestamp': parse_iso8601(video.get('dateAdded')),
+                'formats': formats,
             }
 
-        rtmp_url, play_path = streamUrl.split(';', 1)
-        info.update({
-            'url': rtmp_url,
-            'play_path': play_path,
-            'player_url': player_url,
-            'ext': 'flv',
-        })
-        return info
-
 
 class MySpaceAlbumIE(InfoExtractor):
     IE_NAME = 'MySpace:album'
index f936b92bbdbb031485fd66fb8dc802b3ba9ff9b3..1ca7b1a9e958c221f44c48bced04c314c0957f8c 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class MySpassIE(InfoExtractor):
-    _VALID_URL = r'http://www\.myspass\.de/.*'
+    _VALID_URL = r'https?://www\.myspass\.de/.*'
     _TEST = {
         'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
         'md5': '0b49f4844a068f8b33f4b7c88405862b',
index 1e21cf98a9a415a33a4bb563dd296928c27426b6..6d447a4935e49cd3c4f7525fff6ffe5e9883656e 100644 (file)
@@ -9,8 +9,8 @@ import json
 from .common import InfoExtractor
 from ..compat import (
     compat_ord,
-    compat_urllib_parse,
     compat_urllib_parse_unquote,
+    compat_urllib_parse_urlencode,
 )
 from ..utils import (
     ExtractorError,
@@ -20,7 +20,7 @@ from ..utils import (
 
 class MyVideoIE(InfoExtractor):
     _WORKING = False
-    _VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
+    _VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
     IE_NAME = 'myvideo'
     _TEST = {
         'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
@@ -112,7 +112,7 @@ class MyVideoIE(InfoExtractor):
                 encxml = compat_urllib_parse_unquote(b)
         if not params.get('domain'):
             params['domain'] = 'www.myvideo.de'
-        xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
+        xmldata_url = '%s?%s' % (encxml, compat_urllib_parse_urlencode(params))
         if 'flash_playertype=MTV' in xmldata_url:
             self._downloader.report_warning('avoiding MTV player')
             xmldata_url = (
index a94ab8358cacc51094ab791ace648ec062eb5f94..731c245428103b3ea96f5c396b063afadac82702 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class MyVidsterIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
+    _VALID_URL = r'https?://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
 
     _TEST = {
         'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making',
index 6fc9e7b050c75ced3262ebad9ba92d74ed6b5d7d..72251866303885f6cd9b040ec9ad3f042d8add6a 100644 (file)
@@ -4,30 +4,40 @@ from .common import InfoExtractor
 from ..utils import (
     smuggle_url,
     url_basename,
+    update_url_query,
 )
 
 
 class NationalGeographicIE(InfoExtractor):
-    _VALID_URL = r'http://video\.nationalgeographic\.com/.*?'
+    IE_NAME = 'natgeo'
+    _VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
 
     _TESTS = [
         {
             'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
+            'md5': '730855d559abbad6b42c2be1fa584917',
             'info_dict': {
-                'id': '4DmDACA6Qtk_',
-                'ext': 'flv',
+                'id': '0000014b-70a1-dd8c-af7f-f7b559330001',
+                'ext': 'mp4',
                 'title': 'Mating Crabs Busted by Sharks',
                 'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
+                'timestamp': 1423523799,
+                'upload_date': '20150209',
+                'uploader': 'NAGS',
             },
             'add_ie': ['ThePlatform'],
         },
         {
             'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
+            'md5': '6a3105eb448c070503b3105fb9b320b5',
             'info_dict': {
-                'id': '_JeBD_D7PlS5',
-                'ext': 'flv',
+                'id': 'ngc-I0IauNSWznb_UV008GxSbwY35BZvgi2e',
+                'ext': 'mp4',
                 'title': 'The Real Jaws',
                 'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
+                'timestamp': 1433772632,
+                'upload_date': '20150608',
+                'uploader': 'NAGS',
             },
             'add_ie': ['ThePlatform'],
         },
@@ -37,18 +47,67 @@ class NationalGeographicIE(InfoExtractor):
         name = url_basename(url)
 
         webpage = self._download_webpage(url, name)
-        feed_url = self._search_regex(
-            r'data-feed-url="([^"]+)"', webpage, 'feed url')
         guid = self._search_regex(
             r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
             webpage, 'guid')
 
-        feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
-        content = feed.find('.//{http://search.yahoo.com/mrss/}content')
-        theplatform_id = url_basename(content.attrib.get('url'))
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'ThePlatform',
+            'url': smuggle_url(
+                'http://link.theplatform.com/s/ngs/media/guid/2423130747/%s?mbr=true' % guid,
+                {'force_smil_url': True}),
+            'id': guid,
+        }
 
-        return self.url_result(smuggle_url(
-            'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
-            # For some reason, the normal links don't work and we must force
-            # the use of f4m
-            {'force_smil_url': True}))
+
+class NationalGeographicChannelIE(InfoExtractor):
+    IE_NAME = 'natgeo:channel'
+    _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)'
+
+    _TESTS = [
+        {
+            'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
+            'md5': '518c9aa655686cf81493af5cc21e2a04',
+            'info_dict': {
+                'id': 'nB5vIAfmyllm',
+                'ext': 'mp4',
+                'title': 'Uncovering a Universal Knowledge',
+                'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
+                'timestamp': 1458680907,
+                'upload_date': '20160322',
+                'uploader': 'NEWA-FNG-NGTV',
+            },
+            'add_ie': ['ThePlatform'],
+        },
+        {
+            'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
+            'md5': 'c4912f656b4cbe58f3e000c489360989',
+            'info_dict': {
+                'id': '3TmMv9OvGwIR',
+                'ext': 'mp4',
+                'title': 'The Stunning Red Bird of Paradise',
+                'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
+                'timestamp': 1459362152,
+                'upload_date': '20160330',
+                'uploader': 'NEWA-FNG-NGTV',
+            },
+            'add_ie': ['ThePlatform'],
+        },
+    ]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        release_url = self._search_regex(
+            r'video_auth_playlist_url\s*=\s*"([^"]+)"',
+            webpage, 'release url')
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'ThePlatform',
+            'url': smuggle_url(
+                update_url_query(release_url, {'mbr': 'true', 'switch': 'http'}),
+                {'force_smil_url': True}),
+            'display_id': display_id,
+        }
index 1f5fc21452322792d0a94bc75feec84d6c826858..6d6f69b440a4b91d95c42210b2e597aca99144f6 100644 (file)
@@ -5,7 +5,7 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
@@ -53,8 +53,8 @@ class NaverIE(InfoExtractor):
             raise ExtractorError('couldn\'t extract vid and key')
         vid = m_id.group(1)
         key = m_id.group(2)
-        query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key, })
-        query_urls = compat_urllib_parse.urlencode({
+        query = compat_urllib_parse_urlencode({'vid': vid, 'inKey': key, })
+        query_urls = compat_urllib_parse_urlencode({
             'masterVid': vid,
             'protocol': 'p2p',
             'inKey': key,
index a071378b6d1dc18cefe3d76f98c3b30d0fe8a880..d896b0d04810655c1d7c993819b88e7b32029832 100644 (file)
@@ -1,18 +1,26 @@
 from __future__ import unicode_literals
 
+import functools
+import os.path
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_urlencode,
+    compat_urlparse,
+)
 from ..utils import (
-    parse_duration,
     int_or_none,
+    OnDemandPagedList,
+    parse_duration,
+    remove_start,
     xpath_text,
     xpath_attr,
 )
 
 
 class NBAIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)?video/(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
+    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
     _TESTS = [{
         'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
         'md5': '9e7729d3010a9c71506fd1248f74e4f4',
@@ -44,14 +52,101 @@ class NBAIE(InfoExtractor):
             'timestamp': 1432134543,
             'upload_date': '20150520',
         }
+    }, {
+        'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
+        'info_dict': {
+            'id': '1455672027478-Doc_Feb16_720',
+            'ext': 'mp4',
+            'title': 'Practice: Doc Rivers - 2/16/16',
+            'description': 'Head Coach Doc Rivers addresses the media following practice.',
+            'upload_date': '20160217',
+            'timestamp': 1455672000,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
+        'info_dict': {
+            'id': 'timberwolves',
+            'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
+        },
+        'playlist_count': 30,
+        'params': {
+            # Download the whole playlist takes too long time
+            'playlist_items': '1-30',
+        },
+    }, {
+        'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
+        'info_dict': {
+            'id': 'Wigginsmp4',
+            'ext': 'mp4',
+            'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
+            'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
+            'upload_date': '20141212',
+            'timestamp': 1418418600,
+        },
+        'params': {
+            'noplaylist': True,
+            # m3u8 download
+            'skip_download': True,
+        },
     }]
 
+    _PAGE_SIZE = 30
+
+    def _fetch_page(self, team, video_id, page):
+        search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse_urlencode({
+            'type': 'teamvideo',
+            'start': page * self._PAGE_SIZE + 1,
+            'npp': (page + 1) * self._PAGE_SIZE + 1,
+            'sort': 'recent',
+            'output': 'json',
+            'site': team,
+        })
+        results = self._download_json(
+            search_url, video_id, note='Download page %d of playlist data' % page)['results'][0]
+        for item in results:
+            yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url']))
+
+    def _extract_playlist(self, orig_path, video_id, webpage):
+        team = orig_path.split('/')[0]
+
+        if self._downloader.params.get('noplaylist'):
+            self.to_screen('Downloading just video because of --no-playlist')
+            video_path = self._search_regex(
+                r'nbaVideoCore\.firstVideo\s*=\s*\'([^\']+)\';', webpage, 'video path')
+            video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path)
+            return self.url_result(video_url)
+
+        self.to_screen('Downloading playlist - add --no-playlist to just download video')
+        playlist_title = self._og_search_title(webpage, fatal=False)
+        entries = OnDemandPagedList(
+            functools.partial(self._fetch_page, team, video_id),
+            self._PAGE_SIZE, use_cache=True)
+
+        return self.playlist_result(entries, team, playlist_title)
+
     def _real_extract(self, url):
         path, video_id = re.match(self._VALID_URL, url).groups()
+        orig_path = path
         if path.startswith('nba/'):
             path = path[3:]
+
+        if 'video/' not in path:
+            webpage = self._download_webpage(url, video_id)
+            path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/')
+
+            if path == '{{id}}':
+                return self._extract_playlist(orig_path, video_id, webpage)
+
+            # See prepareContentId() of pkgCvp.js
+            if path.startswith('video/teams'):
+                path = 'video/channels/proxy/' + path[6:]
+
         video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
-        video_id = xpath_text(video_info, 'slug')
+        video_id = os.path.splitext(xpath_text(video_info, 'slug'))[0]
         title = xpath_text(video_info, 'headline')
         description = xpath_text(video_info, 'description')
         duration = parse_duration(xpath_text(video_info, 'length'))
index 2202cfa334765653b44e180306d8e40e0be2049d..f694e210b1dadceb030cb24f6498abe30de5b976 100644 (file)
@@ -3,9 +3,8 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from .theplatform import ThePlatformIE
 from ..utils import (
-    ExtractorError,
     find_xpath_attr,
     lowercase_escape,
     smuggle_url,
@@ -24,6 +23,9 @@ class NBCIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
                 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
+                'timestamp': 1424246400,
+                'upload_date': '20150218',
+                'uploader': 'NBCU-COM',
             },
             'params': {
                 # m3u8 download
@@ -47,6 +49,9 @@ class NBCIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Star Wars Teaser',
                 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
+                'timestamp': 1417852800,
+                'upload_date': '20141206',
+                'uploader': 'NBCU-COM',
             },
             'params': {
                 # m3u8 download
@@ -58,6 +63,23 @@ class NBCIE(InfoExtractor):
             # This video has expired but with an escaped embedURL
             'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
             'only_matching': True,
+        },
+        {
+            # HLS streams requires the 'hdnea3' cookie
+            'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
+            'info_dict': {
+                'id': 'n1806',
+                'ext': 'mp4',
+                'title': 'Goliath',
+                'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
+                'timestamp': 1237100400,
+                'upload_date': '20090315',
+                'uploader': 'NBCU-COM',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'skip': 'Only works from US',
         }
     ]
 
@@ -75,6 +97,7 @@ class NBCIE(InfoExtractor):
             theplatform_url = 'http:' + theplatform_url
         return {
             '_type': 'url_transparent',
+            'ie_key': 'ThePlatform',
             'url': smuggle_url(theplatform_url, {'source_url': url}),
             'id': video_id,
         }
@@ -90,6 +113,9 @@ class NBCSportsVPlayerIE(InfoExtractor):
             'ext': 'flv',
             'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
             'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+            'timestamp': 1426270238,
+            'upload_date': '20150313',
+            'uploader': 'NBCU-SPORTS',
         }
     }, {
         'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
@@ -112,7 +138,7 @@ class NBCSportsVPlayerIE(InfoExtractor):
 
 class NBCSportsIE(InfoExtractor):
     # Does not include https because its certificate is invalid
-    _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+    _VALID_URL = r'https?://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
 
     _TEST = {
         'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
@@ -121,6 +147,9 @@ class NBCSportsIE(InfoExtractor):
             'ext': 'flv',
             'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
             'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+            'uploader': 'NBCU-SPORTS',
+            'upload_date': '20150330',
+            'timestamp': 1427726529,
         }
     }
 
@@ -131,10 +160,37 @@ class NBCSportsIE(InfoExtractor):
             NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
 
 
-class NBCNewsIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
+class CSNNEIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.csnne\.com/video/(?P<id>[0-9a-z-]+)'
+
+    _TEST = {
+        'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter',
+        'info_dict': {
+            'id': 'yvBLLUgQ8WU0',
+            'ext': 'mp4',
+            'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.',
+            'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3',
+            'timestamp': 1459369979,
+            'upload_date': '20160330',
+            'uploader': 'NBCU-SPORTS',
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'ThePlatform',
+            'url': self._html_search_meta('twitter:player:stream', webpage),
+            'display_id': display_id,
+        }
+
+
+class NBCNewsIE(ThePlatformIE):
+    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/
         (?:video/.+?/(?P<id>\d+)|
-        (?:watch|feature|nightly-news)/[^/]+/(?P<title>.+))
+        ([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+))
         '''
 
     _TESTS = [
@@ -149,40 +205,92 @@ class NBCNewsIE(InfoExtractor):
             },
         },
         {
-            'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236',
-            'md5': 'b2421750c9f260783721d898f4c42063',
+            'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
+            'md5': 'af1adfa51312291a017720403826bb64',
             'info_dict': {
-                'id': 'I1wpAI_zmhsQ',
+                'id': '269389891880',
                 'ext': 'mp4',
                 'title': 'How Twitter Reacted To The Snowden Interview',
                 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
+                'uploader': 'NBCU-NEWS',
+                'timestamp': 1401363060,
+                'upload_date': '20140529',
             },
-            'add_ie': ['ThePlatform'],
         },
         {
             'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
             'md5': 'fdbf39ab73a72df5896b6234ff98518a',
             'info_dict': {
-                'id': 'Wjf9EDR3A_60',
+                'id': '529953347624',
                 'ext': 'mp4',
                 'title': 'FULL EPISODE: Family Business',
                 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
             },
+            'skip': 'This page is unavailable.',
         },
         {
             'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
-            'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
+            'md5': '73135a2e0ef819107bbb55a5a9b2a802',
             'info_dict': {
-                'id': 'sekXqyTVnmN3',
+                'id': '394064451844',
                 'ext': 'mp4',
                 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
                 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
+                'timestamp': 1423104900,
+                'uploader': 'NBCU-NEWS',
+                'upload_date': '20150205',
+            },
+        },
+        {
+            'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
+            'md5': 'a49e173825e5fcd15c13fc297fced39d',
+            'info_dict': {
+                'id': '529953347624',
+                'ext': 'mp4',
+                'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
+                'description': 'md5:c8be487b2d80ff0594c005add88d8351',
+                'upload_date': '20150922',
+                'timestamp': 1442917800,
+                'uploader': 'NBCU-NEWS',
+            },
+        },
+        {
+            'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
+            'md5': '118d7ca3f0bea6534f119c68ef539f71',
+            'info_dict': {
+                'id': '669831235788',
+                'ext': 'mp4',
+                'title': 'See the aurora borealis from space in stunning new NASA video',
+                'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
+                'upload_date': '20160420',
+                'timestamp': 1461152093,
+                'uploader': 'NBCU-NEWS',
+            },
+        },
+        {
+            'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
+            'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
+            'info_dict': {
+                'id': '314487875924',
+                'ext': 'mp4',
+                'title': 'The chaotic GOP immigration vote',
+                'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'timestamp': 1406937606,
+                'upload_date': '20140802',
+                'uploader': 'NBCU-NEWS',
+                'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
             },
         },
         {
             'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
             'only_matching': True,
         },
+        {
+            # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
+            'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
@@ -202,72 +310,28 @@ class NBCNewsIE(InfoExtractor):
             }
         else:
             # "feature" and "nightly-news" pages use theplatform.com
-            title = mobj.group('title')
-            webpage = self._download_webpage(url, title)
-            bootstrap_json = self._search_regex(
-                r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
-                webpage, 'bootstrap json', flags=re.MULTILINE)
-            bootstrap = self._parse_json(bootstrap_json, video_id)
-            info = bootstrap['results'][0]['video']
-            mpxid = info['mpxId']
-
-            base_urls = [
-                info['fallbackPlaylistUrl'],
-                info['associatedPlaylistUrl'],
-            ]
-
-            for base_url in base_urls:
-                if not base_url:
-                    continue
-                playlist_url = base_url + '?form=MPXNBCNewsAPI'
-
-                try:
-                    all_videos = self._download_json(playlist_url, title)
-                except ExtractorError as ee:
-                    if isinstance(ee.cause, compat_HTTPError):
-                        continue
-                    raise
-
-                if not all_videos or 'videos' not in all_videos:
-                    continue
-
-                try:
-                    info = next(v for v in all_videos['videos'] if v['mpxId'] == mpxid)
-                    break
-                except StopIteration:
-                    continue
-
-            if info is None:
-                raise ExtractorError('Could not find video in playlists')
+            video_id = mobj.group('mpx_id')
+            if not video_id.isdigit():
+                webpage = self._download_webpage(url, video_id)
+                info = None
+                bootstrap_json = self._search_regex(
+                    [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
+                     r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
+                    webpage, 'bootstrap json', default=None)
+                bootstrap = self._parse_json(
+                    bootstrap_json, video_id, transform_source=unescapeHTML)
+                if 'results' in bootstrap:
+                    info = bootstrap['results'][0]['video']
+                elif 'video' in bootstrap:
+                    info = bootstrap['video']
+                else:
+                    info = bootstrap
+                video_id = info['mpxId']
 
             return {
-                '_type': 'url',
-                # We get the best quality video
-                'url': info['videoAssets'][-1]['publicUrl'],
-                'ie_key': 'ThePlatform',
+                '_type': 'url_transparent',
+                'id': video_id,
+                # http://feed.theplatform.com/f/2E2eJC/nbcnews also works
+                'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
+                'ie_key': 'ThePlatformFeed',
             }
-
-
-class MSNBCIE(InfoExtractor):
-    # https URLs redirect to corresponding http ones
-    _VALID_URL = r'http://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
-    _TEST = {
-        'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
-        'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
-        'info_dict': {
-            'id': 'n_hayes_Aimm_140801_272214',
-            'ext': 'mp4',
-            'title': 'The chaotic GOP immigration vote',
-            'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'timestamp': 1406937606,
-            'upload_date': '20140802',
-            'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        embed_url = self._html_search_meta('embedURL', webpage)
-        return self.url_result(embed_url)
index 2a1ca80df797f0abe63cc6327c5e283965865f70..96528f6499d1e02c5208e61fe8abd1f606b29392 100644 (file)
@@ -1,19 +1,18 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
-    month_by_name,
     int_or_none,
+    remove_end,
+    unified_strdate,
 )
 
 
 class NDTVIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
 
     _TEST = {
-        'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710',
+        'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
         'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
         'info_dict': {
             'id': '300710',
@@ -22,7 +21,7 @@ class NDTVIE(InfoExtractor):
             'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
             'upload_date': '20131208',
             'duration': 1327,
-            'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg',
+            'thumbnail': 're:https?://.*\.jpg',
         },
     }
 
@@ -30,36 +29,19 @@ class NDTVIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
+        title = remove_end(self._og_search_title(webpage), ' - NDTV')
+
         filename = self._search_regex(
             r"__filename='([^']+)'", webpage, 'video filename')
-        video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
-                     filename)
+        video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
 
         duration = int_or_none(self._search_regex(
             r"__duration='([^']+)'", webpage, 'duration', fatal=False))
 
-        date_m = re.search(r'''(?x)
-            <p\s+class="vod_dateline">\s*
-                Published\s+On:\s*
-                (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
-            ''', webpage)
-        upload_date = None
-
-        if date_m is not None:
-            month = month_by_name(date_m.group('monthname'))
-            if month is not None:
-                upload_date = '%s%02d%02d' % (
-                    date_m.group('year'), month, int(date_m.group('day')))
-
-        description = self._og_search_description(webpage)
-        READ_MORE = ' (Read more)'
-        if description.endswith(READ_MORE):
-            description = description[:-len(READ_MORE)]
+        upload_date = unified_strdate(self._html_search_meta(
+            'publish-date', webpage, 'upload date', fatal=False))
 
-        title = self._og_search_title(webpage)
-        TITLE_SUFFIX = ' - NDTV'
-        if title.endswith(TITLE_SUFFIX):
-            title = title[:-len(TITLE_SUFFIX)]
+        description = remove_end(self._og_search_description(webpage), ' (Read more)')
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/nerdist.py b/youtube_dl/extractor/nerdist.py
deleted file mode 100644 (file)
index c6dc34b..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-from ..utils import (
-    determine_ext,
-    parse_iso8601,
-    xpath_text,
-)
-
-
-class NerdistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nerdist\.com/vepisode/(?P<id>[^/?#]+)'
-    _TEST = {
-        'url': 'http://www.nerdist.com/vepisode/exclusive-which-dc-characters-w',
-        'md5': '3698ed582931b90d9e81e02e26e89f23',
-        'info_dict': {
-            'display_id': 'exclusive-which-dc-characters-w',
-            'id': 'RPHpvJyr',
-            'ext': 'mp4',
-            'title': 'Your TEEN TITANS Revealed! Who\'s on the show?',
-            'thumbnail': 're:^https?://.*/thumbs/.*\.jpg$',
-            'description': 'Exclusive: Find out which DC Comics superheroes will star in TEEN TITANS Live-Action TV Show on Nerdist News with Jessica Chobot!',
-            'uploader': 'Eric Diaz',
-            'upload_date': '20150202',
-            'timestamp': 1422892808,
-        }
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-
-        video_id = self._search_regex(
-            r'''(?x)<script\s+(?:type="text/javascript"\s+)?
-                src="https?://content\.nerdist\.com/players/([a-zA-Z0-9_]+)-''',
-            webpage, 'video ID')
-        timestamp = parse_iso8601(self._html_search_meta(
-            'shareaholic:article_published_time', webpage, 'upload date'))
-        uploader = self._html_search_meta(
-            'shareaholic:article_author_name', webpage, 'article author')
-
-        doc = self._download_xml(
-            'http://content.nerdist.com/jw6/%s.xml' % video_id, video_id)
-        video_info = doc.find('.//item')
-        title = xpath_text(video_info, './title', fatal=True)
-        description = xpath_text(video_info, './description')
-        thumbnail = xpath_text(
-            video_info, './{http://rss.jwpcdn.com/}image', 'thumbnail')
-
-        formats = []
-        for source in video_info.findall('./{http://rss.jwpcdn.com/}source'):
-            vurl = source.attrib['file']
-            ext = determine_ext(vurl)
-            if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    vurl, video_id, entry_protocol='m3u8_native', ext='mp4',
-                    preference=0))
-            elif ext == 'smil':
-                formats.extend(self._extract_smil_formats(
-                    vurl, video_id, fatal=False
-                ))
-            else:
-                formats.append({
-                    'format_id': ext,
-                    'url': vurl,
-                })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'timestamp': timestamp,
-            'formats': formats,
-            'uploader': uploader,
-        }
index 7830616f8fb9498c058504d52c199c6a69a9d98b..978a05841ce68161330f9db24169dd330e51efc1 100644 (file)
@@ -8,7 +8,7 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_str,
     compat_itertools_count,
 )
@@ -89,6 +89,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'timestamp': 1431878400,
             'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
         },
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'No lyrics translation.',
         'url': 'http://music.163.com/#/song?id=29822014',
@@ -101,6 +102,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'timestamp': 1419523200,
             'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
         },
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'No lyrics.',
         'url': 'http://music.163.com/song?id=17241424',
@@ -112,6 +114,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'upload_date': '20080211',
             'timestamp': 1202745600,
         },
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'Has translated name.',
         'url': 'http://music.163.com/#/song?id=22735043',
@@ -124,7 +127,8 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'upload_date': '20100127',
             'timestamp': 1264608000,
             'alt_title': '说出愿望吧(Genie)',
-        }
+        },
+        'skip': 'Blocked outside Mainland China',
     }]
 
     def _process_lyrics(self, lyrics_info):
@@ -153,7 +157,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'ids': '[%s]' % song_id
         }
         info = self.query_api(
-            'song/detail?' + compat_urllib_parse.urlencode(params),
+            'song/detail?' + compat_urllib_parse_urlencode(params),
             song_id, 'Downloading song info')['songs'][0]
 
         formats = self.extract_formats(info)
@@ -192,6 +196,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
             'title': 'B\'day',
         },
         'playlist_count': 23,
+        'skip': 'Blocked outside Mainland China',
     }
 
     def _real_extract(self, url):
@@ -223,6 +228,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
             'title': '张惠妹 - aMEI;阿密特',
         },
         'playlist_count': 50,
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'Singer has translated name.',
         'url': 'http://music.163.com/#/artist?id=124098',
@@ -231,6 +237,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
             'title': '李昇基 - 이승기',
         },
         'playlist_count': 50,
+        'skip': 'Blocked outside Mainland China',
     }]
 
     def _real_extract(self, url):
@@ -266,6 +273,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
             'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
         },
         'playlist_count': 99,
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'Toplist/Charts sample',
         'url': 'http://music.163.com/#/discover/toplist?id=3733003',
@@ -275,6 +283,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
             'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
         },
         'playlist_count': 50,
+        'skip': 'Blocked outside Mainland China',
     }]
 
     def _real_extract(self, url):
@@ -314,6 +323,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
             'creator': '白雅言',
             'upload_date': '20150520',
         },
+        'skip': 'Blocked outside Mainland China',
     }
 
     def _real_extract(self, url):
@@ -357,6 +367,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
             'upload_date': '20150613',
             'duration': 900,
         },
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'This program has accompanying songs.',
         'url': 'http://music.163.com/#/program?id=10141022',
@@ -366,6 +377,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
         },
         'playlist_count': 4,
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'This program has accompanying songs.',
         'url': 'http://music.163.com/#/program?id=10141022',
@@ -379,7 +391,8 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
         },
         'params': {
             'noplaylist': True
-        }
+        },
+        'skip': 'Blocked outside Mainland China',
     }]
 
     def _real_extract(self, url):
@@ -438,6 +451,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
             'description': 'md5:766220985cbd16fdd552f64c578a6b15'
         },
         'playlist_mincount': 40,
+        'skip': 'Blocked outside Mainland China',
     }
     _PAGE_SIZE = 1000
 
index cd117b04edeff88d90842f2ed8e15a8c43bde714..7059403239ce19ac8b2861fa4af0dde93c98467b 100644 (file)
@@ -7,8 +7,8 @@ from .common import InfoExtractor
 
 
 class NewgroundsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/audio/listen/(?P<id>[0-9]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'http://www.newgrounds.com/audio/listen/549479',
         'md5': 'fe6033d297591288fa1c1f780386f07a',
         'info_dict': {
@@ -17,7 +17,16 @@ class NewgroundsIE(InfoExtractor):
             'title': 'B7 - BusMode',
             'uploader': 'Burn7',
         }
-    }
+    }, {
+        'url': 'http://www.newgrounds.com/portal/view/673111',
+        'md5': '3394735822aab2478c31b1004fe5e5bc',
+        'info_dict': {
+            'id': '673111',
+            'ext': 'mp4',
+            'title': 'Dancin',
+            'uploader': 'Squirrelman82',
+        },
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -25,9 +34,11 @@ class NewgroundsIE(InfoExtractor):
         webpage = self._download_webpage(url, music_id)
 
         title = self._html_search_regex(
-            r',"name":"([^"]+)",', webpage, 'music title')
+            r'<title>([^>]+)</title>', webpage, 'title')
+
         uploader = self._html_search_regex(
-            r',"artist":"([^"]+)",', webpage, 'music uploader')
+            [r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'],
+            webpage, 'uploader')
 
         music_url_json_string = self._html_search_regex(
             r'({"url":"[^"]+"),', webpage, 'music url') + '}'
index 5a9e73cd66a1b1224bdec848722f5e9d14f65c38..0092b85ceaa27e9b190a05ea1d4dff299351b6c7 100644 (file)
@@ -4,24 +4,24 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
 
 
 class NewstubeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
     _TEST = {
         'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
+        'md5': '801eef0c2a9f4089fa04e4fe3533abdc',
         'info_dict': {
             'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Телеканал CNN переместил город Славянск в Крым',
             'description': 'md5:419a8c9f03442bc0b0a794d689360335',
             'duration': 31.05,
         },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
     }
 
     def _real_extract(self, url):
@@ -62,7 +62,6 @@ class NewstubeIE(InfoExtractor):
             server = media_location.find(ns('./Server')).text
             app = media_location.find(ns('./App')).text
             media_id = stream_info.find(ns('./Id')).text
-            quality_id = stream_info.find(ns('./QualityId')).text
             name = stream_info.find(ns('./Name')).text
             width = int(stream_info.find(ns('./Width')).text)
             height = int(stream_info.find(ns('./Height')).text)
@@ -74,12 +73,38 @@ class NewstubeIE(InfoExtractor):
                 'rtmp_conn': ['S:%s' % session_id, 'S:%s' % media_id, 'S:n2'],
                 'page_url': url,
                 'ext': 'flv',
-                'format_id': quality_id,
-                'format_note': name,
+                'format_id': 'rtmp' + ('-%s' % name if name else ''),
                 'width': width,
                 'height': height,
             })
 
+        sources_data = self._download_json(
+            'http://www.newstube.ru/player2/getsources?guid=%s' % video_guid,
+            video_guid, fatal=False)
+        if sources_data:
+            for source in sources_data.get('Sources', []):
+                source_url = source.get('Src')
+                if not source_url:
+                    continue
+                height = int_or_none(source.get('Height'))
+                f = {
+                    'format_id': 'http' + ('-%dp' % height if height else ''),
+                    'url': source_url,
+                    'width': int_or_none(source.get('Width')),
+                    'height': height,
+                }
+                source_type = source.get('Type')
+                if source_type:
+                    mobj = re.search(r'codecs="([^,]+),\s*([^"]+)"', source_type)
+                    if mobj:
+                        vcodec, acodec = mobj.groups()
+                        f.update({
+                            'vcodec': vcodec,
+                            'acodec': acodec,
+                        })
+                formats.append(f)
+
+        self._check_formats(formats, video_guid)
         self._sort_formats(formats)
 
         return {
index d1688457f28d298e7de2921dcf00e3fba6c087a2..aae7aeeebb8e2adebd2669bcd899caec3432275d 100644 (file)
@@ -7,7 +7,7 @@ from ..utils import parse_iso8601
 
 class NextMediaIE(InfoExtractor):
     IE_DESC = '蘋果日報'
-    _VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
         'md5': 'dff9fad7009311c421176d1ac90bfe4f',
@@ -68,7 +68,7 @@ class NextMediaIE(InfoExtractor):
 
 class NextMediaActionNewsIE(NextMediaIE):
     IE_DESC = '蘋果日報 - 動新聞'
-    _VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
+    _VALID_URL = r'https?://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
     _TESTS = [{
         'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
         'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
@@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE):
 
 class AppleDailyIE(NextMediaIE):
     IE_DESC = '臺灣蘋果日報'
-    _VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+    _VALID_URL = r'https?://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
     _TESTS = [{
         'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
         'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
index 657ae77a0112328361433a9eb46b7daeb82ed8c6..9ccd7d774f9df3084a0429f2ff27ef2e8c866d6f 100644 (file)
@@ -2,7 +2,7 @@
 from __future__ import unicode_literals
 
 from .mtv import MTVServicesInfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_urlencode
 
 
 class NextMovieIE(MTVServicesInfoExtractor):
@@ -20,7 +20,7 @@ class NextMovieIE(MTVServicesInfoExtractor):
     }]
 
     def _get_feed_query(self, uri):
-        return compat_urllib_parse.urlencode({
+        return compat_urllib_parse_urlencode({
             'feed': '1505',
             'mgid': uri,
         })
index 5bd15f7a72f5aeb49d91391e11ddffaa1a52f44f..adcc636bc32c062fec74074044783e452a3725d9 100644 (file)
@@ -1,8 +1,14 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
-from ..utils import sanitized_Request
+from ..utils import (
+    clean_html,
+    determine_ext,
+    int_or_none,
+    qualities,
+    urlencode_postdata,
+    xpath_text,
+)
 
 
 class NFBIE(InfoExtractor):
@@ -14,12 +20,12 @@ class NFBIE(InfoExtractor):
         'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
         'info_dict': {
             'id': 'qallunaat_why_white_people_are_funny',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Qallunaat! Why White People Are Funny ',
-            'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
+            'description': 'md5:6b8e32dde3abf91e58857b174916620c',
             'duration': 3128,
+            'creator': 'Mark Sandiford',
             'uploader': 'Mark Sandiford',
-            'uploader_id': 'mark-sandiford',
         },
         'params': {
             # rtmp download
@@ -29,65 +35,78 @@ class NFBIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        page = self._download_webpage(
-            'https://www.nfb.ca/film/%s' % video_id, video_id,
-            'Downloading film page')
 
-        uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
-                                              page, 'director id', fatal=False)
-        uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
-                                           page, 'director name', fatal=False)
-
-        request = sanitized_Request(
+        config = self._download_xml(
             'https://www.nfb.ca/film/%s/player_config' % video_id,
-            compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
-        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
-
-        config = self._download_xml(request, video_id, 'Downloading player config XML')
+            video_id, 'Downloading player config XML',
+            data=urlencode_postdata({'getConfig': 'true'}),
+            headers={
+                'Content-Type': 'application/x-www-form-urlencoded',
+                'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf'
+            })
 
-        title = None
-        description = None
-        thumbnail = None
-        duration = None
-        formats = []
-
-        def extract_thumbnail(media):
-            thumbnails = {}
-            for asset in media.findall('assets/asset'):
-                thumbnails[asset.get('quality')] = asset.find('default/url').text
-            if not thumbnails:
-                return None
-            if 'high' in thumbnails:
-                return thumbnails['high']
-            return list(thumbnails.values())[0]
+        title, description, thumbnail, duration, uploader, author = [None] * 6
+        thumbnails, formats = [[]] * 2
+        subtitles = {}
 
         for media in config.findall('./player/stream/media'):
             if media.get('type') == 'posterImage':
-                thumbnail = extract_thumbnail(media)
+                quality_key = qualities(('low', 'high'))
+                thumbnails = []
+                for asset in media.findall('assets/asset'):
+                    asset_url = xpath_text(asset, 'default/url', default=None)
+                    if not asset_url:
+                        continue
+                    quality = asset.get('quality')
+                    thumbnails.append({
+                        'url': asset_url,
+                        'id': quality,
+                        'preference': quality_key(quality),
+                    })
             elif media.get('type') == 'video':
-                duration = int(media.get('duration'))
-                title = media.find('title').text
-                description = media.find('description').text
-                # It seems assets always go from lower to better quality, so no need to sort
+                title = xpath_text(media, 'title', fatal=True)
                 for asset in media.findall('assets/asset'):
-                    for x in asset:
+                    quality = asset.get('quality')
+                    height = int_or_none(self._search_regex(
+                        r'^(\d+)[pP]$', quality or '', 'height', default=None))
+                    for node in asset:
+                        streamer = xpath_text(node, 'streamerURI', default=None)
+                        if not streamer:
+                            continue
+                        play_path = xpath_text(node, 'url', default=None)
+                        if not play_path:
+                            continue
                         formats.append({
-                            'url': x.find('streamerURI').text,
-                            'app': x.find('streamerURI').text.split('/', 3)[3],
-                            'play_path': x.find('url').text,
+                            'url': streamer,
+                            'app': streamer.split('/', 3)[3],
+                            'play_path': play_path,
                             'rtmp_live': False,
-                            'ext': 'mp4',
-                            'format_id': '%s-%s' % (x.tag, asset.get('quality')),
+                            'ext': 'flv',
+                            'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag,
+                            'height': height,
                         })
+                self._sort_formats(formats)
+                description = clean_html(xpath_text(media, 'description'))
+                uploader = xpath_text(media, 'author')
+                duration = int_or_none(media.get('duration'))
+                for subtitle in media.findall('./subtitles/subtitle'):
+                    subtitle_url = xpath_text(subtitle, 'url', default=None)
+                    if not subtitle_url:
+                        continue
+                    lang = xpath_text(subtitle, 'lang', default='en')
+                    subtitles.setdefault(lang, []).append({
+                        'url': subtitle_url,
+                        'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(),
+                    })
 
         return {
             'id': video_id,
             'title': title,
             'description': description,
-            'thumbnail': thumbnail,
+            'thumbnails': thumbnails,
             'duration': duration,
+            'creator': uploader,
             'uploader': uploader,
-            'uploader_id': uploader_id,
             'formats': formats,
+            'subtitles': subtitles,
         }
index 8d5ce46ad6bd2c5bdac2aff9998e8f278fca6376..b04d2111312d5a9e956762b10a30422ae5a8cd64 100644 (file)
@@ -7,11 +7,16 @@ import os
 from .common import InfoExtractor
 from ..compat import (
     compat_urlparse,
-    compat_urllib_parse,
-    compat_urllib_parse_urlparse
+    compat_urllib_parse_urlencode,
+    compat_urllib_parse_urlparse,
+    compat_str,
 )
 from ..utils import (
     unified_strdate,
+    determine_ext,
+    int_or_none,
+    parse_iso8601,
+    parse_duration,
 )
 
 
@@ -38,7 +43,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
             parsed_url = compat_urllib_parse_urlparse(initial_video_url)
             filename, ext = os.path.splitext(parsed_url.path)
             path = '%s_sd%s' % (filename, ext)
-            data = compat_urllib_parse.urlencode({
+            data = compat_urllib_parse_urlencode({
                 'type': 'fvod',
                 'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
             })
@@ -70,8 +75,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
         return ret
 
 
-class NHLIE(NHLBaseInfoExtractor):
-    IE_NAME = 'nhl.com'
+class NHLVideocenterIE(NHLBaseInfoExtractor):
+    IE_NAME = 'nhl.com:videocenter'
     _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
 
     _TESTS = [{
@@ -186,8 +191,8 @@ class NHLNewsIE(NHLBaseInfoExtractor):
         return self._real_extract_video(video_id)
 
 
-class NHLVideocenterIE(NHLBaseInfoExtractor):
-    IE_NAME = 'nhl.com:videocenter'
+class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
+    IE_NAME = 'nhl.com:videocenter:category'
     IE_DESC = 'NHL videocenter category'
     _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
     _TEST = {
@@ -211,7 +216,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
             r'tab0"[^>]*?>(.*?)</td>',
             webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
 
-        data = compat_urllib_parse.urlencode({
+        data = compat_urllib_parse_urlencode({
             'cid': cat_id,
             # This is the default value
             'count': 12,
@@ -236,3 +241,86 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
             'id': cat_id,
             'entries': [self._extract_video(v) for v in videos],
         }
+
+
+class NHLIE(InfoExtractor):
+    IE_NAME = 'nhl.com'
+    _VALID_URL = r'https?://(?:www\.)?nhl\.com/([^/]+/)*c-(?P<id>\d+)'
+    _TESTS = [{
+        # type=video
+        'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
+        'md5': '0f7b9a8f986fb4b4eeeece9a56416eaf',
+        'info_dict': {
+            'id': '43663503',
+            'ext': 'mp4',
+            'title': 'Anisimov cleans up mess',
+            'description': 'md5:a02354acdfe900e940ce40706939ca63',
+            'timestamp': 1461288600,
+            'upload_date': '20160422',
+        },
+    }, {
+        # type=article
+        'url': 'https://www.nhl.com/news/dennis-wideman-suspended/c-278258934',
+        'md5': '1f39f4ea74c1394dea110699a25b366c',
+        'info_dict': {
+            'id': '40784403',
+            'ext': 'mp4',
+            'title': 'Wideman suspended by NHL',
+            'description': 'Flames defenseman Dennis Wideman was banned 20 games for violation of Rule 40 (Physical Abuse of Officials)',
+            'upload_date': '20160204',
+            'timestamp': 1454544904,
+        },
+    }]
+
+    def _real_extract(self, url):
+        tmp_id = self._match_id(url)
+        video_data = self._download_json(
+            'https://nhl.bamcontent.com/nhl/id/v1/%s/details/web-v1.json' % tmp_id,
+            tmp_id)
+        if video_data.get('type') == 'article':
+            video_data = video_data['media']
+
+        video_id = compat_str(video_data['id'])
+        title = video_data['title']
+
+        formats = []
+        for playback in video_data.get('playbacks', []):
+            playback_url = playback.get('url')
+            if not playback_url:
+                continue
+            ext = determine_ext(playback_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    playback_url, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id=playback.get('name', 'hls'), fatal=False))
+            else:
+                height = int_or_none(playback.get('height'))
+                formats.append({
+                    'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
+                    'url': playback_url,
+                    'width': int_or_none(playback.get('width')),
+                    'height': height,
+                })
+        self._sort_formats(formats, ('preference', 'width', 'height', 'tbr', 'format_id'))
+
+        thumbnails = []
+        for thumbnail_id, thumbnail_data in video_data.get('image', {}).get('cuts', {}).items():
+            thumbnail_url = thumbnail_data.get('src')
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'id': thumbnail_id,
+                'url': thumbnail_url,
+                'width': int_or_none(thumbnail_data.get('width')),
+                'height': int_or_none(thumbnail_data.get('height')),
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('description'),
+            'timestamp': parse_iso8601(video_data.get('date')),
+            'duration': parse_duration(video_data.get('duration')),
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
index b62819ae529f2731e29d91d1071077af4fd03815..e960137913ff76f6c63d4d1e650e7d95d3747d3a 100644 (file)
@@ -2,7 +2,8 @@
 from __future__ import unicode_literals
 
 from .mtv import MTVServicesInfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_urlencode
+from ..utils import update_url_query
 
 
 class NickIE(MTVServicesInfoExtractor):
@@ -54,10 +55,33 @@ class NickIE(MTVServicesInfoExtractor):
     }]
 
     def _get_feed_query(self, uri):
-        return compat_urllib_parse.urlencode({
+        return compat_urllib_parse_urlencode({
             'feed': 'nick_arc_player_prime',
             'mgid': uri,
         })
 
     def _extract_mgid(self, webpage):
         return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid')
+
+
+class NickDeIE(MTVServicesInfoExtractor):
+    IE_NAME = 'nick.de'
+    _VALID_URL = r'https?://(?:www\.)?nick\.de/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nick.de/shows/342-icarly',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        mrss_url = update_url_query(self._search_regex(
+            r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
+            {'siteKey': 'nick.de'})
+
+        return self._get_videos_info_from_url(mrss_url, video_id)
index 586e52a4a4f49151c55ed99baf9cadd136863bbd..dd75a48afcc9dfa4a728c600c836741785056770 100644 (file)
@@ -7,11 +7,10 @@ import datetime
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
-    encode_dict,
     ExtractorError,
     int_or_none,
     parse_duration,
@@ -19,6 +18,7 @@ from ..utils import (
     sanitized_Request,
     xpath_text,
     determine_ext,
+    urlencode_postdata,
 )
 
 
@@ -101,7 +101,7 @@ class NiconicoIE(InfoExtractor):
             'mail': username,
             'password': password,
         }
-        login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
+        login_data = urlencode_postdata(login_form_strs)
         request = sanitized_Request(
             'https://secure.nicovideo.jp/secure/login', login_data)
         login_results = self._download_webpage(
@@ -141,7 +141,7 @@ class NiconicoIE(InfoExtractor):
                 r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey')
 
             # Get flv info
-            flv_info_data = compat_urllib_parse.urlencode({
+            flv_info_data = compat_urllib_parse_urlencode({
                 'k': thumb_play_key,
                 'v': video_id
             })
index d440313d545b18723614d8eb9b8dc738a4cf476c..06f2bda07dd5db2c54e1e0492f244dbf0fc5a526 100644 (file)
@@ -8,7 +8,6 @@ import hashlib
 from .common import InfoExtractor
 from ..compat import (
     compat_str,
-    compat_urllib_parse,
     compat_urlparse,
 )
 from ..utils import (
@@ -18,11 +17,12 @@ from ..utils import (
     float_or_none,
     parse_iso8601,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
 class NocoIE(InfoExtractor):
-    _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
     _LOGIN_URL = 'http://noco.tv/do.php'
     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
     _SUB_LANG_TEMPLATE = '&sub_lang=%s'
@@ -75,7 +75,7 @@ class NocoIE(InfoExtractor):
             'username': username,
             'password': password,
         }
-        request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
+        request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
 
         login = self._download_json(request, None, 'Logging in as %s' % username)
index 5952d136f7b3efd3e9f91843ba12de6a13d989ba..af44c3bb5714bc0079e3d1307782a8ff1fe5ba84 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from .screenwavemedia import ScreenwaveMediaIE
 
 from ..utils import (
     unified_strdate,
@@ -9,10 +10,9 @@ from ..utils import (
 
 
 class NormalbootsIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
+    _VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
     _TEST = {
         'url': 'http://normalboots.com/video/home-alone-games-jontron/',
-        'md5': '8bf6de238915dd501105b44ef5f1e0f6',
         'info_dict': {
             'id': 'home-alone-games-jontron',
             'ext': 'mp4',
@@ -22,9 +22,10 @@ class NormalbootsIE(InfoExtractor):
             'upload_date': '20140125',
         },
         'params': {
-            # rtmp download
+            # m3u8 download
             'skip_download': True,
         },
+        'add_ie': ['ScreenwaveMedia'],
     }
 
     def _real_extract(self, url):
@@ -38,16 +39,15 @@ class NormalbootsIE(InfoExtractor):
             r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
             webpage, 'date', fatal=False))
 
-        player_url = self._html_search_regex(
-            r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"',
-            webpage, 'player url')
-        player_page = self._download_webpage(player_url, video_id)
-        video_url = self._html_search_regex(
-            r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
+        screenwavemedia_url = self._html_search_regex(
+            ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL',
+            group='url')
 
         return {
+            '_type': 'url_transparent',
             'id': video_id,
-            'url': video_url,
+            'url': screenwavemedia_url,
+            'ie_key': ScreenwaveMediaIE.ie_key(),
             'title': self._og_search_title(webpage),
             'description': self._og_search_description(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
index 3f9c776ef665ab47624eeab7ba60f5754dbf213e..17671ad398b9e9a8148bceff74db678969d26d3f 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class NovaIE(InfoExtractor):
     IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
-    _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
+    _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
     _TESTS = [{
         'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',
         'info_dict': {
index d68c1ad7923ac56c6bef68343a20df24e4aa5ffb..3bbd4735502e113fcc46a07981ff5863c52fef15 100644 (file)
@@ -7,7 +7,6 @@ from ..compat import compat_urlparse
 from ..utils import (
     ExtractorError,
     NO_DEFAULT,
-    encode_dict,
     sanitized_Request,
     urlencode_postdata,
 )
@@ -17,7 +16,14 @@ class NovaMovIE(InfoExtractor):
     IE_NAME = 'novamov'
     IE_DESC = 'NovaMov'
 
-    _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video|mobile/#/videos)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
+    _VALID_URL_TEMPLATE = r'''(?x)
+                            http://
+                                (?:
+                                    (?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/|
+                                    (?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv=
+                                )
+                                (?P<id>[a-z\d]{13})
+                            '''
     _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}
 
     _HOST = 'www.novamov.com'
@@ -28,17 +34,7 @@ class NovaMovIE(InfoExtractor):
     _DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
     _URL_TEMPLATE = 'http://%s/video/%s'
 
-    _TEST = {
-        'url': 'http://www.novamov.com/video/4rurhn9x446jj',
-        'md5': '7205f346a52bbeba427603ba10d4b935',
-        'info_dict': {
-            'id': '4rurhn9x446jj',
-            'ext': 'flv',
-            'title': 'search engine optimization',
-            'description': 'search engine optimization is used to rank the web page in the google search engine'
-        },
-        'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
-    }
+    _TEST = None
 
     def _check_existence(self, webpage, video_id):
         if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
@@ -73,7 +69,7 @@ class NovaMovIE(InfoExtractor):
             if not post_url.startswith('http'):
                 post_url = compat_urlparse.urljoin(url, post_url)
             request = sanitized_Request(
-                post_url, urlencode_postdata(encode_dict(fields)))
+                post_url, urlencode_postdata(fields))
             request.add_header('Content-Type', 'application/x-www-form-urlencoded')
             request.add_header('Referer', post_url)
             webpage = self._download_webpage(
@@ -82,7 +78,7 @@ class NovaMovIE(InfoExtractor):
 
         filekey = extract_filekey()
 
-        title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title', fatal=False)
+        title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title')
         description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
 
         api_response = self._download_webpage(
@@ -188,3 +184,29 @@ class CloudTimeIE(NovaMovIE):
     _TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>'
 
     _TEST = None
+
+
+class AuroraVidIE(NovaMovIE):
+    IE_NAME = 'auroravid'
+    IE_DESC = 'AuroraVid'
+
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'auroravid\.to'}
+
+    _HOST = 'www.auroravid.to'
+
+    _FILE_DELETED_REGEX = r'This file no longer exists on our servers!<'
+
+    _TESTS = [{
+        'url': 'http://www.auroravid.to/video/4rurhn9x446jj',
+        'md5': '7205f346a52bbeba427603ba10d4b935',
+        'info_dict': {
+            'id': '4rurhn9x446jj',
+            'ext': 'flv',
+            'title': 'search engine optimization',
+            'description': 'search engine optimization is used to rank the web page in the google search engine'
+        },
+        'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
+    }, {
+        'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj',
+        'only_matching': True,
+    }]
index 446f5901c1701166fa1a345827d839a25f6d98af..74860eb2054e4f685b4b52c89149e49563ffe230 100644 (file)
@@ -63,8 +63,11 @@ class NownessIE(NownessBaseIE):
             'title': 'Candor: The Art of Gesticulation',
             'description': 'Candor: The Art of Gesticulation',
             'thumbnail': 're:^https?://.*\.jpg',
-            'uploader': 'Nowness',
+            'timestamp': 1446745676,
+            'upload_date': '20151105',
+            'uploader_id': '2385340575001',
         },
+        'add_ie': ['BrightcoveNew'],
     }, {
         'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr',
         'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
@@ -74,8 +77,11 @@ class NownessIE(NownessBaseIE):
             'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
             'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
             'thumbnail': 're:^https?://.*\.jpg',
-            'uploader': 'Nowness',
+            'timestamp': 1407315371,
+            'upload_date': '20140806',
+            'uploader_id': '2385340575001',
         },
+        'add_ie': ['BrightcoveNew'],
     }, {
         # vimeo
         'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut',
@@ -90,6 +96,7 @@ class NownessIE(NownessBaseIE):
             'uploader': 'Cinema Sem Lei',
             'uploader_id': 'cinemasemlei',
         },
+        'add_ie': ['Vimeo'],
     }]
 
     def _real_extract(self, url):
index 0ffb44b47eb673821f013e6696d495226711c2dd..c47a33d1570537aedc0dc3c2415a5f138fbdc5bb 100644 (file)
@@ -2,10 +2,15 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
+    compat_urllib_parse_unquote,
+    compat_xpath,
+)
 from ..utils import (
     int_or_none,
+    find_xpath_attr,
     xpath_text,
+    update_url_query,
 )
 
 
@@ -45,18 +50,33 @@ class NozIE(InfoExtractor):
         duration = int_or_none(xpath_text(
             doc, './/article/movie/file/duration'))
         formats = []
-        for qnode in doc.findall('.//article/movie/file/qualities/qual'):
-            video_node = qnode.find('./html_urls/video_url[@format="video/mp4"]')
-            if video_node is None:
-                continue  # auto
-            formats.append({
-                'url': video_node.text,
-                'format_name': xpath_text(qnode, './name'),
-                'format_id': xpath_text(qnode, './id'),
-                'height': int_or_none(xpath_text(qnode, './height')),
-                'width': int_or_none(xpath_text(qnode, './width')),
-                'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000),
-            })
+        for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')):
+            http_url_ele = find_xpath_attr(
+                qnode, './html_urls/video_url', 'format', 'video/mp4')
+            http_url = http_url_ele.text if http_url_ele is not None else None
+            if http_url:
+                formats.append({
+                    'url': http_url,
+                    'format_name': xpath_text(qnode, './name'),
+                    'format_id': '%s-%s' % ('http', xpath_text(qnode, './id')),
+                    'height': int_or_none(xpath_text(qnode, './height')),
+                    'width': int_or_none(xpath_text(qnode, './width')),
+                    'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000),
+                })
+            else:
+                f4m_url = xpath_text(qnode, 'url_hd2')
+                if f4m_url:
+                    formats.extend(self._extract_f4m_formats(
+                        update_url_query(f4m_url, {'hdcore': '3.4.0'}),
+                        video_id, f4m_id='hds', fatal=False))
+                m3u8_url_ele = find_xpath_attr(
+                    qnode, './html_urls/video_url',
+                    'format', 'application/vnd.apple.mpegurl')
+                m3u8_url = m3u8_url_ele.text if m3u8_url_ele is not None else None
+                if m3u8_url:
+                    formats.extend(self._extract_m3u8_formats(
+                        m3u8_url, video_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
         self._sort_formats(formats)
 
         return {
index 125c7010b9206bce25688c5c3cbad576753f70f0..1777aa10b537a8359b8acb4a5b19f5d169cb00d6 100644 (file)
@@ -1,7 +1,7 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_urlencode
 from ..utils import (
     int_or_none,
     qualities,
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class NprIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205',
         'info_dict': {
@@ -38,7 +38,7 @@ class NprIE(InfoExtractor):
         playlist_id = self._match_id(url)
 
         config = self._download_json(
-            'http://api.npr.org/query?%s' % compat_urllib_parse.urlencode({
+            'http://api.npr.org/query?%s' % compat_urllib_parse_urlencode({
                 'id': playlist_id,
                 'fields': 'titles,audio,show',
                 'format': 'json',
index a126f5054fbee3c178cf19915caec9fdb03df297..6ded5bd456fa86bf16e1762601889b46f2d68fe9 100644 (file)
@@ -4,90 +4,222 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import compat_urllib_parse_unquote
 from ..utils import (
-    determine_ext,
     ExtractorError,
-    float_or_none,
+    int_or_none,
+    parse_age_limit,
     parse_duration,
-    unified_strdate,
 )
 
 
-class NRKIE(InfoExtractor):
-    _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
-
-    _TESTS = [
-        {
-            'url': 'http://www.nrk.no/video/PS*150533',
-            'md5': 'bccd850baebefe23b56d708a113229c2',
-            'info_dict': {
-                'id': '150533',
-                'ext': 'flv',
-                'title': 'Dompap og andre fugler i Piip-Show',
-                'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
-                'duration': 263,
-            }
-        },
-        {
-            'url': 'http://www.nrk.no/video/PS*154915',
-            'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
-            'info_dict': {
-                'id': '154915',
-                'ext': 'flv',
-                'title': 'Slik høres internett ut når du er blind',
-                'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
-                'duration': 20,
-            }
-        },
-    ]
+class NRKBaseIE(InfoExtractor):
+    def _extract_formats(self, manifest_url, video_id, fatal=True):
+        formats = []
+        formats.extend(self._extract_f4m_formats(
+            manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81',
+            video_id, f4m_id='hds', fatal=fatal))
+        formats.extend(self._extract_m3u8_formats(manifest_url.replace(
+            'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'),
+            video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=fatal))
+        return formats
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         data = self._download_json(
-            'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
-            video_id, 'Downloading media JSON')
+            'http://%s/mediaelement/%s' % (self._API_HOST, video_id),
+            video_id, 'Downloading mediaelement JSON')
+
+        title = data.get('fullTitle') or data.get('mainTitle') or data['title']
+        video_id = data.get('id') or video_id
+
+        entries = []
+
+        media_assets = data.get('mediaAssets')
+        if media_assets and isinstance(media_assets, list):
+            def video_id_and_title(idx):
+                return ((video_id, title) if len(media_assets) == 1
+                        else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
+            for num, asset in enumerate(media_assets, 1):
+                asset_url = asset.get('url')
+                if not asset_url:
+                    continue
+                formats = self._extract_formats(asset_url, video_id, fatal=False)
+                if not formats:
+                    continue
+                self._sort_formats(formats)
+                entry_id, entry_title = video_id_and_title(num)
+                duration = parse_duration(asset.get('duration'))
+                subtitles = {}
+                for subtitle in ('webVtt', 'timedText'):
+                    subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
+                    if subtitle_url:
+                        subtitles.setdefault('no', []).append({
+                            'url': compat_urllib_parse_unquote(subtitle_url)
+                        })
+                entries.append({
+                    'id': asset.get('carrierId') or entry_id,
+                    'title': entry_title,
+                    'duration': duration,
+                    'subtitles': subtitles,
+                    'formats': formats,
+                })
 
-        media_url = data.get('mediaUrl')
+        if not entries:
+            media_url = data.get('mediaUrl')
+            if media_url:
+                formats = self._extract_formats(media_url, video_id)
+                self._sort_formats(formats)
+                duration = parse_duration(data.get('duration'))
+                entries = [{
+                    'id': video_id,
+                    'title': title,
+                    'duration': duration,
+                    'formats': formats,
+                }]
 
-        if not media_url:
-            if data['usageRights']['isGeoBlocked']:
+        if not entries:
+            if data.get('usageRights', {}).get('isGeoBlocked'):
                 raise ExtractorError(
                     'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
                     expected=True)
 
-        if determine_ext(media_url) == 'f4m':
-            formats = self._extract_f4m_formats(
-                media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds')
-        else:
-            formats = [{
-                'url': media_url,
-                'ext': 'flv',
-            }]
-
-        duration = parse_duration(data.get('duration'))
+        conviva = data.get('convivaStatistics') or {}
+        series = conviva.get('seriesName') or data.get('seriesTitle')
+        episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
 
+        thumbnails = None
         images = data.get('images')
-        if images:
-            thumbnails = images['webImages']
-            thumbnails.sort(key=lambda image: image['pixelWidth'])
-            thumbnail = thumbnails[-1]['imageUrl']
-        else:
-            thumbnail = None
-
-        return {
-            'id': video_id,
-            'title': data['title'],
-            'description': data['description'],
-            'duration': duration,
-            'thumbnail': thumbnail,
-            'formats': formats,
+        if images and isinstance(images, dict):
+            web_images = images.get('webImages')
+            if isinstance(web_images, list):
+                thumbnails = [{
+                    'url': image['imageUrl'],
+                    'width': int_or_none(image.get('width')),
+                    'height': int_or_none(image.get('height')),
+                } for image in web_images if image.get('imageUrl')]
+
+        description = data.get('description')
+
+        common_info = {
+            'description': description,
+            'series': series,
+            'episode': episode,
+            'age_limit': parse_age_limit(data.get('legalAge')),
+            'thumbnails': thumbnails,
+        }
+
+        vcodec = 'none' if data.get('mediaType') == 'Audio' else None
+
+        # TODO: extract chapters when https://github.com/rg3/youtube-dl/pull/9409 is merged
+
+        for entry in entries:
+            entry.update(common_info)
+            for f in entry['formats']:
+                f['vcodec'] = vcodec
+
+        return self.playlist_result(entries, video_id, title, description)
+
+
+class NRKIE(NRKBaseIE):
+    _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
+    _API_HOST = 'v8.psapi.nrk.no'
+    _TESTS = [{
+        # video
+        'url': 'http://www.nrk.no/video/PS*150533',
+        'md5': '2f7f6eeb2aacdd99885f355428715cfa',
+        'info_dict': {
+            'id': '150533',
+            'ext': 'mp4',
+            'title': 'Dompap og andre fugler i Piip-Show',
+            'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+            'duration': 263,
         }
+    }, {
+        # audio
+        'url': 'http://www.nrk.no/video/PS*154915',
+        # MD5 is unstable
+        'info_dict': {
+            'id': '154915',
+            'ext': 'flv',
+            'title': 'Slik høres internett ut når du er blind',
+            'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+            'duration': 20,
+        }
+    }]
+
+
+class NRKTVIE(NRKBaseIE):
+    IE_DESC = 'NRK TV and NRK Radio'
+    _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
+    _API_HOST = 'psapi-we.nrk.no'
+
+    _TESTS = [{
+        'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
+        'md5': '4e9ca6629f09e588ed240fb11619922a',
+        'info_dict': {
+            'id': 'MUHH48000314AA',
+            'ext': 'mp4',
+            'title': '20 spørsmål 23.05.2014',
+            'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
+            'duration': 1741,
+        },
+    }, {
+        'url': 'https://tv.nrk.no/program/mdfp15000514',
+        'md5': '43d0be26663d380603a9cf0c24366531',
+        'info_dict': {
+            'id': 'MDFP15000514CA',
+            'ext': 'mp4',
+            'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
+            'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
+            'duration': 4605,
+        },
+    }, {
+        # single playlist video
+        'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
+        'md5': 'adbd1dbd813edaf532b0a253780719c2',
+        'info_dict': {
+            'id': 'MSPO40010515-part2',
+            'ext': 'flv',
+            'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
+            'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+        },
+        'skip': 'Only works from Norway',
+    }, {
+        'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
+        'playlist': [{
+            'md5': '9480285eff92d64f06e02a5367970a7a',
+            'info_dict': {
+                'id': 'MSPO40010515-part1',
+                'ext': 'flv',
+                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
+                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+            },
+        }, {
+            'md5': 'adbd1dbd813edaf532b0a253780719c2',
+            'info_dict': {
+                'id': 'MSPO40010515-part2',
+                'ext': 'flv',
+                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
+                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+            },
+        }],
+        'info_dict': {
+            'id': 'MSPO40010515',
+            'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
+            'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+            'duration': 6947.52,
+        },
+        'skip': 'Only works from Norway',
+    }, {
+        'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
+        'only_matching': True,
+    }]
 
 
 class NRKPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
 
     _TESTS = [{
         'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
@@ -126,177 +258,36 @@ class NRKPlaylistIE(InfoExtractor):
             entries, playlist_id, playlist_title, playlist_description)
 
 
-class NRKTVIE(InfoExtractor):
-    IE_DESC = 'NRK TV and NRK Radio'
-    _VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
+class NRKSkoleIE(InfoExtractor):
+    IE_DESC = 'NRK Skole'
+    _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
 
-    _TESTS = [
-        {
-            'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
-            'info_dict': {
-                'id': 'MUHH48000314',
-                'ext': 'mp4',
-                'title': '20 spørsmål',
-                'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
-                'upload_date': '20140523',
-                'duration': 1741.52,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'https://tv.nrk.no/program/mdfp15000514',
-            'info_dict': {
-                'id': 'mdfp15000514',
-                'ext': 'mp4',
-                'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
-                'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
-                'upload_date': '20140524',
-                'duration': 4605.08,
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
-        {
-            # single playlist video
-            'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
-            'md5': 'adbd1dbd813edaf532b0a253780719c2',
-            'info_dict': {
-                'id': 'MSPO40010515-part2',
-                'ext': 'flv',
-                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
-                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
-                'upload_date': '20150106',
-            },
-            'skip': 'Only works from Norway',
-        },
-        {
-            'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
-            'playlist': [
-                {
-                    'md5': '9480285eff92d64f06e02a5367970a7a',
-                    'info_dict': {
-                        'id': 'MSPO40010515-part1',
-                        'ext': 'flv',
-                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
-                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
-                        'upload_date': '20150106',
-                    },
-                },
-                {
-                    'md5': 'adbd1dbd813edaf532b0a253780719c2',
-                    'info_dict': {
-                        'id': 'MSPO40010515-part2',
-                        'ext': 'flv',
-                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
-                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
-                        'upload_date': '20150106',
-                    },
-                },
-            ],
-            'info_dict': {
-                'id': 'MSPO40010515',
-                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
-                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
-                'upload_date': '20150106',
-                'duration': 6947.5199999999995,
-            },
-            'skip': 'Only works from Norway',
+    _TESTS = [{
+        'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
+        'md5': '6bc936b01f9dd8ed45bc58b252b2d9b6',
+        'info_dict': {
+            'id': '6021',
+            'ext': 'mp4',
+            'title': 'Genetikk og eneggede tvillinger',
+            'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
+            'duration': 399,
         },
-        {
-            'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
-            'only_matching': True,
-        }
-    ]
-
-    def _extract_f4m(self, manifest_url, video_id):
-        return self._extract_f4m_formats(
-            manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds')
+    }, {
+        'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        part_id = mobj.group('part_id')
-        base_url = mobj.group('baseurl')
-
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._html_search_meta(
-            'title', webpage, 'title')
-        description = self._html_search_meta(
-            'description', webpage, 'description')
-
-        thumbnail = self._html_search_regex(
-            r'data-posterimage="([^"]+)"',
-            webpage, 'thumbnail', fatal=False)
-        upload_date = unified_strdate(self._html_search_meta(
-            'rightsfrom', webpage, 'upload date', fatal=False))
-        duration = float_or_none(self._html_search_regex(
-            r'data-duration="([^"]+)"',
-            webpage, 'duration', fatal=False))
-
-        # playlist
-        parts = re.findall(
-            r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage)
-        if parts:
-            entries = []
-            for current_part_id, stream_url, part_title in parts:
-                if part_id and current_part_id != part_id:
-                    continue
-                video_part_id = '%s-part%s' % (video_id, current_part_id)
-                formats = self._extract_f4m(stream_url, video_part_id)
-                entries.append({
-                    'id': video_part_id,
-                    'title': part_title,
-                    'description': description,
-                    'thumbnail': thumbnail,
-                    'upload_date': upload_date,
-                    'formats': formats,
-                })
-            if part_id:
-                if entries:
-                    return entries[0]
-            else:
-                playlist = self.playlist_result(entries, video_id, title, description)
-                playlist.update({
-                    'thumbnail': thumbnail,
-                    'upload_date': upload_date,
-                    'duration': duration,
-                })
-                return playlist
+        video_id = self._match_id(url)
 
-        formats = []
+        webpage = self._download_webpage(
+            'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id,
+            video_id)
 
-        f4m_url = re.search(r'data-media="([^"]+)"', webpage)
-        if f4m_url:
-            formats.extend(self._extract_f4m(f4m_url.group(1), video_id))
-
-        m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
-        if m3u8_url:
-            formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4', m3u8_id='hls'))
-        self._sort_formats(formats)
-
-        subtitles_url = self._html_search_regex(
-            r'data-subtitlesurl\s*=\s*(["\'])(?P<url>.+?)\1',
-            webpage, 'subtitle URL', default=None, group='url')
-        subtitles = {}
-        if subtitles_url:
-            subtitles['no'] = [{
-                'ext': 'ttml',
-                'url': compat_urlparse.urljoin(base_url, subtitles_url),
-            }]
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'upload_date': upload_date,
-            'duration': duration,
-            'formats': formats,
-            'subtitles': subtitles,
-        }
+        nrk_id = self._parse_json(
+            self._search_regex(
+                r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>',
+                webpage, 'application json'),
+            video_id)['activeMedia']['psId']
+
+        return self.url_result('nrk:%s' % nrk_id)
index 2cd924d059dafd9aa3734697c9c4a396b2bb01f6..0895d7ea4cb88f805605a55cb0c1fe56ff1d475d 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 class NTVRuIE(InfoExtractor):
     IE_NAME = 'ntv.ru'
-    _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?P<id>.+)'
 
     _TESTS = [
         {
index 9fa7cefadc79ef1d8bda971dc52483a0b8d998eb..ab6bfcd7f485218d19034930607d35d1665e7406 100644 (file)
@@ -5,8 +5,6 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     parse_duration,
-    sanitized_Request,
-    unified_strdate,
 )
 
 
@@ -20,7 +18,6 @@ class NuvidIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Horny babes show their awesome bodeis and',
             'duration': 129,
-            'upload_date': '20140508',
             'age_limit': 18,
         }
     }
@@ -28,28 +25,31 @@ class NuvidIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        formats = []
+        page_url = 'http://m.nuvid.com/video/%s' % video_id
+        webpage = self._download_webpage(
+            page_url, video_id, 'Downloading video page')
+        # When dwnld_speed exists and has a value larger than the MP4 file's
+        # bitrate, Nuvid returns the MP4 URL
+        # It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm
+        self._set_cookie('nuvid.com', 'dwnld_speed', '10.0')
+        mp4_webpage = self._download_webpage(
+            page_url, video_id, 'Downloading video page for MP4 format')
 
-        for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
-            request = sanitized_Request(
-                'http://m.nuvid.com/play/%s' % video_id)
-            request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
-            webpage = self._download_webpage(
-                request, video_id, 'Downloading %s page' % format_id)
-            video_url = self._html_search_regex(
-                r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
-            if not video_url:
-                continue
+        html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']',
+        video_url = self._html_search_regex(html5_video_re, webpage, video_id)
+        mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id)
+        formats = [{
+            'url': video_url,
+        }]
+        if mp4_video_url != video_url:
             formats.append({
-                'url': video_url,
-                'format_id': format_id,
+                'url': mp4_video_url,
             })
 
-        webpage = self._download_webpage(
-            'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
         title = self._html_search_regex(
             [r'<span title="([^"]+)">',
-             r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip()
+             r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>',
+             r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip()
         thumbnails = [
             {
                 'url': thumb_url,
@@ -57,9 +57,8 @@ class NuvidIE(InfoExtractor):
         ]
         thumbnail = thumbnails[0]['url'] if thumbnails else None
         duration = parse_duration(self._html_search_regex(
-            r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
-        upload_date = unified_strdate(self._html_search_regex(
-            r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))
+            [r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})',
+             r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False))
 
         return {
             'id': video_id,
@@ -67,7 +66,6 @@ class NuvidIE(InfoExtractor):
             'thumbnails': thumbnails,
             'thumbnail': thumbnail,
             'duration': duration,
-            'upload_date': upload_date,
             'age_limit': 18,
             'formats': formats,
         }
index 7f254b867da66f70a79ff7aac5d81eb6f37bd997..681683e86f54e796f1c954de2c0cb374016fe303 100644 (file)
@@ -18,8 +18,9 @@ class NYTimesBaseIE(InfoExtractor):
         description = video_data.get('summary')
         duration = float_or_none(video_data.get('duration'), 1000)
 
-        uploader = video_data['byline']
-        timestamp = parse_iso8601(video_data['publication_date'][:-8])
+        uploader = video_data.get('byline')
+        publication_date = video_data.get('publication_date')
+        timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
 
         def get_file_size(file_size):
             if isinstance(file_size, int):
@@ -37,7 +38,7 @@ class NYTimesBaseIE(InfoExtractor):
                 'width': int_or_none(video.get('width')),
                 'height': int_or_none(video.get('height')),
                 'filesize': get_file_size(video.get('fileSize')),
-            } for video in video_data['renditions']
+            } for video in video_data['renditions'] if video.get('url')
         ]
         self._sort_formats(formats)
 
@@ -46,7 +47,7 @@ class NYTimesBaseIE(InfoExtractor):
                 'url': 'http://www.nytimes.com/%s' % image['url'],
                 'width': int_or_none(image.get('width')),
                 'height': int_or_none(image.get('height')),
-            } for image in video_data['images']
+            } for image in video_data.get('images', []) if image.get('url')
         ]
 
         return {
index f9e064a60e445668200b759ca4e0ad1a6f7c28ab..986708e75e45f7f24f656f319767e6adbd9504ea 100644 (file)
@@ -2,7 +2,11 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_unquote,
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
     ExtractorError,
     unified_strdate,
@@ -32,7 +36,7 @@ class OdnoklassnikiIE(InfoExtractor):
         'skip': 'Video has been blocked',
     }, {
         # metadataUrl
-        'url': 'http://ok.ru/video/63567059965189-0',
+        'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
         'md5': '9676cf86eff5391d35dea675d224e131',
         'info_dict': {
             'id': '63567059965189-0',
@@ -44,6 +48,7 @@ class OdnoklassnikiIE(InfoExtractor):
             'uploader': '☭ Андрей Мещанинов ☭',
             'like_count': int,
             'age_limit': 0,
+            'start_time': 5,
         },
     }, {
         # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
@@ -60,6 +65,22 @@ class OdnoklassnikiIE(InfoExtractor):
             'uploader': 'Алина П',
             'age_limit': 0,
         },
+    }, {
+        # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
+        'url': 'http://ok.ru/video/62036049272859-0',
+        'info_dict': {
+            'id': '62036049272859-0',
+            'ext': 'mp4',
+            'title': 'МУЗЫКА     ДОЖДЯ .',
+            'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
+            'upload_date': '20120106',
+            'uploader_id': '473534735899',
+            'uploader': 'МARINA D',
+            'age_limit': 0,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
         'only_matching': True,
@@ -78,6 +99,9 @@ class OdnoklassnikiIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
+        start_time = int_or_none(compat_parse_qs(
+            compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
+
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(
@@ -106,7 +130,14 @@ class OdnoklassnikiIE(InfoExtractor):
                 video_id, 'Downloading metadata JSON')
 
         movie = metadata['movie']
-        title = movie['title']
+
+        # Some embedded videos may not contain title in movie dict (e.g.
+        # http://ok.ru/video/62036049272859-0) thus we allow missing title
+        # here and it's going to be extracted later by an extractor that
+        # will process the actual embed.
+        provider = metadata.get('provider')
+        title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
+
         thumbnail = movie.get('poster')
         duration = int_or_none(movie.get('duration'))
 
@@ -135,9 +166,10 @@ class OdnoklassnikiIE(InfoExtractor):
             'uploader_id': uploader_id,
             'like_count': like_count,
             'age_limit': age_limit,
+            'start_time': start_time,
         }
 
-        if metadata.get('provider') == 'USER_YOUTUBE':
+        if provider == 'USER_YOUTUBE':
             info.update({
                 '_type': 'url_transparent',
                 'url': movie['contentId'],
diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py
new file mode 100644 (file)
index 0000000..1bf96ea
--- /dev/null
@@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class OnceIE(InfoExtractor):
+    _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
+    ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
+    PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
+
+    def _extract_once_formats(self, url):
+        domain_id, application_id, media_item_id = re.match(
+            OnceIE._VALID_URL, url).groups()
+        formats = self._extract_m3u8_formats(
+            self.ADAPTIVE_URL_TEMPLATE % (
+                domain_id, application_id, media_item_id),
+            media_item_id, 'mp4', m3u8_id='hls', fatal=False)
+        progressive_formats = []
+        for adaptive_format in formats:
+            # Prevent advertisement from embedding into m3u8 playlist (see
+            # https://github.com/rg3/youtube-dl/issues/8893#issuecomment-199912684)
+            adaptive_format['url'] = re.sub(
+                r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url'])
+            rendition_id = self._search_regex(
+                r'/now/media/playlist/[^/]+/[^/]+/([^/]+)',
+                adaptive_format['url'], 'redition id', default=None)
+            if rendition_id:
+                progressive_format = adaptive_format.copy()
+                progressive_format.update({
+                    'url': self.PROGRESSIVE_URL_TEMPLATE % (
+                        domain_id, application_id, rendition_id, media_item_id),
+                    'format_id': adaptive_format['format_id'].replace(
+                        'hls', 'http'),
+                    'protocol': 'http',
+                })
+                progressive_formats.append(progressive_format)
+        self._check_formats(progressive_formats, media_item_id)
+        formats.extend(progressive_formats)
+        return formats
index 0f1f448fe3126670932b371498b73b0bf0a7924e..d7b13a0f1fbc53deba74e47f6d1eefc0bdee8be2 100644 (file)
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import determine_ext
+from ..utils import (
+    determine_ext,
+    int_or_none,
+)
 
 
 class OnionStudiosIE(InfoExtractor):
@@ -17,7 +20,7 @@ class OnionStudiosIE(InfoExtractor):
             'id': '2937',
             'ext': 'mp4',
             'title': 'Hannibal charges forward, stops for a cocktail',
-            'description': 'md5:545299bda6abf87e5ec666548c6a9448',
+            'description': 'md5:e786add7f280b7f0fe237b64cc73df76',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': 'The A.V. Club',
             'uploader_id': 'TheAVClub',
@@ -42,9 +45,19 @@ class OnionStudiosIE(InfoExtractor):
 
         formats = []
         for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage):
-            if determine_ext(src) != 'm3u8':  # m3u8 always results in 403
+            ext = determine_ext(src)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+            else:
+                height = int_or_none(self._search_regex(
+                    r'/(\d+)\.%s' % ext, src, 'height', default=None))
                 formats.append({
+                    'format_id': ext + ('-%sp' % height if height else ''),
                     'url': src,
+                    'height': height,
+                    'ext': ext,
+                    'preference': 1,
                 })
         self._sort_formats(formats)
 
@@ -52,7 +65,7 @@ class OnionStudiosIE(InfoExtractor):
             r'share_title\s*=\s*(["\'])(?P<title>[^\1]+?)\1',
             webpage, 'title', group='title')
         description = self._search_regex(
-            r'share_description\s*=\s*(["\'])(?P<description>[^\1]+?)\1',
+            r'share_description\s*=\s*(["\'])(?P<description>[^\'"]+?)\1',
             webpage, 'description', default=None, group='description')
         thumbnail = self._search_regex(
             r'poster\s*=\s*(["\'])(?P<thumbnail>[^\1]+?)\1',
index 20b984288ac7bba05bef7d4a34d5f01b5cb0f851..2038a6ba5001283e786905a23c429d2418762515 100644 (file)
@@ -8,78 +8,88 @@ from ..utils import (
     float_or_none,
     ExtractorError,
     unsmuggle_url,
+    determine_ext,
 )
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_urlencode
 
 
 class OoyalaBaseIE(InfoExtractor):
     _PLAYER_BASE = 'http://player.ooyala.com/'
     _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
-    _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?'
+    _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
 
     def _extract(self, content_tree_url, video_id, domain='example.org'):
         content_tree = self._download_json(content_tree_url, video_id)['content_tree']
         metadata = content_tree[list(content_tree)[0]]
         embed_code = metadata['embed_code']
         pcode = metadata.get('asset_pcode') or embed_code
-        video_info = {
-            'id': embed_code,
-            'title': metadata['title'],
-            'description': metadata.get('description'),
-            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
-            'duration': float_or_none(metadata.get('duration'), 1000),
-        }
+        title = metadata['title']
+
+        auth_data = self._download_json(
+            self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
+            compat_urllib_parse_urlencode({
+                'domain': domain,
+                'supportedFormats': 'mp4,rtmp,m3u8,hds',
+            }), video_id)
+
+        cur_auth_data = auth_data['authorization_data'][embed_code]
 
         urls = []
         formats = []
-        for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
-            auth_data = self._download_json(
-                self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
-                compat_urllib_parse.urlencode({
-                    'domain': domain,
-                    'supportedFormats': supported_format
-                }),
-                video_id, 'Downloading %s JSON' % supported_format)
-
-            cur_auth_data = auth_data['authorization_data'][embed_code]
-
-            if cur_auth_data['authorized']:
-                for stream in cur_auth_data['streams']:
-                    url = base64.b64decode(
-                        stream['url']['data'].encode('ascii')).decode('utf-8')
-                    if url in urls:
-                        continue
-                    urls.append(url)
-                    delivery_type = stream['delivery_type']
-                    if delivery_type == 'hls' or '.m3u8' in url:
-                        formats.extend(self._extract_m3u8_formats(
-                            url, embed_code, 'mp4', 'm3u8_native',
-                            m3u8_id='hls', fatal=False))
-                    elif delivery_type == 'hds' or '.f4m' in url:
-                        formats.extend(self._extract_f4m_formats(
-                            url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
-                    elif '.smil' in url:
-                        formats.extend(self._extract_smil_formats(
-                            url, embed_code, fatal=False))
-                    else:
-                        formats.append({
-                            'url': url,
-                            'ext': stream.get('delivery_type'),
-                            'vcodec': stream.get('video_codec'),
-                            'format_id': delivery_type,
-                            'width': int_or_none(stream.get('width')),
-                            'height': int_or_none(stream.get('height')),
-                            'abr': int_or_none(stream.get('audio_bitrate')),
-                            'vbr': int_or_none(stream.get('video_bitrate')),
-                            'fps': float_or_none(stream.get('framerate')),
-                        })
-            else:
-                raise ExtractorError('%s said: %s' % (
-                    self.IE_NAME, cur_auth_data['message']), expected=True)
+        if cur_auth_data['authorized']:
+            for stream in cur_auth_data['streams']:
+                s_url = base64.b64decode(
+                    stream['url']['data'].encode('ascii')).decode('utf-8')
+                if s_url in urls:
+                    continue
+                urls.append(s_url)
+                ext = determine_ext(s_url, None)
+                delivery_type = stream['delivery_type']
+                if delivery_type == 'hls' or ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        s_url, embed_code, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
+                elif delivery_type == 'hds' or ext == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
+                elif ext == 'smil':
+                    formats.extend(self._extract_smil_formats(
+                        s_url, embed_code, fatal=False))
+                else:
+                    formats.append({
+                        'url': s_url,
+                        'ext': ext or stream.get('delivery_type'),
+                        'vcodec': stream.get('video_codec'),
+                        'format_id': delivery_type,
+                        'width': int_or_none(stream.get('width')),
+                        'height': int_or_none(stream.get('height')),
+                        'abr': int_or_none(stream.get('audio_bitrate')),
+                        'vbr': int_or_none(stream.get('video_bitrate')),
+                        'fps': float_or_none(stream.get('framerate')),
+                    })
+        else:
+            raise ExtractorError('%s said: %s' % (
+                self.IE_NAME, cur_auth_data['message']), expected=True)
         self._sort_formats(formats)
 
-        video_info['formats'] = formats
-        return video_info
+        subtitles = {}
+        for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
+            sub_url = sub.get('url')
+            if not sub_url:
+                continue
+            subtitles[lang] = [{
+                'url': sub_url,
+            }]
+
+        return {
+            'id': embed_code,
+            'title': title,
+            'description': metadata.get('description'),
+            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
+            'duration': float_or_none(metadata.get('duration'), 1000),
+            'subtitles': subtitles,
+            'formats': formats,
+        }
 
 
 class OoyalaIE(OoyalaBaseIE):
@@ -96,6 +106,8 @@ class OoyalaIE(OoyalaBaseIE):
                 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
                 'duration': 853.386,
             },
+            # The video in the original webpage now uses PlayWire
+            'skip': 'Ooyala said: movie expired',
         }, {
             # Only available for ipad
             'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
new file mode 100644 (file)
index 0000000..6415b8f
--- /dev/null
@@ -0,0 +1,130 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_chr
+from ..utils import (
+    determine_ext,
+    encode_base_n,
+    ExtractorError,
+    mimetype2ext,
+)
+
+
+class OpenloadIE(InfoExtractor):
+    _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+
+    _TESTS = [{
+        'url': 'https://openload.co/f/kUEfGclsU9o',
+        'md5': 'bf1c059b004ebc7a256f89408e65c36e',
+        'info_dict': {
+            'id': 'kUEfGclsU9o',
+            'ext': 'mp4',
+            'title': 'skyrim_no-audio_1080.mp4',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }, {
+        'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
+        'only_matching': True,
+    }, {
+        'url': 'https://openload.io/f/ZAn6oz-VZGE/',
+        'only_matching': True,
+    }, {
+        'url': 'https://openload.co/f/_-ztPaZtMhM/',
+        'only_matching': True,
+    }, {
+        # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
+        # for title and ext
+        'url': 'https://openload.co/embed/Sxz5sADo82g/',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def openload_level2_debase(m):
+        radix, num = int(m.group(1)) + 27, int(m.group(2))
+        return '"' + encode_base_n(num, radix) + '"'
+
+    @classmethod
+    def openload_level2(cls, txt):
+        # The function name is ǃ \u01c3
+        # Using escaped unicode literals does not work in Python 3.2
+        return re.sub(r'ǃ\((\d+),(\d+)\)', cls.openload_level2_debase, txt, re.UNICODE).replace('"+"', '')
+
+    # Openload uses a variant of aadecode
+    # openload_decode and related functions are originally written by
+    # vitas@matfyz.cz and released with public domain
+    # See https://github.com/rg3/youtube-dl/issues/8489
+    @classmethod
+    def openload_decode(cls, txt):
+        symbol_table = [
+            ('_', '(゚Д゚) [゚Θ゚]'),
+            ('a', '(゚Д゚) [゚ω゚ノ]'),
+            ('b', '(゚Д゚) [゚Θ゚ノ]'),
+            ('c', '(゚Д゚) [\'c\']'),
+            ('d', '(゚Д゚) [゚ー゚ノ]'),
+            ('e', '(゚Д゚) [゚Д゚ノ]'),
+            ('f', '(゚Д゚) [1]'),
+
+            ('o', '(゚Д゚) [\'o\']'),
+            ('u', '(o゚ー゚o)'),
+            ('c', '(゚Д゚) [\'c\']'),
+
+            ('7', '((゚ー゚) + (o^_^o))'),
+            ('6', '((o^_^o) +(o^_^o) +(c^_^o))'),
+            ('5', '((゚ー゚) + (゚Θ゚))'),
+            ('4', '(-~3)'),
+            ('3', '(-~-~1)'),
+            ('2', '(-~1)'),
+            ('1', '(-~0)'),
+            ('0', '((c^_^o)-(c^_^o))'),
+        ]
+        delim = '(゚Д゚)[゚ε゚]+'
+        ret = ''
+        for aachar in txt.split(delim):
+            for val, pat in symbol_table:
+                aachar = aachar.replace(pat, val)
+            aachar = aachar.replace('+ ', '')
+            m = re.match(r'^\d+', aachar)
+            if m:
+                ret += compat_chr(int(m.group(0), 8))
+            else:
+                m = re.match(r'^u([\da-f]+)', aachar)
+                if m:
+                    ret += compat_chr(int(m.group(1), 16))
+        return cls.openload_level2(ret)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        if 'File not found' in webpage:
+            raise ExtractorError('File not found', expected=True)
+
+        code = self._search_regex(
+            r'</video>\s*</div>\s*<script[^>]+>[^>]+</script>\s*<script[^>]+>([^<]+)</script>',
+            webpage, 'JS code')
+
+        decoded = self.openload_decode(code)
+
+        video_url = self._search_regex(
+            r'return\s+"(https?://[^"]+)"', decoded, 'video URL')
+
+        title = self._og_search_title(webpage, default=None) or self._search_regex(
+            r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
+            'title', default=None) or self._html_search_meta(
+            'description', webpage, 'title', fatal=True)
+
+        ext = mimetype2ext(self._search_regex(
+            r'window\.vt\s*=\s*(["\'])(?P<mimetype>.+?)\1', decoded,
+            'mimetype', default=None, group='mimetype')) or determine_ext(
+            video_url, 'mp4')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'ext': ext,
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
+            'url': video_url,
+        }
index 8545fb1b88cbf29ae1acb999566ee9041335dc4a..1d42be39b3303c95952a8ec54a34abbb9d09f0b1 100644 (file)
@@ -12,8 +12,8 @@ from ..utils import (
 
 
 class OraTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ora\.tv/([^/]+/)*(?P<id>[^/\?#]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)'
+    _TESTS = [{
         'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq',
         'md5': 'fa33717591c631ec93b04b0e330df786',
         'info_dict': {
@@ -22,7 +22,10 @@ class OraTVIE(InfoExtractor):
             'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!',
             'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1',
         }
-    }
+    }, {
+        'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
index 958eb398b992e4d3b4278ebe362f71074e87c19b..4e3864f0d7bd1f6b63c560e8b04ae38844a283f9 100644 (file)
@@ -137,7 +137,7 @@ class ORFTVthekIE(InfoExtractor):
 class ORFOE1IE(InfoExtractor):
     IE_NAME = 'orf:oe1'
     IE_DESC = 'Radio Österreich 1'
-    _VALID_URL = r'http://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'
 
     # Audios on ORF radio are only available for 7 days, so we can't add tests.
     _TEST = {
@@ -171,7 +171,7 @@ class ORFOE1IE(InfoExtractor):
 class ORFFM4IE(InfoExtractor):
     IE_NAME = 'orf:fm4'
     IE_DESC = 'radio FM4'
-    _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)'
+    _VALID_URL = r'https?://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)'
 
     _TEST = {
         'url': 'http://fm4.orf.at/player/20160110/IS/',
@@ -185,6 +185,7 @@ class ORFFM4IE(InfoExtractor):
             'timestamp': 1452456073,
             'upload_date': '20160110',
         },
+        'skip': 'Live streams on FM4 got deleted soon',
     }
 
     def _real_extract(self, url):
@@ -222,7 +223,7 @@ class ORFFM4IE(InfoExtractor):
 class ORFIPTVIE(InfoExtractor):
     IE_NAME = 'orf:iptv'
     IE_DESC = 'iptv.ORF.at'
-    _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
+    _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://iptv.orf.at/stories/2275236/',
index ec8876c28551af6e717ac49cbd22da46d096c67c..22975066516a0d37e74c9c520dd4faf0a68305a6 100644 (file)
@@ -65,7 +65,7 @@ class PatreonIE(InfoExtractor):
 
         request = sanitized_Request(
             'https://www.patreon.com/processLogin',
-            compat_urllib_parse.urlencode(login_form).encode('utf-8')
+            compat_urllib_parse_urlencode(login_form).encode('utf-8')
         )
         login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
 
index f43e3a146e7bd35d9a99ab730289f4a1d4f5b91c..81918ac6e455313c59a7b56b6c63a5bae4699e57 100644 (file)
@@ -196,7 +196,7 @@ class PBSIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
-            'md5': 'ce1888486f0908d555a8093cac9a7362',
+            'md5': '173dc391afd361fa72eab5d3d918968d',
             'info_dict': {
                 'id': '2365006249',
                 'ext': 'mp4',
@@ -204,13 +204,10 @@ class PBSIE(InfoExtractor):
                 'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
                 'duration': 3190,
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
-            'md5': '143c98aa54a346738a3d78f54c925321',
+            'md5': '6f722cb3c3982186d34b0f13374499c7',
             'info_dict': {
                 'id': '2365297690',
                 'ext': 'mp4',
@@ -218,9 +215,6 @@ class PBSIE(InfoExtractor):
                 'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
                 'duration': 5050,
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            }
         },
         {
             'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
@@ -244,9 +238,6 @@ class PBSIE(InfoExtractor):
                 'duration': 6559,
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
@@ -262,9 +253,6 @@ class PBSIE(InfoExtractor):
                 'upload_date': '20140122',
                 'age_limit': 10,
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
@@ -290,6 +278,7 @@ class PBSIE(InfoExtractor):
         },
         {
             'url': 'http://www.pbs.org/video/2365245528/',
+            'md5': '115223d41bd55cda8ae5cd5ed4e11497',
             'info_dict': {
                 'id': '2365245528',
                 'display_id': '2365245528',
@@ -299,15 +288,13 @@ class PBSIE(InfoExtractor):
                 'duration': 6851,
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             # Video embedded in iframe containing angle brackets as attribute's value (e.g.
             # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
             # https://github.com/rg3/youtube-dl/issues/7059)
             'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
+            'md5': '84ced42850d78f1d4650297356e95e6f',
             'info_dict': {
                 'id': '2365546844',
                 'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
@@ -317,9 +304,6 @@ class PBSIE(InfoExtractor):
                 'duration': 1480,
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             # Frontline video embedded via flp2012.js
@@ -340,6 +324,7 @@ class PBSIE(InfoExtractor):
         {
             # Serves hd only via wigget/partnerplayer page
             'url': 'http://www.pbs.org/video/2365641075/',
+            'md5': 'acfd4c400b48149a44861cb16dd305cf',
             'info_dict': {
                 'id': '2365641075',
                 'ext': 'mp4',
@@ -348,9 +333,6 @@ class PBSIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'formats': 'mincount:8',
             },
-            'params': {
-                'skip_download': True,  # requires ffmpeg
-            },
         },
         {
             'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
@@ -494,6 +476,7 @@ class PBSIE(InfoExtractor):
                         info = video_info
 
         formats = []
+        http_url = None
         for num, redirect in enumerate(redirects):
             redirect_id = redirect.get('eeid')
 
@@ -514,13 +497,32 @@ class PBSIE(InfoExtractor):
 
             if determine_ext(format_url) == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
-                    format_url, display_id, 'mp4', preference=1, m3u8_id='hls'))
+                    format_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
             else:
                 formats.append({
                     'url': format_url,
                     'format_id': redirect_id,
                 })
+                if re.search(r'^https?://.*(?:\d+k|baseline)', format_url):
+                    http_url = format_url
         self._remove_duplicate_formats(formats)
+        m3u8_formats = list(filter(
+            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            formats))
+        if http_url:
+            for m3u8_format in m3u8_formats:
+                bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
+                # extract only the formats that we know that they will be available as http format.
+                # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
+                if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'):
+                    continue
+                f = m3u8_format.copy()
+                f.update({
+                    'url': re.sub(r'\d+k|baseline', bitrate, http_url),
+                    'format_id': m3u8_format['format_id'].replace('hls', 'http'),
+                    'protocol': 'http',
+                })
+                formats.append(f)
         self._sort_formats(formats)
 
         rating_str = info.get('rating')
@@ -535,6 +537,19 @@ class PBSIE(InfoExtractor):
                 'ext': 'ttml',
                 'url': closed_captions_url,
             }]
+            mobj = re.search(r'/(\d+)_Encoded\.dfxp', closed_captions_url)
+            if mobj:
+                ttml_caption_suffix, ttml_caption_id = mobj.group(0, 1)
+                ttml_caption_id = int(ttml_caption_id)
+                subtitles['en'].extend([{
+                    'url': closed_captions_url.replace(
+                        ttml_caption_suffix, '/%d_Encoded.srt' % (ttml_caption_id + 1)),
+                    'ext': 'srt',
+                }, {
+                    'url': closed_captions_url.replace(
+                        ttml_caption_suffix, '/%d_Encoded.vtt' % (ttml_caption_id + 2)),
+                    'ext': 'vtt',
+                }])
 
         # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
         # Try turning it to 'program - title' naming scheme if possible
diff --git a/youtube_dl/extractor/people.py b/youtube_dl/extractor/people.py
new file mode 100644 (file)
index 0000000..9ecdbc1
--- /dev/null
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class PeopleIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P<id>\d+),00\.html'
+
+    _TEST = {
+        'url': 'http://www.people.com/people/videos/0,,20995451,00.html',
+        'info_dict': {
+            'id': 'ref:20995451',
+            'ext': 'mp4',
+            'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”',
+            'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 246.318,
+            'timestamp': 1458720585,
+            'upload_date': '20160323',
+            'uploader_id': '416418724',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': ['BrightcoveNew'],
+    }
+
+    def _real_extract(self, url):
+        return self.url_result(
+            'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s'
+            % self._match_id(url), 'BrightcoveNew')
index 514e9b4339be43b509f9c9a8a6d2b87187e5f056..c23b314e79df70e6b115fee6fb91386345d00ad4 100644 (file)
@@ -2,11 +2,15 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import parse_iso8601
+from ..utils import (
+    parse_iso8601,
+    unescapeHTML,
+)
 
 
 class PeriscopeIE(InfoExtractor):
     IE_DESC = 'Periscope'
+    IE_NAME = 'periscope'
     _VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
     # Alive example URLs can be found here http://onperiscope.com/
     _TESTS = [{
@@ -41,8 +45,11 @@ class PeriscopeIE(InfoExtractor):
         broadcast = broadcast_data['broadcast']
         status = broadcast['status']
 
-        uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name')
-        uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id')
+        user = broadcast_data.get('user', {})
+
+        uploader = broadcast.get('user_display_name') or user.get('display_name')
+        uploader_id = (broadcast.get('username') or user.get('username') or
+                       broadcast.get('user_id') or user.get('id'))
 
         title = '%s - %s' % (uploader, status) if uploader else status
         state = broadcast.get('state').lower()
@@ -79,3 +86,43 @@ class PeriscopeIE(InfoExtractor):
             'thumbnails': thumbnails,
             'formats': formats,
         }
+
+
+class PeriscopeUserIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.periscope\.tv/(?P<id>[^/]+)/?$'
+    IE_DESC = 'Periscope user videos'
+    IE_NAME = 'periscope:user'
+
+    _TEST = {
+        'url': 'https://www.periscope.tv/LularoeHusbandMike/',
+        'info_dict': {
+            'id': 'LularoeHusbandMike',
+            'title': 'LULAROE HUSBAND MIKE',
+            'description': 'md5:6cf4ec8047768098da58e446e82c82f0',
+        },
+        # Periscope only shows videos in the last 24 hours, so it's possible to
+        # get 0 videos
+        'playlist_mincount': 0,
+    }
+
+    def _real_extract(self, url):
+        user_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, user_id)
+
+        data_store = self._parse_json(
+            unescapeHTML(self._search_regex(
+                r'data-store=(["\'])(?P<data>.+?)\1',
+                webpage, 'data store', default='{}', group='data')),
+            user_id)
+
+        user = data_store.get('User', {}).get('user', {})
+        title = user.get('display_name') or user.get('username')
+        description = user.get('description')
+
+        entries = [
+            self.url_result(
+                'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id']))
+            for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])]
+
+        return self.playlist_result(entries, user_id, title, description)
index 6e60e5fe98920c64d310c3610194d98b01790ceb..f1008ae514f78f6c843e399031135afb00f5f23f 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class PhilharmonieDeParisIE(InfoExtractor):
     IE_DESC = 'Philharmonie de Paris'
-    _VALID_URL = r'http://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
+    _VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
         'info_dict': {
index 788411ccc18082f59588d40704900c26dba1fe21..6c8bbe1d95c3c4972baa4c956ad1a62ef6518e2d 100644 (file)
@@ -8,7 +8,7 @@ from ..compat import compat_urllib_parse_unquote
 
 
 class PhotobucketIE(InfoExtractor):
-    _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
+    _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
     _TEST = {
         'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
         'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
diff --git a/youtube_dl/extractor/planetaplay.py b/youtube_dl/extractor/planetaplay.py
deleted file mode 100644 (file)
index 06505e9..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import ExtractorError
-
-
-class PlanetaPlayIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?planetaplay\.com/\?sng=(?P<id>[0-9]+)'
-    _API_URL = 'http://planetaplay.com/action/playlist/?sng={0:}'
-    _THUMBNAIL_URL = 'http://planetaplay.com/img/thumb/{thumb:}'
-    _TEST = {
-        'url': 'http://planetaplay.com/?sng=3586',
-        'md5': '9d569dceb7251a4e01355d5aea60f9db',
-        'info_dict': {
-            'id': '3586',
-            'ext': 'flv',
-            'title': 'md5:e829428ee28b1deed00de90de49d1da1',
-        },
-        'skip': 'Not accessible from Travis CI server',
-    }
-
-    _SONG_FORMATS = {
-        'lq': (0, 'http://www.planetaplay.com/videoplayback/{med_hash:}'),
-        'hq': (1, 'http://www.planetaplay.com/videoplayback/hi/{med_hash:}'),
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        response = self._download_json(
-            self._API_URL.format(video_id), video_id)['response']
-        try:
-            data = response.get('data')[0]
-        except IndexError:
-            raise ExtractorError(
-                '%s: failed to get the playlist' % self.IE_NAME, expected=True)
-
-        title = '{song_artists:} - {sng_name:}'.format(**data)
-        thumbnail = self._THUMBNAIL_URL.format(**data)
-
-        formats = []
-        for format_id, (quality, url_template) in self._SONG_FORMATS.items():
-            formats.append({
-                'format_id': format_id,
-                'url': url_template.format(**data),
-                'quality': quality,
-                'ext': 'flv',
-            })
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-            'thumbnail': thumbnail,
-        }
index 2856af96f49cf7928a0dd4fe79ccd287592fb3c1..57c875ef05dd74a429ea55313b0d956ef7e5ec56 100644 (file)
@@ -5,10 +5,10 @@ import re
 import os.path
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     ExtractorError,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -40,7 +40,7 @@ class PlayedIE(InfoExtractor):
 
         self._sleep(2, video_id)
 
-        post = compat_urllib_parse.urlencode(data)
+        post = urlencode_postdata(data)
         headers = {
             b'Content-Type': b'application/x-www-form-urlencoded',
         }
index e360404f7270ebc607ae1b651fcc50445e811659..1e8096a259ad5568d87b96bd566f646ae641862f 100644 (file)
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import (
     compat_urlparse,
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
 )
 from ..utils import (
     ExtractorError,
@@ -106,7 +106,7 @@ class PlaytvakIE(InfoExtractor):
         })
 
         info_url = compat_urlparse.urlunparse(
-            parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
+            parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
         json_info = self._download_json(
             info_url, video_id,
index 6d138ef25d2d5cec02a012f5a06af085a6c35d26..0bc7431189a0eed819fb85a6fbbdc1558a4b84ed 100644 (file)
@@ -4,9 +4,8 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    xpath_text,
+    dict_get,
     float_or_none,
-    int_or_none,
 )
 
 
@@ -23,6 +22,19 @@ class PlaywireIE(InfoExtractor):
             'duration': 145.94,
         },
     }, {
+        # m3u8 in f4m
+        'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
+        'info_dict': {
+            'id': '4840492',
+            'ext': 'mp4',
+            'title': 'ITV EL SHOW FULL',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        # Multiple resolutions while bitrates missing
         'url': 'http://cdn.playwire.com/11625/embed/85228.html',
         'only_matching': True,
     }, {
@@ -48,25 +60,10 @@ class PlaywireIE(InfoExtractor):
         thumbnail = content.get('poster')
         src = content['media']['f4m']
 
-        f4m = self._download_xml(src, video_id)
-        base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True)
-        formats = []
-        for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'):
-            media_url = media.get('url')
-            if not media_url:
-                continue
-            tbr = int_or_none(media.get('bitrate'))
-            width = int_or_none(media.get('width'))
-            height = int_or_none(media.get('height'))
-            f = {
-                'url': '%s/%s' % (base_url, media.attrib['url']),
-                'tbr': tbr,
-                'width': width,
-                'height': height,
-            }
-            if not (tbr or width or height):
-                f['quality'] = 1 if '-hd.' in media_url else 0
-            formats.append(f)
+        formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls')
+        for a_format in formats:
+            if not dict_get(a_format, ['tbr', 'width', 'height']):
+                a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
         self._sort_formats(formats)
 
         return {
index 12e1c2862c2a7964b5613d7e207853c80a862b24..9aab7764559523e3bbc4a82f5b6ab9b71056be59 100644 (file)
@@ -8,7 +8,6 @@ import collections
 from .common import InfoExtractor
 from ..compat import (
     compat_str,
-    compat_urllib_parse,
     compat_urlparse,
 )
 from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
     parse_duration,
     qualities,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -64,8 +64,8 @@ class PluralsightIE(PluralsightBaseIE):
         login_form = self._hidden_inputs(login_page)
 
         login_form.update({
-            'Username': username.encode('utf-8'),
-            'Password': password.encode('utf-8'),
+            'Username': username,
+            'Password': password,
         })
 
         post_url = self._search_regex(
@@ -76,7 +76,7 @@ class PluralsightIE(PluralsightBaseIE):
             post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
 
         request = sanitized_Request(
-            post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+            post_url, urlencode_postdata(login_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 
         response = self._download_webpage(
@@ -279,13 +279,18 @@ class PluralsightCourseIE(PluralsightBaseIE):
             course_id, 'Downloading course data JSON')
 
         entries = []
-        for module in course_data:
+        for num, module in enumerate(course_data, 1):
             for clip in module.get('clips', []):
                 player_parameters = clip.get('playerParameters')
                 if not player_parameters:
                     continue
-                entries.append(self.url_result(
-                    '%s/training/player?%s' % (self._API_BASE, player_parameters),
-                    'Pluralsight'))
+                entries.append({
+                    '_type': 'url_transparent',
+                    'url': '%s/training/player?%s' % (self._API_BASE, player_parameters),
+                    'ie_key': PluralsightIE.ie_key(),
+                    'chapter': module.get('title'),
+                    'chapter_number': num,
+                    'chapter_id': module.get('moduleRef'),
+                })
 
         return self.playlist_result(entries, course_id, title, description)
index 3e15533e9d445e96884974af398e0461438bcc8b..9894f32620c1692830df023423ae02a6199121b1 100644 (file)
@@ -1,7 +1,10 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-from ..compat import compat_urllib_parse
+from ..compat import (
+    compat_urllib_parse_unquote,
+    compat_urllib_parse_urlencode,
+)
 from .common import InfoExtractor
 from ..utils import (
     parse_duration,
@@ -28,9 +31,10 @@ class Porn91IE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id
         self._set_cookie('91porn.com', 'language', 'cn_CN')
-        webpage = self._download_webpage(url, video_id, 'get HTML content')
+
+        webpage = self._download_webpage(
+            'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)
 
         if '作为游客,你每天只可观看10个视频' in webpage:
             raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True)
@@ -46,7 +50,7 @@ class Porn91IE(InfoExtractor):
             r'so.addVariable\(\'seccode\',\'([^\']+)\'', webpage, 'sec code')
         max_vid = self._search_regex(
             r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage, 'max vid')
-        url_params = compat_urllib_parse.urlencode({
+        url_params = compat_urllib_parse_urlencode({
             'VID': file_id,
             'mp4': '1',
             'seccode': sec_code,
@@ -54,8 +58,9 @@ class Porn91IE(InfoExtractor):
         })
         info_cn = self._download_webpage(
             'http://91porn.com/getfile.php?' + url_params, video_id,
-            'get real video url')
-        video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url')
+            'Downloading real video url')
+        video_url = compat_urllib_parse_unquote(self._search_regex(
+            r'file=([^&]+)&', info_cn, 'url'))
 
         duration = parse_duration(self._search_regex(
             r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
index 57c78ba52a994a9c2aff224470b86b913702241f..8df12eec0d44c371d99b536b55694cfd2211f9d0 100644 (file)
@@ -1,19 +1,32 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
     int_or_none,
     js_to_json,
-    qualities,
 )
 
 
 class PornHdIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
+    _TESTS = [{
+        'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
+        'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5',
+        'info_dict': {
+            'id': '9864',
+            'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
+            'ext': 'mp4',
+            'title': 'Restroom selfie masturbation',
+            'description': 'md5:3748420395e03e31ac96857a8f125b2b',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'view_count': int,
+            'age_limit': 18,
+        }
+    }, {
+        # removed video
         'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
         'md5': '956b8ca569f7f4d8ec563e2c41598441',
         'info_dict': {
@@ -25,8 +38,9 @@ class PornHdIE(InfoExtractor):
             'thumbnail': 're:^https?://.*\.jpg',
             'view_count': int,
             'age_limit': 18,
-        }
-    }
+        },
+        'skip': 'Not available anymore',
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -38,28 +52,38 @@ class PornHdIE(InfoExtractor):
         title = self._html_search_regex(
             [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
              r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
-        description = self._html_search_regex(
-            r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
-        view_count = int_or_none(self._html_search_regex(
-            r'(\d+) views\s*</span>', webpage, 'view count', fatal=False))
-        thumbnail = self._search_regex(
-            r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
 
-        quality = qualities(['sd', 'hd'])
-        sources = json.loads(js_to_json(self._search_regex(
+        sources = self._parse_json(js_to_json(self._search_regex(
             r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
-            webpage, 'sources')))
+            webpage, 'sources', default='{}')), video_id)
+
+        if not sources:
+            message = self._html_search_regex(
+                r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
+                webpage, 'error message', group='value')
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+
         formats = []
-        for qname, video_url in sources.items():
+        for format_id, video_url in sources.items():
             if not video_url:
                 continue
+            height = int_or_none(self._search_regex(
+                r'^(\d+)[pP]', format_id, 'height', default=None))
             formats.append({
                 'url': video_url,
-                'format_id': qname,
-                'quality': quality(qname),
+                'format_id': format_id,
+                'height': height,
             })
         self._sort_formats(formats)
 
+        description = self._html_search_regex(
+            r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1',
+            webpage, 'description', fatal=False, group='value')
+        view_count = int_or_none(self._html_search_regex(
+            r'(\d+) views\s*<', webpage, 'view count', fatal=False))
+        thumbnail = self._search_regex(
+            r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
+
         return {
             'id': video_id,
             'display_id': display_id,
index 5a55c25e78cf4b5ad95c05e5856f639d9bdd8617..6d57e1d35eb567b26f65dea8cc74c653f955c08f 100644 (file)
@@ -1,10 +1,13 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
+import itertools
 import os
 import re
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_HTTPError,
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote_plus,
     compat_urllib_parse_urlparse,
@@ -12,6 +15,7 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     int_or_none,
+    orderedSet,
     sanitized_Request,
     str_to_int,
 )
@@ -36,7 +40,25 @@ class PornHubIE(InfoExtractor):
             'dislike_count': int,
             'comment_count': int,
             'age_limit': 18,
-        }
+        },
+    }, {
+        # non-ASCII title
+        'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
+        'info_dict': {
+            'id': '1331683002',
+            'ext': 'mp4',
+            'title': '重庆婷婷女王足交',
+            'uploader': 'cj397186295',
+            'duration': 1753,
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+            'comment_count': int,
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
         'only_matching': True,
@@ -73,19 +95,25 @@ class PornHubIE(InfoExtractor):
                 'PornHub said: %s' % error_msg,
                 expected=True, video_id=video_id)
 
+        # video_title from flashvars contains whitespace instead of non-ASCII (see
+        # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
+        # on that anymore.
+        title = self._html_search_meta(
+            'twitter:title', webpage, default=None) or self._search_regex(
+            (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
+             r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
+             r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
+            webpage, 'title', group='title')
+
         flashvars = self._parse_json(
             self._search_regex(
-                r'var\s+flashv1ars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
+                r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
             video_id)
         if flashvars:
-            video_title = flashvars.get('video_title')
             thumbnail = flashvars.get('image_url')
             duration = int_or_none(flashvars.get('video_duration'))
         else:
-            video_title, thumbnail, duration = [None] * 3
-
-        if not video_title:
-            video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
+            title, thumbnail, duration = [None] * 3
 
         video_uploader = self._html_search_regex(
             r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
@@ -134,7 +162,7 @@ class PornHubIE(InfoExtractor):
         return {
             'id': video_id,
             'uploader': video_uploader,
-            'title': video_title,
+            'title': title,
             'thumbnail': thumbnail,
             'duration': duration,
             'view_count': view_count,
@@ -149,9 +177,12 @@ class PornHubIE(InfoExtractor):
 class PornHubPlaylistBaseIE(InfoExtractor):
     def _extract_entries(self, webpage):
         return [
-            self.url_result('http://www.pornhub.com/%s' % video_url, PornHubIE.ie_key())
-            for video_url in set(re.findall(
-                r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage))
+            self.url_result(
+                'http://www.pornhub.com/%s' % video_url,
+                PornHubIE.ie_key(), video_title=title)
+            for video_url, title in orderedSet(re.findall(
+                r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
+                webpage))
         ]
 
     def _real_extract(self, url):
@@ -185,16 +216,31 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
 class PornHubUserVideosIE(PornHubPlaylistBaseIE):
     _VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P<id>[^/]+)/videos'
     _TESTS = [{
-        'url': 'http://www.pornhub.com/users/rushandlia/videos',
+        'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
         'info_dict': {
-            'id': 'rushandlia',
+            'id': 'zoe_ph',
         },
-        'playlist_mincount': 13,
+        'playlist_mincount': 171,
+    }, {
+        'url': 'http://www.pornhub.com/users/rushandlia/videos',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         user_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, user_id)
-
-        return self.playlist_result(self._extract_entries(webpage), user_id)
+        entries = []
+        for page_num in itertools.count(1):
+            try:
+                webpage = self._download_webpage(
+                    url, user_id, 'Downloading page %d' % page_num,
+                    query={'page': page_num})
+            except ExtractorError as e:
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+                    break
+            page_entries = self._extract_entries(webpage)
+            if not page_entries:
+                break
+            entries.extend(page_entries)
+
+        return self.playlist_result(entries, user_id)
index 1a53fd71c068626e3d91e737c9b295f4e0a1b0a9..6b51e5c5400ee59859eb0d29cb740a31f34f3a96 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class PornoVoisinesIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
 
     _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
         '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
diff --git a/youtube_dl/extractor/presstv.py b/youtube_dl/extractor/presstv.py
new file mode 100644 (file)
index 0000000..2da93ed
--- /dev/null
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import remove_start
+
+
+class PressTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?'
+
+    _TEST = {
+        'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/',
+        'md5': '5d7e3195a447cb13e9267e931d8dd5a5',
+        'info_dict': {
+            'id': '459911',
+            'display_id': 'Australian-sewerage-treatment-facility-',
+            'ext': 'mp4',
+            'title': 'Organic mattresses used to clean waste water',
+            'upload_date': '20160409',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'description': 'md5:20002e654bbafb6908395a5c0cfcd125'
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id') or video_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        # extract video URL from webpage
+        video_url = self._hidden_inputs(webpage)['inpPlayback']
+
+        # build list of available formats
+        # specified in http://www.presstv.ir/Scripts/playback.js
+        base_url = 'http://192.99.219.222:82/presstv'
+        _formats = [
+            (180, '_low200.mp4'),
+            (360, '_low400.mp4'),
+            (720, '_low800.mp4'),
+            (1080, '.mp4')
+        ]
+
+        formats = [{
+            'url': base_url + video_url[:-4] + extension,
+            'format_id': '%dp' % height,
+            'height': height,
+        } for height, extension in _formats]
+
+        # extract video metadata
+        title = remove_start(
+            self._html_search_meta('title', webpage, fatal=True), 'PressTV-')
+
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(webpage)
+
+        upload_date = '%04d%02d%02d' % (
+            int(mobj.group('y')),
+            int(mobj.group('m')),
+            int(mobj.group('d')),
+        )
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'description': description
+        }
index 85aae95765370249023d8202b9d51c44acb99a97..0c1024772c860404209bd3b3f2a183d90c41face 100644 (file)
@@ -1,10 +1,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     ExtractorError,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -42,7 +42,7 @@ class PrimeShareTVIE(InfoExtractor):
         self._sleep(wait_time, video_id)
 
         req = sanitized_Request(
-            url, compat_urllib_parse.urlencode(fields), headers)
+            url, urlencode_postdata(fields), headers)
         video_page = self._download_webpage(
             req, video_id, 'Downloading video page')
 
index d5357283addc5e1faafebff51044083eeb4fafa3..f93bd19ff6dde40c87672b4fd18a3f1aab11382e 100644 (file)
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     determine_ext,
     ExtractorError,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -34,7 +34,7 @@ class PromptFileIE(InfoExtractor):
                                  expected=True)
 
         fields = self._hidden_inputs(webpage)
-        post = compat_urllib_parse.urlencode(fields)
+        post = urlencode_postdata(fields)
         req = sanitized_Request(url, post)
         req.add_header('Content-type', 'application/x-www-form-urlencoded')
         webpage = self._download_webpage(
index 670e6950f3fcc67e78ecd466475e7317c8dfd826..07d49d489d6779b0f6bb7bd12bc610497c576c2e 100644 (file)
@@ -5,9 +5,7 @@ import re
 
 from hashlib import sha1
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_urlencode
 from ..utils import (
     ExtractorError,
     determine_ext,
@@ -235,7 +233,7 @@ class ProSiebenSat1IE(InfoExtractor):
         client_name = 'kolibri-2.0.19-splec4'
         client_location = url
 
-        videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({
+        videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse_urlencode({
             'access_token': access_token,
             'client_location': client_location,
             'client_name': client_name,
@@ -256,7 +254,7 @@ class ProSiebenSat1IE(InfoExtractor):
         client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name])
                                  .encode('utf-8')).hexdigest()
 
-        sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse.urlencode({
+        sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse_urlencode({
             'access_token': access_token,
             'client_id': client_id,
             'client_location': client_location,
@@ -270,7 +268,7 @@ class ProSiebenSat1IE(InfoExtractor):
                                           client_location, source_ids_str, g, client_name])
                                  .encode('utf-8')).hexdigest()
 
-        url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse.urlencode({
+        url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse_urlencode({
             'access_token': access_token,
             'client_id': client_id,
             'client_location': client_location,
index cce84b9e4d95e53731f01d334830faac9f1e008d..fca30e1aae5b35f9ef439fccc8396b5127f79aa9 100644 (file)
@@ -40,7 +40,7 @@ class Puls4IE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         error_message = self._html_search_regex(
-            r'<div class="message-error">(.+?)</div>',
+            r'<div[^>]+class="message-error"[^>]*>(.+?)</div>',
             webpage, 'error message', default=None)
         if error_message:
             raise ExtractorError(
index 6d5732d45c3d3e22d085319ff45449881ac73ad2..cc0416cb81eb23ed87d1dae0cdf2573a6df8936a 100644 (file)
@@ -7,19 +7,19 @@ from .common import InfoExtractor
 
 
 class PyvideoIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
+    _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
 
     _TESTS = [
         {
             'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
-            'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
+            'md5': '520915673e53a5c5d487c36e0c4d85b5',
             'info_dict': {
                 'id': '24_4WWkSmNo',
-                'ext': 'mp4',
+                'ext': 'webm',
                 'title': 'Become a logging expert in 30 minutes',
                 'description': 'md5:9665350d466c67fb5b1598de379021f7',
                 'upload_date': '20130320',
-                'uploader': 'NextDayVideo',
+                'uploader': 'Next Day Video',
                 'uploader_id': 'NextDayVideo',
             },
             'add_ie': ['Youtube'],
index 45a3c41c59438b8784434fb2c5dbfb9cef3c1674..ff0af9543c2b5e5527f406958e9ae5ae4d1adbda 100644 (file)
@@ -18,7 +18,7 @@ from ..utils import (
 class QQMusicIE(InfoExtractor):
     IE_NAME = 'qqmusic'
     IE_DESC = 'QQ音乐'
-    _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
     _TESTS = [{
         'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
         'md5': '9ce1c1c8445f561506d2e3cfb0255705',
@@ -172,7 +172,7 @@ class QQPlaylistBaseIE(InfoExtractor):
 class QQMusicSingerIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:singer'
     IE_DESC = 'QQ音乐 - 歌手'
-    _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
     _TEST = {
         'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
         'info_dict': {
@@ -217,7 +217,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
 class QQMusicAlbumIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:album'
     IE_DESC = 'QQ音乐 - 专辑'
-    _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
 
     _TESTS = [{
         'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',
@@ -260,7 +260,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
 class QQMusicToplistIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:toplist'
     IE_DESC = 'QQ音乐 - 排行榜'
-    _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
+    _VALID_URL = r'https?://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
 
     _TESTS = [{
         'url': 'http://y.qq.com/#type=toplist&p=global_123',
@@ -314,7 +314,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
 class QQMusicPlaylistIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:playlist'
     IE_DESC = 'QQ音乐 - 歌单'
-    _VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
 
     _TESTS = [{
         'url': 'http://y.qq.com/#type=taoge&id=3462654915',
diff --git a/youtube_dl/extractor/quickvid.py b/youtube_dl/extractor/quickvid.py
deleted file mode 100644 (file)
index f414e23..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_urlparse,
-)
-from ..utils import (
-    determine_ext,
-    int_or_none,
-)
-
-
-class QuickVidIE(InfoExtractor):
-    _VALID_URL = r'https?://(www\.)?quickvid\.org/watch\.php\?v=(?P<id>[a-zA-Z_0-9-]+)'
-    _TEST = {
-        'url': 'http://quickvid.org/watch.php?v=sUQT3RCG8dx',
-        'md5': 'c0c72dd473f260c06c808a05d19acdc5',
-        'info_dict': {
-            'id': 'sUQT3RCG8dx',
-            'ext': 'mp4',
-            'title': 'Nick Offerman\'s Summer Reading Recap',
-            'thumbnail': 're:^https?://.*\.(?:png|jpg|gif)$',
-            'view_count': int,
-        },
-        'skip': 'Not accessible from Travis CI server',
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._html_search_regex(r'<h2>(.*?)</h2>', webpage, 'title')
-        view_count = int_or_none(self._html_search_regex(
-            r'(?s)<div id="views">(.*?)</div>',
-            webpage, 'view count', fatal=False))
-        video_code = self._search_regex(
-            r'(?s)<video id="video"[^>]*>(.*?)</video>', webpage, 'video code')
-        formats = [
-            {
-                'url': compat_urlparse.urljoin(url, src),
-                'format_id': determine_ext(src, None),
-            } for src in re.findall('<source\s+src="([^"]+)"', video_code)
-        ]
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-            'thumbnail': self._og_search_thumbnail(webpage),
-            'view_count': view_count,
-        }
index 976c8feec657f8de731d3ffadfa09189ed1628cf..069dbfaed0638e396d024ec81d5142d18f9ad90f 100644 (file)
@@ -2,22 +2,19 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import (
-    js_to_json,
-    unescapeHTML,
-    int_or_none,
-)
+from ..utils import int_or_none
 
 
 class R7IE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://
+    _VALID_URL = r'''(?x)
+                        https?://
                         (?:
                             (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
                             noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
                             player\.r7\.com/video/i/
                         )
                         (?P<id>[\da-f]{24})
-                        '''
+                    '''
     _TESTS = [{
         'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
         'md5': '403c4e393617e8e8ddc748978ee8efde',
@@ -25,6 +22,7 @@ class R7IE(InfoExtractor):
             'id': '54e7050b0cf2ff57e0279389',
             'ext': 'mp4',
             'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
+            'description': 'md5:01812008664be76a6479aa58ec865b72',
             'thumbnail': 're:^https?://.*\.jpg$',
             'duration': 98,
             'like_count': int,
@@ -44,45 +42,72 @@ class R7IE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        webpage = self._download_webpage(
-            'http://player.r7.com/video/i/%s' % video_id, video_id)
+        video = self._download_json(
+            'http://player-api.r7.com/video/i/%s' % video_id, video_id)
 
-        item = self._parse_json(js_to_json(self._search_regex(
-            r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
-
-        title = unescapeHTML(item['title'])
-        thumbnail = item.get('init', {}).get('thumbUri')
-        duration = None
-
-        statistics = item.get('statistics', {})
-        like_count = int_or_none(statistics.get('likes'))
-        view_count = int_or_none(statistics.get('views'))
+        title = video['title']
 
         formats = []
-        for format_key, format_dict in item['playlist'][0].items():
-            src = format_dict.get('src')
-            if not src:
-                continue
-            format_id = format_dict.get('format') or format_key
-            if duration is None:
-                duration = format_dict.get('duration')
-            if '.f4m' in src:
-                formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
-            elif src.endswith('.m3u8'):
-                formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
-            else:
-                formats.append({
-                    'url': src,
-                    'format_id': format_id,
-                })
+        media_url_hls = video.get('media_url_hls')
+        if media_url_hls:
+            formats.extend(self._extract_m3u8_formats(
+                media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native',
+                m3u8_id='hls', fatal=False))
+        media_url = video.get('media_url')
+        if media_url:
+            f = {
+                'url': media_url,
+                'format_id': 'http',
+            }
+            # m3u8 format always matches the http format, let's copy metadata from
+            # one to another
+            m3u8_formats = list(filter(
+                lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+                formats))
+            if len(m3u8_formats) == 1:
+                f_copy = m3u8_formats[0].copy()
+                f_copy.update(f)
+                f_copy['protocol'] = 'http'
+                f = f_copy
+            formats.append(f)
         self._sort_formats(formats)
 
+        description = video.get('description')
+        thumbnail = video.get('thumb')
+        duration = int_or_none(video.get('media_duration'))
+        like_count = int_or_none(video.get('likes'))
+        view_count = int_or_none(video.get('views'))
+
         return {
             'id': video_id,
             'title': title,
+            'description': description,
             'thumbnail': thumbnail,
             'duration': duration,
             'like_count': like_count,
             'view_count': view_count,
             'formats': formats,
         }
+
+
+class R7ArticleIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015',
+        'only_matching': True,
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})',
+            webpage, 'video id')
+
+        return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key())
diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py
new file mode 100644 (file)
index 0000000..4f05bbd
--- /dev/null
@@ -0,0 +1,130 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    find_xpath_attr,
+    determine_ext,
+    int_or_none,
+    unified_strdate,
+    xpath_element,
+    ExtractorError,
+)
+
+
+class RadioCanadaIE(InfoExtractor):
+    IE_NAME = 'radiocanada'
+    _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
+        'info_dict': {
+            'id': '7184272',
+            'ext': 'flv',
+            'title': 'Le parcours du tireur capté sur vidéo',
+            'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
+            'upload_date': '20141023',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        app_code, video_id = re.match(self._VALID_URL, url).groups()
+
+        formats = []
+        # TODO: extract m3u8 and f4m formats
+        # m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
+        # f4m formats can be extracted using flashhd device_type but they produce unplayable file
+        for device_type in ('flash',):
+            v_data = self._download_xml(
+                'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
+                video_id, note='Downloading %s XML' % device_type, query={
+                    'appCode': app_code,
+                    'idMedia': video_id,
+                    'connectionType': 'broadband',
+                    'multibitrate': 'true',
+                    'deviceType': device_type,
+                    # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
+                    'paysJ391wsHjbOJwvCs26toz': 'CA',
+                    'bypasslock': 'NZt5K62gRqfc',
+                })
+            v_url = xpath_text(v_data, 'url')
+            if not v_url:
+                continue
+            if v_url == 'null':
+                raise ExtractorError('%s said: %s' % (
+                    self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
+            ext = determine_ext(v_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
+            else:
+                ext = determine_ext(v_url)
+                bitrates = xpath_element(v_data, 'bitrates')
+                for url_e in bitrates.findall('url'):
+                    tbr = int_or_none(url_e.get('bitrate'))
+                    if not tbr:
+                        continue
+                    formats.append({
+                        'format_id': 'rtmp-%d' % tbr,
+                        'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
+                        'ext': 'flv',
+                        'protocol': 'rtmp',
+                        'width': int_or_none(url_e.get('width')),
+                        'height': int_or_none(url_e.get('height')),
+                        'tbr': tbr,
+                    })
+        self._sort_formats(formats)
+
+        metadata = self._download_xml(
+            'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
+            video_id, note='Downloading metadata XML', query={
+                'appCode': app_code,
+                'idMedia': video_id,
+            })
+
+        def get_meta(name):
+            el = find_xpath_attr(metadata, './/Meta', 'name', name)
+            return el.text if el is not None else None
+
+        return {
+            'id': video_id,
+            'title': get_meta('Title'),
+            'description': get_meta('Description') or get_meta('ShortDescription'),
+            'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
+            'duration': int_or_none(get_meta('length')),
+            'series': get_meta('Emission'),
+            'season_number': int_or_none('SrcSaison'),
+            'episode_number': int_or_none('SrcEpisode'),
+            'upload_date': unified_strdate(get_meta('Date')),
+            'formats': formats,
+        }
+
+
+class RadioCanadaAudioVideoIE(InfoExtractor):
+    'radiocanada:audiovideo'
+    _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
+        'info_dict': {
+            'id': '7527184',
+            'ext': 'flv',
+            'title': 'Barack Obama au Vietnam',
+            'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
+            'upload_date': '20160523',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
index 884c284206cb73303a6631695872ce79ecf7795e..ec4fa6e602ea779dd6d3a530ea6cfb639eee3cf4 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import(
+from ..utils import (
     unified_strdate,
     str_to_int,
 )
index a4dc5c335e152ce47424a73263dc208d942d31ab..e36ce1aa1940deafd5a633bec814e7462008c3b1 100644 (file)
@@ -18,7 +18,7 @@ from ..utils import (
 
 
 class RaiTVIE(InfoExtractor):
-    _VALID_URL = r'http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
+    _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
     _TESTS = [
         {
             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
@@ -175,7 +175,7 @@ class RaiTVIE(InfoExtractor):
 
 
 class RaiIE(InfoExtractor):
-    _VALID_URL = r'http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
+    _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
     _TESTS = [
         {
             'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
index d6054d7175fd49a22117dd357bea7905f6e739be..721fc3a9e2d2b3431051ea00982f72ae1d98ff65 100644 (file)
@@ -1,11 +1,16 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    str_to_int,
+    unified_strdate,
+)
 
 
 class RedTubeIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.redtube.com/66418',
         'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
@@ -13,6 +18,9 @@ class RedTubeIE(InfoExtractor):
             'id': '66418',
             'ext': 'mp4',
             'title': 'Sucked on a toilet',
+            'upload_date': '20120831',
+            'duration': 596,
+            'view_count': int,
             'age_limit': 18,
         }
     }
@@ -24,12 +32,39 @@ class RedTubeIE(InfoExtractor):
         if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
             raise ExtractorError('Video %s has been removed' % video_id, expected=True)
 
-        video_url = self._html_search_regex(
-            r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
-        video_title = self._html_search_regex(
-            r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
-            webpage, 'title')
-        video_thumbnail = self._og_search_thumbnail(webpage)
+        title = self._html_search_regex(
+            (r'<h1 class="videoTitle[^"]*">(?P<title>.+?)</h1>',
+             r'videoTitle\s*:\s*(["\'])(?P<title>)\1'),
+            webpage, 'title', group='title')
+
+        formats = []
+        sources = self._parse_json(
+            self._search_regex(
+                r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'),
+            video_id, fatal=False)
+        if sources and isinstance(sources, dict):
+            for format_id, format_url in sources.items():
+                if format_url:
+                    formats.append({
+                        'url': format_url,
+                        'format_id': format_id,
+                        'height': int_or_none(format_id),
+                    })
+        else:
+            video_url = self._html_search_regex(
+                r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
+            formats.append({'url': video_url})
+        self._sort_formats(formats)
+
+        thumbnail = self._og_search_thumbnail(webpage)
+        upload_date = unified_strdate(self._search_regex(
+            r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
+            webpage, 'upload date', fatal=False))
+        duration = int_or_none(self._search_regex(
+            r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+        view_count = str_to_int(self._search_regex(
+            r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
+            webpage, 'view count', fatal=False))
 
         # No self-labeling, but they describe themselves as
         # "Home of Videos Porno"
@@ -37,9 +72,12 @@ class RedTubeIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'url': video_url,
             'ext': 'mp4',
-            'title': video_title,
-            'thumbnail': video_thumbnail,
+            'title': title,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'duration': duration,
+            'view_count': view_count,
             'age_limit': age_limit,
+            'formats': formats,
         }
index b17c2bfc06b7bd63a1abc4bc112c6692d98b9756..fd50065d4ad05d668919f2c0392e99f3ab637032 100644 (file)
@@ -31,6 +31,7 @@ class RestudyIE(InfoExtractor):
         formats = self._extract_smil_formats(
             'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id,
             video_id)
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/reuters.py b/youtube_dl/extractor/reuters.py
new file mode 100644 (file)
index 0000000..961d504
--- /dev/null
@@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    int_or_none,
+    unescapeHTML,
+)
+
+
+class ReutersIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
+        'md5': '8015113643a0b12838f160b0b81cc2ee',
+        'info_dict': {
+            'id': '368575562',
+            'ext': 'mp4',
+            'title': 'San Francisco police chief resigns',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(
+            'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
+        video_data = js_to_json(self._search_regex(
+            r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
+            webpage, 'video data'))
+
+        def get_json_value(key, fatal=False):
+            return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
+
+        title = unescapeHTML(get_json_value('title', fatal=True))
+        mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
+
+        mas_data = self._download_json(
+            'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
+            video_id, transform_source=js_to_json)
+        formats = []
+        for f in mas_data:
+            f_url = f.get('url')
+            if not f_url:
+                continue
+            method = f.get('method')
+            if method == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+            else:
+                container = f.get('container')
+                ext = '3gp' if method == 'mobile' else container
+                formats.append({
+                    'format_id': ext,
+                    'url': f_url,
+                    'ext': ext,
+                    'container': container if method != 'mobile' else None,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': get_json_value('thumb'),
+            'duration': int_or_none(get_json_value('seconds')),
+            'formats': formats,
+        }
index b1b8800b97c9eb8caad2c03f999f1bc8f304c4da..833d8a2f0d3813014224e39a8d2d41fb0e51d515 100644 (file)
@@ -13,13 +13,69 @@ from ..utils import (
 )
 
 
+class Revision3EmbedIE(InfoExtractor):
+    IE_NAME = 'revision3:embed'
+    _VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)'
+    _TEST = {
+        'url': 'http://api.seekernetwork.com/player/embed?videoId=67558',
+        'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
+        'info_dict': {
+            'id': '67558',
+            'ext': 'mp4',
+            'title': 'The Pros & Cons Of Zoos',
+            'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
+            'uploader_id': 'dnews',
+            'uploader': 'DNews',
+        }
+    }
+    _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('playlist_id')
+        playlist_type = mobj.group('playlist_type') or 'video_id'
+        video_data = self._download_json(
+            'http://revision3.com/api/getPlaylist.json', playlist_id, query={
+                'api_key': self._API_KEY,
+                'codecs': 'h264,vp8,theora',
+                playlist_type: playlist_id,
+            })['items'][0]
+
+        formats = []
+        for vcodec, media in video_data['media'].items():
+            for quality_id, quality in media.items():
+                if quality_id == 'hls':
+                    formats.extend(self._extract_m3u8_formats(
+                        quality['url'], playlist_id, 'mp4',
+                        'm3u8_native', m3u8_id='hls', fatal=False))
+                else:
+                    formats.append({
+                        'url': quality['url'],
+                        'format_id': '%s-%s' % (vcodec, quality_id),
+                        'tbr': int_or_none(quality.get('bitrate')),
+                        'vcodec': vcodec,
+                    })
+        self._sort_formats(formats)
+
+        return {
+            'id': playlist_id,
+            'title': unescapeHTML(video_data['title']),
+            'description': unescapeHTML(video_data.get('summary')),
+            'uploader': video_data.get('show', {}).get('name'),
+            'uploader_id': video_data.get('show', {}).get('slug'),
+            'duration': int_or_none(video_data.get('duration')),
+            'formats': formats,
+        }
+
+
 class Revision3IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
+    IE_NAME = 'revision'
+    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
     _TESTS = [{
         'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
         'md5': 'd94a72d85d0a829766de4deb8daaf7df',
         'info_dict': {
-            'id': '73034',
+            'id': '71089',
             'display_id': 'technobuffalo/5-google-predictions-for-2016',
             'ext': 'webm',
             'title': '5 Google Predictions for 2016',
@@ -31,89 +87,76 @@ class Revision3IE(InfoExtractor):
             'uploader_id': 'technobuffalo',
         }
     }, {
-        'url': 'http://testtube.com/brainstuff',
-        'info_dict': {
-            'id': '251',
-            'title': 'BrainStuff',
-            'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.',
-        },
-        'playlist_mincount': 93,
+        # Show
+        'url': 'http://revision3.com/variant',
+        'only_matching': True,
     }, {
-        'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
-        'info_dict': {
-            'id': '60163',
-            'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
-            'duration': 275,
-            'ext': 'webm',
-            'title': '5 Weird Ways Plants Can Eat Animals',
-            'description': 'Why have some plants evolved to eat meat?',
-            'upload_date': '20150120',
-            'timestamp': 1421763300,
-            'uploader': 'DNews',
-            'uploader_id': 'dnews',
-        },
+        # Tag
+        'url': 'http://revision3.com/vr',
+        'only_matching': True,
     }]
     _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
-    _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
 
     def _real_extract(self, url):
         domain, display_id = re.match(self._VALID_URL, url).groups()
+        site = domain.split('.')[0]
         page_info = self._download_json(
             self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
 
-        if page_info['data']['type'] == 'episode':
-            episode_data = page_info['data']
-            video_id = compat_str(episode_data['video']['data']['id'])
-            video_data = self._download_json(
-                'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
-                video_id)['items'][0]
-
-            formats = []
-            for vcodec, media in video_data['media'].items():
-                for quality_id, quality in media.items():
-                    if quality_id == 'hls':
-                        formats.extend(self._extract_m3u8_formats(
-                            quality['url'], video_id, 'mp4',
-                            'm3u8_native', m3u8_id='hls', fatal=False))
-                    else:
-                        formats.append({
-                            'url': quality['url'],
-                            'format_id': '%s-%s' % (vcodec, quality_id),
-                            'tbr': int_or_none(quality.get('bitrate')),
-                            'vcodec': vcodec,
-                        })
-            self._sort_formats(formats)
+        page_data = page_info['data']
+        page_type = page_data['type']
+        if page_type in ('episode', 'embed'):
+            show_data = page_data['show']['data']
+            page_id = compat_str(page_data['id'])
+            video_id = compat_str(page_data['video']['data']['id'])
 
             preference = qualities(['mini', 'small', 'medium', 'large'])
             thumbnails = [{
                 'url': image_url,
                 'id': image_id,
                 'preference': preference(image_id)
-            } for image_id, image_url in video_data.get('images', {}).items()]
+            } for image_id, image_url in page_data.get('images', {}).items()]
 
-            return {
-                'id': video_id,
+            info = {
+                'id': page_id,
                 'display_id': display_id,
-                'title': unescapeHTML(video_data['title']),
-                'description': unescapeHTML(video_data.get('summary')),
-                'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '),
-                'author': episode_data.get('author'),
-                'uploader': video_data.get('show', {}).get('name'),
-                'uploader_id': video_data.get('show', {}).get('slug'),
-                'duration': int_or_none(video_data.get('duration')),
+                'title': unescapeHTML(page_data['name']),
+                'description': unescapeHTML(page_data.get('summary')),
+                'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
+                'author': page_data.get('author'),
+                'uploader': show_data.get('name'),
+                'uploader_id': show_data.get('slug'),
                 'thumbnails': thumbnails,
-                'formats': formats,
+                'extractor_key': site,
             }
+
+            if page_type == 'embed':
+                info.update({
+                    '_type': 'url_transparent',
+                    'url': page_data['video']['data']['embed'],
+                })
+                return info
+
+            info.update({
+                '_type': 'url_transparent',
+                'url': 'revision3:%s' % video_id,
+            })
+            return info
         else:
-            show_data = page_info['show']['data']
+            list_data = page_info[page_type]['data']
             episodes_data = page_info['episodes']['data']
             num_episodes = page_info['meta']['totalEpisodes']
             processed_episodes = 0
             entries = []
             page_num = 1
             while True:
-                entries.extend([self.url_result(
-                    'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data])
+                entries.extend([{
+                    '_type': 'url',
+                    'url': 'http://%s%s' % (domain, episode['path']),
+                    'id': compat_str(episode['id']),
+                    'ie_key': 'Revision3',
+                    'extractor_key': site,
+                } for episode in episodes_data])
                 processed_episodes += len(episodes_data)
                 if processed_episodes == num_episodes:
                     break
@@ -123,5 +166,5 @@ class Revision3IE(InfoExtractor):
                     display_id)['episodes']['data']
 
             return self.playlist_result(
-                entries, compat_str(show_data['id']),
-                show_data.get('name'), show_data.get('summary'))
+                entries, compat_str(list_data['id']),
+                list_data.get('name'), list_data.get('summary'))
diff --git a/youtube_dl/extractor/rice.py b/youtube_dl/extractor/rice.py
new file mode 100644 (file)
index 0000000..f855719
--- /dev/null
@@ -0,0 +1,116 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+    xpath_text,
+    xpath_element,
+    int_or_none,
+    parse_iso8601,
+    ExtractorError,
+)
+
+
+class RICEIE(InfoExtractor):
+    _VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)'
+    _TEST = {
+        'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw',
+        'md5': '9b83b4a2eead4912dc3b7fac7c449b6a',
+        'info_dict': {
+            'id': 'YEWIvbhb40aqdjMD1ALSqw',
+            'ext': 'mp4',
+            'title': 'Active Learning in Archeology',
+            'upload_date': '20140616',
+            'timestamp': 1402926346,
+        }
+    }
+    _NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config'
+
+    def _real_extract(self, url):
+        qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
+        if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'):
+            raise ExtractorError('Invalid URL', expected=True)
+
+        portal_id = qs['PortalID'][0]
+        playlist_id = qs['DestinationID'][0]
+        content_id = qs['ContentID'][0]
+
+        content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={
+            'portalId': portal_id,
+            'playlistId': playlist_id,
+            'contentId': content_id
+        })
+        metadata = xpath_element(content_data, './/metaData', fatal=True)
+        title = xpath_text(metadata, 'primaryTitle', fatal=True)
+        encodings = xpath_element(content_data, './/encodings', fatal=True)
+        player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={
+            'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True),
+            'contentId': content_id,
+        })
+
+        common_fmt = {}
+        dimensions = xpath_text(encodings, 'dimensions')
+        if dimensions:
+            wh = dimensions.split('x')
+            if len(wh) == 2:
+                common_fmt.update({
+                    'width': int_or_none(wh[0]),
+                    'height': int_or_none(wh[1]),
+                })
+
+        formats = []
+        rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS))
+        if rtsp_path:
+            fmt = {
+                'url': rtsp_path,
+                'format_id': 'rtsp',
+            }
+            fmt.update(common_fmt)
+            formats.append(fmt)
+        for source in player_data.findall(self._xpath_ns('.//Source', self._NS)):
+            video_url = xpath_text(source, self._xpath_ns('File', self._NS))
+            if not video_url:
+                continue
+            if '.m3u8' in video_url:
+                formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+            else:
+                fmt = {
+                    'url': video_url,
+                    'format_id': video_url.split(':')[0],
+                }
+                fmt.update(common_fmt)
+                rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url)
+                if rtmp:
+                    fmt.update({
+                        'url': rtmp.group('url'),
+                        'play_path': rtmp.group('playpath'),
+                        'app': rtmp.group('app'),
+                        'ext': 'flv',
+                    })
+                formats.append(fmt)
+        self._sort_formats(formats)
+
+        thumbnails = []
+        for content_asset in content_data.findall('.//contentAssets'):
+            asset_type = xpath_text(content_asset, 'type')
+            if asset_type == 'image':
+                image_url = xpath_text(content_asset, 'httpPath')
+                if not image_url:
+                    continue
+                thumbnails.append({
+                    'id': xpath_text(content_asset, 'ID'),
+                    'url': image_url,
+                })
+
+        return {
+            'id': content_id,
+            'title': title,
+            'description': xpath_text(metadata, 'abstract'),
+            'duration': int_or_none(xpath_text(metadata, 'duration')),
+            'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')),
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
index 50875807577d076cd81c2021aa7695334ef272da..2c2c707bd36ad3f737072bf1f9011027e0514bd9 100644 (file)
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 
 
 class RingTVIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
     _TEST = {
         'url': 'http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30',
         'md5': 'd25945f5df41cdca2d2587165ac28720',
diff --git a/youtube_dl/extractor/rockstargames.py b/youtube_dl/extractor/rockstargames.py
new file mode 100644 (file)
index 0000000..48128e2
--- /dev/null
@@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+)
+
+
+class RockstarGamesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.rockstargames.com/videos/video/11544/',
+        'md5': '03b5caa6e357a4bd50e3143fc03e5733',
+        'info_dict': {
+            'id': '11544',
+            'ext': 'mp4',
+            'title': 'Further Adventures in Finance and Felony Trailer',
+            'description': 'md5:6d31f55f30cb101b5476c4a379e324a3',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1464876000,
+            'upload_date': '20160602',
+        }
+    }, {
+        'url': 'http://www.rockstargames.com/videos#/?video=48',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_json(
+            'https://www.rockstargames.com/videoplayer/videos/get-video.json',
+            video_id, query={
+                'id': video_id,
+                'locale': 'en_us',
+            })['video']
+
+        title = video['title']
+
+        formats = []
+        for video in video['files_processed']['video/mp4']:
+            if not video.get('src'):
+                continue
+            resolution = video.get('resolution')
+            height = int_or_none(self._search_regex(
+                r'^(\d+)[pP]$', resolution or '', 'height', default=None))
+            formats.append({
+                'url': self._proto_relative_url(video['src']),
+                'format_id': resolution,
+                'height': height,
+            })
+
+        if not formats:
+            youtube_id = video.get('youtube_id')
+            if youtube_id:
+                return self.url_result(youtube_id, 'Youtube')
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video.get('description'),
+            'thumbnail': self._proto_relative_url(video.get('screencap')),
+            'timestamp': parse_iso8601(video.get('created')),
+            'formats': formats,
+        }
index e8bb20a0803700937875355d2f854d1de88cea1a..f9cd48790c3b4a92b82bf1880020d53a074b1434 100644 (file)
@@ -1,11 +1,11 @@
 from __future__ import unicode_literals
 
-from .videodetective import VideoDetectiveIE
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from .internetvideoarchive import InternetVideoArchiveIE
 
 
-# It just uses the same method as videodetective.com,
-# the internetvideoarchive.com is extracted from the og:video property
-class RottenTomatoesIE(VideoDetectiveIE):
+class RottenTomatoesIE(InfoExtractor):
     _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
 
     _TEST = {
@@ -13,7 +13,19 @@ class RottenTomatoesIE(VideoDetectiveIE):
         'info_dict': {
             'id': '613340',
             'ext': 'mp4',
-            'title': 'TOY STORY 3',
-            'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
+            'title': 'Toy Story 3',
         },
     }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        og_video = self._og_search_video_url(webpage)
+        query = compat_urlparse.urlparse(og_video).query
+
+        return {
+            '_type': 'url_transparent',
+            'url': InternetVideoArchiveIE._build_xml_url(query),
+            'ie_key': InternetVideoArchiveIE.ie_key(),
+            'title': self._og_search_title(webpage),
+        }
index e42b319a3e224aa6b078cad7756e5c44b7f620d8..28cc5522d89083cec2ad7631d51fb0aa0798ccbd 100644 (file)
@@ -4,12 +4,18 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
-    unescapeHTML,
+    ExtractorError,
 )
 
 
 class RTBFIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?rtbf\.be/(?:video/[^?]+\?.*\bid=|ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=)(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?rtbf\.be/
+        (?:
+            video/[^?]+\?.*\bid=|
+            ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
+            auvio/[^/]+\?.*id=
+        )(?P<id>\d+)'''
     _TESTS = [{
         'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
         'md5': '799f334ddf2c0a582ba80c44655be570',
@@ -17,7 +23,11 @@ class RTBFIE(InfoExtractor):
             'id': '1921274',
             'ext': 'mp4',
             'title': 'Les Diables au coeur (épisode 2)',
+            'description': 'Football - Diables Rouges',
             'duration': 3099,
+            'upload_date': '20140425',
+            'timestamp': 1398456336,
+            'uploader': 'rtbfsport',
         }
     }, {
         # geo restricted
@@ -26,45 +36,63 @@ class RTBFIE(InfoExtractor):
     }, {
         'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
         'only_matching': True,
+    }, {
+        'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
+        'only_matching': True,
     }]
-
+    _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
+    _PROVIDERS = {
+        'YOUTUBE': 'Youtube',
+        'DAILYMOTION': 'Dailymotion',
+        'VIMEO': 'Vimeo',
+    }
     _QUALITIES = [
-        ('mobile', 'mobile'),
-        ('web', 'SD'),
-        ('url', 'MD'),
+        ('mobile', 'SD'),
+        ('web', 'MD'),
         ('high', 'HD'),
     ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        data = self._download_json(
+            'http://www.rtbf.be/api/media/video?method=getVideoDetail&args[]=%s' % video_id, video_id)
 
-        webpage = self._download_webpage(
-            'http://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
+        error = data.get('error')
+        if error:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
 
-        data = self._parse_json(
-            unescapeHTML(self._search_regex(
-                r'data-media="([^"]+)"', webpage, 'data video')),
-            video_id)
+        data = data['data']
+
+        provider = data.get('provider')
+        if provider in self._PROVIDERS:
+            return self.url_result(data['url'], self._PROVIDERS[provider])
 
-        if data.get('provider').lower() == 'youtube':
-            video_url = data.get('downloadUrl') or data.get('url')
-            return self.url_result(video_url, 'Youtube')
         formats = []
         for key, format_id in self._QUALITIES:
-            format_url = data['sources'].get(key)
+            format_url = data.get(key + 'Url')
             if format_url:
                 formats.append({
                     'format_id': format_id,
                     'url': format_url,
                 })
 
+        thumbnails = []
+        for thumbnail_id, thumbnail_url in data.get('thumbnail', {}).items():
+            if thumbnail_id != 'default':
+                thumbnails.append({
+                    'url': self._IMAGE_HOST + thumbnail_url,
+                    'id': thumbnail_id,
+                })
+
         return {
             'id': video_id,
             'formats': formats,
             'title': data['title'],
             'description': data.get('description') or data.get('subtitle'),
-            'thumbnail': data.get('thumbnail'),
+            'thumbnails': thumbnails,
             'duration': data.get('duration') or data.get('realDuration'),
             'timestamp': int_or_none(data.get('created')),
             'view_count': int_or_none(data.get('viewCount')),
+            'uploader': data.get('channel'),
+            'tags': data.get('tags'),
         }
index 042bc8dab9a1d6b273c94e03b88f152a49217e89..ebe563ebb89e86e28a6bf55669cd066aca44d851 100644 (file)
@@ -39,9 +39,14 @@ class RteIE(InfoExtractor):
         duration = float_or_none(self._html_search_meta(
             'duration', webpage, 'duration', fatal=False), 1000)
 
-        thumbnail_id = self._search_regex(
-            r'<meta name="thumbnail" content="uri:irus:(.*?)" />', webpage, 'thumbnail')
-        thumbnail = 'http://img.rasset.ie/' + thumbnail_id + '.jpg'
+        thumbnail = None
+        thumbnail_meta = self._html_search_meta('thumbnail', webpage)
+        if thumbnail_meta:
+            thumbnail_id = self._search_regex(
+                r'uri:irus:(.+)', thumbnail_meta,
+                'thumbnail id', fatal=False)
+            if thumbnail_id:
+                thumbnail = 'http://img.rasset.ie/%s.jpg' % thumbnail_id
 
         feeds_url = self._html_search_meta('feeds-prefix', webpage, 'feeds url') + video_id
         json_string = self._download_json(feeds_url, video_id)
@@ -49,6 +54,7 @@ class RteIE(InfoExtractor):
         # f4m_url = server + relative_url
         f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
         f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
+        self._sort_formats(f4m_formats)
 
         return {
             'id': video_id,
index 543d94417f6d52f9cb5f4bd4508d5bcf3d4b984e..4d612b5e3c1c1bfceec034d476cd5c0952da5b8c 100644 (file)
@@ -20,18 +20,19 @@ class RtlNlIE(InfoExtractor):
         (?P<id>[0-9a-f-]+)'''
 
     _TESTS = [{
-        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
-        'md5': 'cc16baa36a6c169391f0764fa6b16654',
+        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
+        'md5': '473d1946c1fdd050b2c0161a4b13c373',
         'info_dict': {
-            'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
+            'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
             'ext': 'mp4',
-            'title': 'RTL Nieuws - Laat',
-            'description': 'md5:6b61f66510c8889923b11f2778c72dc5',
-            'timestamp': 1408051800,
-            'upload_date': '20140814',
-            'duration': 576.880,
+            'title': 'RTL Nieuws',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+            'timestamp': 1461951000,
+            'upload_date': '20160429',
+            'duration': 1167.96,
         },
     }, {
+        # best format avaialble a3t
         'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
         'md5': 'dea7474214af1271d91ef332fb8be7ea',
         'info_dict': {
@@ -39,18 +40,19 @@ class RtlNlIE(InfoExtractor):
             'ext': 'mp4',
             'timestamp': 1424039400,
             'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
-            'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
+            'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
             'upload_date': '20150215',
             'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
         }
     }, {
         # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
+        # best format available nettv
         'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
         'info_dict': {
             'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
             'ext': 'mp4',
             'title': 'RTL Nieuws - Meer beelden van overval juwelier',
-            'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
+            'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
             'timestamp': 1437233400,
             'upload_date': '20150718',
             'duration': 30.474,
@@ -94,22 +96,46 @@ class RtlNlIE(InfoExtractor):
         videopath = material['videopath']
         m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
 
-        formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
+        formats = self._extract_m3u8_formats(
+            m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
 
         video_urlpart = videopath.split('/adaptive/')[1][:-5]
         PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
 
-        formats.extend([
-            {
-                'url': PG_URL_TEMPLATE % ('a2m', video_urlpart),
-                'format_id': 'pg-sd',
-            },
-            {
-                'url': PG_URL_TEMPLATE % ('a3m', video_urlpart),
-                'format_id': 'pg-hd',
-                'quality': 0,
+        PG_FORMATS = (
+            ('a2t', 512, 288),
+            ('a3t', 704, 400),
+            ('nettv', 1280, 720),
+        )
+
+        def pg_format(format_id, width, height):
+            return {
+                'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
+                'format_id': 'pg-%s' % format_id,
+                'protocol': 'http',
+                'width': width,
+                'height': height,
             }
-        ])
+
+        if not formats:
+            formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
+        else:
+            pg_formats = []
+            for format_id, width, height in PG_FORMATS:
+                try:
+                    # Find hls format with the same width and height corresponding
+                    # to progressive format and copy metadata from it.
+                    f = next(f for f in formats if f.get('height') == height)
+                    # hls formats may have invalid width
+                    f['width'] = width
+                    f_copy = f.copy()
+                    f_copy.update(pg_format(format_id, width, height))
+                    pg_formats.append(f_copy)
+                except StopIteration:
+                    # Missing hls format does mean that no progressive format with
+                    # such width and height exists either.
+                    pass
+            formats.extend(pg_formats)
         self._sort_formats(formats)
 
         thumbnails = []
index 603d7bd00620cef13cd957a54053dc7f77010e05..f11e3588b0796718e2ecbe316b53b968a08df98c 100644 (file)
@@ -6,13 +6,16 @@ import re
 import time
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_struct_unpack,
+)
 from ..utils import (
     ExtractorError,
     float_or_none,
     remove_end,
+    remove_start,
     sanitized_Request,
     std_headers,
-    struct_unpack,
 )
 
 
@@ -20,7 +23,7 @@ def _decrypt_url(png):
     encrypted_data = base64.b64decode(png.encode('utf-8'))
     text_index = encrypted_data.find(b'tEXt')
     text_chunk = encrypted_data[text_index - 4:]
-    length = struct_unpack('!I', text_chunk[:4])[0]
+    length = compat_struct_unpack('!I', text_chunk[:4])[0]
     # Use bytearray to get integers when iterating in both python 2.x and 3.x
     data = bytearray(text_chunk[8:8 + length])
     data = [chr(b) for b in data if b != 0]
@@ -61,7 +64,7 @@ def _decrypt_url(png):
 class RTVEALaCartaIE(InfoExtractor):
     IE_NAME = 'rtve.es:alacarta'
     IE_DESC = 'RTVE a la carta'
-    _VALID_URL = r'http://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
@@ -84,6 +87,9 @@ class RTVEALaCartaIE(InfoExtractor):
     }, {
         'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
         'only_matching': True,
+    }, {
+        'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
+        'only_matching': True,
     }]
 
     def _real_initialize(self):
@@ -178,14 +184,14 @@ class RTVEInfantilIE(InfoExtractor):
 class RTVELiveIE(InfoExtractor):
     IE_NAME = 'rtve.es:live'
     IE_DESC = 'RTVE.es live streams'
-    _VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
+    _VALID_URL = r'https?://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
 
     _TESTS = [{
-        'url': 'http://www.rtve.es/noticias/directo-la-1/',
+        'url': 'http://www.rtve.es/directo/la-1/',
         'info_dict': {
-            'id': 'directo-la-1',
-            'ext': 'flv',
-            'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
+            'id': 'la-1',
+            'ext': 'mp4',
+            'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
         },
         'params': {
             'skip_download': 'live stream',
@@ -198,23 +204,21 @@ class RTVELiveIE(InfoExtractor):
         video_id = mobj.group('id')
 
         webpage = self._download_webpage(url, video_id)
-        player_url = self._search_regex(
-            r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
-        title = remove_end(self._og_search_title(webpage), ' en directo')
+        title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
+        title = remove_start(title, 'Estoy viendo ')
         title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
 
         vidplayer_id = self._search_regex(
-            r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
-        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
+            r'playerId=player([0-9]+)', webpage, 'internal video ID')
+        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
         png = self._download_webpage(png_url, video_id, 'Downloading url information')
-        video_url = _decrypt_url(png)
+        m3u8_url = _decrypt_url(png)
+        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'ext': 'flv',
             'title': title,
-            'url': video_url,
-            'app': 'rtve-live-live?ovpfv=2.1.2',
-            'player_url': player_url,
-            'rtmp_live': True,
+            'formats': formats,
+            'is_live': True,
         }
index 7c9d4b0cd6fe09d4e792df3da25c2059f9b056e6..4896d09d666e687010ae3cb6ebe0e2bfaec537d6 100644 (file)
@@ -38,6 +38,7 @@ class RTVNHIE(InfoExtractor):
                     item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
             elif item.get('type') == '':
                 formats.append({'url': item['file']})
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
index 0e470e73f538fd60d7ed34cbe515042f6abc078b..1f7c262993c8ce7e0d602f612fc6316e80052f66 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 
 class RUHDIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.ruhd.ru/play.php?vid=207',
         'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83',
index c5c47d01ecef917ce037fc0ede75dd2b5e770138..9ca4ae147cb1e3c430de3abd9fd0927aaee2ed5a 100644 (file)
@@ -122,7 +122,7 @@ class RutubeEmbedIE(InfoExtractor):
 class RutubeChannelIE(InfoExtractor):
     IE_NAME = 'rutube:channel'
     IE_DESC = 'Rutube channels'
-    _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
+    _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://rutube.ru/tags/video/1800/',
         'info_dict': {
@@ -156,7 +156,7 @@ class RutubeChannelIE(InfoExtractor):
 class RutubeMovieIE(RutubeChannelIE):
     IE_NAME = 'rutube:movie'
     IE_DESC = 'Rutube movies'
-    _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
+    _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
     _TESTS = []
 
     _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
@@ -174,7 +174,7 @@ class RutubeMovieIE(RutubeChannelIE):
 class RutubePersonIE(RutubeChannelIE):
     IE_NAME = 'rutube:person'
     IE_DESC = 'Rutube person videos'
-    _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
+    _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://rutube.ru/video/person/313878/',
         'info_dict': {
index f7fe1feceeccbb3c7dc6860f0ee5d6a9c10a57df..a2379eb04c2e6744a49f315ebee2a0c9fb0170f6 100644 (file)
@@ -14,7 +14,7 @@ class RUTVIE(InfoExtractor):
     IE_DESC = 'RUTV.RU'
     _VALID_URL = r'''(?x)
         https?://player\.(?:rutv\.ru|vgtrk\.com)/
-            (?P<path>flash2v/container\.swf\?id=
+            (?P<path>flash\d+v/container\.swf\?id=
             |iframe/(?P<type>swf|video|live)/id/
             |index/iframe/cast_id/)
             (?P<id>\d+)'''
@@ -109,7 +109,7 @@ class RUTVIE(InfoExtractor):
             return mobj.group('url')
 
         mobj = re.search(
-            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
+            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
             webpage)
         if mobj:
             return mobj.group('url')
@@ -119,7 +119,7 @@ class RUTVIE(InfoExtractor):
         video_id = mobj.group('id')
         video_path = mobj.group('path')
 
-        if video_path.startswith('flash2v'):
+        if re.match(r'flash\d+v', video_path):
             video_type = 'video'
         elif video_path.startswith('iframe'):
             video_type = mobj.group('type')
@@ -168,7 +168,7 @@ class RUTVIE(InfoExtractor):
                         'play_path': mobj.group('playpath'),
                         'app': mobj.group('app'),
                         'page_url': 'http://player.rutv.ru',
-                        'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
+                        'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
                         'rtmp_live': True,
                         'ext': 'flv',
                         'vbr': int(quality),
index 7de7b7273523ea8a43a6d22e8ab684afb4fc5875..6ba91f202baadbfd72160cc739efde868a60d421 100644 (file)
@@ -4,14 +4,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from .brightcove import BrightcoveLegacyIE
 
 from ..utils import (
     ExtractorError,
     sanitized_Request,
-    smuggle_url,
     std_headers,
     urlencode_postdata,
+    update_url_query,
 )
 
 
@@ -20,28 +19,30 @@ class SafariBaseIE(InfoExtractor):
     _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
     _NETRC_MACHINE = 'safari'
 
-    _API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
+    _API_BASE = 'https://www.safaribooksonline.com/api/v1'
     _API_FORMAT = 'json'
 
     LOGGED_IN = False
 
     def _real_initialize(self):
-        # We only need to log in once for courses or individual videos
-        if not self.LOGGED_IN:
-            self._login()
-            SafariBaseIE.LOGGED_IN = True
+        self._login()
 
     def _login(self):
+        # We only need to log in once for courses or individual videos
+        if self.LOGGED_IN:
+            return
+
         (username, password) = self._get_login_info()
         if username is None:
-            self.raise_login_required('safaribooksonline.com account is required')
+            return
 
-        headers = std_headers
+        headers = std_headers.copy()
         if 'Referer' not in headers:
             headers['Referer'] = self._LOGIN_URL
+        login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers)
 
         login_page = self._download_webpage(
-            self._LOGIN_URL, None,
+            login_page_request, None,
             'Downloading login form')
 
         csrf = self._html_search_regex(
@@ -66,35 +67,27 @@ class SafariBaseIE(InfoExtractor):
                 'Login failed; make sure your credentials are correct and try again.',
                 expected=True)
 
+        SafariBaseIE.LOGGED_IN = True
+
         self.to_screen('Login successful')
 
 
 class SafariIE(SafariBaseIE):
     IE_NAME = 'safari'
     IE_DESC = 'safaribooksonline.com online video'
-    _VALID_URL = r'''(?x)https?://
-                            (?:www\.)?safaribooksonline\.com/
-                                (?:
-                                    library/view/[^/]+|
-                                    api/v1/book
-                                )/
-                                (?P<course_id>[^/]+)/
-                                    (?:chapter(?:-content)?/)?
-                                (?P<part>part\d+)\.html
-    '''
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html'
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
-        'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
+        'md5': 'dcc5a425e79f2564148652616af1f2a3',
         'info_dict': {
-            'id': '2842601850001',
+            'id': '0_qbqx90ic',
             'ext': 'mp4',
-            'title': 'Introduction',
+            'title': 'Introduction to Hadoop Fundamentals LiveLessons',
+            'timestamp': 1437758058,
+            'upload_date': '20150724',
+            'uploader_id': 'stork',
         },
-        'skip': 'Requires safaribooksonline account credentials',
-    }, {
-        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
-        'only_matching': True,
     }, {
         # non-digits in course id
         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
@@ -103,18 +96,55 @@ class SafariIE(SafariBaseIE):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        course_id = mobj.group('course_id')
-        part = mobj.group('part')
+        video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part'))
+
+        webpage = self._download_webpage(url, video_id)
+        reference_id = self._search_regex(
+            r'data-reference-id=(["\'])(?P<id>.+?)\1',
+            webpage, 'kaltura reference id', group='id')
+        partner_id = self._search_regex(
+            r'data-partner-id=(["\'])(?P<id>.+?)\1',
+            webpage, 'kaltura widget id', group='id')
+        ui_id = self._search_regex(
+            r'data-ui-id=(["\'])(?P<id>.+?)\1',
+            webpage, 'kaltura uiconf id', group='id')
+
+        query = {
+            'wid': '_%s' % partner_id,
+            'uiconf_id': ui_id,
+            'flashvars[referenceId]': reference_id,
+        }
+
+        if self.LOGGED_IN:
+            kaltura_session = self._download_json(
+                '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
+                video_id, 'Downloading kaltura session JSON',
+                'Unable to download kaltura session JSON', fatal=False)
+            if kaltura_session:
+                session = kaltura_session.get('session')
+                if session:
+                    query['flashvars[ks]'] = session
+
+        return self.url_result(update_url_query(
+            'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query),
+            'Kaltura')
+
 
-        webpage = self._download_webpage(
-            '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
-            part)
+class SafariApiIE(SafariBaseIE):
+    IE_NAME = 'safari:api'
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html'
 
-        bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
-        if not bc_url:
-            raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
+    _TEST = {
+        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+        'only_matching': True,
+    }
 
-        return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy')
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        part = self._download_json(
+            url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')),
+            'Downloading part JSON')
+        return self.url_result(part['web_url'], SafariIE.ie_key())
 
 
 class SafariCourseIE(SafariBaseIE):
@@ -140,7 +170,7 @@ class SafariCourseIE(SafariBaseIE):
         course_id = self._match_id(url)
 
         course_json = self._download_json(
-            '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+            '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
             course_id, 'Downloading course JSON')
 
         if 'chapters' not in course_json:
@@ -148,7 +178,7 @@ class SafariCourseIE(SafariBaseIE):
                 'No chapters found for course %s' % course_id, expected=True)
 
         entries = [
-            self.url_result(chapter, 'Safari')
+            self.url_result(chapter, SafariApiIE.ie_key())
             for chapter in course_json['chapters']]
 
         course_title = course_json['title']
index d6ee2d9e2245475d236c12fb6967af68558d8598..96472fbc44e9a78654ae7c136e9f7e4a31751a13 100644 (file)
@@ -2,6 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import (
+    smuggle_url,
+    ExtractorError,
+)
 
 
 class SBSIE(InfoExtractor):
@@ -20,6 +24,9 @@ class SBSIE(InfoExtractor):
             'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
             'thumbnail': 're:http://.*\.jpg',
             'duration': 308,
+            'timestamp': 1408613220,
+            'upload_date': '20140821',
+            'uploader': 'SBSC',
         },
     }, {
         'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
@@ -31,21 +38,29 @@ class SBSIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        player_params = self._download_json(
+            'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)
 
-        webpage = self._download_webpage(
-            'http://www.sbs.com.au/ondemand/video/single/%s?context=web' % video_id, video_id)
-
-        player_params = self._parse_json(
-            self._search_regex(
-                r'(?s)var\s+playerParams\s*=\s*({.+?});', webpage, 'playerParams'),
-            video_id)
+        error = player_params.get('error')
+        if error:
+            error_message = 'Sorry, The video you are looking for does not exist.'
+            video_data = error.get('results') or {}
+            error_code = error.get('errorCode')
+            if error_code == 'ComingSoon':
+                error_message = '%s is not yet available.' % video_data.get('title', '')
+            elif error_code in ('Forbidden', 'intranetAccessOnly'):
+                error_message = 'Sorry, This video cannot be accessed via this website'
+            elif error_code == 'Expired':
+                error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
 
         urls = player_params['releaseUrls']
-        theplatform_url = (urls.get('progressive') or urls.get('standard') or
-                           urls.get('html') or player_params['relatedItemsURL'])
+        theplatform_url = (urls.get('progressive') or urls.get('html') or
+                           urls.get('standard') or player_params['relatedItemsURL'])
 
         return {
             '_type': 'url_transparent',
+            'ie_key': 'ThePlatform',
             'id': video_id,
-            'url': theplatform_url,
+            'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}),
         }
index 3bf93c870b2bc30c3baf9567a64d06171558f06b..b1ca12fdee1c012de789ebfaf15f03f04e73f768 100644 (file)
@@ -18,6 +18,7 @@ class SciVeeIE(InfoExtractor):
             'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting',
             'description': 'md5:81f1710638e11a481358fab1b11059d7',
         },
+        'skip': 'Not accessible from Travis CI server',
     }
 
     def _real_extract(self, url):
index dfd897ba3a3f0a7297164fb315e4543bb597d678..3566317008712d8e378eec36c437cbc39fdc1cdc 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class ScreencastIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
     _TESTS = [{
         'url': 'http://www.screencast.com/t/3ZEjQXlT',
         'md5': '917df1c13798a3e96211dd1561fded83',
@@ -53,8 +53,10 @@ class ScreencastIE(InfoExtractor):
             'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
             'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
         }
-    },
-    ]
+    }, {
+        'url': 'http://screencast.com/t/aAB3iowa',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -94,8 +96,9 @@ class ScreencastIE(InfoExtractor):
         title = self._og_search_title(webpage, default=None)
         if title is None:
             title = self._html_search_regex(
-                [r'<b>Title:</b> ([^<]*)</div>',
-                 r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
+                [r'<b>Title:</b> ([^<]+)</div>',
+                 r'class="tabSeperator">></span><span class="tabText">(.+?)<',
+                 r'<title>([^<]+)</title>'],
                 webpage, 'title')
         thumbnail = self._og_search_thumbnail(webpage)
         description = self._og_search_description(webpage, default=None)
index 05337421ca4210af5a9a797f22c112bb663a0960..7a88a42cd84dbfd9f343567dffb5f462c10329b7 100644 (file)
@@ -1,15 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
-    ExtractorError,
-    js_to_json,
-)
+from .jwplatform import JWPlatformBaseIE
+from ..utils import js_to_json
 
 
-class ScreencastOMaticIE(InfoExtractor):
+class ScreencastOMaticIE(JWPlatformBaseIE):
     _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
     _TEST = {
         'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
@@ -20,6 +16,7 @@ class ScreencastOMaticIE(InfoExtractor):
             'title': 'Welcome to 3-4 Philosophy @ DECV!',
             'thumbnail': 're:^https?://.*\.jpg$',
             'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
+            'duration': 369.163,
         }
     }
 
@@ -27,23 +24,14 @@ class ScreencastOMaticIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        setup_js = self._search_regex(
-            r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);",
-            webpage, 'setup code')
-        data = self._parse_json(setup_js, video_id, transform_source=js_to_json)
-        try:
-            video_data = next(
-                m for m in data['modes'] if m.get('type') == 'html5')
-        except StopIteration:
-            raise ExtractorError('Could not find any video entries!')
-        video_url = compat_urlparse.urljoin(url, video_data['config']['file'])
-        thumbnail = data.get('image')
+        jwplayer_data = self._parse_json(
+            self._search_regex(
+                r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'),
+            video_id, transform_source=js_to_json)
 
-        return {
-            'id': video_id,
+        info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
+        info_dict.update({
             'title': self._og_search_title(webpage),
             'description': self._og_search_description(webpage),
-            'url': video_url,
-            'ext': 'mp4',
-            'thumbnail': thumbnail,
-        }
+        })
+        return info_dict
index f2af15f6b43ef5cd205db383cd01c283069dd05b..dd0a6ba19d4ef3b9397af6d977277256cbc0e1e9 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class ScreenJunkiesIE(InfoExtractor):
-    _VALID_URL = r'http://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
+    _VALID_URL = r'https?://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
     _TESTS = [{
         'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915',
         'md5': '5c2b686bec3d43de42bde9ec047536b0',
index 2cf210e0d609a219f6ac2cbd205311cc166afa29..40333c825443f1b349e8e40c7a08faff7f261e48 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class ScreenwaveMediaIE(InfoExtractor):
-    _VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
+    _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
     EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
     _TESTS = [{
         'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
@@ -70,25 +70,27 @@ class ScreenwaveMediaIE(InfoExtractor):
 
         formats = []
         for source in sources:
-            if source['type'] == 'hls':
-                formats.extend(self._extract_m3u8_formats(source['file'], video_id, ext='mp4'))
+            file_ = source.get('file')
+            if not file_:
+                continue
+            if source.get('type') == 'hls':
+                formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4'))
             else:
-                file_ = source.get('file')
-                if not file_:
-                    continue
-                format_label = source.get('label')
                 format_id = self._search_regex(
                     r'_(.+?)\.[^.]+$', file_, 'format id', default=None)
+                if not self._is_valid_url(file_, video_id, format_id or 'video'):
+                    continue
+                format_label = source.get('label')
                 height = int_or_none(self._search_regex(
                     r'^(\d+)[pP]', format_label, 'height', default=None))
                 formats.append({
-                    'url': source['file'],
+                    'url': file_,
                     'format_id': format_id,
                     'format': format_label,
                     'ext': source.get('type'),
                     'height': height,
                 })
-        self._sort_formats(formats)
+        self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/seeker.py b/youtube_dl/extractor/seeker.py
new file mode 100644 (file)
index 0000000..3b9c65e
--- /dev/null
@@ -0,0 +1,57 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SeekerIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
+    _TESTS = [{
+        # player.loadRevision3Item
+        'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
+        'md5': '30c1dc4030cc715cf05b423d0947ac18',
+        'info_dict': {
+            'id': '76243',
+            'ext': 'webm',
+            'title': 'Should Trump Be Required To Release His Tax Returns?',
+            'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?',
+            'uploader': 'Seeker Daily',
+            'uploader_id': 'seekerdaily',
+        }
+    }, {
+        'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
+        'playlist': [
+            {
+                'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
+                'info_dict': {
+                    'id': '67558',
+                    'ext': 'mp4',
+                    'title': 'The Pros & Cons Of Zoos',
+                    'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
+                    'uploader': 'DNews',
+                    'uploader_id': 'dnews',
+                },
+            }
+        ],
+        'info_dict': {
+            'id': '1834116536',
+            'title': 'After Gorilla Killing, Changes Ahead for Zoos',
+            'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.',
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id, article_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, display_id)
+        mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage)
+        if mobj:
+            playlist_type, playlist_id = mobj.groups()
+            return self.url_result(
+                'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id)
+        else:
+            entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall(
+                r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)]
+            return self.playlist_result(
+                entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage))
index 4d3b585228570769f1c4ece0955ff2e6c0b6b73c..c5f474dd1d8a5040a5368de7f2aa050658f7a984 100644 (file)
@@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor):
         ['arch', '', 'http://ussenate-f.akamaihd.net/']
     ]
     _IE_NAME = 'senate.gov'
-    _VALID_URL = r'http://www\.senate\.gov/isvp/?\?(?P<qs>.+)'
+    _VALID_URL = r'https?://www\.senate\.gov/isvp/?\?(?P<qs>.+)'
     _TESTS = [{
         'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
         'info_dict': {
diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py
new file mode 100644 (file)
index 0000000..1c636f6
--- /dev/null
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .jwplatform import JWPlatformBaseIE
+from ..compat import compat_parse_qs
+from ..utils import (
+    ExtractorError,
+    parse_duration,
+)
+
+
+class SendtoNewsIE(JWPlatformBaseIE):
+    _VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P<query>[^#]+)'
+
+    _TEST = {
+        # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
+        'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes',
+        'info_dict': {
+            'id': 'GxfCe0Zo7D-175909-5588',
+            'ext': 'mp4',
+            'title': 'Recap: CLE 15, CIN 6',
+            'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
+            'duration': 49,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    _URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s'
+
+    @classmethod
+    def _extract_url(cls, webpage):
+        mobj = re.search(r'''(?x)<script[^>]+src=([\'"])
+            (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\?
+                .*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
+            \1>''', webpage)
+        if mobj:
+            sk, mk, pk = mobj.group('SC').split('-')
+            return cls._URL_TEMPLATE % (sk, mk, pk)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        params = compat_parse_qs(mobj.group('query'))
+
+        if 'SK' not in params or 'MK' not in params or 'PK' not in params:
+            raise ExtractorError('Invalid URL', expected=True)
+
+        video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]])
+
+        webpage = self._download_webpage(url, video_id)
+
+        jwplayer_data_str = self._search_regex(
+            r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data')
+        js_vars = {
+            'w': 1024,
+            'h': 768,
+            'modeVar': 'html5',
+        }
+        for name, val in js_vars.items():
+            js_val = '%d' % val if isinstance(val, int) else '"%s"' % val
+            jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val)
+
+        info_dict = self._parse_jwplayer_data(
+            self._parse_json(jwplayer_data_str, video_id),
+            video_id, require_title=False, rtmp_params={'no_resume': True})
+
+        title = self._html_search_regex(
+            r'<div[^>]+class="embedTitle">([^<]+)</div>', webpage, 'title')
+        description = self._html_search_regex(
+            r'<div[^>]+class="embedSubTitle">([^<]+)</div>', webpage,
+            'description', fatal=False)
+        duration = parse_duration(self._html_search_regex(
+            r'<div[^>]+class="embedDetails">([0-9:]+)', webpage,
+            'duration', fatal=False))
+
+        info_dict.update({
+            'title': title,
+            'description': description,
+            'duration': duration,
+        })
+
+        return info_dict
index 6365a8779d74e2ac9d82ce83c32c404d51e64b2e..a99b2a8e7be1bc9de8a01d6ae2de6fb36055703c 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -14,7 +12,7 @@ class SexuIE(InfoExtractor):
             'id': '961791',
             'ext': 'mp4',
             'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
-            'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
+            'description': 'md5:2b75327061310a3afb3fbd7d09e2e403',
             'categories': list,  # NSFW
             'thumbnail': 're:https?://.*\.jpg$',
             'age_limit': 18,
@@ -25,13 +23,18 @@ class SexuIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        quality_arr = self._search_regex(
-            r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
+        jwvideo = self._parse_json(
+            self._search_regex(r'\.setup\(\s*({.+?})\s*\);', webpage, 'jwvideo'),
+            video_id)
+
+        sources = jwvideo['sources']
+
         formats = [{
-            'url': fmt[0].replace('\\', ''),
-            'format_id': fmt[1],
-            'height': int(fmt[1][:3]),
-        } for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
+            'url': source['file'].replace('\\', ''),
+            'format_id': source.get('label'),
+            'height': self._search_regex(
+                r'^(\d+)[pP]', source.get('label', ''), 'height', default=None),
+        } for source in sources if source.get('file')]
         self._sort_formats(formats)
 
         title = self._html_search_regex(
@@ -40,9 +43,7 @@ class SexuIE(InfoExtractor):
         description = self._html_search_meta(
             'description', webpage, 'description')
 
-        thumbnail = self._html_search_regex(
-            r'image:\s*"([^"]+)"',
-            webpage, 'thumbnail', fatal=False)
+        thumbnail = jwvideo.get('image')
 
         categories_str = self._html_search_meta(
             'keywords', webpage, 'categories')
index 1178b7a2781eb8809b99c78cce9fe80a33cafdd6..d95ea06be56844deb3c34276fb1c4e7e18d64173 100644 (file)
@@ -2,7 +2,7 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_urlencode
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -77,11 +77,12 @@ class ShahidIE(InfoExtractor):
             raise ExtractorError('This video is DRM protected.', expected=True)
 
         formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
+        self._sort_formats(formats)
 
         video = self._download_json(
             '%s/%s/%s?%s' % (
                 api_vars['url'], api_vars['playerType'], api_vars['id'],
-                compat_urllib_parse.urlencode({
+                compat_urllib_parse_urlencode({
                     'apiKey': 'sh@hid0nlin3',
                     'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
                 })),
index 8eda3c8648a093213426437e151d217a1115da92..e7e5f653eb2117936f568195e508f9e7778b1085 100644 (file)
@@ -3,17 +3,17 @@ from __future__ import unicode_literals
 import base64
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     ExtractorError,
     int_or_none,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
 class SharedIE(InfoExtractor):
     IE_DESC = 'shared.sx and vivo.sx'
-    _VALID_URL = r'http://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
+    _VALID_URL = r'https?://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
 
     _TESTS = [{
         'url': 'http://shared.sx/0060718775',
@@ -45,7 +45,7 @@ class SharedIE(InfoExtractor):
 
         download_form = self._hidden_inputs(webpage)
         request = sanitized_Request(
-            url, compat_urllib_parse.urlencode(download_form))
+            url, urlencode_postdata(download_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 
         video_page = self._download_webpage(
index f1ea9bdb208c79cb59c189c5a3b17a9e1dec460a..9cce5ceb43b71877202a77067458a39e5a810432 100644 (file)
@@ -4,10 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     parse_duration,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -47,7 +47,7 @@ class ShareSixIE(InfoExtractor):
         fields = {
             'method_free': 'Free'
         }
-        post = compat_urllib_parse.urlencode(fields)
+        post = urlencode_postdata(fields)
         req = sanitized_Request(url, post)
         req.add_header('Content-type', 'application/x-www-form-urlencoded')
 
index b2258a0f64c9f985edb5354ae5c11a381ebcc8af..8fc66732af70f4db5305fdc891c5142afd5c97c7 100644 (file)
@@ -4,28 +4,35 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
-from ..utils import sanitized_Request
+from ..utils import (
+    HEADRequest,
+    ExtractorError,
+    int_or_none,
+    update_url_query,
+    qualities,
+    get_element_by_attribute,
+    clean_html,
+)
 
 
 class SinaIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
-                        (
-                            (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
-                            |
+    _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/
+                        (?:
+                            (?:view/|.*\#)(?P<video_id>\d+)|
+                            .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)|
                             # This is used by external sites like Weibo
-                            (api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
+                            api/sinawebApi/outplay.php/(?P<token>.+?)\.swf
                         )
                   '''
 
     _TESTS = [
         {
-            'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
-            'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
+            'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622',
+            'md5': 'd38433e2fc886007729735650ae4b3e9',
             'info_dict': {
-                'id': '110028898',
-                'ext': 'flv',
-                'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
+                'id': '250576622',
+                'ext': 'mp4',
+                'title': '现场:克鲁兹宣布退选 特朗普将稳获提名',
             }
         },
         {
@@ -35,37 +42,74 @@ class SinaIE(InfoExtractor):
                 'ext': 'flv',
                 'title': '军方提高对朝情报监视级别',
             },
+            'skip': 'the page does not exist or has been deleted',
+        },
+        {
+            'url': 'http://video.sina.com.cn/view/250587748.html',
+            'md5': '3d1807a25c775092aab3bc157fff49b4',
+            'info_dict': {
+                'id': '250587748',
+                'ext': 'mp4',
+                'title': '瞬间泪目:8年前汶川地震珍贵视频首曝光',
+            },
         },
     ]
 
-    def _extract_video(self, video_id):
-        data = compat_urllib_parse.urlencode({'vid': video_id})
-        url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
-                                     video_id, 'Downloading video url')
-        image_page = self._download_webpage(
-            'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
-            video_id, 'Downloading thumbnail info')
-
-        return {'id': video_id,
-                'url': url_doc.find('./durl/url').text,
-                'ext': 'flv',
-                'title': url_doc.find('./vname').text,
-                'thumbnail': image_page.split('=')[1],
-                }
-
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        if mobj.group('token') is not None:
-            # The video id is in the redirected url
-            self.to_screen('Getting video id')
-            request = sanitized_Request(url)
-            request.get_method = lambda: 'HEAD'
-            (_, urlh) = self._download_webpage_handle(request, 'NA', False)
-            return self._real_extract(urlh.geturl())
-        elif video_id is None:
-            pseudo_id = mobj.group('pseudo_id')
-            webpage = self._download_webpage(url, pseudo_id)
-            video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, 'video id')
 
-        return self._extract_video(video_id)
+        video_id = mobj.group('video_id')
+        if not video_id:
+            if mobj.group('token') is not None:
+                # The video id is in the redirected url
+                self.to_screen('Getting video id')
+                request = HEADRequest(url)
+                (_, urlh) = self._download_webpage_handle(request, 'NA', False)
+                return self._real_extract(urlh.geturl())
+            else:
+                pseudo_id = mobj.group('pseudo_id')
+                webpage = self._download_webpage(url, pseudo_id)
+                error = get_element_by_attribute('class', 'errtitle', webpage)
+                if error:
+                    raise ExtractorError('%s said: %s' % (
+                        self.IE_NAME, clean_html(error)), expected=True)
+                video_id = self._search_regex(
+                    r"video_id\s*:\s*'(\d+)'", webpage, 'video id')
+
+        video_data = self._download_json(
+            'http://s.video.sina.com.cn/video/h5play',
+            video_id, query={'video_id': video_id})
+        if video_data['code'] != 1:
+            raise ExtractorError('%s said: %s' % (
+                self.IE_NAME, video_data['message']), expected=True)
+        else:
+            video_data = video_data['data']
+            title = video_data['title']
+            description = video_data.get('description')
+            if description:
+                description = description.strip()
+
+            preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd'])
+            formats = []
+            for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items():
+                file_api = quality.get('file_api')
+                file_id = quality.get('file_id')
+                if not file_api or not file_id:
+                    continue
+                formats.append({
+                    'format_id': quality_id,
+                    'url': update_url_query(file_api, {'vid': file_id}),
+                    'preference': preference(quality_id),
+                    'ext': 'mp4',
+                })
+            self._sort_formats(formats)
+
+            return {
+                'id': video_id,
+                'title': title,
+                'description': description,
+                'thumbnail': video_data.get('image'),
+                'duration': int_or_none(video_data.get('length')),
+                'timestamp': int_or_none(video_data.get('create_time')),
+                'formats': formats,
+            }
index 015ef75f3e9c36fa6e7447a78721ffd4d0bff74d..5c3fd0fece8dc8b32a3d05bea8ed4dedf430f1c5 100644 (file)
@@ -7,12 +7,12 @@ import hashlib
 import uuid
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     ExtractorError,
     int_or_none,
     sanitized_Request,
     unified_strdate,
+    urlencode_postdata,
 )
 
 
@@ -175,7 +175,7 @@ class SmotriIE(InfoExtractor):
             video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
 
         request = sanitized_Request(
-            'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
+            'http://smotri.com/video/view/url/bot/', urlencode_postdata(video_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 
         video = self._download_json(request, video_id, 'Downloading video JSON')
@@ -338,7 +338,7 @@ class SmotriBroadcastIE(InfoExtractor):
             }
 
             request = sanitized_Request(
-                broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
+                broadcast_url + '/?no_redirect=1', urlencode_postdata(login_form))
             request.add_header('Content-Type', 'application/x-www-form-urlencoded')
             broadcast_page = self._download_webpage(
                 request, broadcast_id, 'Logging in and confirming age')
index ea8fc258d1e7f361bc2a75360dc4c65774674622..49e5d09ae450d11bb567a2fe95ecba55998c8b42 100644 (file)
@@ -6,7 +6,7 @@ import re
 from .common import InfoExtractor
 from ..compat import (
     compat_str,
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
 )
 from ..utils import (
     ExtractorError,
@@ -170,7 +170,7 @@ class SohuIE(InfoExtractor):
                     if retries > 0:
                         download_note += ' (retry #%d)' % retries
                     part_info = self._parse_json(self._download_webpage(
-                        'http://%s/?%s' % (allot, compat_urllib_parse.urlencode(params)),
+                        'http://%s/?%s' % (allot, compat_urllib_parse_urlencode(params)),
                         video_id, download_note), video_id)
 
                     video_url = part_info['url']
index 1efb2b980cc83bc5ffa3b14c912188973b4574b7..194dabc71d84072fc64afd50baa3b80467c0808f 100644 (file)
@@ -11,10 +11,9 @@ from .common import (
 from ..compat import (
     compat_str,
     compat_urlparse,
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
 )
 from ..utils import (
-    encode_dict,
     ExtractorError,
     int_or_none,
     unified_strdate,
@@ -393,7 +392,7 @@ class SoundcloudUserIE(SoundcloudIE):
         query = COMMON_QUERY.copy()
         query['offset'] = 0
 
-        next_href = base_url + '?' + compat_urllib_parse.urlencode(query)
+        next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
 
         entries = []
         for i in itertools.count():
@@ -424,7 +423,7 @@ class SoundcloudUserIE(SoundcloudIE):
             qs = compat_urlparse.parse_qs(parsed_next_href.query)
             qs.update(COMMON_QUERY)
             next_href = compat_urlparse.urlunparse(
-                parsed_next_href._replace(query=compat_urllib_parse.urlencode(qs, True)))
+                parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
         return {
             '_type': 'playlist',
@@ -460,7 +459,7 @@ class SoundcloudPlaylistIE(SoundcloudIE):
         if token:
             data_dict['secret_token'] = token
 
-        data = compat_urllib_parse.urlencode(data_dict)
+        data = compat_urllib_parse_urlencode(data_dict)
         data = self._download_json(
             base_url + data, playlist_id, 'Downloading playlist')
 
@@ -500,7 +499,7 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
         query['client_id'] = self._CLIENT_ID
         query['linked_partitioning'] = '1'
         query['offset'] = 0
-        data = compat_urllib_parse.urlencode(encode_dict(query))
+        data = compat_urllib_parse_urlencode(query)
         next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
 
         collected_results = 0
diff --git a/youtube_dl/extractor/space.py b/youtube_dl/extractor/space.py
deleted file mode 100644 (file)
index ebb5d6e..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from .brightcove import BrightcoveLegacyIE
-from ..utils import RegexNotFoundError, ExtractorError
-
-
-class SpaceIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
-    _TEST = {
-        'add_ie': ['BrightcoveLegacy'],
-        'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
-        'info_dict': {
-            'id': '2780937028001',
-            'ext': 'mp4',
-            'title': 'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
-            'description': 'md5:db81cf7f3122f95ed234b631a6ea1e61',
-            'uploader': 'TechMedia Networks',
-        },
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        title = mobj.group('title')
-        webpage = self._download_webpage(url, title)
-        try:
-            # Some videos require the playerKey field, which isn't define in
-            # the BrightcoveExperience object
-            brightcove_url = self._og_search_video_url(webpage)
-        except RegexNotFoundError:
-            # Other videos works fine with the info from the object
-            brightcove_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
-        if brightcove_url is None:
-            raise ExtractorError(
-                'The webpage does not contain a video', expected=True)
-        return self.url_result(brightcove_url, BrightcoveLegacyIE.ie_key())
index 692fd78e886c0a6a932adce4659f2564beeab7e6..92a7120a3242e732ceb58f51b4391a5efbc569d8 100644 (file)
@@ -96,20 +96,18 @@ class SpankwireIE(InfoExtractor):
         formats = []
         for height, video_url in zip(heights, video_urls):
             path = compat_urllib_parse_urlparse(video_url).path
-            _, quality = path.split('/')[4].split('_')[:2]
-            f = {
+            m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path)
+            if m:
+                tbr = int(m.group('tbr'))
+                height = int(m.group('height'))
+            else:
+                tbr = None
+            formats.append({
                 'url': video_url,
+                'format_id': '%dp' % height,
                 'height': height,
-            }
-            tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
-            if tbr:
-                f.update({
-                    'tbr': int(tbr),
-                    'format_id': '%dp' % height,
-                })
-            else:
-                f['format_id'] = quality
-            formats.append(f)
+                'tbr': tbr,
+            })
         self._sort_formats(formats)
 
         age_limit = self._rta_search(webpage)
index dfe50ed4585b0fe876b8a300edd00a453ae4b690..7e67833062d0a21d2c663b1b5d24246d653f0116 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import ExtractorError
 
 
 class Sport5IE(InfoExtractor):
-    _VALID_URL = r'http://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1',
index 86d509ae5351a3cc15be66dda9f485d63ec166ba..e5c28ae890ee61536052a5716677d486d0a5b43e 100644 (file)
@@ -6,6 +6,7 @@ import re
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
+    js_to_json,
     unified_strdate,
 )
 
@@ -94,18 +95,32 @@ class SportBoxEmbedIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        hls = self._search_regex(
-            r"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]([^'\"]+)['\"]",
-            webpage, 'hls file')
+        formats = []
 
-        formats = self._extract_m3u8_formats(hls, video_id, 'mp4')
+        def cleanup_js(code):
+            # desktop_advert_config contains complex Javascripts and we don't need it
+            return js_to_json(re.sub(r'desktop_advert_config.*', '', code))
 
-        title = self._search_regex(
-            r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title')
+        jwplayer_data = self._parse_json(self._search_regex(
+            r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id,
+            transform_source=cleanup_js)
 
-        thumbnail = self._search_regex(
-            r'sportboxPlayer\.jwplayer_common_params\.image\s*=\s*"([^"]+)"',
-            webpage, 'thumbnail', default=None)
+        hls_url = jwplayer_data.get('hls_url')
+        if hls_url:
+            formats.extend(self._extract_m3u8_formats(
+                hls_url, video_id, ext='mp4', m3u8_id='hls'))
+
+        rtsp_url = jwplayer_data.get('rtsp_url')
+        if rtsp_url:
+            formats.append({
+                'url': rtsp_url,
+                'format_id': 'rtsp',
+            })
+
+        self._sort_formats(formats)
+
+        title = jwplayer_data['node_title']
+        thumbnail = jwplayer_data.get('image_url')
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/sportschau.py b/youtube_dl/extractor/sportschau.py
new file mode 100644 (file)
index 0000000..0d7925a
--- /dev/null
@@ -0,0 +1,38 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .wdr import WDRBaseIE
+from ..utils import get_element_by_attribute
+
+
+class SportschauIE(WDRBaseIE):
+    IE_NAME = 'Sportschau'
+    _VALID_URL = r'https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video-?(?P<id>[^/#?]+)\.html'
+    _TEST = {
+        'url': 'http://www.sportschau.de/uefaeuro2016/videos/video-dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100.html',
+        'info_dict': {
+            'id': 'mdb-1140188',
+            'display_id': 'dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100',
+            'ext': 'mp4',
+            'title': 'DFB-Team geht gut gelaunt ins Spiel gegen Polen',
+            'description': 'Vor dem zweiten Gruppenspiel gegen Polen herrscht gute Stimmung im deutschen Team. Insbesondere Bastian Schweinsteiger strotzt vor Optimismus nach seinem Tor gegen die Ukraine.',
+            'upload_date': '20160615',
+        },
+        'skip': 'Geo-restricted to Germany',
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        title = get_element_by_attribute('class', 'headline', webpage)
+        description = self._html_search_meta('description', webpage, 'description')
+
+        info = self._extract_wdr_video(webpage, video_id)
+
+        info.update({
+            'title': title,
+            'description': description,
+        })
+
+        return info
index 13101c7146244181f62a634b773da446e2f5e79a..54d1843f2200d0cef7fa2e7b192f673d316c5f18 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import (
 
 
 class SSAIE(InfoExtractor):
-    _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
+    _VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)'
     _TEST = {
         'url': 'http://ssa.nls.uk/film/3561',
         'info_dict': {
index 77841b94686a27feb00968e5f3b67729504cee6c..6a6bb90c493a92fc2e644e2a550547460d899ca4 100644 (file)
@@ -4,15 +4,17 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
-from ..utils import sanitized_Request
+from ..utils import (
+    ExtractorError,
+    urlencode_postdata,
+)
 
 
 class StreamcloudIE(InfoExtractor):
     IE_NAME = 'streamcloud.eu'
     _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
         'md5': '6bea4c7fa5daaacc2a946b7146286686',
         'info_dict': {
@@ -21,7 +23,10 @@ class StreamcloudIE(InfoExtractor):
             'title': 'youtube-dl test video  \'/\\ ä ↭',
         },
         'skip': 'Only available from the EU'
-    }
+    }, {
+        'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -29,26 +34,36 @@ class StreamcloudIE(InfoExtractor):
 
         orig_webpage = self._download_webpage(url, video_id)
 
+        if '>File Not Found<' in orig_webpage:
+            raise ExtractorError(
+                'Video %s does not exist' % video_id, expected=True)
+
         fields = re.findall(r'''(?x)<input\s+
             type="(?:hidden|submit)"\s+
             name="([^"]+)"\s+
             (?:id="[^"]+"\s+)?
             value="([^"]*)"
             ''', orig_webpage)
-        post = compat_urllib_parse.urlencode(fields)
 
         self._sleep(12, video_id)
-        headers = {
-            b'Content-Type': b'application/x-www-form-urlencoded',
-        }
-        req = sanitized_Request(url, post, headers)
 
         webpage = self._download_webpage(
-            req, video_id, note='Downloading video page ...')
-        title = self._html_search_regex(
-            r'<h1[^>]*>([^<]+)<', webpage, 'title')
-        video_url = self._search_regex(
-            r'file:\s*"([^"]+)"', webpage, 'video URL')
+            url, video_id, data=urlencode_postdata(fields), headers={
+                b'Content-Type': b'application/x-www-form-urlencoded',
+            })
+
+        try:
+            title = self._html_search_regex(
+                r'<h1[^>]*>([^<]+)<', webpage, 'title')
+            video_url = self._search_regex(
+                r'file:\s*"([^"]+)"', webpage, 'video URL')
+        except ExtractorError:
+            message = self._html_search_regex(
+                r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>',
+                webpage, 'message', default=None, group='message')
+            if message:
+                raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+            raise
         thumbnail = self._search_regex(
             r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
 
index 6a57fa60a5a2ea877f65a1af045f14d05377c1a9..e529051d100b8024007229200648ea259b3d1677 100644 (file)
@@ -14,7 +14,6 @@ class StreetVoiceIE(InfoExtractor):
         'info_dict': {
             'id': '94440',
             'ext': 'mp3',
-            'filesize': 4167053,
             'title': '輸',
             'description': 'Crispy脆樂團 - 輸',
             'thumbnail': 're:^https?://.*\.jpg$',
@@ -32,20 +31,19 @@ class StreetVoiceIE(InfoExtractor):
         song_id = self._match_id(url)
 
         song = self._download_json(
-            'http://streetvoice.com/music/api/song/%s' % song_id, song_id)
+            'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
 
         title = song['name']
-        author = song['musician']['name']
+        author = song['user']['nickname']
 
         return {
             'id': song_id,
             'url': song['file'],
-            'filesize': song.get('size'),
             'title': title,
             'description': '%s - %s' % (author, title),
             'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
             'duration': song.get('length'),
             'upload_date': unified_strdate(song.get('created_at')),
             'uploader': author,
-            'uploader_id': compat_str(song['musician']['id']),
+            'uploader_id': compat_str(song['user']['id']),
         }
index 399c3b8eedbe57006a07eb187f1a78c4f090e6af..67f56fab8cb299d6644c8765fbeaaec1562e4b7f 100644 (file)
@@ -6,56 +6,80 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
+    dict_get,
+    int_or_none,
+    try_get,
 )
 
 
 class SVTBaseIE(InfoExtractor):
-    def _extract_video(self, url, video_id):
-        info = self._download_json(url, video_id)
-
-        title = info['context']['title']
-        thumbnail = info['context'].get('thumbnailImage')
-
-        video_info = info['video']
+    def _extract_video(self, video_info, video_id):
         formats = []
         for vr in video_info['videoReferences']:
+            player_type = vr.get('playerType')
             vurl = vr['url']
             ext = determine_ext(vurl)
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                     vurl, video_id,
                     ext='mp4', entry_protocol='m3u8_native',
-                    m3u8_id=vr.get('playerType')))
+                    m3u8_id=player_type, fatal=False))
             elif ext == 'f4m':
                 formats.extend(self._extract_f4m_formats(
                     vurl + '?hdcore=3.3.0', video_id,
-                    f4m_id=vr.get('playerType')))
+                    f4m_id=player_type, fatal=False))
+            elif ext == 'mpd':
+                if player_type == 'dashhbbtv':
+                    formats.extend(self._extract_mpd_formats(
+                        vurl, video_id, mpd_id=player_type, fatal=False))
             else:
                 formats.append({
-                    'format_id': vr.get('playerType'),
+                    'format_id': player_type,
                     'url': vurl,
                 })
+        if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
+            self.raise_geo_restricted('This video is only available in Sweden')
         self._sort_formats(formats)
 
         subtitles = {}
-        subtitle_references = video_info.get('subtitleReferences')
+        subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
         if isinstance(subtitle_references, list):
             for sr in subtitle_references:
                 subtitle_url = sr.get('url')
+                subtitle_lang = sr.get('language', 'sv')
                 if subtitle_url:
-                    subtitles.setdefault('sv', []).append({'url': subtitle_url})
+                    if determine_ext(subtitle_url) == 'm3u8':
+                        # TODO(yan12125): handle WebVTT in m3u8 manifests
+                        continue
+
+                    subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})
 
-        duration = video_info.get('materialLength')
-        age_limit = 18 if video_info.get('inappropriateForChildren') else 0
+        title = video_info.get('title')
+
+        series = video_info.get('programTitle')
+        season_number = int_or_none(video_info.get('season'))
+        episode = video_info.get('episodeTitle')
+        episode_number = int_or_none(video_info.get('episodeNumber'))
+
+        duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
+        age_limit = None
+        adult = dict_get(
+            video_info, ('inappropriateForChildren', 'blockedForChildren'),
+            skip_false_values=False)
+        if adult is not None:
+            age_limit = 18 if adult else 0
 
         return {
             'id': video_id,
             'title': title,
             'formats': formats,
             'subtitles': subtitles,
-            'thumbnail': thumbnail,
             'duration': duration,
             'age_limit': age_limit,
+            'series': series,
+            'season_number': season_number,
+            'episode': episode,
+            'episode_number': episode_number,
         }
 
 
@@ -63,11 +87,11 @@ class SVTIE(SVTBaseIE):
     _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
-        'md5': '9648197555fc1b49e3dc22db4af51d46',
+        'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
         'info_dict': {
             'id': '2900353',
-            'ext': 'flv',
-            'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
+            'ext': 'mp4',
+            'title': 'Stjärnorna skojar till det - under SVT-intervjun',
             'duration': 27,
             'age_limit': 0,
         },
@@ -84,15 +108,20 @@ class SVTIE(SVTBaseIE):
         mobj = re.match(self._VALID_URL, url)
         widget_id = mobj.group('widget_id')
         article_id = mobj.group('id')
-        return self._extract_video(
+
+        info = self._download_json(
             'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
             article_id)
 
+        info_dict = self._extract_video(info['video'], article_id)
+        info_dict['title'] = info['context']['title']
+        return info_dict
+
 
 class SVTPlayIE(SVTBaseIE):
     IE_DESC = 'SVT Play and Öppet arkiv'
-    _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
         'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
         'info_dict': {
@@ -108,12 +137,47 @@ class SVTPlayIE(SVTBaseIE):
                 }]
             },
         },
-    }
+    }, {
+        # geo restricted to Sweden
+        'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        host = mobj.group('host')
-        return self._extract_video(
-            'http://www.%s.se/video/%s?output=json' % (host, video_id),
-            video_id)
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        data = self._parse_json(
+            self._search_regex(
+                r'root\["__svtplay"\]\s*=\s*([^;]+);',
+                webpage, 'embedded data', default='{}'),
+            video_id, fatal=False)
+
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        if data:
+            video_info = try_get(
+                data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
+                dict)
+            if video_info:
+                info_dict = self._extract_video(video_info, video_id)
+                info_dict.update({
+                    'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
+                    'thumbnail': thumbnail,
+                })
+                return info_dict
+
+        video_id = self._search_regex(
+            r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
+            webpage, 'video id', default=None)
+
+        if video_id:
+            data = self._download_json(
+                'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id)
+            info_dict = self._extract_video(data, video_id)
+            if not info_dict.get('title'):
+                info_dict['title'] = re.sub(
+                    r'\s*\|\s*.+?$', '',
+                    info_dict.get('episode') or self._og_search_title(webpage))
+            return info_dict
index aa5964acb6b3f40b0d663bd2169ac6aec0c210ae..f562aa6d386ee891f4ab3a724bef53e20a6cec92 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 
 class SztvHuIE(InfoExtractor):
-    _VALID_URL = r'http://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
         'md5': 'a6df607b11fb07d0e9f2ad94613375cb',
index 73e7657d4bec7b1bc37753923744d92b769d8843..136e18f96cadf7bd5701e32b0a3bc7c8767e324e 100644 (file)
@@ -4,42 +4,178 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import parse_filesize
+from ..utils import (
+    determine_ext,
+    js_to_json,
+    parse_iso8601,
+    parse_filesize,
+)
+
+
+class TagesschauPlayerIE(InfoExtractor):
+    IE_NAME = 'tagesschau:player'
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?P<kind>audio|video)/(?P=kind)-(?P<id>\d+)~player(?:_[^/?#&]+)?\.html'
+
+    _TESTS = [{
+        'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
+        'md5': '8d09548d5c15debad38bee3a4d15ca21',
+        'info_dict': {
+            'id': '179517',
+            'ext': 'mp4',
+            'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD',
+            'thumbnail': 're:^https?:.*\.jpg$',
+            'formats': 'mincount:6',
+        },
+    }, {
+        'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
+        'md5': '76e6eec6ebd40740671cf0a2c88617e5',
+        'info_dict': {
+            'id': '29417',
+            'ext': 'mp3',
+            'title': 'Trabi - Bye, bye Rennpappe',
+            'thumbnail': 're:^https?:.*\.jpg$',
+            'formats': 'mincount:2',
+        },
+    }, {
+        'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417~player_autoplay-true.html',
+        'only_matching': True,
+    }]
+
+    _FORMATS = {
+        'xs': {'quality': 0},
+        's': {'width': 320, 'height': 180, 'quality': 1},
+        'm': {'width': 512, 'height': 288, 'quality': 2},
+        'l': {'width': 960, 'height': 540, 'quality': 3},
+        'xl': {'width': 1280, 'height': 720, 'quality': 4},
+        'xxl': {'quality': 5},
+    }
+
+    def _extract_via_api(self, kind, video_id):
+        info = self._download_json(
+            'https://www.tagesschau.de/api/multimedia/{0}/{0}-{1}.json'.format(kind, video_id),
+            video_id)
+        title = info['headline']
+        formats = []
+        for media in info['mediadata']:
+            for format_id, format_url in media.items():
+                if determine_ext(format_url) == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id='hls'))
+                else:
+                    formats.append({
+                        'url': format_url,
+                        'format_id': format_id,
+                        'vcodec': 'none' if kind == 'audio' else None,
+                    })
+        self._sort_formats(formats)
+        timestamp = parse_iso8601(info.get('date'))
+        return {
+            'id': video_id,
+            'title': title,
+            'timestamp': timestamp,
+            'formats': formats,
+        }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        # kind = mobj.group('kind').lower()
+        # if kind == 'video':
+        #     return self._extract_via_api(kind, video_id)
+
+        # JSON api does not provide some audio formats (e.g. ogg) thus
+        # extractiong audio via webpage
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage).strip()
+        formats = []
+
+        for media_json in re.findall(r'({src\s*:\s*["\']http[^}]+type\s*:[^}]+})', webpage):
+            media = self._parse_json(js_to_json(media_json), video_id, fatal=False)
+            if not media:
+                continue
+            src = media.get('src')
+            if not src:
+                return
+            quality = media.get('quality')
+            kind = media.get('type', '').split('/')[0]
+            ext = determine_ext(src)
+            f = {
+                'url': src,
+                'format_id': '%s_%s' % (quality, ext) if quality else ext,
+                'ext': ext,
+                'vcodec': 'none' if kind == 'audio' else None,
+            }
+            f.update(self._FORMATS.get(quality, {}))
+            formats.append(f)
+
+        self._sort_formats(formats)
+
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
 
 
 class TagesschauIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_[^/#?]+?)?\.html'
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
 
     _TESTS = [{
         'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
-        'md5': '917a228bc7df7850783bc47979673a09',
+        'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6',
         'info_dict': {
-            'id': '102143',
+            'id': 'video-102143',
             'ext': 'mp4',
             'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
-            'description': 'md5:171feccd9d9b3dd54d05d501568f6359',
+            'description': '18.07.2015 20:10 Uhr',
             'thumbnail': 're:^https?:.*\.jpg$',
         },
     }, {
         'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
         'md5': '3c54c1f6243d279b706bde660ceec633',
         'info_dict': {
-            'id': '5727',
+            'id': 'ts-5727',
             'ext': 'mp4',
-            'description': 'md5:695c01bfd98b7e313c501386327aea59',
             'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
+            'description': 'md5:695c01bfd98b7e313c501386327aea59',
+            'thumbnail': 're:^https?:.*\.jpg$',
+        },
+    }, {
+        # exclusive audio
+        'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
+        'md5': '76e6eec6ebd40740671cf0a2c88617e5',
+        'info_dict': {
+            'id': 'audio-29417',
+            'ext': 'mp3',
+            'title': 'Trabi - Bye, bye Rennpappe',
+            'description': 'md5:8687dda862cbbe2cfb2df09b56341317',
             'thumbnail': 're:^https?:.*\.jpg$',
         },
     }, {
-        'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html',
-        'md5': 'aef45de271c4bf0a5db834aa40bf774c',
+        # audio in article
+        'url': 'http://www.tagesschau.de/inland/bnd-303.html',
+        'md5': 'e0916c623e85fc1d2b26b78f299d3958',
         'info_dict': {
-            'id': '18407',
+            'id': 'bnd-303',
             'ext': 'mp3',
-            'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
-            'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
+            'title': 'Viele Baustellen für neuen BND-Chef',
+            'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4',
             'thumbnail': 're:^https?:.*\.jpg$',
         },
+    }, {
+        'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
+        'info_dict': {
+            'id': 'afd-parteitag-135',
+            'title': 'Möchtegern-Underdog mit Machtanspruch',
+        },
+        'playlist_count': 2,
     }, {
         'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
         'only_matching': True,
@@ -61,88 +197,108 @@ class TagesschauIE(InfoExtractor):
     }, {
         'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
         'only_matching': True,
+    }, {
+        'url': 'http://www.tagesschau.de/100sekunden/index.html',
+        'only_matching': True,
+    }, {
+        # playlist article with collapsing sections
+        'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
+        'only_matching': True,
     }]
 
-    _FORMATS = {
-        's': {'width': 256, 'height': 144, 'quality': 1},
-        'm': {'width': 512, 'height': 288, 'quality': 2},
-        'l': {'width': 960, 'height': 544, 'quality': 3},
-    }
+    @classmethod
+    def suitable(cls, url):
+        return False if TagesschauPlayerIE.suitable(url) else super(TagesschauIE, cls).suitable(url)
+
+    def _extract_formats(self, download_text, media_kind):
+        links = re.finditer(
+            r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
+            download_text)
+        formats = []
+        for l in links:
+            link_url = l.group('url')
+            if not link_url:
+                continue
+            format_id = self._search_regex(
+                r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID',
+                default=determine_ext(link_url))
+            format = {
+                'format_id': format_id,
+                'url': l.group('url'),
+                'format_name': l.group('name'),
+            }
+            title = l.group('title')
+            if title:
+                if media_kind.lower() == 'video':
+                    m = re.match(
+                        r'''(?x)
+                            Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
+                            (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
+                            (?P<vbr>[0-9]+)kbps&\#10;
+                            Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
+                            Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
+                        title)
+                    if m:
+                        format.update({
+                            'format_note': m.group('audio_desc'),
+                            'vcodec': m.group('vcodec'),
+                            'width': int(m.group('width')),
+                            'height': int(m.group('height')),
+                            'abr': int(m.group('abr')),
+                            'vbr': int(m.group('vbr')),
+                            'filesize_approx': parse_filesize(m.group('filesize_approx')),
+                        })
+                else:
+                    m = re.match(
+                        r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)',
+                        title)
+                    if m:
+                        format.update({
+                            'format_note': '%s, %s' % (m.group('format'), m.group('note')),
+                            'vcodec': 'none',
+                            'abr': int(m.group('abr')),
+                        })
+            formats.append(format)
+        self._sort_formats(formats)
+        return formats
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id') or mobj.group('path')
         display_id = video_id.lstrip('-')
+
         webpage = self._download_webpage(url, display_id)
 
-        player_url = self._html_search_meta(
-            'twitter:player', webpage, 'player URL', default=None)
-        if player_url:
-            playerpage = self._download_webpage(
-                player_url, display_id, 'Downloading player page')
-
-            formats = []
-            for media in re.finditer(
-                    r'''(?x)
-                        (?P<q_url>["\'])(?P<url>http://media.+?)(?P=q_url)
-                        ,\s*type:(?P<q_type>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type)
-                        (?:,\s*quality:(?P<q_quality>["\'])(?P<quality>.+?)(?P=q_quality))?
-                    ''', playerpage):
-                url = media.group('url')
-                type_ = media.group('type')
-                ext = media.group('ext')
-                res = media.group('quality')
-                f = {
-                    'format_id': '%s_%s' % (res, ext) if res else ext,
-                    'url': url,
-                    'ext': ext,
-                    'vcodec': 'none' if type_ == 'audio' else None,
-                }
-                f.update(self._FORMATS.get(res, {}))
-                formats.append(f)
-            thumbnail = self._og_search_thumbnail(playerpage)
-            title = self._og_search_title(webpage).strip()
-            description = self._og_search_description(webpage).strip()
-        else:
+        title = self._html_search_regex(
+            r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
+            webpage, 'title', default=None) or self._og_search_title(webpage)
+
+        DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>'
+
+        webpage_type = self._og_search_property('type', webpage, default=None)
+        if webpage_type == 'website':  # Article
+            entries = []
+            for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
+                    r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
+                    webpage), 1):
+                entries.append({
+                    'id': '%s-%d' % (display_id, num),
+                    'title': '%s' % entry_title,
+                    'formats': self._extract_formats(download_text, media_kind),
+                })
+            if len(entries) > 1:
+                return self.playlist_result(entries, display_id, title)
+            formats = entries[0]['formats']
+        else:  # Assume single video
             download_text = self._search_regex(
-                r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
-                webpage, 'download links')
-            links = re.finditer(
-                r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
-                download_text)
-            formats = []
-            for l in links:
-                format_id = self._search_regex(
-                    r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
-                format = {
-                    'format_id': format_id,
-                    'url': l.group('url'),
-                    'format_name': l.group('name'),
-                }
-                m = re.match(
-                    r'''(?x)
-                        Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
-                        (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
-                        (?P<vbr>[0-9]+)kbps&\#10;
-                        Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
-                        Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
-                    l.group('title'))
-                if m:
-                    format.update({
-                        'format_note': m.group('audio_desc'),
-                        'vcodec': m.group('vcodec'),
-                        'width': int(m.group('width')),
-                        'height': int(m.group('height')),
-                        'abr': int(m.group('abr')),
-                        'vbr': int(m.group('vbr')),
-                        'filesize_approx': parse_filesize(m.group('filesize_approx')),
-                    })
-                formats.append(format)
-            thumbnail = self._og_search_thumbnail(webpage)
-            description = self._html_search_regex(
-                r'(?s)<p class="teasertext">(.*?)</p>',
-                webpage, 'description', default=None)
-            title = self._html_search_regex(
-                r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
+                DOWNLOAD_REGEX, webpage, 'download links', group='links')
+            media_kind = self._search_regex(
+                DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='kind')
+            formats = self._extract_formats(download_text, media_kind)
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._html_search_regex(
+            r'(?s)<p class="teasertext">(.*?)</p>',
+            webpage, 'description', default=None)
 
         self._sort_formats(formats)
 
diff --git a/youtube_dl/extractor/tdslifeway.py b/youtube_dl/extractor/tdslifeway.py
new file mode 100644 (file)
index 0000000..4d1f5c8
--- /dev/null
@@ -0,0 +1,33 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TDSLifewayIE(InfoExtractor):
+    _VALID_URL = r'https?://tds\.lifeway\.com/v1/trainingdeliverysystem/courses/(?P<id>\d+)/index\.html'
+
+    _TEST = {
+        # From http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers
+        'url': 'http://tds.lifeway.com/v1/trainingdeliverysystem/courses/3453494717001/index.html?externalRegistration=AssetId%7C34F466F1-78F3-4619-B2AB-A8EFFA55E9E9%21InstanceId%7C0%21UserId%7Caaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa&grouping=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&activity_id=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&content_endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2Fcontent%2F&actor=%7B%22name%22%3A%5B%22Guest%20Guest%22%5D%2C%22account%22%3A%5B%7B%22accountServiceHomePage%22%3A%22http%3A%2F%2Fscorm.lifeway.com%2F%22%2C%22accountName%22%3A%22aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa%22%7D%5D%2C%22objectType%22%3A%22Agent%22%7D&content_token=462a50b2-b6f9-4970-99b1-930882c499fb&registration=93d6ec8e-7f7b-4ed3-bbc8-a857913c0b2a&externalConfiguration=access%7CFREE%21adLength%7C-1%21assignOrgId%7C4AE36F78-299A-425D-91EF-E14A899B725F%21assignOrgParentId%7C%21courseId%7C%21isAnonymous%7Cfalse%21previewAsset%7Cfalse%21previewLength%7C-1%21previewMode%7Cfalse%21royalty%7CFREE%21sessionId%7C671422F9-8E79-48D4-9C2C-4EE6111EA1CD%21trackId%7C&auth=Basic%20OjhmZjk5MDBmLTBlYTMtNDJhYS04YjFlLWE4MWQ3NGNkOGRjYw%3D%3D&endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2F',
+        'info_dict': {
+            'id': '3453494717001',
+            'ext': 'mp4',
+            'title': 'The Gospel by Numbers',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'upload_date': '20140410',
+            'description': 'Coming soon from T4G 2014!',
+            'uploader_id': '2034960640001',
+            'timestamp': 1397145591,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'add_ie': ['BrightcoveNew'],
+    }
+
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2034960640001/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        brightcove_id = self._match_id(url)
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
index e0477382ceabea0769bd0575ceb1f350ce8c0911..d14d93e3ab1ae87902dc275e1208964a86b6b840 100644 (file)
@@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor):
 
     _TEST = {
         'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
+        'md5': '3d6361864d7cac20b57c8784da17166f',
         'info_dict': {
             'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
             'ext': 'mp4',
@@ -19,9 +20,9 @@ class TeachingChannelIE(InfoExtractor):
             'duration': 422.255,
         },
         'params': {
-            # m3u8 download
             'skip_download': True,
         },
+        'add_ie': ['Ooyala'],
     }
 
     def _real_extract(self, url):
index d1b7264b4ca4a0cb72e491da26d7f5bbc1cc66b7..79a7789200e34e1e457d9cd69cdabb495e3548c3 100644 (file)
@@ -16,7 +16,7 @@ from ..compat import compat_ord
 
 
 class TeamcocoIE(InfoExtractor):
-    _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
+    _VALID_URL = r'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
     _TESTS = [
         {
             'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
@@ -88,7 +88,7 @@ class TeamcocoIE(InfoExtractor):
         preload_codes = self._html_search_regex(
             r'(function.+)setTimeout\(function\(\)\{playlist',
             webpage, 'preload codes')
-        base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes)
+        base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes)
         base64_fragments.remove('init')
 
         def _check_sequence(cur_fragments):
index a48d77c309dcd1f9984cd0a6c71b7af574ca5498..451cde76d2e757fcdfb30ad96847b16aa4d156ff 100644 (file)
@@ -27,7 +27,7 @@ class TEDIE(InfoExtractor):
         '''
     _TESTS = [{
         'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
-        'md5': 'fc94ac279feebbce69f21c0c6ee82810',
+        'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
         'info_dict': {
             'id': '102',
             'ext': 'mp4',
@@ -37,21 +37,26 @@ class TEDIE(InfoExtractor):
                             'consciousness, but that half the time our brains are '
                             'actively fooling us.'),
             'uploader': 'Dan Dennett',
-            'width': 854,
+            'width': 853,
             'duration': 1308,
         }
     }, {
         'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
-        'md5': '226f4fb9c62380d11b7995efa4c87994',
+        'md5': 'b899ac15e345fb39534d913f7606082b',
         'info_dict': {
-            'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
+            'id': 'tSVI8ta_P4w',
             'ext': 'mp4',
             'title': 'Vishal Sikka: The beauty and power of algorithms',
             'thumbnail': 're:^https?://.+\.jpg',
-            'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
-        }
+            'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
+            'upload_date': '20140122',
+            'uploader_id': 'TEDInstitute',
+            'uploader': 'TED Institute',
+        },
+        'add_ie': ['Youtube'],
     }, {
         'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
+        'md5': '71b3ab2f4233012dce09d515c9c39ce2',
         'info_dict': {
             'id': '1972',
             'ext': 'mp4',
@@ -73,7 +78,7 @@ class TEDIE(InfoExtractor):
         'add_ie': ['Youtube'],
         'info_dict': {
             'id': '_ZG8HBuDjgc',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'Douglas Adams: Parrots the Universe and Everything',
             'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
             'uploader': 'University of California Television (UCTV)',
@@ -102,9 +107,9 @@ class TEDIE(InfoExtractor):
     }]
 
     _NATIVE_FORMATS = {
-        'low': {'preference': 1, 'width': 320, 'height': 180},
-        'medium': {'preference': 2, 'width': 512, 'height': 288},
-        'high': {'preference': 3, 'width': 854, 'height': 480},
+        'low': {'width': 320, 'height': 180},
+        'medium': {'width': 512, 'height': 288},
+        'high': {'width': 854, 'height': 480},
     }
 
     def _extract_info(self, webpage):
@@ -171,15 +176,21 @@ class TEDIE(InfoExtractor):
                 if finfo:
                     f.update(finfo)
 
+        http_url = None
         for format_id, resources in talk_info['resources'].items():
             if format_id == 'h264':
                 for resource in resources:
+                    h264_url = resource.get('file')
+                    if not h264_url:
+                        continue
                     bitrate = int_or_none(resource.get('bitrate'))
                     formats.append({
-                        'url': resource['file'],
+                        'url': h264_url,
                         'format_id': '%s-%sk' % (format_id, bitrate),
                         'tbr': bitrate,
                     })
+                    if re.search('\d+k', h264_url):
+                        http_url = h264_url
             elif format_id == 'rtmp':
                 streamer = talk_info.get('streamer')
                 if not streamer:
@@ -195,16 +206,24 @@ class TEDIE(InfoExtractor):
                         'tbr': int_or_none(resource.get('bitrate')),
                     })
             elif format_id == 'hls':
-                hls_formats = self._extract_m3u8_formats(
-                    resources.get('stream'), video_name, 'mp4', m3u8_id=format_id)
-                for f in hls_formats:
-                    if f.get('format_id') == 'hls-meta':
-                        continue
-                    if not f.get('height'):
-                        f['vcodec'] = 'none'
-                    else:
-                        f['acodec'] = 'none'
-                formats.extend(hls_formats)
+                formats.extend(self._extract_m3u8_formats(
+                    resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
+
+        m3u8_formats = list(filter(
+            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            formats))
+        if http_url:
+            for m3u8_format in m3u8_formats:
+                bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
+                if not bitrate:
+                    continue
+                f = m3u8_format.copy()
+                f.update({
+                    'url': re.sub(r'\d+k', bitrate, http_url),
+                    'format_id': m3u8_format['format_id'].replace('hls', 'http'),
+                    'protocol': 'http',
+                })
+                formats.append(f)
 
         audio_download = talk_info.get('audioDownload')
         if audio_download:
@@ -212,7 +231,6 @@ class TEDIE(InfoExtractor):
                 'url': audio_download,
                 'format_id': 'audio',
                 'vcodec': 'none',
-                'preference': -0.5,
             })
 
         self._sort_formats(formats)
@@ -254,7 +272,11 @@ class TEDIE(InfoExtractor):
 
         config_json = self._html_search_regex(
             r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
-            webpage, 'config')
+            webpage, 'config', default=None)
+        if not config_json:
+            embed_url = self._search_regex(
+                r"<iframe[^>]+class='pages-video-embed__video__object'[^>]+src='([^']+)'", webpage, 'embed url')
+            return self.url_result(self._proto_relative_url(embed_url))
         config = json.loads(config_json)['config']
         video_url = config['video']['url']
         thumbnail = config.get('image', {}).get('url')
index 4e860db0a906f7892a2f155483bd66f7e34cecc2..a29a64b6d5d2fbcec5667902eba259cefb679125 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class Tele13IE(InfoExtractor):
-    _VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
+    _VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
     _TESTS = [
         {
             'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
index a3d05f97d681b6cb4da6adf179a4f0a5744e5123..eefecc490c5d13476259497e79f7a3ebe68caee7 100644 (file)
@@ -1,11 +1,13 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 
 
 class TeleBruxellesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
     _TESTS = [{
         'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
         'md5': '59439e568c9ee42fb77588b2096b214f',
@@ -39,18 +41,18 @@ class TeleBruxellesIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         article_id = self._html_search_regex(
-            r"<article id=\"post-(\d+)\"", webpage, 'article ID')
+            r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None)
         title = self._html_search_regex(
             r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
-        description = self._og_search_description(webpage)
+        description = self._og_search_description(webpage, default=None)
 
         rtmp_url = self._html_search_regex(
-            r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"",
+            r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
             webpage, 'RTMP url')
-        rtmp_url = rtmp_url.replace("\" + \"", "")
+        rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
 
         return {
-            'id': article_id,
+            'id': article_id or display_id,
             'display_id': display_id,
             'title': title,
             'description': description,
index 2c8e9b9410f09f23349584e7945bed64e5dd3a76..4b4b740b44d325ffb8a8a5c6cba848b0c99ced13 100644 (file)
@@ -5,8 +5,8 @@ import json
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
     compat_urllib_parse_unquote,
+    compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
@@ -74,7 +74,7 @@ class TelecincoIE(InfoExtractor):
         info_el = self._download_xml(info_url, episode).find('./video/info')
 
         video_link = info_el.find('videoUrl/link').text
-        token_query = compat_urllib_parse.urlencode({'id': video_link})
+        token_query = compat_urllib_parse_urlencode({'id': video_link})
         token_info = self._download_json(
             embed_data['flashvars']['ov_tk'] + '?' + token_query,
             episode,
@@ -82,6 +82,7 @@ class TelecincoIE(InfoExtractor):
         )
         formats = self._extract_m3u8_formats(
             token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native')
+        self._sort_formats(formats)
 
         return {
             'id': embed_data['videoId'],
index 6f8333cfc0d40aee4d3637ed1e58867da1277e9f..9092e9b853637d14d54a2c15c524b12e35e91a30 100644 (file)
@@ -2,14 +2,16 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import remove_end
+from ..utils import (
+    determine_ext,
+    remove_end,
+)
 
 
 class TelegraafIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
     _TEST = {
         'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
-        'md5': '83245a9779bcc4a24454bfd53c65b6dc',
         'info_dict': {
             'id': '24353229',
             'ext': 'mp4',
@@ -18,18 +20,60 @@ class TelegraafIE(InfoExtractor):
             'thumbnail': 're:^https?://.*\.jpg$',
             'duration': 33,
         },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }
 
     def _real_extract(self, url):
-        playlist_id = self._match_id(url)
+        video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, playlist_id)
+        webpage = self._download_webpage(url, video_id)
 
+        player_url = self._html_search_regex(
+            r'<iframe[^>]+src="([^"]+")', webpage, 'player URL')
+        player_page = self._download_webpage(
+            player_url, video_id, note='Download player webpage')
         playlist_url = self._search_regex(
-            r"iframe\.loadPlayer\('([^']+)'", webpage, 'player')
+            r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL')
+        playlist_data = self._download_json(playlist_url, video_id)
+
+        item = playlist_data['items'][0]
+        formats = []
+        locations = item['locations']
+        for location in locations.get('adaptive', []):
+            manifest_url = location['src']
+            ext = determine_ext(manifest_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    manifest_url, video_id, ext='mp4', m3u8_id='hls'))
+            elif ext == 'mpd':
+                # TODO: Current DASH formats are broken - $Time$ pattern in
+                # <SegmentTemplate> not implemented yet
+                continue
+            else:
+                self.report_warning('Unknown adaptive format %s' % ext)
+        for location in locations.get('progressive', []):
+            formats.append({
+                'url': location['sources'][0]['src'],
+                'width': location.get('width'),
+                'height': location.get('height'),
+                'format_id': 'http-%s' % location['label'],
+            })
+
+        self._sort_formats(formats)
 
-        entries = self._extract_xspf_playlist(playlist_url, playlist_id)
         title = remove_end(self._og_search_title(webpage), ' - VIDEO')
         description = self._og_search_description(webpage)
+        duration = item.get('duration')
+        thumbnail = item.get('poster')
 
-        return self.playlist_result(entries, playlist_id, title, description)
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+            'duration': duration,
+            'thumbnail': thumbnail,
+        }
diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py
new file mode 100644 (file)
index 0000000..77916c6
--- /dev/null
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TelewebionIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.telewebion\.com/#!/episode/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.telewebion.com/#!/episode/1263668/',
+        'info_dict': {
+            'id': '1263668',
+            'ext': 'mp4',
+            'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'view_count': int,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        secure_token = self._download_webpage(
+            'http://m.s2.telewebion.com/op/op?action=getSecurityToken', video_id)
+        episode_details = self._download_json(
+            'http://m.s2.telewebion.com/op/op', video_id,
+            query={'action': 'getEpisodeDetails', 'episode_id': video_id})
+
+        m3u8_url = 'http://m.s1.telewebion.com/smil/%s.m3u8?filepath=%s&m3u8=1&secure_token=%s' % (
+            video_id, episode_details['file_path'], secure_token)
+        formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, ext='mp4', m3u8_id='hls')
+
+        picture_paths = [
+            episode_details.get('picture_path'),
+            episode_details.get('large_picture_path'),
+        ]
+
+        thumbnails = [{
+            'url': picture_path,
+            'preference': idx,
+        } for idx, picture_path in enumerate(picture_paths) if picture_path is not None]
+
+        return {
+            'id': video_id,
+            'title': episode_details['title'],
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'view_count': episode_details.get('view_count'),
+        }
diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py
deleted file mode 100644 (file)
index 02a31a6..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    float_or_none,
-)
-
-
-class TenPlayIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
-    _TEST = {
-        'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
-        'info_dict': {
-            'id': '2695695426001',
-            'ext': 'flv',
-            'title': 'TENplay: TV your way',
-            'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.',
-            'timestamp': 1380150606.889,
-            'upload_date': '20130925',
-            'uploader': 'TENplay',
-        },
-        'params': {
-            'skip_download': True,  # Requires rtmpdump
-        }
-    }
-
-    _video_fields = [
-        'id', 'name', 'shortDescription', 'longDescription', 'creationDate',
-        'publishedDate', 'lastModifiedDate', 'customFields', 'videoStillURL',
-        'thumbnailURL', 'referenceId', 'length', 'playsTotal',
-        'playsTrailingWeek', 'renditions', 'captioning', 'startDate', 'endDate']
-
-    def _real_extract(self, url):
-        webpage = self._download_webpage(url, url)
-        video_id = self._html_search_regex(
-            r'videoID: "(\d+?)"', webpage, 'video_id')
-        api_token = self._html_search_regex(
-            r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token')
-        title = self._html_search_regex(
-            r'<meta property="og:title" content="\s*(.*?)\s*"\s*/?\s*>',
-            webpage, 'title')
-
-        json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title)
-
-        formats = []
-        for rendition in json['renditions']:
-            url = rendition['remoteUrl'] or rendition['url']
-            protocol = 'rtmp' if url.startswith('rtmp') else 'http'
-            ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower()
-
-            if protocol == 'rtmp':
-                url = url.replace('&mp4:', '')
-
-                tbr = int_or_none(rendition.get('encodingRate'), 1000)
-
-            formats.append({
-                'format_id': '_'.join(
-                    ['rtmp', rendition['videoContainer'].lower(),
-                     rendition['videoCodec'].lower(), '%sk' % tbr]),
-                'width': int_or_none(rendition['frameWidth']),
-                'height': int_or_none(rendition['frameHeight']),
-                'tbr': tbr,
-                'filesize': int_or_none(rendition['size']),
-                'protocol': protocol,
-                'ext': ext,
-                'vcodec': rendition['videoCodec'].lower(),
-                'container': rendition['videoContainer'].lower(),
-                'url': url,
-            })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'display_id': json['referenceId'],
-            'title': json['name'],
-            'description': json['shortDescription'] or json['longDescription'],
-            'formats': formats,
-            'thumbnails': [{
-                'url': json['videoStillURL']
-            }, {
-                'url': json['thumbnailURL']
-            }],
-            'thumbnail': json['videoStillURL'],
-            'duration': float_or_none(json.get('length'), 1000),
-            'timestamp': float_or_none(json.get('creationDate'), 1000),
-            'uploader': json.get('customFields', {}).get('production_company_distributor') or 'TENplay',
-            'view_count': int_or_none(json.get('playsTotal')),
-        }
index e1a64e284db99e3e749e09ab97f4fc8b8b039eb5..e595c4a69b3f03361abc05f6bca61adecb61cf36 100644 (file)
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 
 class TF1IE(InfoExtractor):
     """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
+    _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)'
     _TESTS = [{
         'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
         'info_dict': {
diff --git a/youtube_dl/extractor/theonion.py b/youtube_dl/extractor/theonion.py
deleted file mode 100644 (file)
index 10239c9..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class TheOnionIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
-    _TEST = {
-        'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
-        'md5': '19eaa9a39cf9b9804d982e654dc791ee',
-        'info_dict': {
-            'id': '2133',
-            'ext': 'mp4',
-            'title': 'Man Wearing M&M Jacket Apparently Made In God\'s Image',
-            'description': 'md5:cc12448686b5600baae9261d3e180910',
-            'thumbnail': 're:^https?://.*\.jpg\?\d+$',
-        }
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-
-        video_id = self._search_regex(
-            r'"videoId":\s(\d+),', webpage, 'video ID')
-        title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
-
-        sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
-        formats = []
-        for src, type_ in sources:
-            if type_ == 'video/mp4':
-                formats.append({
-                    'format_id': 'mp4_sd',
-                    'preference': 1,
-                    'url': src,
-                })
-            elif type_ == 'video/webm':
-                formats.append({
-                    'format_id': 'webm_sd',
-                    'preference': 0,
-                    'url': src,
-                })
-            elif type_ == 'application/x-mpegURL':
-                formats.extend(
-                    self._extract_m3u8_formats(src, display_id, preference=-1))
-            else:
-                self.report_warning(
-                    'Encountered unexpected format: %s' % type_)
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'title': title,
-            'formats': formats,
-            'thumbnail': thumbnail,
-            'description': description,
-        }
index 93d8715716d00be7e15c53afbe804a3703b38e68..07d222ae3c1b53dba7c3886922886d44a09a0d8b 100644 (file)
@@ -8,7 +8,7 @@ import binascii
 import hashlib
 
 
-from .common import InfoExtractor
+from .once import OnceIE
 from ..compat import (
     compat_parse_qs,
     compat_urllib_parse_urlparse,
@@ -20,40 +20,43 @@ from ..utils import (
     int_or_none,
     sanitized_Request,
     unsmuggle_url,
+    update_url_query,
     xpath_with_ns,
     mimetype2ext,
+    find_xpath_attr,
 )
 
 default_ns = 'http://www.w3.org/2005/SMIL21/Language'
 _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
 
 
-class ThePlatformBaseIE(InfoExtractor):
+class ThePlatformBaseIE(OnceIE):
     def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
-        meta = self._download_xml(smil_url, video_id, note=note)
-        try:
-            error_msg = next(
-                n.attrib['abstract']
-                for n in meta.findall(_x('.//smil:ref'))
-                if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
-        except StopIteration:
-            pass
-        else:
-            raise ExtractorError(error_msg, expected=True)
+        meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'})
+        error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
+        if error_element is not None and error_element.attrib['src'].startswith(
+                'http://link.theplatform.com/s/errorFiles/Unavailable.'):
+            raise ExtractorError(error_element.attrib['abstract'], expected=True)
 
-        formats = self._parse_smil_formats(
+        smil_formats = self._parse_smil_formats(
             meta, smil_url, video_id, namespace=default_ns,
             # the parameters are from syfy.com, other sites may use others,
             # they also work for nbc.com
             f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
             transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
 
-        for _format in formats:
-            ext = determine_ext(_format['url'])
-            if ext == 'once':
-                _format['ext'] = 'mp4'
+        formats = []
+        for _format in smil_formats:
+            if OnceIE.suitable(_format['url']):
+                formats.extend(self._extract_once_formats(_format['url']))
+            else:
+                media_url = _format['url']
+                if determine_ext(media_url) == 'm3u8':
+                    hdnea2 = self._get_cookies(media_url).get('hdnea2')
+                    if hdnea2:
+                        _format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value})
 
-        self._sort_formats(formats)
+                formats.append(_format)
 
         subtitles = self._parse_smil_subtitles(meta, default_ns)
 
@@ -79,13 +82,15 @@ class ThePlatformBaseIE(InfoExtractor):
             'description': info['description'],
             'thumbnail': info['defaultThumbnailUrl'],
             'duration': int_or_none(info.get('duration'), 1000),
+            'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
+            'uploader': info.get('billingCode'),
         }
 
 
 class ThePlatformIE(ThePlatformBaseIE):
     _VALID_URL = r'''(?x)
         (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
-           (?:(?P<media>(?:(?:[^/]+/)+select/)?media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
+           (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
          |theplatform:)(?P<id>[^/\?&]+)'''
 
     _TESTS = [{
@@ -97,6 +102,9 @@ class ThePlatformIE(ThePlatformBaseIE):
             'title': 'Blackberry\'s big, bold Z30',
             'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
             'duration': 247,
+            'timestamp': 1383239700,
+            'upload_date': '20131031',
+            'uploader': 'CBSI-NEW',
         },
         'params': {
             # rtmp download
@@ -110,6 +118,9 @@ class ThePlatformIE(ThePlatformBaseIE):
             'ext': 'flv',
             'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
             'title': 'Tesla Model S: A second step towards a cleaner motoring future',
+            'timestamp': 1426176191,
+            'upload_date': '20150312',
+            'uploader': 'CBSI-NEW',
         },
         'params': {
             # rtmp download
@@ -122,13 +133,14 @@ class ThePlatformIE(ThePlatformBaseIE):
             'ext': 'mp4',
             'description': 'md5:644ad9188d655b742f942bf2e06b002d',
             'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
+            'uploader': 'EGSM',
         }
     }, {
         'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
         'only_matching': True,
     }, {
         'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
-        'md5': '734f3790fb5fc4903da391beeebc4836',
+        'md5': 'fb96bb3d85118930a5b055783a3bd992',
         'info_dict': {
             'id': 'tdy_or_siri_150701',
             'ext': 'mp4',
@@ -138,7 +150,7 @@ class ThePlatformIE(ThePlatformBaseIE):
             'thumbnail': 're:^https?://.*\.jpg$',
             'timestamp': 1435752600,
             'upload_date': '20150701',
-            'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
+            'uploader': 'NBCU-NEWS',
         },
     }, {
         # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
@@ -147,6 +159,22 @@ class ThePlatformIE(ThePlatformBaseIE):
         'only_matching': True,
     }]
 
+    @classmethod
+    def _extract_urls(cls, webpage):
+        m = re.search(
+            r'''(?x)
+                    <meta\s+
+                        property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
+                        content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2
+            ''', webpage)
+        if m:
+            return [m.group('url')]
+
+        matches = re.findall(
+            r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
+        if matches:
+            return list(zip(*matches))[1]
+
     @staticmethod
     def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
         flags = '10' if include_qs else '00'
@@ -155,11 +183,11 @@ class ThePlatformIE(ThePlatformBaseIE):
         def str_to_hex(str):
             return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
 
-        def hex_to_str(hex):
-            return binascii.a2b_hex(hex)
+        def hex_to_bytes(hex):
+            return binascii.a2b_hex(hex.encode('ascii'))
 
-        relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
-        clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
+        relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
+        clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
         checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
         sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
         return '%s&sig=%s' % (url, sig)
@@ -174,10 +202,10 @@ class ThePlatformIE(ThePlatformBaseIE):
         if not provider_id:
             provider_id = 'dJ5BDC'
 
-        path = provider_id
+        path = provider_id + '/'
         if mobj.group('media'):
-            path += '/media'
-        path += '/' + video_id
+            path += mobj.group('media')
+        path += video_id
 
         qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
         if 'guid' in qs_dict:
@@ -216,7 +244,7 @@ class ThePlatformIE(ThePlatformBaseIE):
                 webpage, 'smil url', group='url')
             path = self._search_regex(
                 r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
-            smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL'
+            smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4'
         elif mobj.group('config'):
             config_url = url + '&form=json'
             config_url = config_url.replace('swf/', 'config/')
@@ -226,15 +254,16 @@ class ThePlatformIE(ThePlatformBaseIE):
                 release_url = config['releaseUrl']
             else:
                 release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
-            smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m'
+            smil_url = release_url + '&formats=MPEG4&manifest=f4m'
         else:
-            smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path
+            smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
 
         sig = smuggled_data.get('sig')
         if sig:
             smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
 
         formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
+        self._sort_formats(formats)
 
         ret = self.get_metadata(path, video_id)
         combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
@@ -248,12 +277,12 @@ class ThePlatformIE(ThePlatformBaseIE):
 
 
 class ThePlatformFeedIE(ThePlatformBaseIE):
-    _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s'
-    _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)'
-    _TEST = {
+    _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
+    _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))'
+    _TESTS = [{
         # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
         'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
-        'md5': '22d2b84f058d3586efcd99e57d59d314',
+        'md5': '6e32495b5073ab414471b615c5ded394',
         'info_dict': {
             'id': 'n_hardball_5biden_140207',
             'ext': 'mp4',
@@ -264,33 +293,40 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
             'timestamp': 1391824260,
             'duration': 467.0,
             'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
+            'uploader': 'NBCU-NEWS',
         },
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('id')
-        provider_id = mobj.group('provider_id')
-        feed_id = mobj.group('feed_id')
+    }]
 
-        real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id)
-        feed = self._download_json(real_url, video_id)
-        entry = feed['entries'][0]
+    def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
+        real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
+        entry = self._download_json(real_url, video_id)['entries'][0]
 
         formats = []
         subtitles = {}
         first_video_id = None
         duration = None
+        asset_types = []
         for item in entry['media$content']:
-            smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
+            smil_url = item['plfile$url']
             cur_video_id = ThePlatformIE._match_id(smil_url)
             if first_video_id is None:
                 first_video_id = cur_video_id
                 duration = float_or_none(item.get('plfile$duration'))
-            cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
-            formats.extend(cur_formats)
-            subtitles = self._merge_subtitles(subtitles, cur_subtitles)
+            for asset_type in item['plfile$assetTypes']:
+                if asset_type in asset_types:
+                    continue
+                asset_types.append(asset_type)
+                query = {
+                    'mbr': 'true',
+                    'formats': item['plfile$format'],
+                    'assetTypes': asset_type,
+                }
+                if asset_type in asset_types_query:
+                    query.update(asset_types_query[asset_type])
+                cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
+                    smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
+                formats.extend(cur_formats)
+                subtitles = self._merge_subtitles(subtitles, cur_subtitles)
 
         self._sort_formats(formats)
 
@@ -314,5 +350,17 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
             'timestamp': timestamp,
             'categories': categories,
         })
+        if custom_fields:
+            ret.update(custom_fields(entry))
 
         return ret
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        video_id = mobj.group('id')
+        provider_id = mobj.group('provider_id')
+        feed_id = mobj.group('feed_id')
+        filter_query = mobj.group('filter')
+
+        return self._extract_feed_info(provider_id, feed_id, filter_query, video_id)
diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py
new file mode 100644 (file)
index 0000000..3e4e140
--- /dev/null
@@ -0,0 +1,52 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..compat import compat_urlparse
+from ..utils import qualities
+
+
+class TheSceneIE(InfoExtractor):
+    _VALID_URL = r'https://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)'
+
+    _TEST = {
+        'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear',
+        'info_dict': {
+            'id': '520e8faac2b4c00e3c6e5f43',
+            'ext': 'mp4',
+            'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear',
+            'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear',
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        player_url = compat_urlparse.urljoin(
+            url,
+            self._html_search_regex(
+                r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url'))
+
+        player = self._download_webpage(player_url, display_id)
+        info = self._parse_json(
+            self._search_regex(
+                r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'),
+            display_id)
+
+        qualities_order = qualities(('low', 'high'))
+        formats = [{
+            'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']),
+            'url': f['src'],
+            'quality': qualities_order(f['quality']),
+        } for f in info['sources'][0]]
+        self._sort_formats(formats)
+
+        return {
+            'id': info['id'],
+            'display_id': display_id,
+            'title': info['title'],
+            'formats': formats,
+            'thumbnail': info.get('poster_frame'),
+        }
index d8b1fd2813eadc3d17a17a6d46766b3c9c4ea37a..d63aef5dea9a8543f2a919b19321582f20e8df86 100644 (file)
@@ -12,7 +12,7 @@ class TheSixtyOneIE(InfoExtractor):
             s|
             song/comments/list|
             song
-        )/(?P<id>[A-Za-z0-9]+)/?$'''
+        )/(?:[^/]+/)?(?P<id>[A-Za-z0-9]+)/?$'''
     _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
     _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream'
     _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
@@ -45,6 +45,10 @@ class TheSixtyOneIE(InfoExtractor):
             'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/',
             'only_matching': True,
         },
+        {
+            'url': 'http://www.thesixtyone.com/maryatmidnight/song/StrawberriesandCream/yvWtLp0c4GQ/',
+            'only_matching': True,
+        },
     ]
 
     _DECODE_MAP = {
diff --git a/youtube_dl/extractor/thestar.py b/youtube_dl/extractor/thestar.py
new file mode 100644 (file)
index 0000000..ba1380a
--- /dev/null
@@ -0,0 +1,35 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveLegacyIE
+from ..compat import compat_parse_qs
+
+
+class TheStarIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?thestar\.com/(?:[^/]+/)*(?P<id>.+)\.html'
+    _TEST = {
+        'url': 'http://www.thestar.com/life/2016/02/01/mankind-why-this-woman-started-a-men-s-skincare-line.html',
+        'md5': '2c62dd4db2027e35579fefb97a8b6554',
+        'info_dict': {
+            'id': '4732393888001',
+            'ext': 'mp4',
+            'title': 'Mankind: Why this woman started a men\'s skin care line',
+            'description': 'Robert Cribb talks to Young Lee, the founder of Uncle Peter\'s MAN.',
+            'uploader_id': '794267642001',
+            'timestamp': 1454353482,
+            'upload_date': '20160201',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/794267642001/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+        brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dl/extractor/threeqsdn.py
new file mode 100644 (file)
index 0000000..c77a079
--- /dev/null
@@ -0,0 +1,139 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    js_to_json,
+    mimetype2ext,
+)
+
+
+class ThreeQSDNIE(InfoExtractor):
+    IE_NAME = '3qsdn'
+    IE_DESC = '3Q SDN'
+    _VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _TESTS = [{
+        # ondemand from http://www.philharmonie.tv/veranstaltung/26/
+        'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
+        'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd',
+        'info_dict': {
+            'id': '0280d6b9-1215-11e6-b427-0cc47a188158',
+            'ext': 'mp4',
+            'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
+            'is_live': False,
+        },
+        'expected_warnings': ['Failed to download MPD manifest'],
+    }, {
+        # live video stream
+        'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
+        'info_dict': {
+            'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
+            'ext': 'mp4',
+            'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
+            'is_live': False,
+        },
+    }, {
+        # live audio stream
+        'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
+        'only_matching': True,
+    }, {
+        # live audio stream with some 404 URLs
+        'url': 'http://playout.3qsdn.com/ac5c3186-777a-11e2-9c30-9acf09e2db48',
+        'only_matching': True,
+    }, {
+        # geo restricted with 'This content is not available in your country'
+        'url': 'http://playout.3qsdn.com/d63a3ffe-75e8-11e2-9c30-9acf09e2db48',
+        'only_matching': True,
+    }, {
+        # geo restricted with 'playout.3qsdn.com/forbidden'
+        'url': 'http://playout.3qsdn.com/8e330f26-6ae2-11e2-a16a-9acf09e2db48',
+        'only_matching': True,
+    }, {
+        # live video with rtmp link
+        'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % ThreeQSDNIE._VALID_URL, webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        js = self._download_webpage(
+            'http://playout.3qsdn.com/%s' % video_id, video_id,
+            query={'js': 'true'})
+
+        if any(p in js for p in (
+                '>This content is not available in your country',
+                'playout.3qsdn.com/forbidden')):
+            self.raise_geo_restricted()
+
+        stream_content = self._search_regex(
+            r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js,
+            'stream content', default='demand', group='content')
+
+        live = stream_content == 'live'
+
+        stream_type = self._search_regex(
+            r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js,
+            'stream type', default='video', group='type')
+
+        formats = []
+        urls = set()
+
+        def extract_formats(item_url, item={}):
+            if not item_url or item_url in urls:
+                return
+            urls.add(item_url)
+            type_ = item.get('type')
+            ext = determine_ext(item_url, default_ext=None)
+            if type_ == 'application/dash+xml' or ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    item_url, video_id, mpd_id='mpd', fatal=False))
+            elif type_ in ('application/vnd.apple.mpegURL', 'application/x-mpegurl') or ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    item_url, video_id, 'mp4',
+                    entry_protocol='m3u8' if live else 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    item_url, video_id, f4m_id='hds', fatal=False))
+            else:
+                if not self._is_valid_url(item_url, video_id):
+                    return
+                formats.append({
+                    'url': item_url,
+                    'format_id': item.get('quality'),
+                    'ext': 'mp4' if item_url.startswith('rtsp') else mimetype2ext(type_) or ext,
+                    'vcodec': 'none' if stream_type == 'audio' else None,
+                })
+
+        for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js):
+            f = self._parse_json(
+                item_js, video_id, transform_source=js_to_json, fatal=False)
+            if not f:
+                continue
+            extract_formats(f.get('src'), f)
+
+        # More relaxed version to collect additional URLs and acting
+        # as a future-proof fallback
+        for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js):
+            extract_formats(src)
+
+        self._sort_formats(formats)
+
+        title = self._live_title(video_id) if live else video_id
+
+        return {
+            'id': video_id,
+            'title': title,
+            'is_live': live,
+            'formats': formats,
+        }
index 496f15d80b478f94bc2aac86c3d20417e2b09925..406f4a826623c0335e973a0f6dd79744fb32e982 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import (
 
 
 class THVideoIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://thvideo.tv/v/th1987/',
         'md5': 'fa107b1f73817e325e9433505a70db50',
index e036b8cdf1e6ca6ad4277a4c3d22e79361322703..c43cace24d5bfd107328944d0bd290594ec06b3f 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import ExtractorError
 class TinyPicIE(InfoExtractor):
     IE_NAME = 'tinypic'
     IE_DESC = 'tinypic.com videos'
-    _VALID_URL = r'http://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
+    _VALID_URL = r'https?://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
 
     _TESTS = [
         {
index adc05ed5f077594302482257e35efa764e2d1773..abad3ff64b5e519414615d3dd3cf8da345e9a2f3 100644 (file)
@@ -4,12 +4,12 @@ import re
 
 from .common import InfoExtractor
 from .brightcove import BrightcoveLegacyIE
-from ..compat import compat_urlparse
+from ..compat import compat_parse_qs
 
 
 class TlcDeIE(InfoExtractor):
     IE_NAME = 'tlc.de'
-    _VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)'
+    _VALID_URL = r'https?://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
 
     _TEST = {
         'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
@@ -17,32 +17,23 @@ class TlcDeIE(InfoExtractor):
             'id': '3235167922001',
             'ext': 'mp4',
             'title': 'Breaking Amish: Die Welt da draußen',
-            'uploader': 'Discovery Networks - Germany',
             'description': (
                 'Vier Amische und eine Mennonitin wagen in New York'
                 '  den Sprung in ein komplett anderes Leben. Begleitet sie auf'
                 ' ihrem spannenden Weg.'),
+            'timestamp': 1396598084,
+            'upload_date': '20140404',
+            'uploader_id': '1659832546',
         },
     }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        title = mobj.group('title')
-        webpage = self._download_webpage(url, title)
-        iframe_url = self._search_regex(
-            '<iframe src="(http://www\.tlc\.de/wp-content/.+?)"', webpage,
-            'iframe url')
-        # Otherwise we don't get the correct 'BrightcoveExperience' element,
-        # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
-        iframe_url = iframe_url.replace('.htm?', '.php?')
-        url_fragment = compat_urlparse.urlparse(url).fragment
-        if url_fragment:
-            # Since the fragment is not send to the server, we always get the same iframe
-            iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url)
-        iframe = self._download_webpage(iframe_url, title)
-
-        return {
-            '_type': 'url',
-            'url': BrightcoveLegacyIE._extract_brightcove_url(iframe),
-            'ie': BrightcoveLegacyIE.ie_key(),
-        }
+        brightcove_id = mobj.group('id')
+        if not brightcove_id:
+            title = mobj.group('title')
+            webpage = self._download_webpage(url, title)
+            brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+            brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
index 49516abca690721a83dee5044bb2cdd6540d4a07..78174178e6ef69362462f96f997b7a37a640a275 100644 (file)
@@ -71,12 +71,16 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
+        display_id = mobj.group('display_id') if 'display_id' in mobj.groupdict() else video_id
 
         webpage = self._download_webpage(url, display_id)
 
         cfg_url = self._proto_relative_url(self._html_search_regex(
-            self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
+            self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:')
+
+        if not cfg_url:
+            inputs = self._hidden_inputs(webpage)
+            cfg_url = 'https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s' % (inputs['vkey'], inputs['nkey'])
 
         cfg_xml = self._download_xml(
             cfg_url, display_id, 'Downloading metadata',
@@ -117,7 +121,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
         title = self._html_search_regex(
             self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
 
-        age_limit = self._rta_search(webpage)
+        age_limit = self._rta_search(webpage) or 18
 
         duration = parse_duration(self._html_search_meta(
             'duration', webpage, 'duration', default=None))
@@ -132,7 +136,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
         average_rating = float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating'))
 
         categories_str = extract_field(self._CATEGORIES_REGEX, 'categories')
-        categories = categories_str.split(', ') if categories_str is not None else []
+        categories = [c.strip() for c in categories_str.split(',')] if categories_str is not None else []
 
         return {
             'id': video_id,
@@ -152,17 +156,48 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
         }
 
 
+class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
+    _VALID_URL = r'https?://player\.(?:tna|emp)flix\.com/video/(?P<id>\d+)'
+
+    _TITLE_REGEX = r'<title>([^<]+)</title>'
+
+    _TESTS = [{
+        'url': 'https://player.tnaflix.com/video/6538',
+        'info_dict': {
+            'id': '6538',
+            'display_id': '6538',
+            'ext': 'mp4',
+            'title': 'Educational xxx video',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://player.empflix.com/video/33051',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return [url for _, url in re.findall(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.(?:tna|emp)flix\.com/video/\d+)\1',
+            webpage)]
+
+
 class TNAFlixIE(TNAFlixNetworkBaseIE):
     _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
 
     _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
-    _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
-    _UPLOADER_REGEX = r'(?s)<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)<div'
+    _DESCRIPTION_REGEX = r'<meta[^>]+name="description"[^>]+content="([^"]+)"'
+    _UPLOADER_REGEX = r'<i>\s*Verified Member\s*</i>\s*<h1>(.+?)</h1>'
+    _CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
 
     _TESTS = [{
         # anonymous uploader, no categories
         'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
-        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
+        'md5': '7e569419fe6d69543d01e6be22f5f7c4',
         'info_dict': {
             'id': '553878',
             'display_id': 'Carmella-Decesare-striptease',
@@ -171,17 +206,16 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
             'thumbnail': 're:https?://.*\.jpg$',
             'duration': 91,
             'age_limit': 18,
-            'uploader': 'Anonymous',
-            'categories': [],
+            'categories': ['Porn Stars'],
         }
     }, {
         # non-anonymous uploader, categories
         'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
-        'md5': '0f5d4d490dbfd117b8607054248a07c0',
+        'md5': 'fcba2636572895aba116171a899a5658',
         'info_dict': {
             'id': '6538',
             'display_id': 'Educational-xxx-video',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Educational xxx video',
             'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
             'thumbnail': 're:https?://.*\.jpg$',
index 2756f56d3a94ae8f2bed64aa39acf4d45616366b..2579ba8c67498c91aa117c6853b83f391ccb3ba6 100644 (file)
@@ -41,7 +41,7 @@ class ToypicsIE(InfoExtractor):
 
 class ToypicsUserIE(InfoExtractor):
     IE_DESC = 'Toypics user profile'
-    _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
+    _VALID_URL = r'https?://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
     _TEST = {
         'url': 'http://videos.toypics.net/Mikey',
         'info_dict': {
index 0e01b15fcc51ed41d6ace902058e8446ad5625fd..747370d12d7fc8fd1c66b1ac101db0ba01c963e5 100644 (file)
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 
 class TrailerAddictIE(InfoExtractor):
     _WORKING = False
-    _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
+    _VALID_URL = r'(?:https?://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
     _TEST = {
         'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
         'md5': '41365557f3c8c397d091da510e73ceb4',
index d239949a668a3cfb1d3267bf0be8780c2bda6569..65770562309186acc95c33c2165d3d50ae8f5f36 100644 (file)
@@ -7,7 +7,7 @@ from .nuevo import NuevoBaseIE
 
 
 class TrollvidsIE(NuevoBaseIE):
-    _VALID_URL = r'http://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
     IE_NAME = 'trollvids'
     _TEST = {
         'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff',
index 6d78b5dfea0030f125062ad8ef46a1c6e731e4ea..c6572defbcf7a732d58d5fa3003993d063a8be3e 100644 (file)
@@ -1,31 +1,32 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import codecs
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     ExtractorError,
     int_or_none,
     sanitized_Request,
+    urlencode_postdata,
+    parse_iso8601,
 )
 
 
 class TubiTvIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video\?id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
     _LOGIN_URL = 'http://tubitv.com/login'
     _NETRC_MACHINE = 'tubitv'
     _TEST = {
-        'url': 'http://tubitv.com/video?id=54411&title=The_Kitchen_Musical_-_EP01',
+        'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
         'info_dict': {
-            'id': '54411',
+            'id': '283829',
             'ext': 'mp4',
-            'title': 'The Kitchen Musical - EP01',
-            'thumbnail': 're:^https?://.*\.png$',
-            'description': 'md5:37532716166069b353e8866e71fefae7',
-            'duration': 2407,
+            'title': 'The Comedian at The Friday',
+            'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
+            'uploader': 'Indie Rights Films',
+            'upload_date': '20160111',
+            'timestamp': 1452555979,
         },
         'params': {
             'skip_download': 'HLS download',
@@ -41,7 +42,7 @@ class TubiTvIE(InfoExtractor):
             'username': username,
             'password': password,
         }
-        payload = compat_urllib_parse.urlencode(form_data).encode('utf-8')
+        payload = urlencode_postdata(form_data)
         request = sanitized_Request(self._LOGIN_URL, payload)
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         login_page = self._download_webpage(
@@ -55,26 +56,31 @@ class TubiTvIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        video_data = self._download_json(
+            'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
+        title = video_data['n']
 
-        webpage = self._download_webpage(url, video_id)
-        if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
-            self.raise_login_required('This video requires login')
+        formats = self._extract_m3u8_formats(
+            video_data['mh'], video_id, 'mp4', 'm3u8_native')
+        self._sort_formats(formats)
 
-        title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
-        duration = int_or_none(self._html_search_meta(
-            'video:duration', webpage, 'duration'))
-
-        apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu')
-        m3u8_url = codecs.decode(apu, 'rot_13')[::-1]
-        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+        subtitles = {}
+        for sub in video_data.get('sb', []):
+            sub_url = sub.get('u')
+            if not sub_url:
+                continue
+            subtitles.setdefault(sub.get('l', 'en'), []).append({
+                'url': sub_url,
+            })
 
         return {
             'id': video_id,
             'title': title,
             'formats': formats,
-            'thumbnail': thumbnail,
-            'description': description,
-            'duration': duration,
+            'subtitles': subtitles,
+            'thumbnail': video_data.get('ph'),
+            'description': video_data.get('d'),
+            'duration': int_or_none(video_data.get('s')),
+            'timestamp': parse_iso8601(video_data.get('u')),
+            'uploader': video_data.get('on'),
         }
index f56b66d06f2f7323b60d2fc4788d26cb3274ef9e..bb8b8e23424e7943f2133028aca187d4fcffeab9 100644 (file)
@@ -5,7 +5,9 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
+    ExtractorError,
     int_or_none,
+    InAdvancePagedList,
     float_or_none,
     unescapeHTML,
 )
@@ -45,11 +47,27 @@ class TudouIE(InfoExtractor):
 
     _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
 
+    # Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf
+    # 0001, 0002 and 4001 are not included as they indicate temporary issues
+    TVC_ERRORS = {
+        '0003': 'The video is deleted or does not exist',
+        '1001': 'This video is unavailable due to licensing issues',
+        '1002': 'This video is unavailable as it\'s under review',
+        '1003': 'This video is unavailable as it\'s under review',
+        '3001': 'Password required',
+        '5001': 'This video is available in Mainland China only due to licensing issues',
+        '7001': 'This video is unavailable',
+        '8001': 'This video is unavailable due to licensing issues',
+    }
+
     def _url_for_id(self, video_id, quality=None):
         info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
         if quality:
             info_url += '&hd' + quality
         xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page')
+        error = xml_data.attrib.get('error')
+        if error is not None:
+            raise ExtractorError('Tudou said: %s' % error, expected=True)
         final_url = xml_data.text
         return final_url
 
@@ -62,6 +80,15 @@ class TudouIE(InfoExtractor):
         if youku_vcode:
             return self.url_result('youku:' + youku_vcode, ie='Youku')
 
+        if not item_data.get('itemSegs'):
+            tvc_code = item_data.get('tvcCode')
+            if tvc_code:
+                err_msg = self.TVC_ERRORS.get(tvc_code)
+                if err_msg:
+                    raise ExtractorError('Tudou said: %s' % err_msg, expected=True)
+                raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code)
+            raise ExtractorError('Unxpected error returned from Tudou')
+
         title = unescapeHTML(item_data['kw'])
         description = item_data.get('desc')
         thumbnail_url = item_data.get('pic')
@@ -75,15 +102,16 @@ class TudouIE(InfoExtractor):
         quality = sorted(filter(lambda k: k.isdigit(), segments.keys()),
                          key=lambda k: int(k))[-1]
         parts = segments[quality]
-        result = []
         len_parts = len(parts)
         if len_parts > 1:
             self.to_screen('%s: found %s parts' % (video_id, len_parts))
-        for part in parts:
+
+        def part_func(partnum):
+            part = parts[partnum]
             part_id = part['k']
             final_url = self._url_for_id(part_id, quality)
             ext = (final_url.split('?')[0]).split('.')[-1]
-            part_info = {
+            return [{
                 'id': '%s' % part_id,
                 'url': final_url,
                 'ext': ext,
@@ -97,12 +125,13 @@ class TudouIE(InfoExtractor):
                 'http_headers': {
                     'Referer': self._PLAYER_URL,
                 },
-            }
-            result.append(part_info)
+            }]
+
+        entries = InAdvancePagedList(part_func, len_parts, 1)
 
         return {
             '_type': 'multi_video',
-            'entries': result,
+            'entries': entries,
             'id': video_id,
             'title': title,
         }
index 4f844706d365950d6e1d04e237a5c20df7a1cafc..4d8b57111897f3c936e11f55fcea60d6a6bd30d6 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import int_or_none
 
 
 class TumblrIE(InfoExtractor):
-    _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
+    _VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
     _TESTS = [{
         'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
         'md5': '479bb068e5b16462f5176a6828829767',
@@ -67,6 +67,34 @@ class TumblrIE(InfoExtractor):
             'uploader_id': 'user32021558',
         },
         'add_ie': ['Vimeo'],
+    }, {
+        'url': 'http://sutiblr.tumblr.com/post/139638707273',
+        'md5': '2dd184b3669e049ba40563a7d423f95c',
+        'info_dict': {
+            'id': 'ir7qBEIKqvq',
+            'ext': 'mp4',
+            'title': 'Vine by sutiblr',
+            'alt_title': 'Vine by sutiblr',
+            'uploader': 'sutiblr',
+            'uploader_id': '1198993975374495744',
+            'upload_date': '20160220',
+            'like_count': int,
+            'comment_count': int,
+            'repost_count': int,
+        },
+        'add_ie': ['Vine'],
+    }, {
+        'url': 'http://vitasidorkina.tumblr.com/post/134652425014/joskriver-victoriassecret-invisibility-or',
+        'md5': '01c12ceb82cbf6b2fe0703aa56b3ad72',
+        'info_dict': {
+            'id': '-7LnUPGlSo',
+            'ext': 'mp4',
+            'title': 'Video by victoriassecret',
+            'description': 'Invisibility or flight…which superpower would YOU choose? #VSFashionShow #ThisOrThat',
+            'uploader_id': 'victoriassecret',
+            'thumbnail': 're:^https?://.*\.jpg'
+        },
+        'add_ie': ['Instagram'],
     }]
 
     def _real_extract(self, url):
index 8322cc14da821f4615a4a8038904039b01c18827..ae4cfaec29b493c3b8b8e11705629901a07a2bf2 100644 (file)
@@ -1,7 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
+import re
 
 from .common import InfoExtractor
 from ..utils import ExtractorError
@@ -27,10 +27,9 @@ class TuneInBaseIE(InfoExtractor):
         if not streams_url.startswith('http://'):
             streams_url = compat_urlparse.urljoin(url, streams_url)
 
-        stream_data = self._download_webpage(
-            streams_url, content_id, note='Downloading stream data')
-        streams = json.loads(self._search_regex(
-            r'\((.*)\);', stream_data, 'stream info'))['Streams']
+        streams = self._download_json(
+            streams_url, content_id, note='Downloading stream data',
+            transform_source=lambda s: re.sub(r'^\s*\((.*)\);\s*$', r'\1', s))['Streams']
 
         is_live = None
         formats = []
index 1457e524e810c8bda02795c1b8dd78e95c47802e..86bb7915db170ecf4c75fdc2b960160a65daa1c0 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class TV2IE(InfoExtractor):
-    _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.tv2.no/v/916509/',
         'info_dict': {
@@ -100,7 +100,7 @@ class TV2IE(InfoExtractor):
 
 
 class TV2ArticleIE(InfoExtractor):
-    _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
         'info_dict': {
diff --git a/youtube_dl/extractor/tv3.py b/youtube_dl/extractor/tv3.py
new file mode 100644 (file)
index 0000000..3867ec9
--- /dev/null
@@ -0,0 +1,34 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TV3IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx'
+    _TEST = {
+        'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx',
+        'info_dict': {
+            'id': '4659127992001',
+            'ext': 'mp4',
+            'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3',
+            'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.',
+            'uploader_id': '3812193411001',
+            'upload_date': '20151213',
+            'timestamp': 1449975272,
+        },
+        'expected_warnings': [
+            'Failed to download MPD manifest'
+        ],
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id')
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
index 3a4f393fcf6d79f3f42970db7aab853d5efedf84..4065354ddde2c63698908dfac81dc98cac77e79d 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class TVCIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702',
         'md5': 'bbc5ff531d1e90e856f60fc4b3afd708',
@@ -64,7 +64,7 @@ class TVCIE(InfoExtractor):
 
 
 class TVCArticleIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)'
     _TESTS = [{
         'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/',
         'info_dict': {
index dc3a8334a6b335143dff417d805a26df412d8783..ead4c00c79bb453585b4ba18c67f7535bcc69254 100644 (file)
@@ -58,7 +58,9 @@ class TvigleIE(InfoExtractor):
         if not video_id:
             webpage = self._download_webpage(url, display_id)
             video_id = self._html_search_regex(
-                r'class="video-preview current_playing" id="(\d+)">',
+                (r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)',
+                 r'var\s+cloudId\s*=\s*["\'](\d+)',
+                 r'class="video-preview current_playing" id="(\d+)"'),
                 webpage, 'video id')
 
         video_data = self._download_json(
@@ -81,10 +83,10 @@ class TvigleIE(InfoExtractor):
 
         formats = []
         for vcodec, fmts in item['videos'].items():
+            if vcodec == 'hls':
+                continue
             for format_id, video_url in fmts.items():
                 if format_id == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        video_url, video_id, 'mp4', m3u8_id=vcodec))
                     continue
                 height = self._search_regex(
                     r'^(\d+)[pP]$', format_id, 'height', default=None)
index f57d609d43eecb13f3bb43ecc042107b5cad50bd..5070082da7ba3b34078a01bd214d02a9e8dcac33 100644 (file)
@@ -1,25 +1,24 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
 
 from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    clean_html,
+    get_element_by_attribute,
+    ExtractorError,
+)
 
 
-class TvpIE(InfoExtractor):
-    IE_NAME = 'tvp.pl'
-    _VALID_URL = r'https?://(?:vod|www)\.tvp\.pl/.*/(?P<id>\d+)$'
+class TVPIE(InfoExtractor):
+    IE_NAME = 'tvp'
+    IE_DESC = 'Telewizja Polska'
+    _VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
 
     _TESTS = [{
-        'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035',
-        'md5': 'cdd98303338b8a7f7abab5cd14092bf2',
-        'info_dict': {
-            'id': '4278035',
-            'ext': 'wmv',
-            'title': 'Ogniem i mieczem, odc. 2',
-        },
-    }, {
-        'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
+        'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
         'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
         'info_dict': {
             'id': '194536',
@@ -28,7 +27,7 @@ class TvpIE(InfoExtractor):
         },
     }, {
         'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
-        'md5': 'c3b15ed1af288131115ff17a17c19dda',
+        'md5': 'b0005b542e5b4de643a9690326ab1257',
         'info_dict': {
             'id': '17916176',
             'ext': 'mp4',
@@ -36,12 +35,22 @@ class TvpIE(InfoExtractor):
         },
     }, {
         'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
-        'md5': 'c3b15ed1af288131115ff17a17c19dda',
-        'info_dict': {
-            'id': '17834272',
-            'ext': 'mp4',
-            'title': 'Na sygnale, odc. 39',
-        },
+        'only_matching': True,
+    }, {
+        'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200',
+        'only_matching': True,
+    }, {
+        'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa',
+        'only_matching': True,
+    }, {
+        'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach',
+        'only_matching': True,
+    }, {
+        'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -50,6 +59,11 @@ class TvpIE(InfoExtractor):
         webpage = self._download_webpage(
             'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
 
+        error_massage = get_element_by_attribute('class', 'msg error', webpage)
+        if error_massage:
+            raise ExtractorError('%s said: %s' % (
+                self.IE_NAME, clean_html(error_massage)), expected=True)
+
         title = self._search_regex(
             r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
             webpage, 'title', group='title')
@@ -63,24 +77,50 @@ class TvpIE(InfoExtractor):
             r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
 
         video_url = self._search_regex(
-            r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
-        if not video_url:
+            r'0:{src:([\'"])(?P<url>.*?)\1', webpage,
+            'formats', group='url', default=None)
+        if not video_url or 'material_niedostepny.mp4' in video_url:
             video_url = self._download_json(
                 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
                 video_id)['video_url']
 
-        ext = video_url.rsplit('.', 1)[-1]
-        if ext != 'ism/manifest':
-            if '/' in ext:
-                ext = 'mp4'
+        formats = []
+        video_url_base = self._search_regex(
+            r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
+            video_url, 'video base url', default=None)
+        if video_url_base:
+            # TODO: Current DASH formats are broken - $Time$ pattern in
+            # <SegmentTemplate> not implemented yet
+            # formats.extend(self._extract_mpd_formats(
+            #     video_url_base + '.ism/video.mpd',
+            #     video_id, mpd_id='dash', fatal=False))
+            formats.extend(self._extract_f4m_formats(
+                video_url_base + '.ism/video.f4m',
+                video_id, f4m_id='hds', fatal=False))
+            m3u8_formats = self._extract_m3u8_formats(
+                video_url_base + '.ism/video.m3u8', video_id,
+                'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+            self._sort_formats(m3u8_formats)
+            m3u8_formats = list(filter(
+                lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+                m3u8_formats))
+            formats.extend(m3u8_formats)
+            for i, m3u8_format in enumerate(m3u8_formats, 2):
+                http_url = '%s-%d.mp4' % (video_url_base, i)
+                if self._is_valid_url(http_url, video_id):
+                    f = m3u8_format.copy()
+                    f.update({
+                        'url': http_url,
+                        'format_id': f['format_id'].replace('hls', 'http'),
+                        'protocol': 'http',
+                    })
+                    formats.append(f)
+        else:
             formats = [{
                 'format_id': 'direct',
                 'url': video_url,
-                'ext': ext,
+                'ext': determine_ext(video_url, 'mp4'),
             }]
-        else:
-            m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url)
-            formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
 
         self._sort_formats(formats)
 
@@ -92,8 +132,8 @@ class TvpIE(InfoExtractor):
         }
 
 
-class TvpSeriesIE(InfoExtractor):
-    IE_NAME = 'tvp.pl:Series'
+class TVPSeriesIE(InfoExtractor):
+    IE_NAME = 'tvp:series'
     _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$'
 
     _TESTS = [{
@@ -127,7 +167,7 @@ class TvpSeriesIE(InfoExtractor):
         videos_paths = re.findall(
             '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist)
         entries = [
-            self.url_result('http://vod.tvp.pl%s' % v_path, ie=TvpIE.ie_key())
+            self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key())
             for v_path in videos_paths]
 
         return {
index b4683de542dffbcc839f162d282285b3cb5d02f3..df70a6b230a4217261f3f69a3e1213a88f07afbf 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 class TVPlayIE(InfoExtractor):
     IE_DESC = 'TV3Play and related services'
-    _VALID_URL = r'''(?x)http://(?:www\.)?
+    _VALID_URL = r'''(?x)https?://(?:www\.)?
         (?:tvplay\.lv/parraides|
            tv3play\.lt/programos|
            play\.tv3\.lt/programos|
index e03e2dbaa42f23a5107a50c67e7c12d9f378600b..4025edf02b4ca5b8c42e3324fb094fe43141ce68 100644 (file)
@@ -47,7 +47,8 @@ class TwentyFourVideoIE(InfoExtractor):
 
         title = self._og_search_title(webpage)
         description = self._html_search_regex(
-            r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False)
+            r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>',
+            webpage, 'description', fatal=False, group='description')
         thumbnail = self._og_search_thumbnail(webpage)
         duration = int_or_none(self._og_search_property(
             'duration', webpage, 'duration', fatal=False))
index ca7d953b8e2733d404ebe9f7cd90e7e28d083abe..b721ecb0a106a710b6d140d7d21309307196a684 100644 (file)
@@ -32,7 +32,22 @@ class TwentyMinutenIE(InfoExtractor):
             'title': '«Wir müssen mutig nach vorne schauen»',
             'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
             'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
-        }
+        },
+        'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.',
+    }, {
+        # YouTube embed
+        'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184',
+        'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f',
+        'info_dict': {
+            'id': 'ivM7A7SpDOs',
+            'ext': 'mp4',
+            'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016',
+            'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a',
+            'upload_date': '20160424',
+            'uploader': 'RTVCM Castilla-La Mancha',
+            'uploader_id': 'RTVCM',
+        },
+        'add_ie': ['Youtube'],
     }, {
         'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
         'only_matching': True,
@@ -48,6 +63,12 @@ class TwentyMinutenIE(InfoExtractor):
 
         webpage = self._download_webpage(url, display_id)
 
+        youtube_url = self._html_search_regex(
+            r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
+            webpage, 'YouTube embed URL', default=None)
+        if youtube_url is not None:
+            return self.url_result(youtube_url, 'Youtube')
+
         title = self._html_search_regex(
             r'<h1>.*?<span>(.+?)</span></h1>',
             webpage, 'title', default=None)
index 69882da6337bc6f20d0578f6d612e799429db3e8..20919774d0bcf65705e1f379a433667e2a06b7e9 100644 (file)
@@ -9,17 +9,19 @@ from .common import InfoExtractor
 from ..compat import (
     compat_parse_qs,
     compat_str,
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
     compat_urlparse,
 )
 from ..utils import (
-    encode_dict,
     ExtractorError,
     int_or_none,
+    js_to_json,
+    orderedSet,
     parse_duration,
     parse_iso8601,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -81,7 +83,7 @@ class TwitchBaseIE(InfoExtractor):
             post_url = compat_urlparse.urljoin(redirect_url, post_url)
 
         request = sanitized_Request(
-            post_url, compat_urllib_parse.urlencode(encode_dict(login_form)).encode('utf-8'))
+            post_url, urlencode_postdata(login_form))
         request.add_header('Referer', redirect_url)
         response = self._download_webpage(
             request, None, 'Logging in as %s' % username)
@@ -170,6 +172,7 @@ class TwitchVideoIE(TwitchItemBaseIE):
             'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
         },
         'playlist_mincount': 12,
+        'skip': 'HTTP Error 404: Not Found',
     }
 
 
@@ -186,6 +189,7 @@ class TwitchChapterIE(TwitchItemBaseIE):
             'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
         },
         'playlist_mincount': 3,
+        'skip': 'HTTP Error 404: Not Found',
     }, {
         'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
         'only_matching': True,
@@ -249,14 +253,15 @@ class TwitchVodIE(TwitchItemBaseIE):
         formats = self._extract_m3u8_formats(
             '%s/vod/%s?%s' % (
                 self._USHER_BASE, item_id,
-                compat_urllib_parse.urlencode({
+                compat_urllib_parse_urlencode({
                     'allow_source': 'true',
+                    'allow_audio_only': 'true',
                     'allow_spectre': 'true',
                     'player': 'twitchweb',
                     'nauth': access_token['token'],
                     'nauthsig': access_token['sig'],
                 })),
-            item_id, 'mp4')
+            item_id, 'mp4', entry_protocol='m3u8_native')
 
         self._prefer_source(formats)
         info['formats'] = formats
@@ -281,17 +286,37 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
         entries = []
         offset = 0
         limit = self._PAGE_LIMIT
+        broken_paging_detected = False
+        counter_override = None
         for counter in itertools.count(1):
             response = self._download_json(
                 self._PLAYLIST_URL % (channel_id, offset, limit),
-                channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
+                channel_id,
+                'Downloading %s videos JSON page %s'
+                % (self._PLAYLIST_TYPE, counter_override or counter))
             page_entries = self._extract_playlist_page(response)
             if not page_entries:
                 break
+            total = int_or_none(response.get('_total'))
+            # Since the beginning of March 2016 twitch's paging mechanism
+            # is completely broken on the twitch side. It simply ignores
+            # a limit and returns the whole offset number of videos.
+            # Working around by just requesting all videos at once.
+            # Upd: pagination bug was fixed by twitch on 15.03.2016.
+            if not broken_paging_detected and total and len(page_entries) > limit:
+                self.report_warning(
+                    'Twitch pagination is broken on twitch side, requesting all videos at once',
+                    channel_id)
+                broken_paging_detected = True
+                offset = total
+                counter_override = '(all at once)'
+                continue
             entries.extend(page_entries)
+            if broken_paging_detected or total and len(page_entries) >= total:
+                break
             offset += limit
         return self.playlist_result(
-            [self.url_result(entry) for entry in set(entries)],
+            [self.url_result(entry) for entry in orderedSet(entries)],
             channel_id, channel_name)
 
     def _extract_playlist_page(self, response):
@@ -333,31 +358,6 @@ class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
     }
 
 
-class TwitchBookmarksIE(TwitchPlaylistBaseIE):
-    IE_NAME = 'twitch:bookmarks'
-    _VALID_URL = r'%s/(?P<id>[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
-    _PLAYLIST_URL = '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
-    _PLAYLIST_TYPE = 'bookmarks'
-
-    _TEST = {
-        'url': 'http://www.twitch.tv/ognos/profile/bookmarks',
-        'info_dict': {
-            'id': 'ognos',
-            'title': 'Ognos',
-        },
-        'playlist_mincount': 3,
-    }
-
-    def _extract_playlist_page(self, response):
-        entries = []
-        for bookmark in response.get('bookmarks', []):
-            video = bookmark.get('video')
-            if not video:
-                continue
-            entries.append(video['url'])
-        return entries
-
-
 class TwitchStreamIE(TwitchBaseIE):
     IE_NAME = 'twitch:stream'
     _VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
@@ -411,6 +411,7 @@ class TwitchStreamIE(TwitchBaseIE):
 
         query = {
             'allow_source': 'true',
+            'allow_audio_only': 'true',
             'p': random.randint(1000000, 10000000),
             'player': 'twitchweb',
             'segment_preference': '4',
@@ -419,7 +420,7 @@ class TwitchStreamIE(TwitchBaseIE):
         }
         formats = self._extract_m3u8_formats(
             '%s/api/channel/hls/%s.m3u8?%s'
-            % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)),
+            % (self._USHER_BASE, channel_id, compat_urllib_parse_urlencode(query)),
             channel_id, 'mp4')
         self._prefer_source(formats)
 
@@ -454,3 +455,45 @@ class TwitchStreamIE(TwitchBaseIE):
             'formats': formats,
             'is_live': True,
         }
+
+
+class TwitchClipsIE(InfoExtractor):
+    IE_NAME = 'twitch:clips'
+    _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+
+    _TEST = {
+        'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound',
+        'md5': '761769e1eafce0ffebfb4089cb3847cd',
+        'info_dict': {
+            'id': 'AggressiveCobraPoooound',
+            'ext': 'mp4',
+            'title': 'EA Play 2016 Live from the Novo Theatre',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'creator': 'EA',
+            'uploader': 'stereotype_',
+            'uploader_id': 'stereotype_',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        clip = self._parse_json(
+            self._search_regex(
+                r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
+            video_id, transform_source=js_to_json)
+
+        video_url = clip['clip_video_url']
+        title = clip['channel_title']
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'),
+            'uploader': clip.get('curator_login'),
+            'uploader_id': clip.get('curator_display_name'),
+        }
index 5d2b5ec3515277980f2da45a91a0c73fae17923d..b7384298619608ab879337326b1e6719962932e3 100644 (file)
@@ -5,12 +5,12 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
     float_or_none,
     xpath_text,
     remove_end,
     int_or_none,
     ExtractorError,
-    sanitized_Request,
 )
 
 
@@ -22,7 +22,7 @@ class TwitterBaseIE(InfoExtractor):
 
 class TwitterCardIE(TwitterBaseIE):
     IE_NAME = 'twitter:card'
-    _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
@@ -30,7 +30,7 @@ class TwitterCardIE(TwitterBaseIE):
             'info_dict': {
                 'id': '560070183650213889',
                 'ext': 'mp4',
-                'title': 'TwitterCard',
+                'title': 'Twitter Card',
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'duration': 30.033,
             }
@@ -41,7 +41,7 @@ class TwitterCardIE(TwitterBaseIE):
             'info_dict': {
                 'id': '623160978427936768',
                 'ext': 'mp4',
-                'title': 'TwitterCard',
+                'title': 'Twitter Card',
                 'thumbnail': 're:^https?://.*\.jpg',
                 'duration': 80.155,
             },
@@ -53,7 +53,7 @@ class TwitterCardIE(TwitterBaseIE):
                 'id': 'dq4Oj5quskI',
                 'ext': 'mp4',
                 'title': 'Ubuntu 11.10 Overview',
-                'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/',
+                'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...',
                 'upload_date': '20111013',
                 'uploader': 'OMG! Ubuntu!',
                 'uploader_id': 'omgubuntu',
@@ -72,63 +72,107 @@ class TwitterCardIE(TwitterBaseIE):
                 'title': 'Vine by ArsenalTerje',
             },
             'add_ie': ['Vine'],
-        }
+        }, {
+            'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
+            'md5': '3846d0a07109b5ab622425449b59049d',
+            'info_dict': {
+                'id': '705235433198714880',
+                'ext': 'mp4',
+                'title': 'Twitter web player',
+                'thumbnail': 're:^https?://.*\.jpg',
+            },
+        },
     ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        # Different formats served for different User-Agents
-        USER_AGENTS = [
-            'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',  # mp4
-            'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',  # webm
-        ]
-
         config = None
         formats = []
-        for user_agent in USER_AGENTS:
-            request = sanitized_Request(url)
-            request.add_header('User-Agent', user_agent)
-            webpage = self._download_webpage(request, video_id)
-
-            iframe_url = self._html_search_regex(
-                r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
-                webpage, 'video iframe', default=None)
-            if iframe_url:
-                return self.url_result(iframe_url)
-
-            config = self._parse_json(self._html_search_regex(
-                r'data-player-config="([^"]+)"', webpage, 'data player config'),
-                video_id)
-            if 'playlist' not in config:
-                if 'vmapUrl' in config:
-                    formats.append({
-                        'url': self._get_vmap_video_url(config['vmapUrl'], video_id),
-                    })
-                    break   # same video regardless of UA
-                continue
-
-            video_url = config['playlist'][0]['source']
-
-            f = {
-                'url': video_url,
-            }
+        duration = None
 
+        webpage = self._download_webpage(url, video_id)
+
+        iframe_url = self._html_search_regex(
+            r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
+            webpage, 'video iframe', default=None)
+        if iframe_url:
+            return self.url_result(iframe_url)
+
+        config = self._parse_json(self._html_search_regex(
+            r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),
+            video_id)
+
+        if config.get('source_type') == 'vine':
+            return self.url_result(config['player_url'], 'Vine')
+
+        def _search_dimensions_in_video_url(a_format, video_url):
             m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
             if m:
-                f.update({
+                a_format.update({
                     'width': int(m.group('width')),
                     'height': int(m.group('height')),
                 })
-            formats.append(f)
+
+        video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
+
+        if video_url:
+            if determine_ext(video_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls'))
+            else:
+                f = {
+                    'url': video_url,
+                }
+
+                _search_dimensions_in_video_url(f, video_url)
+
+                formats.append(f)
+
+        vmap_url = config.get('vmapUrl') or config.get('vmap_url')
+        if vmap_url:
+            formats.append({
+                'url': self._get_vmap_video_url(vmap_url, video_id),
+            })
+
+        media_info = None
+
+        for entity in config.get('status', {}).get('entities', []):
+            if 'mediaInfo' in entity:
+                media_info = entity['mediaInfo']
+
+        if media_info:
+            for media_variant in media_info['variants']:
+                media_url = media_variant['url']
+                if media_url.endswith('.m3u8'):
+                    formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
+                elif media_url.endswith('.mpd'):
+                    formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
+                else:
+                    vbr = int_or_none(media_variant.get('bitRate'), scale=1000)
+                    a_format = {
+                        'url': media_url,
+                        'format_id': 'http-%d' % vbr if vbr else 'http',
+                        'vbr': vbr,
+                    }
+                    # Reported bitRate may be zero
+                    if not a_format['vbr']:
+                        del a_format['vbr']
+
+                    _search_dimensions_in_video_url(a_format, media_url)
+
+                    formats.append(a_format)
+
+            duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
+
         self._sort_formats(formats)
 
-        thumbnail = config.get('posterImageUrl')
-        duration = float_or_none(config.get('duration'))
+        title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
+        thumbnail = config.get('posterImageUrl') or config.get('image_src')
+        duration = float_or_none(config.get('duration')) or duration
 
         return {
             'id': video_id,
-            'title': 'TwitterCard',
+            'title': title,
             'thumbnail': thumbnail,
             'duration': duration,
             'formats': formats,
@@ -142,17 +186,18 @@ class TwitterIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
-        # MD5 checksums are different in different places
         'info_dict': {
             'id': '643211948184596480',
             'ext': 'mp4',
             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
             'thumbnail': 're:^https?://.*\.jpg',
-            'duration': 12.922,
             'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
             'uploader': 'FREE THE NIPPLE',
             'uploader_id': 'freethenipple',
         },
+        'params': {
+            'skip_download': True,  # requires ffmpeg
+        },
     }, {
         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
@@ -166,6 +211,7 @@ class TwitterIE(InfoExtractor):
             'uploader_id': 'giphz',
         },
         'expected_warnings': ['height', 'width'],
+        'skip': 'Account suspended',
     }, {
         'url': 'https://twitter.com/starwars/status/665052190608723968',
         'md5': '39b7199856dee6cd4432e72c74bc69d4',
@@ -177,6 +223,61 @@ class TwitterIE(InfoExtractor):
             'uploader_id': 'starwars',
             'uploader': 'Star Wars',
         },
+    }, {
+        'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
+        'info_dict': {
+            'id': '705235433198714880',
+            'ext': 'mp4',
+            'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight.',
+            'description': 'Brent Yarina on Twitter: "Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight."',
+            'uploader_id': 'BTNBrentYarina',
+            'uploader': 'Brent Yarina',
+        },
+        'params': {
+            # The same video as https://twitter.com/i/videos/tweet/705235433198714880
+            # Test case of TwitterCardIE
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
+        'md5': '',
+        'info_dict': {
+            'id': '700207533655363584',
+            'ext': 'mp4',
+            'title': 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel',
+            'description': 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'uploader': 'Donte The Dumbass',
+            'uploader_id': 'jaydingeer',
+        },
+        'params': {
+            'skip_download': True,  # requires ffmpeg
+        },
+    }, {
+        'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
+        'md5': '89a15ed345d13b86e9a5a5e051fa308a',
+        'info_dict': {
+            'id': 'MIOxnrUteUd',
+            'ext': 'mp4',
+            'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
+            'uploader': 'TAKUMA',
+            'uploader_id': '1004126642786242560',
+            'upload_date': '20140615',
+        },
+        'add_ie': ['Vine'],
+    }, {
+        'url': 'https://twitter.com/captainamerica/status/719944021058060289',
+        'info_dict': {
+            'id': '719944021058060289',
+            'ext': 'mp4',
+            'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
+            'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
+            'uploader_id': 'captainamerica',
+            'uploader': 'Captain America',
+        },
+        'params': {
+            'skip_download': True,  # requires ffmpeg
+        },
     }]
 
     def _real_extract(self, url):
@@ -184,7 +285,11 @@ class TwitterIE(InfoExtractor):
         user_id = mobj.group('user_id')
         twid = mobj.group('id')
 
-        webpage = self._download_webpage(self._TEMPLATE_URL % (user_id, twid), twid)
+        webpage, urlh = self._download_webpage_handle(
+            self._TEMPLATE_URL % (user_id, twid), twid)
+
+        if 'twitter.com/account/suspended' in urlh.geturl():
+            raise ExtractorError('Account suspended by Twitter.', expected=True)
 
         username = remove_end(self._og_search_title(webpage), ' on Twitter')
 
@@ -201,17 +306,6 @@ class TwitterIE(InfoExtractor):
             'title': username + ' - ' + title,
         }
 
-        card_id = self._search_regex(
-            r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url', default=None)
-        if card_id:
-            card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
-            info.update({
-                '_type': 'url_transparent',
-                'ie_key': 'TwitterCard',
-                'url': card_url,
-            })
-            return info
-
         mobj = re.search(r'''(?x)
             <video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*
                 <source[^>]+video-src="(?P<url>[^"]+)"
@@ -234,6 +328,15 @@ class TwitterIE(InfoExtractor):
             })
             return info
 
+        if 'class="PlayableMedia' in webpage:
+            info.update({
+                '_type': 'url_transparent',
+                'ie_key': 'TwitterCard',
+                'url': '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid),
+            })
+
+            return info
+
         raise ExtractorError('There\'s no video in this tweet.')
 
 
diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py
deleted file mode 100644 (file)
index d502377..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    qualities,
-)
-
-
-class UbuIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
-    _TEST = {
-        'url': 'http://ubu.com/film/her_noise.html',
-        'md5': '138d5652618bf0f03878978db9bef1ee',
-        'info_dict': {
-            'id': 'her_noise',
-            'ext': 'm4v',
-            'title': 'Her Noise - The Making Of (2007)',
-            'duration': 3600,
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._html_search_regex(
-            r'<title>.+?Film &amp; Video: ([^<]+)</title>', webpage, 'title')
-
-        duration = int_or_none(self._html_search_regex(
-            r'Duration: (\d+) minutes', webpage, 'duration', fatal=False),
-            invscale=60)
-
-        formats = []
-        FORMAT_REGEXES = [
-            ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"),
-            ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'),
-        ]
-        preference = qualities([fid for fid, _ in FORMAT_REGEXES])
-        for format_id, format_regex in FORMAT_REGEXES:
-            m = re.search(format_regex, webpage)
-            if m:
-                formats.append({
-                    'url': m.group(1),
-                    'format_id': format_id,
-                    'preference': preference(format_id),
-                })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'duration': duration,
-            'formats': formats,
-        }
index f5b5e7fd6775b4194ca18265f4c78f639a5f092a..89b86955913587c3c09474fdffaab8ad338bb26a 100644 (file)
@@ -1,23 +1,37 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
-    compat_urllib_parse,
     compat_urllib_request,
+    compat_urlparse,
 )
 from ..utils import (
+    determine_ext,
+    extract_attributes,
     ExtractorError,
     float_or_none,
     int_or_none,
     sanitized_Request,
     unescapeHTML,
+    urlencode_postdata,
 )
 
 
 class UdemyIE(InfoExtractor):
     IE_NAME = 'udemy'
-    _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        www\.udemy\.com/
+                        (?:
+                            [^#]+\#/lecture/|
+                            lecture/view/?\?lectureId=|
+                            [^/]+/learn/v4/t/lecture/
+                        )
+                        (?P<id>\d+)
+                    '''
     _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1'
     _ORIGIN_URL = 'https://www.udemy.com'
     _NETRC_MACHINE = 'udemy'
@@ -33,36 +47,55 @@ class UdemyIE(InfoExtractor):
             'duration': 579.29,
         },
         'skip': 'Requires udemy account credentials',
+    }, {
+        # new URL schema
+        'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906',
+        'only_matching': True,
     }]
 
-    def _enroll_course(self, webpage, course_id):
+    def _extract_course_info(self, webpage, video_id):
+        course = self._parse_json(
+            unescapeHTML(self._search_regex(
+                r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')),
+            video_id, fatal=False) or {}
+        course_id = course.get('id') or self._search_regex(
+            (r'&quot;id&quot;\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'),
+            webpage, 'course id')
+        return course_id, course.get('title')
+
+    def _enroll_course(self, base_url, webpage, course_id):
+        def combine_url(base_url, url):
+            return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
+
         checkout_url = unescapeHTML(self._search_regex(
-            r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/payment/checkout/.+?)\1',
+            r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1',
             webpage, 'checkout url', group='url', default=None))
         if checkout_url:
             raise ExtractorError(
                 'Course %s is not free. You have to pay for it before you can download. '
-                'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True)
+                'Use this URL to confirm purchase: %s'
+                % (course_id, combine_url(base_url, checkout_url)),
+                expected=True)
 
         enroll_url = unescapeHTML(self._search_regex(
-            r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/course/subscribe/.+?)\1',
+            r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
             webpage, 'enroll url', group='url', default=None))
         if enroll_url:
-            webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course')
+            webpage = self._download_webpage(
+                combine_url(base_url, enroll_url),
+                course_id, 'Enrolling in the course',
+                headers={'Referer': base_url})
             if '>You have enrolled in' in webpage:
                 self.to_screen('%s: Successfully enrolled in the course' % course_id)
 
     def _download_lecture(self, course_id, lecture_id):
         return self._download_json(
-            'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?%s' % (
-                course_id, lecture_id, compat_urllib_parse.urlencode({
-                    'video_only': '',
-                    'auto_play': '',
-                    'fields[lecture]': 'title,description,asset',
-                    'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data',
-                    'instructorPreviewMode': 'False',
-                })),
-            lecture_id, 'Downloading lecture JSON')
+            'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?'
+            % (course_id, lecture_id),
+            lecture_id, 'Downloading lecture JSON', query={
+                'fields[lecture]': 'title,description,view_html,asset',
+                'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data',
+            })
 
     def _handle_error(self, response):
         if not isinstance(response, dict):
@@ -75,7 +108,7 @@ class UdemyIE(InfoExtractor):
                 error_str += ' - %s' % error_data.get('formErrors')
             raise ExtractorError(error_str, expected=True)
 
-    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
+    def _download_json(self, url_or_request, *args, **kwargs):
         headers = {
             'X-Udemy-Snail-Case': 'true',
             'X-Requested-With': 'XMLHttpRequest',
@@ -93,7 +126,7 @@ class UdemyIE(InfoExtractor):
         else:
             url_or_request = sanitized_Request(url_or_request, headers=headers)
 
-        response = super(UdemyIE, self)._download_json(url_or_request, video_id, note)
+        response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
         self._handle_error(response)
         return response
 
@@ -109,7 +142,9 @@ class UdemyIE(InfoExtractor):
             self._LOGIN_URL, None, 'Downloading login popup')
 
         def is_logged(webpage):
-            return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<'])
+            return any(re.search(p, webpage) for p in (
+                r'href=["\'](?:https://www\.udemy\.com)?/user/logout/',
+                r'>Logout<'))
 
         # already logged in
         if is_logged(login_popup):
@@ -118,17 +153,17 @@ class UdemyIE(InfoExtractor):
         login_form = self._form_hidden_inputs('login-form', login_popup)
 
         login_form.update({
-            'email': username.encode('utf-8'),
-            'password': password.encode('utf-8'),
+            'email': username,
+            'password': password,
         })
 
-        request = sanitized_Request(
-            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
-        request.add_header('Referer', self._ORIGIN_URL)
-        request.add_header('Origin', self._ORIGIN_URL)
-
         response = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
+            self._LOGIN_URL, None, 'Logging in as %s' % username,
+            data=urlencode_postdata(login_form),
+            headers={
+                'Referer': self._ORIGIN_URL,
+                'Origin': self._ORIGIN_URL,
+            })
 
         if not is_logged(response):
             error = self._html_search_regex(
@@ -143,15 +178,14 @@ class UdemyIE(InfoExtractor):
 
         webpage = self._download_webpage(url, lecture_id)
 
-        course_id = self._search_regex(
-            r'data-course-id=["\'](\d+)', webpage, 'course id')
+        course_id, _ = self._extract_course_info(webpage, lecture_id)
 
         try:
             lecture = self._download_lecture(course_id, lecture_id)
         except ExtractorError as e:
             # Error could possibly mean we are not enrolled in the course
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                self._enroll_course(webpage, course_id)
+                self._enroll_course(url, webpage, course_id)
                 lecture = self._download_lecture(course_id, lecture_id)
             else:
                 raise
@@ -161,12 +195,12 @@ class UdemyIE(InfoExtractor):
 
         asset = lecture['asset']
 
-        asset_type = asset.get('assetType') or asset.get('asset_type')
+        asset_type = asset.get('asset_type') or asset.get('assetType')
         if asset_type != 'Video':
             raise ExtractorError(
                 'Lecture %s is not a video' % lecture_id, expected=True)
 
-        stream_url = asset.get('streamUrl') or asset.get('stream_url')
+        stream_url = asset.get('stream_url') or asset.get('streamUrl')
         if stream_url:
             youtube_url = self._search_regex(
                 r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url, 'youtube URL', default=None)
@@ -174,43 +208,92 @@ class UdemyIE(InfoExtractor):
                 return self.url_result(youtube_url, 'Youtube')
 
         video_id = asset['id']
-        thumbnail = asset.get('thumbnailUrl') or asset.get('thumbnail_url')
+        thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl')
         duration = float_or_none(asset.get('data', {}).get('duration'))
-        outputs = asset.get('data', {}).get('outputs', {})
 
         formats = []
-        for format_ in asset.get('download_urls', {}).get('Video', []):
-            video_url = format_.get('file')
-            if not video_url:
-                continue
-            format_id = format_.get('label')
-            f = {
-                'url': format_['file'],
-                'height': int_or_none(format_id),
+
+        def extract_output_format(src):
+            return {
+                'url': src['url'],
+                'format_id': '%sp' % (src.get('height') or format_id),
+                'width': int_or_none(src.get('width')),
+                'height': int_or_none(src.get('height')),
+                'vbr': int_or_none(src.get('video_bitrate_in_kbps')),
+                'vcodec': src.get('video_codec'),
+                'fps': int_or_none(src.get('frame_rate')),
+                'abr': int_or_none(src.get('audio_bitrate_in_kbps')),
+                'acodec': src.get('audio_codec'),
+                'asr': int_or_none(src.get('audio_sample_rate')),
+                'tbr': int_or_none(src.get('total_bitrate_in_kbps')),
+                'filesize': int_or_none(src.get('file_size_in_bytes')),
             }
-            if format_id:
-                # Some videos contain additional metadata (e.g.
-                # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
-                output = outputs.get(format_id)
-                if isinstance(output, dict):
-                    f.update({
-                        'format_id': '%sp' % (output.get('label') or format_id),
-                        'width': int_or_none(output.get('width')),
-                        'height': int_or_none(output.get('height')),
-                        'vbr': int_or_none(output.get('video_bitrate_in_kbps')),
-                        'vcodec': output.get('video_codec'),
-                        'fps': int_or_none(output.get('frame_rate')),
-                        'abr': int_or_none(output.get('audio_bitrate_in_kbps')),
-                        'acodec': output.get('audio_codec'),
-                        'asr': int_or_none(output.get('audio_sample_rate')),
-                        'tbr': int_or_none(output.get('total_bitrate_in_kbps')),
-                        'filesize': int_or_none(output.get('file_size_in_bytes')),
-                    })
+
+        outputs = asset.get('data', {}).get('outputs')
+        if not isinstance(outputs, dict):
+            outputs = {}
+
+        def add_output_format_meta(f, key):
+            output = outputs.get(key)
+            if isinstance(output, dict):
+                output_format = extract_output_format(output)
+                output_format.update(f)
+                return output_format
+            return f
+
+        download_urls = asset.get('download_urls')
+        if isinstance(download_urls, dict):
+            video = download_urls.get('Video')
+            if isinstance(video, list):
+                for format_ in video:
+                    video_url = format_.get('file')
+                    if not video_url:
+                        continue
+                    format_id = format_.get('label')
+                    f = {
+                        'url': format_['file'],
+                        'format_id': '%sp' % format_id,
+                        'height': int_or_none(format_id),
+                    }
+                    if format_id:
+                        # Some videos contain additional metadata (e.g.
+                        # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
+                        f = add_output_format_meta(f, format_id)
+                    formats.append(f)
+
+        view_html = lecture.get('view_html')
+        if view_html:
+            view_html_urls = set()
+            for source in re.findall(r'<source[^>]+>', view_html):
+                attributes = extract_attributes(source)
+                src = attributes.get('src')
+                if not src:
+                    continue
+                res = attributes.get('data-res')
+                height = int_or_none(res)
+                if src in view_html_urls:
+                    continue
+                view_html_urls.add(src)
+                if attributes.get('type') == 'application/x-mpegURL' or determine_ext(src) == 'm3u8':
+                    m3u8_formats = self._extract_m3u8_formats(
+                        src, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls', fatal=False)
+                    for f in m3u8_formats:
+                        m = re.search(r'/hls_(?P<height>\d{3,4})_(?P<tbr>\d{2,})/', f['url'])
+                        if m:
+                            if not f.get('height'):
+                                f['height'] = int(m.group('height'))
+                            if not f.get('tbr'):
+                                f['tbr'] = int(m.group('tbr'))
+                    formats.extend(m3u8_formats)
                 else:
-                    f['format_id'] = '%sp' % format_id
-            formats.append(f)
+                    formats.append(add_output_format_meta({
+                        'url': src,
+                        'format_id': '%dp' % height if height else None,
+                        'height': height,
+                    }, res))
 
-        self._sort_formats(formats)
+        self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
 
         return {
             'id': video_id,
@@ -224,7 +307,7 @@ class UdemyIE(InfoExtractor):
 
 class UdemyCourseIE(UdemyIE):
     IE_NAME = 'udemy:course'
-    _VALID_URL = r'https?://www\.udemy\.com/(?P<id>[\da-z-]+)'
+    _VALID_URL = r'https?://www\.udemy\.com/(?P<id>[^/?#&]+)'
     _TESTS = []
 
     @classmethod
@@ -236,29 +319,34 @@ class UdemyCourseIE(UdemyIE):
 
         webpage = self._download_webpage(url, course_path)
 
-        response = self._download_json(
-            'https://www.udemy.com/api-1.1/courses/%s' % course_path,
-            course_path, 'Downloading course JSON')
-
-        course_id = response['id']
-        course_title = response.get('title')
+        course_id, title = self._extract_course_info(webpage, course_path)
 
-        self._enroll_course(webpage, course_id)
+        self._enroll_course(url, webpage, course_id)
 
         response = self._download_json(
-            'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
-            course_id, 'Downloading course curriculum')
+            'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id,
+            course_id, 'Downloading course curriculum', query={
+                'fields[chapter]': 'title,object_index',
+                'fields[lecture]': 'title,asset',
+                'page_size': '1000',
+            })
 
         entries = []
-        chapter, chapter_number = None, None
-        for asset in response:
-            asset_type = asset.get('assetType') or asset.get('asset_type')
-            if asset_type == 'Video':
-                asset_id = asset.get('id')
-                if asset_id:
+        chapter, chapter_number = [None] * 2
+        for entry in response['results']:
+            clazz = entry.get('_class')
+            if clazz == 'lecture':
+                asset = entry.get('asset')
+                if isinstance(asset, dict):
+                    asset_type = asset.get('asset_type') or asset.get('assetType')
+                    if asset_type != 'Video':
+                        continue
+                lecture_id = entry.get('id')
+                if lecture_id:
                     entry = {
                         '_type': 'url_transparent',
-                        'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']),
+                        'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']),
+                        'title': entry.get('title'),
                         'ie_key': UdemyIE.ie_key(),
                     }
                     if chapter_number:
@@ -266,8 +354,8 @@ class UdemyCourseIE(UdemyIE):
                     if chapter:
                         entry['chapter'] = chapter
                     entries.append(entry)
-            elif asset.get('type') == 'chapter':
-                chapter_number = asset.get('index') or asset.get('object_index')
-                chapter = asset.get('title')
+            elif clazz == 'chapter':
+                chapter_number = entry.get('object_index')
+                chapter = entry.get('title')
 
-        return self.playlist_result(entries, course_id, course_title)
+        return self.playlist_result(entries, course_id, title)
index ee35b7227372c0ddc128dfc694577578f9fc6009..57dd73aef6f6254f22cdcd814e2d76b20c75b847 100644 (file)
@@ -2,10 +2,13 @@
 from __future__ import unicode_literals
 
 import json
+import re
+
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
+    int_or_none,
     js_to_json,
-    ExtractorError,
 )
 from ..compat import compat_urlparse
 
@@ -16,13 +19,16 @@ class UDNEmbedIE(InfoExtractor):
     _VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL
     _TESTS = [{
         'url': 'http://video.udn.com/embed/news/300040',
-        'md5': 'de06b4c90b042c128395a88f0384817e',
         'info_dict': {
             'id': '300040',
             'ext': 'mp4',
             'title': '生物老師男變女 全校挺"做自己"',
             'thumbnail': 're:^https?://.*\.jpg$',
-        }
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }, {
         'url': 'https://video.udn.com/embed/news/300040',
         'only_matching': True,
@@ -38,39 +44,53 @@ class UDNEmbedIE(InfoExtractor):
         page = self._download_webpage(url, video_id)
 
         options = json.loads(js_to_json(self._html_search_regex(
-            r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))
+            r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary')))
 
         video_urls = options['video']
 
         if video_urls.get('youtube'):
             return self.url_result(video_urls.get('youtube'), 'Youtube')
 
-        try:
-            del video_urls['youtube']
-        except KeyError:
-            pass
+        formats = []
+        for video_type, api_url in video_urls.items():
+            if not api_url:
+                continue
 
-        formats = [{
-            'url': self._download_webpage(
+            video_url = self._download_webpage(
                 compat_urlparse.urljoin(url, api_url), video_id,
-                'retrieve url for %s video' % video_type),
-            'format_id': video_type,
-            'preference': 0 if video_type == 'mp4' else -1,
-        } for video_type, api_url in video_urls.items() if api_url]
+                note='retrieve url for %s video' % video_type)
 
-        if not formats:
-            raise ExtractorError('No videos found', expected=True)
+            ext = determine_ext(video_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, ext='mp4', m3u8_id='hls'))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    video_url, video_id, f4m_id='hds'))
+            else:
+                mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+).mp4', video_url)
+                a_format = {
+                    'url': video_url,
+                    # video_type may be 'mp4', which confuses YoutubeDL
+                    'format_id': 'http-' + video_type,
+                }
+                if mobj:
+                    a_format.update({
+                        'height': int_or_none(mobj.group('height')),
+                        'tbr': int_or_none(mobj.group('tbr')),
+                    })
+                formats.append(a_format)
 
         self._sort_formats(formats)
 
-        thumbnail = None
-
-        if options.get('gallery') and len(options['gallery']):
-            thumbnail = options['gallery'][0].get('original')
+        thumbnails = [{
+            'url': img_url,
+            'id': img_type,
+        } for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url]
 
         return {
             'id': video_id,
             'formats': formats,
             'title': options['title'],
-            'thumbnail': thumbnail
+            'thumbnails': thumbnails,
         }
index 594bee4f9a681f928f37887270b062b6e7079514..a724cdbef8620821e65cb22324fcfcb33c1a3b4c 100644 (file)
@@ -7,7 +7,7 @@ from ..utils import qualities
 
 
 class UnistraIE(InfoExtractor):
-    _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
+    _VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
 
     _TESTS = [
         {
@@ -49,6 +49,7 @@ class UnistraIE(InfoExtractor):
                 'format_id': format_id,
                 'quality': quality(format_id)
             })
+        self._sort_formats(formats)
 
         title = self._html_search_regex(
             r'<title>UTV - (.*?)</', webpage, 'title')
diff --git a/youtube_dl/extractor/usatoday.py b/youtube_dl/extractor/usatoday.py
new file mode 100644 (file)
index 0000000..e5678dc
--- /dev/null
@@ -0,0 +1,48 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    get_element_by_attribute,
+    parse_duration,
+    update_url_query,
+    ExtractorError,
+)
+from ..compat import compat_str
+
+
+class USATodayIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?usatoday\.com/(?:[^/]+/)*(?P<id>[^?/#]+)'
+    _TEST = {
+        'url': 'http://www.usatoday.com/media/cinematic/video/81729424/us-france-warn-syrian-regime-ahead-of-new-peace-talks/',
+        'md5': '4d40974481fa3475f8bccfd20c5361f8',
+        'info_dict': {
+            'id': '81729424',
+            'ext': 'mp4',
+            'title': 'US, France warn Syrian regime ahead of new peace talks',
+            'timestamp': 1457891045,
+            'description': 'md5:7e50464fdf2126b0f533748d3c78d58f',
+            'uploader_id': '29906170001',
+            'upload_date': '20160313',
+        }
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/29906170001/38a9eecc-bdd8-42a3-ba14-95397e48b3f8_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(update_url_query(url, {'ajax': 'true'}), display_id)
+        ui_video_data = get_element_by_attribute('class', 'ui-video-data', webpage)
+        if not ui_video_data:
+            raise ExtractorError('no video on the webpage', expected=True)
+        video_data = self._parse_json(ui_video_data, display_id)
+
+        return {
+            '_type': 'url_transparent',
+            'url': self.BRIGHTCOVE_URL_TEMPLATE % video_data['brightcove_id'],
+            'id': compat_str(video_data['id']),
+            'title': video_data['title'],
+            'thumbnail': video_data.get('thumbnail'),
+            'description': video_data.get('description'),
+            'duration': parse_duration(video_data.get('length')),
+            'ie_key': 'BrightcoveNew',
+        }
index b5fe753d7115923d16ed6cf7de34c8723368f82f..54605d863027968a4a15c5358b9f98539c69c4b3 100644 (file)
@@ -41,6 +41,12 @@ class UstreamIE(InfoExtractor):
             'uploader': 'sportscanadatv',
         },
         'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.',
+    }, {
+        'url': 'http://www.ustream.tv/embed/10299409',
+        'info_dict': {
+            'id': '10299409',
+        },
+        'playlist_count': 3,
     }]
 
     def _real_extract(self, url):
@@ -55,10 +61,12 @@ class UstreamIE(InfoExtractor):
         if m.group('type') == 'embed':
             video_id = m.group('id')
             webpage = self._download_webpage(url, video_id)
-            desktop_video_id = self._html_search_regex(
-                r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
-            desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
-            return self.url_result(desktop_url, 'Ustream')
+            content_video_ids = self._parse_json(self._search_regex(
+                r'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage,
+                'content video IDs'), video_id)
+            return self.playlist_result(
+                map(lambda u: self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream'), content_video_ids),
+                video_id)
 
         params = self._download_json(
             'https://api.ustream.tv/videos/%s.json' % video_id, video_id)
diff --git a/youtube_dl/extractor/ustudio.py b/youtube_dl/extractor/ustudio.py
new file mode 100644 (file)
index 0000000..3484a20
--- /dev/null
@@ -0,0 +1,125 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    unified_strdate,
+    unescapeHTML,
+)
+
+
+class UstudioIE(InfoExtractor):
+    IE_NAME = 'ustudio'
+    _VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
+    _TEST = {
+        'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge',
+        'md5': '58bbfca62125378742df01fc2abbdef6',
+        'info_dict': {
+            'id': 'Uxu2my9bgSph',
+            'display_id': 'san_francisco_golden_gate_bridge',
+            'ext': 'mp4',
+            'title': 'San Francisco: Golden Gate Bridge',
+            'description': 'md5:23925500697f2c6d4830e387ba51a9be',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20111107',
+            'uploader': 'Tony Farley',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id, display_id = re.match(self._VALID_URL, url).groups()
+
+        config = self._download_xml(
+            'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id,
+            display_id)
+
+        def extract(kind):
+            return [{
+                'url': unescapeHTML(item.attrib['url']),
+                'width': int_or_none(item.get('width')),
+                'height': int_or_none(item.get('height')),
+            } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
+
+        formats = extract('video')
+        self._sort_formats(formats)
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._og_search_title(webpage)
+        upload_date = unified_strdate(self._search_regex(
+            r'(?s)Uploaded by\s*.+?\s*on\s*<span>([^<]+)</span>',
+            webpage, 'upload date', fatal=False))
+        uploader = self._search_regex(
+            r'Uploaded by\s*<a[^>]*>([^<]+)<',
+            webpage, 'uploader', fatal=False)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': self._og_search_description(webpage),
+            'thumbnails': extract('image'),
+            'upload_date': upload_date,
+            'uploader': uploader,
+            'formats': formats,
+        }
+
+
+class UstudioEmbedIE(InfoExtractor):
+    IE_NAME = 'ustudio:embed'
+    _VALID_URL = r'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P<uid>[^/]+)/(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T',
+        'md5': '47c0be52a09b23a7f40de9469cec58f4',
+        'info_dict': {
+            'id': 'Uw7G1kMCe65T',
+            'ext': 'mp4',
+            'title': '5 Things IT Should Know About Video',
+            'description': 'md5:93d32650884b500115e158c5677d25ad',
+            'uploader_id': 'DeN7VdYRDKhP',
+        }
+    }
+
+    def _real_extract(self, url):
+        uploader_id, video_id = re.match(self._VALID_URL, url).groups()
+        video_data = self._download_json(
+            'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id),
+            video_id)['videos'][0]
+        title = video_data['name']
+
+        formats = []
+        for ext, qualities in video_data.get('transcodes', {}).items():
+            for quality in qualities:
+                quality_url = quality.get('url')
+                if not quality_url:
+                    continue
+                height = int_or_none(quality.get('height'))
+                formats.append({
+                    'format_id': '%s-%dp' % (ext, height) if height else ext,
+                    'url': quality_url,
+                    'width': int_or_none(quality.get('width')),
+                    'height': height,
+                })
+        self._sort_formats(formats)
+
+        thumbnails = []
+        for image in video_data.get('images', []):
+            image_url = image.get('url')
+            if not image_url:
+                continue
+            thumbnails.append({
+                'url': image_url,
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('description'),
+            'duration': int_or_none(video_data.get('duration')),
+            'uploader_id': uploader_id,
+            'tags': video_data.get('keywords'),
+            'thumbnails': thumbnails,
+            'formats': formats,
+        }
index 9369abaf8f7bdfa2b220c39d02f9460dbab711c2..84698371a8ab2daf77faae1684141eb32425f232 100644 (file)
@@ -2,11 +2,19 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_urlparse,
+    compat_parse_qs,
+)
+from ..utils import (
+    clean_html,
+    remove_start,
+)
 
 
 class Varzesh3IE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
         'md5': '2a933874cb7dce4366075281eb49e855',
         'info_dict': {
@@ -15,8 +23,19 @@ class Varzesh3IE(InfoExtractor):
             'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا',
             'description': 'فصل ۲۰۱۵-۲۰۱۴',
             'thumbnail': 're:^https?://.*\.jpg$',
-        }
-    }
+        },
+        'skip': 'HTTP 404 Error',
+    }, {
+        'url': 'http://video.varzesh3.com/video/112785/%D8%AF%D9%84%D9%87-%D8%B9%D9%84%DB%8C%D8%9B-%D8%B3%D8%AA%D8%A7%D8%B1%D9%87-%D9%86%D9%88%D8%B8%D9%87%D9%88%D8%B1-%D9%84%DB%8C%DA%AF-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AC%D8%B2%DB%8C%D8%B1%D9%87',
+        'md5': '841b7cd3afbc76e61708d94e53a4a4e7',
+        'info_dict': {
+            'id': '112785',
+            'ext': 'mp4',
+            'title': 'دله علی؛ ستاره نوظهور لیگ برتر جزیره',
+            'description': 'فوتبال 120',
+        },
+        'expected_warnings': ['description'],
+    }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
@@ -26,15 +45,30 @@ class Varzesh3IE(InfoExtractor):
         video_url = self._search_regex(
             r'<source[^>]+src="([^"]+)"', webpage, 'video url')
 
-        title = self._og_search_title(webpage)
+        title = remove_start(self._html_search_regex(
+            r'<title>([^<]+)</title>', webpage, 'title'), 'ویدیو ورزش 3 | ')
+
         description = self._html_search_regex(
             r'(?s)<div class="matn">(.+?)</div>',
-            webpage, 'description', fatal=False)
-        thumbnail = self._og_search_thumbnail(webpage)
+            webpage, 'description', default=None)
+        if description is None:
+            description = clean_html(self._html_search_meta('description', webpage))
+
+        thumbnail = self._og_search_thumbnail(webpage, default=None)
+        if thumbnail is None:
+            fb_sharer_url = self._search_regex(
+                r'<a[^>]+href="(https?://www\.facebook\.com/sharer/sharer\.php?[^"]+)"',
+                webpage, 'facebook sharer URL', fatal=False)
+            sharer_params = compat_parse_qs(compat_urllib_parse_urlparse(fb_sharer_url).query)
+            thumbnail = sharer_params.get('p[images][0]', [None])[0]
 
         video_id = self._search_regex(
             r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
-            webpage, display_id, default=display_id)
+            webpage, display_id, default=None)
+        if video_id is None:
+            video_id = self._search_regex(
+                'var\s+VideoId\s*=\s*(\d+);', webpage, 'video id',
+                default=display_id)
 
         return {
             'url': video_url,
index 3794bcded273235f3fc2a77097b1ca6e48fab348..dff1bb70281b9a1cd69d9507b963e4c622a29044 100644 (file)
@@ -2,18 +2,16 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urlparse,
-)
+from ..compat import compat_urlparse
 from ..utils import (
     ExtractorError,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
 class Vbox7IE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'
     _TEST = {
         'url': 'http://vbox7.com/play:249bb972c2',
         'md5': '99f65c0c9ef9b682b97313e052734c3f',
@@ -48,7 +46,7 @@ class Vbox7IE(InfoExtractor):
                                         webpage, 'title').split('/')[0].strip()
 
         info_url = 'http://vbox7.com/play/magare.do'
-        data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
+        data = urlencode_postdata({'as3': '1', 'vid': video_id})
         info_request = sanitized_Request(info_url, data)
         info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage')
index 9633f7ffeec865c69c77a0e2d7475399a998d44a..0f5d6873808ed2dce5cde2e6239b6973cf809367 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class VeohIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'
+    _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'
 
     _TESTS = [
         {
@@ -37,6 +37,7 @@ class VeohIE(InfoExtractor):
                 'uploader': 'afp-news',
                 'duration': 123,
             },
+            'skip': 'This video has been deleted.',
         },
         {
             'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
index 1a0ff3395598027ebd8de05a609faca987c14e9e..2cd617b91ce4a4a7eba0a639c0956dca3e168576 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import json
+import re
 
 from .common import InfoExtractor
 from ..utils import (
@@ -12,11 +13,11 @@ from ..utils import (
 
 
 class VesselIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)'
+    _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
     _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
     _LOGIN_URL = 'https://www.vessel.com/api/account/login'
     _NETRC_MACHINE = 'vessel'
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.vessel.com/videos/HDN7G5UMs',
         'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
         'info_dict': {
@@ -28,7 +29,16 @@ class VesselIE(InfoExtractor):
             'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
             'timestamp': int,
         },
-    }
+    }, {
+        'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return [url for _, url in re.findall(
+            r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1',
+            webpage)]
 
     @staticmethod
     def make_json_request(url, data):
@@ -98,16 +108,24 @@ class VesselIE(InfoExtractor):
 
         formats = []
         for f in video_asset.get('sources', []):
-            if f['name'] == 'hls-index':
+            location = f.get('location')
+            if not location:
+                continue
+            name = f.get('name')
+            if name == 'hls-index':
                 formats.extend(self._extract_m3u8_formats(
-                    f['location'], video_id, ext='mp4', m3u8_id='m3u8'))
+                    location, video_id, ext='mp4',
+                    entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False))
+            elif name == 'dash-index':
+                formats.extend(self._extract_mpd_formats(
+                    location, video_id, mpd_id='dash', fatal=False))
             else:
                 formats.append({
-                    'format_id': f['name'],
+                    'format_id': name,
                     'tbr': f.get('bitrate'),
                     'height': f.get('height'),
                     'width': f.get('width'),
-                    'url': f['location'],
+                    'url': location,
                 })
         self._sort_formats(formats)
 
index a0c59a2e0e1cb8fca2e0e3eb3ec2e4edce2918bb..cb64ae0bd07cdca051eb3aa10550840a296ded85 100644 (file)
@@ -10,7 +10,7 @@ from .rutv import RUTVIE
 
 class VestiIE(InfoExtractor):
     IE_DESC = 'Вести.Ru'
-    _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
 
     _TESTS = [
         {
index 152fef42e2aa052acea07f909eee66cf842e87c5..388b4debee27d7331ae7dc351338e3829e539071 100644 (file)
@@ -3,7 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_etree_fromstring
+from ..compat import (
+    compat_etree_fromstring,
+    compat_str,
+    compat_urlparse,
+)
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -12,13 +16,22 @@ from ..utils import (
 )
 
 
-class VevoIE(InfoExtractor):
+class VevoBaseIE(InfoExtractor):
+    def _extract_json(self, webpage, video_id, item):
+        return self._parse_json(
+            self._search_regex(
+                r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
+                webpage, 'initial store'),
+            video_id)['default'][item]
+
+
+class VevoIE(VevoBaseIE):
     '''
     Accepts urls from vevo.com or in the format 'vevo:{id}'
     (currently used by MTVIE and MySpaceIE)
     '''
     _VALID_URL = r'''(?x)
-        (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
+        (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
            https?://cache\.vevo\.com/m/html/embed\.html\?video=|
            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
            vevo:)
@@ -30,11 +43,15 @@ class VevoIE(InfoExtractor):
         'info_dict': {
             'id': 'GB1101300280',
             'ext': 'mp4',
-            'title': 'Somebody to Die For',
+            'title': 'Hurts - Somebody to Die For',
+            'timestamp': 1372057200,
             'upload_date': '20130624',
             'uploader': 'Hurts',
-            'timestamp': 1372057200,
+            'track': 'Somebody to Die For',
+            'artist': 'Hurts',
+            'genre': 'Pop',
         },
+        'expected_warnings': ['Unable to download SMIL file'],
     }, {
         'note': 'v3 SMIL format',
         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
@@ -42,23 +59,31 @@ class VevoIE(InfoExtractor):
         'info_dict': {
             'id': 'USUV71302923',
             'ext': 'mp4',
-            'title': 'I Wish I Could Break Your Heart',
+            'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
+            'timestamp': 1392796919,
             'upload_date': '20140219',
             'uploader': 'Cassadee Pope',
-            'timestamp': 1392796919,
+            'track': 'I Wish I Could Break Your Heart',
+            'artist': 'Cassadee Pope',
+            'genre': 'Country',
         },
+        'expected_warnings': ['Unable to download SMIL file'],
     }, {
         'note': 'Age-limited video',
         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
         'info_dict': {
             'id': 'USRV81300282',
             'ext': 'mp4',
-            'title': 'Tunnel Vision (Explicit)',
-            'upload_date': '20130703',
+            'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
             'age_limit': 18,
-            'uploader': 'Justin Timberlake',
             'timestamp': 1372888800,
+            'upload_date': '20130703',
+            'uploader': 'Justin Timberlake',
+            'track': 'Tunnel Vision (Explicit)',
+            'artist': 'Justin Timberlake',
+            'genre': 'Pop',
         },
+        'expected_warnings': ['Unable to download SMIL file'],
     }, {
         'note': 'No video_info',
         'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
@@ -66,12 +91,36 @@ class VevoIE(InfoExtractor):
         'info_dict': {
             'id': 'USUV71503000',
             'ext': 'mp4',
-            'title': 'Till I Die',
-            'upload_date': '20151207',
+            'title': 'K Camp - Till I Die',
             'age_limit': 18,
-            'uploader': 'K Camp',
             'timestamp': 1449468000,
+            'upload_date': '20151207',
+            'uploader': 'K Camp',
+            'track': 'Till I Die',
+            'artist': 'K Camp',
+            'genre': 'Rap/Hip-Hop',
+        },
+    }, {
+        'note': 'Only available via webpage',
+        'url': 'http://www.vevo.com/watch/GBUV71600656',
+        'md5': '67e79210613865b66a47c33baa5e37fe',
+        'info_dict': {
+            'id': 'GBUV71600656',
+            'ext': 'mp4',
+            'title': 'ABC - Viva Love',
+            'age_limit': 0,
+            'timestamp': 1461830400,
+            'upload_date': '20160428',
+            'uploader': 'ABC',
+            'track': 'Viva Love',
+            'artist': 'ABC',
+            'genre': 'Pop',
         },
+        'expected_warnings': ['Failed to download video versions info'],
+    }, {
+        # no genres available
+        'url': 'http://www.vevo.com/watch/INS171400764',
+        'only_matching': True,
     }]
     _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'
     _SOURCE_TYPES = {
@@ -140,42 +189,41 @@ class VevoIE(InfoExtractor):
             errnote='Unable to retrieve oauth token')
 
         if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
-            raise ExtractorError(
-                '%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
+            self.raise_geo_restricted(
+                '%s said: This page is currently unavailable in your region' % self.IE_NAME)
 
         auth_info = self._parse_json(webpage, video_id)
         self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
 
-    def _call_api(self, path, video_id, note, errnote, fatal=True):
-        return self._download_json(self._api_url_template % path, video_id, note, errnote)
+    def _call_api(self, path, *args, **kwargs):
+        return self._download_json(self._api_url_template % path, *args, **kwargs)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
+        json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
         response = self._download_json(
-            json_url, video_id, 'Downloading video info', 'Unable to download info')
+            json_url, video_id, 'Downloading video info',
+            'Unable to download info', fatal=False) or {}
         video_info = response.get('video') or {}
-        video_versions = video_info.get('videoVersions')
+        artist = None
+        featured_artist = None
         uploader = None
-        timestamp = None
         view_count = None
         formats = []
 
         if not video_info:
-            if response.get('statusCode') != 909:
+            try:
+                self._initialize_api(video_id)
+            except ExtractorError:
                 ytid = response.get('errorInfo', {}).get('ytid')
                 if ytid:
                     self.report_warning(
                         'Video is geoblocked, trying with the YouTube video %s' % ytid)
                     return self.url_result(ytid, 'Youtube', ytid)
 
-                if 'statusMessage' in response:
-                    raise ExtractorError('%s said: %s' % (
-                        self.IE_NAME, response['statusMessage']), expected=True)
-                raise ExtractorError('Unable to extract videos')
+                raise
 
-            self._initialize_api(video_id)
             video_info = self._call_api(
                 'video/%s' % video_id, video_id, 'Downloading api video info',
                 'Failed to download video info')
@@ -183,12 +231,19 @@ class VevoIE(InfoExtractor):
             video_versions = self._call_api(
                 'video/%s/streams' % video_id, video_id,
                 'Downloading video versions info',
-                'Failed to download video versions info')
+                'Failed to download video versions info',
+                fatal=False)
+
+            # Some videos are only available via webpage (e.g.
+            # https://github.com/rg3/youtube-dl/issues/9366)
+            if not video_versions:
+                webpage = self._download_webpage(url, video_id)
+                video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0]
 
             timestamp = parse_iso8601(video_info.get('releaseDate'))
             artists = video_info.get('artists')
             if artists:
-                uploader = artists[0]['name']
+                artist = uploader = artists[0]['name']
             view_count = int_or_none(video_info.get('views', {}).get('total'))
 
             for video_version in video_versions:
@@ -241,7 +296,11 @@ class VevoIE(InfoExtractor):
                 scale=1000)
             artists = video_info.get('mainArtists')
             if artists:
-                uploader = artists[0]['artistName']
+                artist = uploader = artists[0]['artistName']
+
+            featured_artists = video_info.get('featuredArtists')
+            if featured_artists:
+                featured_artist = featured_artists[0]['artistName']
 
             smil_parsed = False
             for video_version in video_info['videoVersions']:
@@ -278,7 +337,15 @@ class VevoIE(InfoExtractor):
                         smil_parsed = True
         self._sort_formats(formats)
 
-        title = video_info['title']
+        track = video_info['title']
+        if featured_artist:
+            artist = '%s ft. %s' % (artist, featured_artist)
+        title = '%s - %s' % (artist, track) if artist else track
+
+        genres = video_info.get('genres')
+        genre = (
+            genres[0] if genres and isinstance(genres, list) and
+            isinstance(genres[0], compat_str) else None)
 
         is_explicit = video_info.get('isExplicit')
         if is_explicit is True:
@@ -300,4 +367,75 @@ class VevoIE(InfoExtractor):
             'duration': duration,
             'view_count': view_count,
             'age_limit': age_limit,
+            'track': track,
+            'artist': uploader,
+            'genre': genre,
         }
+
+
+class VevoPlaylistIE(VevoBaseIE):
+    _VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29',
+        'info_dict': {
+            'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29',
+            'title': 'Best-Of: Birdman',
+        },
+        'playlist_count': 10,
+    }, {
+        'url': 'http://www.vevo.com/watch/genre/rock',
+        'info_dict': {
+            'id': 'rock',
+            'title': 'Rock',
+        },
+        'playlist_count': 20,
+    }, {
+        'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0',
+        'md5': '32dcdfddddf9ec6917fc88ca26d36282',
+        'info_dict': {
+            'id': 'USCMV1100073',
+            'ext': 'mp4',
+            'title': 'Birdman - Y.U. MAD',
+            'timestamp': 1323417600,
+            'upload_date': '20111209',
+            'uploader': 'Birdman',
+            'track': 'Y.U. MAD',
+            'artist': 'Birdman',
+            'genre': 'Rap/Hip-Hop',
+        },
+        'expected_warnings': ['Unable to download SMIL file'],
+    }, {
+        'url': 'http://www.vevo.com/watch/genre/rock?index=0',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        playlist_kind = mobj.group('kind')
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        index = qs.get('index', [None])[0]
+
+        if index:
+            video_id = self._search_regex(
+                r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
+                webpage, 'video id', default=None, group='id')
+            if video_id:
+                return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
+
+        playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind)
+
+        playlist = (list(playlists.values())[0]
+                    if playlist_kind == 'playlist' else playlists[playlist_id])
+
+        entries = [
+            self.url_result('vevo:%s' % src, VevoIE.ie_key())
+            for src in playlist['isrcs']]
+
+        return self.playlist_result(
+            entries, playlist.get('playlistId') or playlist_id,
+            playlist.get('name'), playlist.get('description'))
index 14e945d494cd2f6e5f3b3e6a03ff6ebb076826dd..b11cd254c7da9c8c780dedd2b2db120f8025c74b 100644 (file)
@@ -20,6 +20,7 @@ class VGTVIE(XstreamIE):
         'aftenbladet.no/tv': 'satv',
         'fvn.no/fvntv': 'fvntv',
         'aftenposten.no/webtv': 'aptv',
+        'ap.vgtv.no/webtv': 'aptv',
     }
 
     _APP_NAME_TO_VENDOR = {
@@ -35,7 +36,7 @@ class VGTVIE(XstreamIE):
                     (?P<host>
                         %s
                     )
-                    /
+                    /?
                     (?:
                         \#!/(?:video|live)/|
                         embed?.*id=
@@ -107,19 +108,27 @@ class VGTVIE(XstreamIE):
             'md5': 'fd828cd29774a729bf4d4425fe192972',
             'info_dict': {
                 'id': '21039',
-                'ext': 'mov',
+                'ext': 'mp4',
                 'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
                 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
                 'duration': 66,
                 'timestamp': 1417002452,
                 'upload_date': '20141126',
                 'view_count': int,
-            }
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
         },
         {
             'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
             'only_matching': True,
         },
+        {
+            'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
@@ -144,8 +153,6 @@ class VGTVIE(XstreamIE):
         if len(video_id) == 5:
             if appname == 'bttv':
                 info = self._extract_video_info('btno', video_id)
-            elif appname == 'aptv':
-                info = self._extract_video_info('ap', video_id)
 
         streams = data['streamUrls']
         stream_type = data.get('streamType')
@@ -207,7 +214,7 @@ class VGTVIE(XstreamIE):
 class BTArticleIE(InfoExtractor):
     IE_NAME = 'bt:article'
     IE_DESC = 'Bergens Tidende Articles'
-    _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
+    _VALID_URL = r'https?://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
     _TEST = {
         'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
         'md5': '2acbe8ad129b3469d5ae51b1158878df',
@@ -234,7 +241,7 @@ class BTArticleIE(InfoExtractor):
 class BTVestlendingenIE(InfoExtractor):
     IE_NAME = 'bt:vestlendingen'
     IE_DESC = 'Bergens Tidende - Vestlendingen'
-    _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
         'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
index 3db6286e48c1402ff210d8f4cc666dac1a918a86..e2b2ce0981cc8767ade2f5ef4c8bc52759b86af3 100644 (file)
@@ -1,31 +1,47 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
-from .ooyala import OoyalaIE
 from ..utils import ExtractorError
 
 
 class ViceIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)+(?P<id>.+)'
-
-    _TESTS = [
-        {
-            'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
-            'info_dict': {
-                'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
-                'ext': 'mp4',
-                'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
-                'duration': 725.983,
-            },
-            'params': {
-                # Requires ffmpeg (m3u8 manifest)
-                'skip_download': True,
-            },
-        }, {
-            'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
-            'only_matching': True,
-        }
-    ]
+    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
+        'md5': 'e9d77741f9e42ba583e683cd170660f7',
+        'info_dict': {
+            'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
+            'ext': 'flv',
+            'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+            'duration': 725.983,
+        },
+        'add_ie': ['Ooyala'],
+    }, {
+        'url': 'http://www.vice.com/video/how-to-hack-a-car',
+        'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
+        'info_dict': {
+            'id': '3jstaBeXgAs',
+            'ext': 'mp4',
+            'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
+            'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
+            'uploader_id': 'MotherboardTV',
+            'uploader': 'Motherboard',
+            'upload_date': '20140529',
+        },
+        'add_ie': ['Youtube'],
+    }, {
+        'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229',
+        'only_matching': True,
+    }, {
+        'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -33,8 +49,43 @@ class ViceIE(InfoExtractor):
         try:
             embed_code = self._search_regex(
                 r'embedCode=([^&\'"]+)', webpage,
-                'ooyala embed code')
-            ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
+                'ooyala embed code', default=None)
+            if embed_code:
+                return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
+            youtube_id = self._search_regex(
+                r'data-youtube-id="([^"]+)"', webpage, 'youtube id')
+            return self.url_result(youtube_id, 'Youtube')
         except ExtractorError:
             raise ExtractorError('The page doesn\'t contain a video', expected=True)
-        return self.url_result(ooyala_url, ie='Ooyala')
+
+
+class ViceShowIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
+
+    _TEST = {
+        'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2',
+        'info_dict': {
+            'id': 'fuck-thats-delicious-2',
+            'title': "Fuck, That's Delicious",
+            'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.',
+        },
+        'playlist_count': 17,
+    }
+
+    def _real_extract(self, url):
+        show_id = self._match_id(url)
+        webpage = self._download_webpage(url, show_id)
+
+        entries = [
+            self.url_result(video_url, ViceIE.ie_key())
+            for video_url, _ in re.findall(
+                r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
+                % ViceIE._VALID_URL, webpage)]
+
+        title = self._search_regex(
+            r'<title>(.+?)</title>', webpage, 'title', default=None)
+        if title:
+            title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
+        description = self._html_search_meta('description', webpage, 'description')
+
+        return self.playlist_result(entries, show_id, title, description)
index 6bfbd4d85d1810d94398111562b4fcbc15ba215f..8d92aee878d3ad0c0d5725db755451c88e527f66 100644 (file)
@@ -2,7 +2,7 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
@@ -93,7 +93,7 @@ class ViddlerIE(InfoExtractor):
         headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
         request = sanitized_Request(
             'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s'
-            % compat_urllib_parse.urlencode(query), None, headers)
+            % compat_urllib_parse_urlencode(query), None, headers)
         data = self._download_json(request, video_id)['video']
 
         formats = []
index 0ffc7ff7dc9185a3a3ec5c0fd14d302872662dda..2ed5d964344211c22d2260b1946273772434db8b 100644 (file)
@@ -14,8 +14,11 @@ class VideoDetectiveIE(InfoExtractor):
             'id': '194487',
             'ext': 'mp4',
             'title': 'KICK-ASS 2',
-            'description': 'md5:65ba37ad619165afac7d432eaded6013',
-            'duration': 138,
+            'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         },
     }
 
@@ -24,4 +27,4 @@ class VideoDetectiveIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         og_video = self._og_search_video_url(webpage)
         query = compat_urlparse.urlparse(og_video).query
-        return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key())
+        return self.url_result(InternetVideoArchiveIE._build_json_url(query), ie=InternetVideoArchiveIE.ie_key())
index 5e2e7cbacc4d52a91dd3a789b76874fffac069db..4f0dcd18c7f28ab17aec58c814d53fd8ae21e7ac 100644 (file)
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import sanitized_Request
+from ..utils import (
+    decode_packed_codes,
+    sanitized_Request,
+)
 
 
 class VideoMegaIE(InfoExtractor):
-    _WORKING = False
     _VALID_URL = r'(?:videomega:|https?://(?:www\.)?videomega\.tv/(?:(?:view|iframe|cdn)\.php)?\?ref=)(?P<id>[A-Za-z0-9]+)'
     _TESTS = [{
         'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA',
@@ -42,8 +44,10 @@ class VideoMegaIE(InfoExtractor):
             r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s*|\s*-\svideomega\.tv$)', '', title)
         thumbnail = self._search_regex(
             r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
+
+        real_codes = decode_packed_codes(webpage)
         video_url = self._search_regex(
-            r'<source[^>]+?src="([^"]+)"', webpage, 'video URL')
+            r'"src"\s*,\s*"([^"]+)"', real_codes, 'video URL')
 
         return {
             'id': video_id,
index 0bd1e1eec5212f9b2bb101870765b014c062d011..04e95c66e91eec8368d325d1086ac59dddf7656f 100644 (file)
@@ -111,6 +111,7 @@ class VideomoreIE(InfoExtractor):
 
         video_url = xpath_text(video, './/video_url', 'video url', fatal=True)
         formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds')
+        self._sort_formats(formats)
 
         data = self._download_json(
             'http://videomore.ru/video/tracks/%s.json' % video_id,
index 2cd36508a2dfef07a963109327349e7c453ab298..0f798711bca7ebc25c893f82746ed3b1a49ff778 100644 (file)
@@ -14,7 +14,7 @@ class VideoTtIE(InfoExtractor):
     _WORKING = False
     ID_NAME = 'video.tt'
     IE_DESC = 'video.tt - Your True Tube'
-    _VALID_URL = r'http://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
+    _VALID_URL = r'https?://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
 
     _TESTS = [{
         'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8',
diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py
new file mode 100644 (file)
index 0000000..6898042
--- /dev/null
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class VidioIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
+        'md5': 'cd2801394afc164e9775db6a140b91fe',
+        'info_dict': {
+            'id': '165683',
+            'display_id': 'dj_ambred-booyah-live-2015',
+            'ext': 'mp4',
+            'title': 'DJ_AMBRED - Booyah (Live 2015)',
+            'description': 'md5:27dc15f819b6a78a626490881adbadf8',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 149,
+            'like_count': int,
+        },
+    }, {
+        'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id, display_id = mobj.group('id', 'display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._og_search_title(webpage)
+
+        m3u8_url, duration, thumbnail = [None] * 3
+
+        clips = self._parse_json(
+            self._html_search_regex(
+                r'data-json-clips\s*=\s*(["\'])(?P<data>\[.+?\])\1',
+                webpage, 'video data', default='[]', group='data'),
+            display_id, fatal=False)
+        if clips:
+            clip = clips[0]
+            m3u8_url = clip.get('sources', [{}])[0].get('file')
+            duration = clip.get('clip_duration')
+            thumbnail = clip.get('image')
+
+        m3u8_url = m3u8_url or self._search_regex(
+            r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>.+?)\1', webpage, 'hls url')
+        formats = self._extract_m3u8_formats(m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
+
+        duration = int_or_none(duration or self._search_regex(
+            r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration'))
+        thumbnail = thumbnail or self._og_search_thumbnail(webpage)
+
+        like_count = int_or_none(self._search_regex(
+            (r'<span[^>]+data-comment-vote-count=["\'](\d+)',
+             r'<span[^>]+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'),
+            webpage, 'like count', fatal=False))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': self._og_search_description(webpage),
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'like_count': like_count,
+            'formats': formats,
+        }
index 7c6e98026823e0df415664ad0b4d6995c9e2f673..3c78fb3d5a071a6f49dec7467e620c0b8a01ded9 100644 (file)
@@ -1,11 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
-from ..utils import smuggle_url
+from .jwplatform import JWPlatformBaseIE
+from ..utils import (
+    decode_packed_codes,
+    js_to_json,
+)
 
 
-class VidziIE(InfoExtractor):
+class VidziIE(JWPlatformBaseIE):
     _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)'
     _TEST = {
         'url': 'http://vidzi.tv/cghql9yq6emu.html',
@@ -14,7 +17,6 @@ class VidziIE(InfoExtractor):
             'id': 'cghql9yq6emu',
             'ext': 'mp4',
             'title': 'youtube-dl test video  1\\\\2\'3/4<5\\\\6ä7↭',
-            'uploader': 'vidzi.tv',
         },
         'params': {
             # m3u8 download
@@ -29,11 +31,12 @@ class VidziIE(InfoExtractor):
         title = self._html_search_regex(
             r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
 
-        # Vidzi now uses jwplayer, which can be handled by GenericIE
-        return {
-            '_type': 'url_transparent',
-            'id': video_id,
-            'title': title,
-            'url': smuggle_url(url, {'to_generic': True}),
-            'ie_key': 'Generic',
-        }
+        code = decode_packed_codes(webpage).replace('\\\'', '\'')
+        jwplayer_data = self._parse_json(
+            self._search_regex(r'setup\(([^)]+)\)', code, 'jwplayer data'),
+            video_id, transform_source=js_to_json)
+
+        info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
+        info_dict['title'] = title
+
+        return info_dict
index c76c20614e49b7468234aed68a985f6476fe1d0a..6645c6186dbff315e850f22ae793677803cbbf9b 100644 (file)
@@ -50,6 +50,7 @@ class VierIE(InfoExtractor):
 
         playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
         formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
+        self._sort_formats(formats)
 
         title = self._og_search_title(webpage, default=display_id)
         description = self._og_search_description(webpage, default=None)
similarity index 80%
rename from youtube_dl/extractor/snagfilms.py
rename to youtube_dl/extractor/viewlift.py
index 6977afb27850ff908f1c08fcc4ecc672b7b5cc9f..19500eba84f1bf7b4fdf7c59b8b56ca7e5b91efc 100644 (file)
@@ -13,8 +13,12 @@ from ..utils import (
 )
 
 
-class SnagFilmsEmbedIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})'
+class ViewLiftBaseIE(InfoExtractor):
+    _DOMAINS_REGEX = '(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
+
+
+class ViewLiftEmbedIE(ViewLiftBaseIE):
+    _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' % ViewLiftBaseIE._DOMAINS_REGEX
     _TESTS = [{
         'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
         'md5': '2924e9215c6eff7a55ed35b72276bd93',
@@ -40,7 +44,7 @@ class SnagFilmsEmbedIE(InfoExtractor):
     @staticmethod
     def _extract_url(webpage):
         mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1',
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
             webpage)
         if mobj:
             return mobj.group('url')
@@ -55,6 +59,7 @@ class SnagFilmsEmbedIE(InfoExtractor):
                 'Film %s is not playable in your area.' % video_id, expected=True)
 
         formats = []
+        has_bitrate = False
         for source in self._parse_json(js_to_json(self._search_regex(
                 r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
             file_ = source.get('file')
@@ -63,22 +68,25 @@ class SnagFilmsEmbedIE(InfoExtractor):
             type_ = source.get('type')
             ext = determine_ext(file_)
             format_id = source.get('label') or ext
-            if all(v == 'm3u8' for v in (type_, ext)):
+            if all(v == 'm3u8' or v == 'hls' for v in (type_, ext)):
                 formats.extend(self._extract_m3u8_formats(
                     file_, video_id, 'mp4', m3u8_id='hls'))
             else:
                 bitrate = int_or_none(self._search_regex(
                     [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
                     file_, 'bitrate', default=None))
+                if not has_bitrate and bitrate:
+                    has_bitrate = True
                 height = int_or_none(self._search_regex(
                     r'^(\d+)[pP]$', format_id, 'height', default=None))
                 formats.append({
                     'url': file_,
-                    'format_id': format_id,
+                    'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')),
                     'tbr': bitrate,
                     'height': height,
                 })
-        self._sort_formats(formats)
+        field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
+        self._sort_formats(formats, field_preference)
 
         title = self._search_regex(
             [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
@@ -91,8 +99,8 @@ class SnagFilmsEmbedIE(InfoExtractor):
         }
 
 
-class SnagFilmsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P<id>[^?#]+)'
+class ViewLiftIE(ViewLiftBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
     _TESTS = [{
         'url': 'http://www.snagfilms.com/films/title/lost_for_life',
         'md5': '19844f897b35af219773fd63bdec2942',
@@ -127,10 +135,20 @@ class SnagFilmsIE(InfoExtractor):
         # Film is not available.
         'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
         'only_matching': True,
+    }, {
+        'url': 'http://www.winnersview.com/videos/the-good-son',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.kesari.tv/news/video/1461919076414',
+        'only_matching': True,
+    }, {
+        # Was once Kaltura embed
+        'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
+        domain, display_id = re.match(self._VALID_URL, url).groups()
 
         webpage = self._download_webpage(url, display_id)
 
@@ -170,7 +188,7 @@ class SnagFilmsIE(InfoExtractor):
 
         return {
             '_type': 'url_transparent',
-            'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id,
+            'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
             'id': film_id,
             'display_id': display_id,
             'title': title,
@@ -178,4 +196,5 @@ class SnagFilmsIE(InfoExtractor):
             'thumbnail': thumbnail,
             'duration': duration,
             'categories': categories,
+            'ie_key': 'ViewLiftEmbed',
         }
index fe94a479339035dfd9a70386e903e5c3770fdfea..a93196a0772fd5588dd2f55c327427d00e814eb4 100644 (file)
@@ -1,10 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
-    compat_urllib_parse,
     compat_urllib_parse_unquote,
 )
 from ..utils import (
@@ -14,6 +15,7 @@ from ..utils import (
     parse_iso8601,
     sanitized_Request,
     HEADRequest,
+    url_basename,
 )
 
 
@@ -75,11 +77,11 @@ class ViewsterIE(InfoExtractor):
 
     _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
 
-    def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True):
+    def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}):
         request = sanitized_Request(url)
         request.add_header('Accept', self._ACCEPT_HEADER)
         request.add_header('Auth-token', self._AUTH_TOKEN)
-        return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal)
+        return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -114,43 +116,85 @@ class ViewsterIE(InfoExtractor):
             return self.playlist_result(entries, video_id, title, description)
 
         formats = []
-        for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
-            media = self._download_json(
-                'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
-                % (entry_id, compat_urllib_parse.quote(media_type)),
-                video_id, 'Downloading %s JSON' % media_type, fatal=False)
-            if not media:
-                continue
-            video_url = media.get('Uri')
-            if not video_url:
-                continue
-            ext = determine_ext(video_url)
-            if ext == 'f4m':
-                video_url += '&' if '?' in video_url else '?'
-                video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
-                formats.extend(self._extract_f4m_formats(
-                    video_url, video_id, f4m_id='hds'))
-            elif ext == 'm3u8':
-                m3u8_formats = self._extract_m3u8_formats(
-                    video_url, video_id, 'mp4', m3u8_id='hls',
-                    fatal=False)  # m3u8 sometimes fail
-                if m3u8_formats:
-                    formats.extend(m3u8_formats)
-            else:
-                format_id = media.get('Bitrate')
-                f = {
-                    'url': video_url,
-                    'format_id': 'mp4-%s' % format_id,
-                    'height': int_or_none(media.get('Height')),
-                    'width': int_or_none(media.get('Width')),
-                    'preference': 1,
-                }
-                if format_id and not f['height']:
-                    f['height'] = int_or_none(self._search_regex(
-                        r'^(\d+)[pP]$', format_id, 'height', default=None))
-                formats.append(f)
-
-        if not formats and not info.get('LanguageSets') and not info.get('VODSettings'):
+        for language_set in info.get('LanguageSets', []):
+            manifest_url = None
+            m3u8_formats = []
+            audio = language_set.get('Audio') or ''
+            subtitle = language_set.get('Subtitle') or ''
+            base_format_id = audio
+            if subtitle:
+                base_format_id += '-%s' % subtitle
+
+            def concat(suffix, sep='-'):
+                return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
+
+            for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
+                media = self._download_json(
+                    'https://public-api.viewster.com/movies/%s/video' % entry_id,
+                    video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={
+                        'mediaType': media_type,
+                        'language': audio,
+                        'subtitle': subtitle,
+                    })
+                if not media:
+                    continue
+                video_url = media.get('Uri')
+                if not video_url:
+                    continue
+                ext = determine_ext(video_url)
+                if ext == 'f4m':
+                    manifest_url = video_url
+                    video_url += '&' if '?' in video_url else '?'
+                    video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
+                    formats.extend(self._extract_f4m_formats(
+                        video_url, video_id, f4m_id=concat('hds')))
+                elif ext == 'm3u8':
+                    manifest_url = video_url
+                    m3u8_formats = self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', m3u8_id=concat('hls'),
+                        fatal=False)  # m3u8 sometimes fail
+                    if m3u8_formats:
+                        formats.extend(m3u8_formats)
+                else:
+                    qualities_basename = self._search_regex(
+                        '/([^/]+)\.csmil/',
+                        manifest_url, 'qualities basename', default=None)
+                    if not qualities_basename:
+                        continue
+                    QUALITIES_RE = r'((,\d+k)+,?)'
+                    qualities = self._search_regex(
+                        QUALITIES_RE, qualities_basename,
+                        'qualities', default=None)
+                    if not qualities:
+                        continue
+                    qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
+                    qualities.sort()
+                    http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
+                    http_url_basename = url_basename(video_url)
+                    if m3u8_formats:
+                        self._sort_formats(m3u8_formats)
+                        m3u8_formats = list(filter(
+                            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+                            m3u8_formats))
+                    if len(qualities) == len(m3u8_formats):
+                        for q, m3u8_format in zip(qualities, m3u8_formats):
+                            f = m3u8_format.copy()
+                            f.update({
+                                'url': video_url.replace(http_url_basename, http_template % q),
+                                'format_id': f['format_id'].replace('hls', 'http'),
+                                'protocol': 'http',
+                            })
+                            formats.append(f)
+                    else:
+                        for q in qualities:
+                            formats.append({
+                                'url': video_url.replace(http_url_basename, http_template % q),
+                                'ext': 'mp4',
+                                'format_id': 'http-%d' % q,
+                                'tbr': q,
+                            })
+
+        if not formats and not info.get('VODSettings'):
             self.raise_geo_restricted()
 
         self._sort_formats(formats)
index 315984bf9dbd298304d4211c3793f3aaf5d267e2..a4f914d1449ad1ad4fd38938fe81591a703d6120 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class ViideaIE(InfoExtractor):
-    _VALID_URL = r'''(?x)http://(?:www\.)?(?:
+    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
             videolectures\.net|
             flexilearn\.viidea\.net|
             presentations\.ocwconsortium\.org|
@@ -151,6 +151,7 @@ class ViideaIE(InfoExtractor):
                 smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id)
                 smil = self._download_smil(smil_url, lecture_id)
                 info = self._parse_smil(smil, smil_url, lecture_id)
+                self._sort_formats(info['formats'])
                 info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id)
                 info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id)
                 if multipart:
index 433fc9914a1d59fbc6743e8e82815b429a695cc5..efa15e0b633f56dc24c15eeb4e63889cbb084ab3 100644 (file)
@@ -101,10 +101,13 @@ class VikiBaseIE(InfoExtractor):
             self.report_warning('Unable to get session token, login has probably failed')
 
     @staticmethod
-    def dict_selection(dict_obj, preferred_key):
+    def dict_selection(dict_obj, preferred_key, allow_fallback=True):
         if preferred_key in dict_obj:
             return dict_obj.get(preferred_key)
 
+        if not allow_fallback:
+            return
+
         filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
         return filtered_dict[0] if filtered_dict else None
 
@@ -127,7 +130,7 @@ class VikiIE(VikiBaseIE):
     }, {
         # clip
         'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
-        'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
+        'md5': 'feea2b1d7b3957f70886e6dfd8b8be84',
         'info_dict': {
             'id': '1067139v',
             'ext': 'mp4',
@@ -156,17 +159,18 @@ class VikiIE(VikiBaseIE):
         'params': {
             # m3u8 download
             'skip_download': True,
-        }
+        },
+        'skip': 'Blocked in the US',
     }, {
         # episode
         'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
-        'md5': '190f3ef426005ba3a080a63325955bc3',
+        'md5': '1f54697dabc8f13f31bf06bb2e4de6db',
         'info_dict': {
             'id': '44699v',
             'ext': 'mp4',
             'title': 'Boys Over Flowers - Episode 1',
-            'description': 'md5:52617e4f729c7d03bfd4bcbbb6e946f2',
-            'duration': 4155,
+            'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
+            'duration': 4204,
             'timestamp': 1270496524,
             'upload_date': '20100405',
             'uploader': 'group8',
@@ -176,13 +180,13 @@ class VikiIE(VikiBaseIE):
     }, {
         # youtube external
         'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
-        'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
+        'md5': '63f8600c1da6f01b7640eee7eca4f1da',
         'info_dict': {
             'id': '50562v',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'Poor Nastya [COMPLETE] - Episode 1',
             'description': '',
-            'duration': 607,
+            'duration': 606,
             'timestamp': 1274949505,
             'upload_date': '20101213',
             'uploader': 'ad14065n',
@@ -196,7 +200,7 @@ class VikiIE(VikiBaseIE):
     }, {
         # non-English description
         'url': 'http://www.viki.com/videos/158036v-love-in-magic',
-        'md5': '1713ae35df5a521b31f6dc40730e7c9c',
+        'md5': '013dc282714e22acf9447cad14ff1208',
         'info_dict': {
             'id': '158036v',
             'ext': 'mp4',
@@ -217,7 +221,7 @@ class VikiIE(VikiBaseIE):
 
         self._check_errors(video)
 
-        title = self.dict_selection(video.get('titles', {}), 'en')
+        title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
         if not title:
             title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
             container_titles = video.get('container', {}).get('titles', {})
@@ -302,7 +306,7 @@ class VikiChannelIE(VikiBaseIE):
             'title': 'Boys Over Flowers',
             'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
         },
-        'playlist_count': 70,
+        'playlist_mincount': 71,
     }, {
         'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
         'info_dict': {
index 3049dffb6c98ed95251262bd3013add99f4b41db..d9c9852d463d2ee533e3447f536a1a665e06ddd8 100644 (file)
@@ -8,14 +8,15 @@ import itertools
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
+    compat_str,
     compat_urlparse,
 )
 from ..utils import (
     determine_ext,
-    encode_dict,
     ExtractorError,
     InAdvancePagedList,
     int_or_none,
+    NO_DEFAULT,
     RegexNotFoundError,
     sanitized_Request,
     smuggle_url,
@@ -25,6 +26,7 @@ from ..utils import (
     urlencode_postdata,
     unescapeHTML,
     parse_filesize,
+    try_get,
 )
 
 
@@ -42,19 +44,39 @@ class VimeoBaseInfoExtractor(InfoExtractor):
         self.report_login()
         webpage = self._download_webpage(self._LOGIN_URL, None, False)
         token, vuid = self._extract_xsrft_and_vuid(webpage)
-        data = urlencode_postdata(encode_dict({
+        data = urlencode_postdata({
             'action': 'login',
             'email': username,
             'password': password,
             'service': 'vimeo',
             'token': token,
-        }))
+        })
         login_request = sanitized_Request(self._LOGIN_URL, data)
         login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         login_request.add_header('Referer', self._LOGIN_URL)
         self._set_vimeo_cookie('vuid', vuid)
         self._download_webpage(login_request, None, False, 'Wrong login info')
 
+    def _verify_video_password(self, url, video_id, webpage):
+        password = self._downloader.params.get('videopassword')
+        if password is None:
+            raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
+        token, vuid = self._extract_xsrft_and_vuid(webpage)
+        data = urlencode_postdata({
+            'password': password,
+            'token': token,
+        })
+        if url.startswith('http://'):
+            # vimeo only supports https now, but the user can give an http url
+            url = url.replace('http://', 'https://')
+        password_request = sanitized_Request(url + '/password', data)
+        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        password_request.add_header('Referer', url)
+        self._set_vimeo_cookie('vuid', vuid)
+        return self._download_webpage(
+            password_request, video_id,
+            'Verifying the password', 'Wrong password')
+
     def _extract_xsrft_and_vuid(self, webpage):
         xsrft = self._search_regex(
             r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
@@ -67,21 +89,96 @@ class VimeoBaseInfoExtractor(InfoExtractor):
     def _set_vimeo_cookie(self, name, value):
         self._set_cookie('vimeo.com', name, value)
 
+    def _vimeo_sort_formats(self, formats):
+        # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
+        # at the same time without actual units specified. This lead to wrong sorting.
+        self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id'))
+
+    def _parse_config(self, config, video_id):
+        # Extract title
+        video_title = config['video']['title']
+
+        # Extract uploader, uploader_url and uploader_id
+        video_uploader = config['video'].get('owner', {}).get('name')
+        video_uploader_url = config['video'].get('owner', {}).get('url')
+        video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None
+
+        # Extract video thumbnail
+        video_thumbnail = config['video'].get('thumbnail')
+        if video_thumbnail is None:
+            video_thumbs = config['video'].get('thumbs')
+            if video_thumbs and isinstance(video_thumbs, dict):
+                _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
+
+        # Extract video duration
+        video_duration = int_or_none(config['video'].get('duration'))
+
+        formats = []
+        config_files = config['video'].get('files') or config['request'].get('files', {})
+        for f in config_files.get('progressive', []):
+            video_url = f.get('url')
+            if not video_url:
+                continue
+            formats.append({
+                'url': video_url,
+                'format_id': 'http-%s' % f.get('quality'),
+                'width': int_or_none(f.get('width')),
+                'height': int_or_none(f.get('height')),
+                'fps': int_or_none(f.get('fps')),
+                'tbr': int_or_none(f.get('bitrate')),
+            })
+        m3u8_url = config_files.get('hls', {}).get('url')
+        if m3u8_url:
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+
+        subtitles = {}
+        text_tracks = config['request'].get('text_tracks')
+        if text_tracks:
+            for tt in text_tracks:
+                subtitles[tt['lang']] = [{
+                    'ext': 'vtt',
+                    'url': 'https://vimeo.com' + tt['url'],
+                }]
+
+        return {
+            'title': video_title,
+            'uploader': video_uploader,
+            'uploader_id': video_uploader_id,
+            'uploader_url': video_uploader_url,
+            'thumbnail': video_thumbnail,
+            'duration': video_duration,
+            'formats': formats,
+            'subtitles': subtitles,
+        }
+
 
 class VimeoIE(VimeoBaseInfoExtractor):
     """Information extractor for vimeo.com."""
 
     # _VALID_URL matches Vimeo URLs
     _VALID_URL = r'''(?x)
-        https?://
-        (?:(?:www|(?P<player>player))\.)?
-        vimeo(?P<pro>pro)?\.com/
-        (?!channels/[^/?#]+/?(?:$|[?#])|album/)
-        (?:.*?/)?
-        (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
-        (?:videos?/)?
-        (?P<id>[0-9]+)
-        /?(?:[?&].*)?(?:[#].*)?$'''
+                    https?://
+                        (?:
+                            (?:
+                                www|
+                                (?P<player>player)
+                            )
+                            \.
+                        )?
+                        vimeo(?P<pro>pro)?\.com/
+                        (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
+                        (?:.*?/)?
+                        (?:
+                            (?:
+                                play_redirect_hls|
+                                moogaloop\.swf)\?clip_id=
+                            )?
+                        (?:videos?/)?
+                        (?P<id>[0-9]+)
+                        (?:/[\da-f]+)?
+                        /?(?:[?&].*)?(?:[#].*)?$
+                    '''
     IE_NAME = 'vimeo'
     _TESTS = [
         {
@@ -93,6 +190,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
                 'description': 'md5:2d3305bad981a06ff79f027f19865021',
                 'upload_date': '20121220',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user7108434',
                 'uploader_id': 'user7108434',
                 'uploader': 'Filippo Valsorda',
                 'duration': 10,
@@ -105,6 +203,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'info_dict': {
                 'id': '68093876',
                 'ext': 'mp4',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
                 'uploader_id': 'openstreetmapus',
                 'uploader': 'OpenStreetMap US',
                 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
@@ -121,6 +220,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
                 'uploader': 'The BLN & Business of Software',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware',
                 'uploader_id': 'theblnbusinessofsoftware',
                 'duration': 3610,
                 'description': None,
@@ -135,10 +235,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'youtube-dl password protected test video',
                 'upload_date': '20130614',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user18948128',
                 'uploader_id': 'user18948128',
                 'uploader': 'Jaime Marquínez Ferrándiz',
                 'duration': 10,
-                'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people\u2026',
+                'description': 'This is "youtube-dl password protected test video" by  on Vimeo, the home for high quality videos and the people who love them.',
             },
             'params': {
                 'videopassword': 'youtube-dl',
@@ -147,13 +248,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
         {
             'url': 'http://vimeo.com/channels/keypeele/75629013',
             'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
-            'note': 'Video is freely available via original URL '
-                    'and protected with password when accessed via http://vimeo.com/75629013',
             'info_dict': {
                 'id': '75629013',
                 'ext': 'mp4',
                 'title': 'Key & Peele: Terrorist Interrogation',
                 'description': 'md5:8678b246399b070816b12313e8b4eb5c',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/atencio',
                 'uploader_id': 'atencio',
                 'uploader': 'Peter Atencio',
                 'upload_date': '20130927',
@@ -169,6 +269,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'title': 'The New Vimeo Player (You Know, For Videos)',
                 'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
                 'upload_date': '20131015',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/staff',
                 'uploader_id': 'staff',
                 'uploader': 'Vimeo Staff',
                 'duration': 62,
@@ -183,23 +284,48 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'Pier Solar OUYA Official Trailer',
                 'uploader': 'Tulio Gonçalves',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user28849593',
                 'uploader_id': 'user28849593',
             },
         },
         {
             # contains original format
             'url': 'https://vimeo.com/33951933',
-            'md5': '53c688fa95a55bf4b7293d37a89c5c53',
+            'md5': '2d9f5475e0537f013d0073e812ab89e6',
             'info_dict': {
                 'id': '33951933',
                 'ext': 'mp4',
                 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
                 'uploader': 'The DMCI',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/dmci',
                 'uploader_id': 'dmci',
                 'upload_date': '20111220',
                 'description': 'md5:ae23671e82d05415868f7ad1aec21147',
             },
         },
+        {
+            # only available via https://vimeo.com/channels/tributes/6213729 and
+            # not via https://vimeo.com/6213729
+            'url': 'https://vimeo.com/channels/tributes/6213729',
+            'info_dict': {
+                'id': '6213729',
+                'ext': 'mp4',
+                'title': 'Vimeo Tribute: The Shining',
+                'uploader': 'Casey Donahue',
+                'uploader_url': 're:https?://(?:www\.)?vimeo\.com/caseydonahue',
+                'uploader_id': 'caseydonahue',
+                'upload_date': '20090821',
+                'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'expected_warnings': ['Unable to download JSON metadata'],
+        },
+        {
+            'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
+            'only_matching': True,
+        },
         {
             'url': 'https://vimeo.com/109815029',
             'note': 'Video not completely processed, "failed" seed status',
@@ -209,11 +335,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
             'only_matching': True,
         },
+        {
+            'url': 'https://vimeo.com/album/2632481/video/79010983',
+            'only_matching': True,
+        },
         {
             # source file returns 403: Forbidden
             'url': 'https://vimeo.com/7809605',
             'only_matching': True,
         },
+        {
+            'url': 'https://vimeo.com/160743502/abd0e13fb4',
+            'only_matching': True,
+        }
     ]
 
     @staticmethod
@@ -231,47 +365,26 @@ class VimeoIE(VimeoBaseInfoExtractor):
         if mobj:
             return mobj.group(1)
 
-    def _verify_video_password(self, url, video_id, webpage):
-        password = self._downloader.params.get('videopassword')
-        if password is None:
-            raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
-        token, vuid = self._extract_xsrft_and_vuid(webpage)
-        data = urlencode_postdata(encode_dict({
-            'password': password,
-            'token': token,
-        }))
-        if url.startswith('http://'):
-            # vimeo only supports https now, but the user can give an http url
-            url = url.replace('http://', 'https://')
-        password_request = sanitized_Request(url + '/password', data)
-        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        password_request.add_header('Referer', url)
-        self._set_vimeo_cookie('vuid', vuid)
-        return self._download_webpage(
-            password_request, video_id,
-            'Verifying the password', 'Wrong password')
-
     def _verify_player_video_password(self, url, video_id):
         password = self._downloader.params.get('videopassword')
         if password is None:
             raise ExtractorError('This video is protected by a password, use the --video-password option')
-        data = urlencode_postdata(encode_dict({'password': password}))
+        data = urlencode_postdata({'password': password})
         pass_url = url + '/check-password'
         password_request = sanitized_Request(pass_url, data)
         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        password_request.add_header('Referer', url)
         return self._download_json(
             password_request, video_id,
-            'Verifying the password',
-            'Wrong password')
+            'Verifying the password', 'Wrong password')
 
     def _real_initialize(self):
         self._login()
 
     def _real_extract(self, url):
         url, data = unsmuggle_url(url, {})
-        headers = std_headers
+        headers = std_headers.copy()
         if 'http_headers' in data:
-            headers = headers.copy()
             headers.update(data['http_headers'])
         if 'Referer' not in headers:
             headers['Referer'] = url
@@ -282,11 +395,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
         orig_url = url
         if mobj.group('pro') or mobj.group('player'):
             url = 'https://player.vimeo.com/video/' + video_id
-        else:
+        elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
             url = 'https://vimeo.com/' + video_id
 
         # Retrieve video webpage to extract further information
-        request = sanitized_Request(url, None, headers)
+        request = sanitized_Request(url, headers=headers)
         try:
             webpage = self._download_webpage(request, video_id)
         except ExtractorError as ee:
@@ -360,27 +473,24 @@ class VimeoIE(VimeoBaseInfoExtractor):
             if config.get('view') == 4:
                 config = self._verify_player_video_password(url, video_id)
 
-        if '>You rented this title.<' in webpage:
+        def is_rented():
+            if '>You rented this title.<' in webpage:
+                return True
+            if config.get('user', {}).get('purchased'):
+                return True
+            label = try_get(
+                config, lambda x: x['video']['vod']['purchase_options'][0]['label_string'], compat_str)
+            if label and label.startswith('You rented this'):
+                return True
+            return False
+
+        if is_rented():
             feature_id = config.get('video', {}).get('vod', {}).get('feature_id')
             if feature_id and not data.get('force_feature_id', False):
                 return self.url_result(smuggle_url(
                     'https://player.vimeo.com/player/%s' % feature_id,
                     {'force_feature_id': True}), 'Vimeo')
 
-        # Extract title
-        video_title = config['video']['title']
-
-        # Extract uploader and uploader_id
-        video_uploader = config['video']['owner']['name']
-        video_uploader_id = config['video']['owner']['url'].split('/')[-1] if config['video']['owner']['url'] else None
-
-        # Extract video thumbnail
-        video_thumbnail = config['video'].get('thumbnail')
-        if video_thumbnail is None:
-            video_thumbs = config['video'].get('thumbs')
-            if video_thumbs and isinstance(video_thumbs, dict):
-                _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
-
         # Extract video description
 
         video_description = self._html_search_regex(
@@ -400,9 +510,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
         if not video_description and not mobj.group('player'):
             self._downloader.report_warning('Cannot find video description')
 
-        # Extract video duration
-        video_duration = int_or_none(config['video'].get('duration'))
-
         # Extract upload date
         video_upload_date = None
         mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
@@ -440,52 +547,54 @@ class VimeoIE(VimeoBaseInfoExtractor):
                             'format_id': source_name,
                             'preference': 1,
                         })
-        config_files = config['video'].get('files') or config['request'].get('files', {})
-        for f in config_files.get('progressive', []):
-            video_url = f.get('url')
-            if not video_url:
-                continue
-            formats.append({
-                'url': video_url,
-                'format_id': 'http-%s' % f.get('quality'),
-                'width': int_or_none(f.get('width')),
-                'height': int_or_none(f.get('height')),
-                'fps': int_or_none(f.get('fps')),
-                'tbr': int_or_none(f.get('bitrate')),
-            })
-        m3u8_url = config_files.get('hls', {}).get('url')
-        if m3u8_url:
-            formats.extend(self._extract_m3u8_formats(
-                m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
-        # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
-        # at the same time without actual units specified. This lead to wrong sorting.
-        self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id'))
-
-        subtitles = {}
-        text_tracks = config['request'].get('text_tracks')
-        if text_tracks:
-            for tt in text_tracks:
-                subtitles[tt['lang']] = [{
-                    'ext': 'vtt',
-                    'url': 'https://vimeo.com' + tt['url'],
-                }]
 
-        return {
+        info_dict = self._parse_config(config, video_id)
+        formats.extend(info_dict['formats'])
+        self._vimeo_sort_formats(formats)
+        info_dict.update({
             'id': video_id,
-            'uploader': video_uploader,
-            'uploader_id': video_uploader_id,
+            'formats': formats,
             'upload_date': video_upload_date,
-            'title': video_title,
-            'thumbnail': video_thumbnail,
             'description': video_description,
-            'duration': video_duration,
-            'formats': formats,
             'webpage_url': url,
             'view_count': view_count,
             'like_count': like_count,
             'comment_count': comment_count,
-            'subtitles': subtitles,
-        }
+        })
+
+        return info_dict
+
+
+class VimeoOndemandIE(VimeoBaseInfoExtractor):
+    IE_NAME = 'vimeo:ondemand'
+    _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        # ondemand video not available via https://vimeo.com/id
+        'url': 'https://vimeo.com/ondemand/20704',
+        'md5': 'c424deda8c7f73c1dfb3edd7630e2f35',
+        'info_dict': {
+            'id': '105442900',
+            'ext': 'mp4',
+            'title': 'המעבדה - במאי יותם פלדמן',
+            'uploader': 'גם סרטים',
+            'uploader_url': 're:https?://(?:www\.)?vimeo\.com/gumfilms',
+            'uploader_id': 'gumfilms',
+        },
+    }, {
+        'url': 'https://vimeo.com/ondemand/nazmaalik',
+        'only_matching': True,
+    }, {
+        'url': 'https://vimeo.com/ondemand/141692381',
+        'only_matching': True,
+    }, {
+        'url': 'https://vimeo.com/ondemand/thelastcolony/150274832',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        return self.url_result(self._og_search_video_url(webpage), VimeoIE.ie_key())
 
 
 class VimeoChannelIE(VimeoBaseInfoExtractor):
@@ -523,7 +632,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
         token, vuid = self._extract_xsrft_and_vuid(webpage)
         fields['token'] = token
         fields['password'] = password
-        post = urlencode_postdata(encode_dict(fields))
+        post = urlencode_postdata(fields)
         password_path = self._search_regex(
             r'action="([^"]+)"', login_form, 'password URL')
         password_url = compat_urlparse.urljoin(page_url, password_path)
@@ -547,8 +656,21 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
                 webpage = self._login_list_password(page_url, list_id, webpage)
                 yield self._extract_list_title(webpage)
 
-            for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
-                yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
+            # Try extracting href first since not all videos are available via
+            # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
+            clips = re.findall(
+                r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage)
+            if clips:
+                for video_id, video_url in clips:
+                    yield self.url_result(
+                        compat_urlparse.urljoin(base_url, video_url),
+                        VimeoIE.ie_key(), video_id=video_id)
+            # More relaxed fallback
+            else:
+                for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
+                    yield self.url_result(
+                        'https://vimeo.com/%s' % video_id,
+                        VimeoIE.ie_key(), video_id=video_id)
 
             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                 break
@@ -585,7 +707,7 @@ class VimeoUserIE(VimeoChannelIE):
 
 class VimeoAlbumIE(VimeoChannelIE):
     IE_NAME = 'vimeo:album'
-    _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
+    _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))'
     _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
     _TESTS = [{
         'url': 'https://vimeo.com/album/2632481',
@@ -605,6 +727,13 @@ class VimeoAlbumIE(VimeoChannelIE):
         'params': {
             'videopassword': 'youtube-dl',
         }
+    }, {
+        'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail',
+        'only_matching': True,
+    }, {
+        # TODO: respect page number
+        'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail',
+        'only_matching': True,
     }]
 
     def _page_url(self, base_url, pagenum):
@@ -636,7 +765,7 @@ class VimeoGroupsIE(VimeoAlbumIE):
         return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name)
 
 
-class VimeoReviewIE(InfoExtractor):
+class VimeoReviewIE(VimeoBaseInfoExtractor):
     IE_NAME = 'vimeo:review'
     IE_DESC = 'Review pages on vimeo'
     _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
@@ -648,6 +777,7 @@ class VimeoReviewIE(InfoExtractor):
             'ext': 'mp4',
             'title': "DICK HARDWICK 'Comedian'",
             'uploader': 'Richard Hardwick',
+            'uploader_id': 'user21297594',
         }
     }, {
         'note': 'video player needs Referer',
@@ -660,14 +790,45 @@ class VimeoReviewIE(InfoExtractor):
             'uploader': 'DevWeek Events',
             'duration': 2773,
             'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader_id': 'user22258446',
         }
+    }, {
+        'note': 'Password protected',
+        'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
+        'info_dict': {
+            'id': '138823582',
+            'ext': 'mp4',
+            'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
+            'uploader': 'TMB',
+            'uploader_id': 'user37284429',
+        },
+        'params': {
+            'videopassword': 'holygrail',
+        },
     }]
 
+    def _real_initialize(self):
+        self._login()
+
+    def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
+        webpage = self._download_webpage(webpage_url, video_id)
+        config_url = self._html_search_regex(
+            r'data-config-url="([^"]+)"', webpage, 'config URL',
+            default=NO_DEFAULT if video_password_verified else None)
+        if config_url is None:
+            self._verify_video_password(webpage_url, video_id, webpage)
+            config_url = self._get_config_url(
+                webpage_url, video_id, video_password_verified=True)
+        return config_url
+
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        player_url = 'https://player.vimeo.com/player/' + video_id
-        return self.url_result(player_url, 'Vimeo', video_id)
+        video_id = self._match_id(url)
+        config_url = self._get_config_url(url, video_id)
+        config = self._download_json(config_url, video_id)
+        info_dict = self._parse_config(config, video_id)
+        self._vimeo_sort_formats(info_dict['formats'])
+        info_dict['id'] = video_id
+        return info_dict
 
 
 class VimeoWatchLaterIE(VimeoChannelIE):
index a6a6cc47955f6aae482d8022bf52d4d233fb955f..5b801849cae1c26cd40049e625c65715c7e58010 100644 (file)
@@ -24,6 +24,7 @@ class VineIE(InfoExtractor):
             'upload_date': '20130519',
             'uploader': 'Jack Dorsey',
             'uploader_id': '76',
+            'view_count': int,
             'like_count': int,
             'comment_count': int,
             'repost_count': int,
@@ -39,6 +40,7 @@ class VineIE(InfoExtractor):
             'upload_date': '20140815',
             'uploader': 'Mars Ruiz',
             'uploader_id': '1102363502380728320',
+            'view_count': int,
             'like_count': int,
             'comment_count': int,
             'repost_count': int,
@@ -54,6 +56,7 @@ class VineIE(InfoExtractor):
             'upload_date': '20130430',
             'uploader': 'Z3k3',
             'uploader_id': '936470460173008896',
+            'view_count': int,
             'like_count': int,
             'comment_count': int,
             'repost_count': int,
@@ -71,6 +74,7 @@ class VineIE(InfoExtractor):
             'upload_date': '20150705',
             'uploader': 'Pimry_zaa',
             'uploader_id': '1135760698325307392',
+            'view_count': int,
             'like_count': int,
             'comment_count': int,
             'repost_count': int,
@@ -109,6 +113,7 @@ class VineIE(InfoExtractor):
             'upload_date': unified_strdate(data.get('created')),
             'uploader': username,
             'uploader_id': data.get('userIdStr'),
+            'view_count': int_or_none(data.get('loops', {}).get('count')),
             'like_count': int_or_none(data.get('likes', {}).get('count')),
             'comment_count': int_or_none(data.get('comments', {}).get('count')),
             'repost_count': int_or_none(data.get('reposts', {}).get('count')),
index 0805e3c083937d7f58acbe4b872d41c19f8d1c02..cfc5ffd8b02082d1f6d3035ba7b02dc230b8b2ed 100644 (file)
@@ -3,19 +3,18 @@ from __future__ import unicode_literals
 
 import re
 import json
+import sys
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_urllib_parse,
-)
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
+    int_or_none,
     orderedSet,
-    sanitized_Request,
     str_to_int,
     unescapeHTML,
     unified_strdate,
+    urlencode_postdata,
 )
 from .vimeo import VimeoIE
 from .pladform import PladformIE
@@ -27,12 +26,16 @@ class VKIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
-                            (?:m\.)?vk\.com/video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|
+                            (?:
+                                (?:m\.)?vk\.com/video_|
+                                (?:www\.)?daxab.com/
+                            )
+                            ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
                             (?:
                                 (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video|
-                                (?:www\.)?biqle\.ru/watch/
+                                (?:www\.)?daxab.com/embed/
                             )
-                            (?P<videoid>[^s].*?)(?:\?(?:.*\blist=(?P<list_id>[\da-f]+))?|%2F|$)
+                            (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
                         )
                     '''
     _NETRC_MACHINE = 'vk'
@@ -76,7 +79,8 @@ class VKIE(InfoExtractor):
                 'duration': 101,
                 'upload_date': '20120730',
                 'view_count': int,
-            }
+            },
+            'skip': 'This video has been removed from public access.',
         },
         {
             # VIDEO NOW REMOVED
@@ -141,16 +145,29 @@ class VKIE(InfoExtractor):
             'url': 'https://vk.com/video276849682_170681728',
             'info_dict': {
                 'id': 'V3K4mi0SYkc',
-                'ext': 'mp4',
+                'ext': 'webm',
                 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
-                'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
-                'duration': 179,
+                'description': 'md5:d9903938abdc74c738af77f527ca0596',
+                'duration': 178,
                 'upload_date': '20130116',
                 'uploader': "Children's Joy Foundation",
                 'uploader_id': 'thecjf',
                 'view_count': int,
             },
         },
+        {
+            # video key is extra_data not url\d+
+            'url': 'http://vk.com/video-110305615_171782105',
+            'md5': 'e13fcda136f99764872e739d13fac1d1',
+            'info_dict': {
+                'id': '171782105',
+                'ext': 'mp4',
+                'title': 'S-Dance, репетиции к The way show',
+                'uploader': 'THE WAY SHOW | 17 апреля',
+                'upload_date': '20160207',
+                'view_count': int,
+            },
+        },
         {
             # removed video, just testing that we match the pattern
             'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
@@ -161,11 +178,6 @@ class VKIE(InfoExtractor):
             'url': 'https://vk.com/video205387401_164765225',
             'only_matching': True,
         },
-        {
-            # vk wrapper
-            'url': 'http://www.biqle.ru/watch/847655_160197695',
-            'only_matching': True,
-        },
         {
             # pladform embed
             'url': 'https://vk.com/video-76116461_171554880',
@@ -178,7 +190,7 @@ class VKIE(InfoExtractor):
         if username is None:
             return
 
-        login_page = self._download_webpage(
+        login_page, url_handle = self._download_webpage_handle(
             'https://vk.com', None, 'Downloading login page')
 
         login_form = self._hidden_inputs(login_page)
@@ -188,11 +200,26 @@ class VKIE(InfoExtractor):
             'pass': password.encode('cp1251'),
         })
 
-        request = sanitized_Request(
-            'https://login.vk.com/?act=login',
-            compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
+        # and expects the first one to be set rather than second (see
+        # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201).
+        # As of RFC6265 the newer one cookie should be set into cookie store
+        # what actually happens.
+        # We will workaround this VK issue by resetting the remixlhk cookie to
+        # the first one manually.
+        cookies = url_handle.headers.get('Set-Cookie')
+        if sys.version_info[0] >= 3:
+            cookies = cookies.encode('iso-8859-1')
+        cookies = cookies.decode('utf-8')
+        remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
+        if remixlhk:
+            value, domain = remixlhk.groups()
+            self._set_cookie(domain, 'remixlhk', value)
+
         login_page = self._download_webpage(
-            request, None, note='Logging in as %s' % username)
+            'https://login.vk.com/?act=login', None,
+            note='Logging in as %s' % username,
+            data=urlencode_postdata(login_form))
 
         if re.search(r'onLoginFailed', login_page):
             raise ExtractorError(
@@ -205,20 +232,21 @@ class VKIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('videoid')
 
-        if not video_id:
+        if video_id:
+            info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
+            # Some videos (removed?) can only be downloaded with list id specified
+            list_id = mobj.group('list_id')
+            if list_id:
+                info_url += '&list=%s' % list_id
+        else:
+            info_url = 'http://vk.com/video_ext.php?' + mobj.group('embed_query')
             video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
 
-        info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
-
-        # Some videos (removed?) can only be downloaded with list id specified
-        list_id = mobj.group('list_id')
-        if list_id:
-            info_url += '&list=%s' % list_id
-
         info_page = self._download_webpage(info_url, video_id)
 
         error_message = self._html_search_regex(
-            r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
+            [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
+                r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
             info_page, 'error message', default=None)
         if error_message:
             raise ExtractorError(error_message, expected=True)
@@ -293,17 +321,22 @@ class VKIE(InfoExtractor):
         view_count = None
         views = self._html_search_regex(
             r'"mv_views_count_number"[^>]*>(.+?\bviews?)<',
-            info_page, 'view count', fatal=False)
+            info_page, 'view count', default=None)
         if views:
             view_count = str_to_int(self._search_regex(
                 r'([\d,.]+)', views, 'view count', fatal=False))
 
-        formats = [{
-            'format_id': k,
-            'url': v,
-            'width': int(k[len('url'):]),
-        } for k, v in data.items()
-            if k.startswith('url')]
+        formats = []
+        for k, v in data.items():
+            if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v:
+                continue
+            height = int_or_none(self._search_regex(
+                r'^(?:url|cache)(\d+)', k, 'height', default=None))
+            formats.append({
+                'format_id': k,
+                'url': v,
+                'height': height,
+            })
         self._sort_formats(formats)
 
         return {
index 9e2aa58bd94270aee79800ec2d4d4ac18bad28e8..8d671cca767d4592a5428f7d3ad855e952df5353 100644 (file)
@@ -1,13 +1,17 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     dict_get,
+    ExtractorError,
     float_or_none,
     int_or_none,
+    remove_start,
 )
-from ..compat import compat_urllib_parse
+from ..compat import compat_urllib_parse_urlencode
 
 
 class VLiveIE(InfoExtractor):
@@ -19,7 +23,7 @@ class VLiveIE(InfoExtractor):
         'info_dict': {
             'id': '1326',
             'ext': 'mp4',
-            'title': "[V] Girl's Day's Broadcast",
+            'title': "[V LIVE] Girl's Day's Broadcast",
             'creator': "Girl's Day",
             'view_count': int,
         },
@@ -31,19 +35,65 @@ class VLiveIE(InfoExtractor):
         webpage = self._download_webpage(
             'http://www.vlive.tv/video/%s' % video_id, video_id)
 
-        long_video_id = self._search_regex(
-            r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"([^"]+)"',
-            webpage, 'long video id')
+        video_params = self._search_regex(
+            r'\bvlive\.video\.init\(([^)]+)\)',
+            webpage, 'video params')
+        status, _, _, live_params, long_video_id, key = re.split(
+            r'"\s*,\s*"', video_params)[2:8]
+        status = remove_start(status, 'PRODUCT_')
+
+        if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
+            live_params = self._parse_json('"%s"' % live_params, video_id)
+            live_params = self._parse_json(live_params, video_id)
+            return self._live(video_id, webpage, live_params)
+        elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
+            if long_video_id and key:
+                return self._replay(video_id, webpage, long_video_id, key)
+            else:
+                status = 'COMING_SOON'
 
-        key = self._search_regex(
-            r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"[^"]+"\s*,\s*"([^"]+)"',
-            webpage, 'key')
+        if status == 'LIVE_END':
+            raise ExtractorError('Uploading for replay. Please wait...',
+                                 expected=True)
+        elif status == 'COMING_SOON':
+            raise ExtractorError('Coming soon!', expected=True)
+        elif status == 'CANCELED':
+            raise ExtractorError('We are sorry, '
+                                 'but the live broadcast has been canceled.',
+                                 expected=True)
+        else:
+            raise ExtractorError('Unknown status %s' % status)
 
+    def _get_common_fields(self, webpage):
         title = self._og_search_title(webpage)
+        creator = self._html_search_regex(
+            r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',
+            webpage, 'creator', fatal=False)
+        thumbnail = self._og_search_thumbnail(webpage)
+        return {
+            'title': title,
+            'creator': creator,
+            'thumbnail': thumbnail,
+        }
+
+    def _live(self, video_id, webpage, live_params):
+        formats = []
+        for vid in live_params.get('resolutions', []):
+            formats.extend(self._extract_m3u8_formats(
+                vid['cdnUrl'], video_id, 'mp4',
+                m3u8_id=vid.get('name'),
+                fatal=False, live=True))
+        self._sort_formats(formats)
+
+        return dict(self._get_common_fields(webpage),
+                    id=video_id,
+                    formats=formats,
+                    is_live=True)
 
+    def _replay(self, video_id, webpage, long_video_id, key):
         playinfo = self._download_json(
             'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
-            % compat_urllib_parse.urlencode({
+            % compat_urllib_parse_urlencode({
                 'videoId': long_video_id,
                 'key': key,
                 'ptc': 'http',
@@ -62,11 +112,6 @@ class VLiveIE(InfoExtractor):
         } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
         self._sort_formats(formats)
 
-        thumbnail = self._og_search_thumbnail(webpage)
-        creator = self._html_search_regex(
-            r'<div[^>]+class="info_area"[^>]*>\s*<strong[^>]+class="name"[^>]*>([^<]+)</strong>',
-            webpage, 'creator', fatal=False)
-
         view_count = int_or_none(playinfo.get('meta', {}).get('count'))
 
         subtitles = {}
@@ -77,12 +122,8 @@ class VLiveIE(InfoExtractor):
                     'ext': 'vtt',
                     'url': caption['source']}]
 
-        return {
-            'id': video_id,
-            'title': title,
-            'creator': creator,
-            'thumbnail': thumbnail,
-            'view_count': view_count,
-            'formats': formats,
-            'subtitles': subtitles,
-        }
+        return dict(self._get_common_fields(webpage),
+                    id=video_id,
+                    formats=formats,
+                    view_count=view_count,
+                    subtitles=subtitles)
index a97995a6dfd92383c2d25ea7250142404176ecad..a938a4007ead91a25ca84b43307ce16e1787e2e6 100644 (file)
@@ -2,11 +2,11 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
     ExtractorError,
     NO_DEFAULT,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -38,7 +38,7 @@ class VodlockerIE(InfoExtractor):
 
         if fields['op'] == 'download1':
             self._sleep(3, video_id)  # they do detect when requests happen too fast!
-            post = compat_urllib_parse.urlencode(fields)
+            post = urlencode_postdata(fields)
             req = sanitized_Request(url, post)
             req.add_header('Content-type', 'application/x-www-form-urlencoded')
             webpage = self._download_webpage(
index 93d15a556dedb6e0589dc5f393e0a01ad5b4a8a0..4f1a99a8989d736c1de572e6372b022544102f87 100644 (file)
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_str,
+    compat_urlparse,
+)
 from ..utils import (
     ExtractorError,
     determine_ext,
@@ -16,13 +19,13 @@ class VoiceRepublicIE(InfoExtractor):
     _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
     _TESTS = [{
         'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
-        'md5': '0554a24d1657915aa8e8f84e15dc9353',
+        'md5': 'b9174d651323f17783000876347116e3',
         'info_dict': {
             'id': '2296',
             'display_id': 'watching-the-watchers-building-a-sousveillance-state',
             'ext': 'm4a',
             'title': 'Watching the Watchers: Building a Sousveillance State',
-            'description': 'md5:715ba964958afa2398df615809cfecb1',
+            'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.',
             'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
             'duration': 1800,
             'view_count': int,
@@ -52,7 +55,7 @@ class VoiceRepublicIE(InfoExtractor):
         if data:
             title = data['title']
             description = data.get('teaser')
-            talk_id = data.get('talk_id') or display_id
+            talk_id = compat_str(data.get('talk_id') or display_id)
             talk = data['talk']
             duration = int_or_none(talk.get('duration'))
             formats = [{
diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py
new file mode 100644 (file)
index 0000000..b1b32ad
--- /dev/null
@@ -0,0 +1,136 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class VoxMediaIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P<id>[^/?]+)'
+    _TESTS = [{
+        'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
+        'md5': '73856edf3e89a711e70d5cf7cb280b37',
+        'info_dict': {
+            'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe',
+            'ext': 'mp4',
+            'title': 'Google\'s new material design direction',
+            'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
+        },
+        'add_ie': ['Ooyala'],
+    }, {
+        # data-ooyala-id
+        'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
+        'md5': 'd744484ff127884cd2ba09e3fa604e4b',
+        'info_dict': {
+            'id': 'RkZXU4cTphOCPDMZg5oEounJyoFI0g-B',
+            'ext': 'mp4',
+            'title': 'The Nexus 6: hands-on with Google\'s phablet',
+            'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
+        },
+        'add_ie': ['Ooyala'],
+    }, {
+        # volume embed
+        'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
+        'md5': '375c483c5080ab8cd85c9c84cfc2d1e4',
+        'info_dict': {
+            'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b',
+            'ext': 'mp4',
+            'title': 'The new frontier of LGBTQ civil rights, explained',
+            'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
+        },
+        'add_ie': ['Ooyala'],
+    }, {
+        # youtube embed
+        'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
+        'md5': '83b3080489fb103941e549352d3e0977',
+        'info_dict': {
+            'id': 'FcNHTJU1ufM',
+            'ext': 'mp4',
+            'title': 'How "the robot" became the greatest novelty dance of all time',
+            'description': 'md5:b081c0d588b8b2085870cda55e6da176',
+            'upload_date': '20160324',
+            'uploader_id': 'voxdotcom',
+            'uploader': 'Vox',
+        },
+        'add_ie': ['Youtube'],
+    }, {
+        # SBN.VideoLinkset.entryGroup multiple ooyala embeds
+        'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
+        'info_dict': {
+            'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
+            'title': '25 lies you will tell yourself on National Signing Day',
+            'description': 'It\'s the most self-delusional time of the year, and everyone\'s gonna tell the same lies together!',
+        },
+        'playlist': [{
+            'md5': '721fededf2ab74ae4176c8c8cbfe092e',
+            'info_dict': {
+                'id': 'p3cThlMjE61VDi_SD9JlIteSNPWVDBB9',
+                'ext': 'mp4',
+                'title': 'Buddy Hield vs Steph Curry (and the world)',
+                'description': 'Let’s dissect only the most important Final Four storylines.',
+            },
+        }, {
+            'md5': 'bf0c5cc115636af028be1bab79217ea9',
+            'info_dict': {
+                'id': 'BmbmVjMjE6esPHxdALGubTrouQ0jYLHj',
+                'ext': 'mp4',
+                'title': 'Chasing Cinderella 2016: Syracuse basketball',
+                'description': 'md5:e02d56b026d51aa32c010676765a690d',
+            },
+        }],
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id))
+
+        def create_entry(provider_video_id, provider_video_type, title=None, description=None):
+            return {
+                '_type': 'url_transparent',
+                'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id),
+                'title': title or self._og_search_title(webpage),
+                'description': description or self._og_search_description(webpage),
+            }
+
+        entries = []
+        entries_data = self._search_regex([
+            r'Chorus\.VideoContext\.addVideo\((\[{.+}\])\);',
+            r'var\s+entry\s*=\s*({.+});',
+            r'SBN\.VideoLinkset\.entryGroup\(\s*(\[.+\])',
+        ], webpage, 'video data', default=None)
+        if entries_data:
+            entries_data = self._parse_json(entries_data, display_id)
+            if isinstance(entries_data, dict):
+                entries_data = [entries_data]
+            for video_data in entries_data:
+                provider_video_id = video_data.get('provider_video_id')
+                provider_video_type = video_data.get('provider_video_type')
+                if provider_video_id and provider_video_type:
+                    entries.append(create_entry(
+                        provider_video_id, provider_video_type,
+                        video_data.get('title'), video_data.get('description')))
+
+        provider_video_id = self._search_regex(
+            r'data-ooyala-id="([^"]+)"', webpage, 'ooyala id', default=None)
+        if provider_video_id:
+            entries.append(create_entry(provider_video_id, 'ooyala'))
+
+        volume_uuid = self._search_regex(
+            r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None)
+        if volume_uuid:
+            volume_webpage = self._download_webpage(
+                'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
+            video_data = self._parse_json(self._search_regex(
+                r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
+            for provider_video_type in ('ooyala', 'youtube'):
+                provider_video_id = video_data.get('%s_id' % provider_video_type)
+                if provider_video_id:
+                    description = video_data.get('description_long') or video_data.get('description_short')
+                    entries.append(create_entry(
+                        provider_video_id, provider_video_type, video_data.get('title_short'), description))
+                    break
+
+        if len(entries) == 1:
+            return entries[0]
+        else:
+            return self.playlist_result(entries, display_id, self._og_search_title(webpage), self._og_search_description(webpage))
index 92c90e5172e89b98c3309bb01dc9787f15c24859..1557a0e0406ebfb75c2b5b4583c74f05c5dd2cc7 100644 (file)
@@ -4,6 +4,7 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
     parse_duration,
     str_to_int,
 )
@@ -27,7 +28,8 @@ class VpornIE(InfoExtractor):
                 'duration': 393,
                 'age_limit': 18,
                 'view_count': int,
-            }
+            },
+            'skip': 'video removed',
         },
         {
             'url': 'http://www.vporn.com/female/hana-shower/523564/',
@@ -40,7 +42,7 @@ class VpornIE(InfoExtractor):
                 'description': 'Hana showers at the bathroom.',
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'uploader': 'Hmmmmm',
-                'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
+                'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'],
                 'duration': 588,
                 'age_limit': 18,
                 'view_count': int,
@@ -55,6 +57,10 @@ class VpornIE(InfoExtractor):
 
         webpage = self._download_webpage(url, display_id)
 
+        errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!'
+        if errmsg in webpage:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
+
         title = self._html_search_regex(
             r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
         description = self._html_search_regex(
index 2b6bae89bd2a450c9babe1ea77e299b806b752c1..8e35f24e81e7e61240898ea3259914737365fd2f 100644 (file)
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import float_or_none
+from ..utils import (
+    determine_ext,
+    float_or_none,
+)
 
 
 class VRTIE(InfoExtractor):
@@ -52,6 +55,11 @@ class VRTIE(InfoExtractor):
                 'duration': 661,
             }
         },
+        {
+            # YouTube video
+            'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957',
+            'only_matching': True,
+        },
         {
             'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
             'only_matching': True,
@@ -66,7 +74,17 @@ class VRTIE(InfoExtractor):
         video_id = self._search_regex(
             r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False)
 
+        src = self._search_regex(
+            r'data-video-src="([^"]+)"', webpage, 'video src', default=None)
+
+        video_type = self._search_regex(
+            r'data-video-type="([^"]+)"', webpage, 'video type', default=None)
+
+        if video_type == 'YouTubeVideo':
+            return self.url_result(src, 'Youtube')
+
         formats = []
+
         mobj = re.search(
             r'data-video-iphone-server="(?P<server>[^"]+)"\s+data-video-iphone-path="(?P<path>[^"]+)"',
             webpage)
@@ -74,11 +92,15 @@ class VRTIE(InfoExtractor):
             formats.extend(self._extract_m3u8_formats(
                 '%s/%s' % (mobj.group('server'), mobj.group('path')),
                 video_id, 'mp4', m3u8_id='hls', fatal=False))
-        mobj = re.search(r'data-video-src="(?P<src>[^"]+)"', webpage)
-        if mobj:
-            formats.extend(self._extract_f4m_formats(
-                '%s/manifest.f4m' % mobj.group('src'),
-                video_id, f4m_id='hds', fatal=False))
+
+        if src:
+            if determine_ext(src) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                formats.extend(self._extract_f4m_formats(
+                    '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False))
 
         if not formats and 'data-video-geoblocking="true"' in webpage:
             self.raise_geo_restricted('This video is only available in Belgium')
index 149e364677fcab4d0374479c4b96ff741277b17e..10ca6acb12469f85267405f9431b9508c0537e57 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 class VubeIE(InfoExtractor):
     IE_NAME = 'vube'
     IE_DESC = 'Vube.com'
-    _VALID_URL = r'http://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b'
+    _VALID_URL = r'https?://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b'
 
     _TESTS = [
         {
index a6d9b5fee1f4864d82c7f8bb83e87884c96afe3b..eaa888f005cc61c53b8f45c3e3b93633083b17ed 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class VuClipIE(InfoExtractor):
-    _VALID_URL = r'http://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
 
     _TEST = {
         'url': 'http://m.vuclip.com/w?cid=922692425&fid=70295&z=1010&nvar&frm=index.html',
diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py
deleted file mode 100644 (file)
index faa167e..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-import os.path
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    parse_iso8601,
-)
-
-
-class VultureIE(InfoExtractor):
-    IE_NAME = 'vulture.com'
-    _VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/'
-    _TEST = {
-        'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1',
-        'md5': '8d997845642a2b5152820f7257871bc8',
-        'info_dict': {
-            'id': '6GHRQL3RV7MSD1H4',
-            'ext': 'mp4',
-            'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED',
-            'uploader_id': 'Sarah',
-            'thumbnail': 're:^http://.*\.jpg$',
-            'timestamp': 1401288564,
-            'upload_date': '20140528',
-            'description': 'Uplifting and witty, as predicted.',
-            'duration': 1015,
-        }
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
-
-        webpage = self._download_webpage(url, display_id)
-        query_string = self._search_regex(
-            r"queryString\s*=\s*'([^']+)'", webpage, 'query string')
-        video_id = self._search_regex(
-            r'content=([^&]+)', query_string, 'video ID')
-        query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string
-
-        query_webpage = self._download_webpage(
-            query_url, display_id, note='Downloading query page')
-        params_json = self._search_regex(
-            r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n',
-            query_webpage,
-            'player params')
-        params = json.loads(params_json)
-
-        upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T'))
-        uploader_id = params.get('user', {}).get('handle')
-
-        media_item = params['media_item']
-        title = os.path.splitext(media_item['title'])[0]
-        duration = int_or_none(media_item.get('duration_seconds'))
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'url': media_item['pipeline_xid'],
-            'title': title,
-            'timestamp': upload_timestamp,
-            'thumbnail': params.get('thumbnail_url'),
-            'uploader_id': uploader_id,
-            'description': params.get('description'),
-            'duration': duration,
-        }
index 24efbd6e6341ba5aa73e5df11cb9af36f941da43..8b9488340368ea0292fa2614336778099c9eb11e 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class WallaIE(InfoExtractor):
-    _VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
+    _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
     _TEST = {
         'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
         'info_dict': {
index ec8b999983f6ae89a3bf53909e9d70a463f87f52..839cad986cbbf4edc8f73ca5639e780f210163c2 100644 (file)
@@ -11,7 +11,96 @@ from ..utils import (
 
 
 class WashingtonPostIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
+    IE_NAME = 'washingtonpost'
+    _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _TEST = {
+        'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
+        'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
+        'info_dict': {
+            'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
+            'ext': 'mp4',
+            'title': 'Egypt finds belongings, debris from plane crash',
+            'description': 'md5:a17ceee432f215a5371388c1f680bd86',
+            'upload_date': '20160520',
+            'uploader': 'Reuters',
+            'timestamp': 1463778452,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_json(
+            'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id,
+            video_id, transform_source=strip_jsonp)[0]['contentConfig']
+        title = video_data['title']
+
+        urls = []
+        formats = []
+        for s in video_data.get('streams', []):
+            s_url = s.get('url')
+            if not s_url or s_url in urls:
+                continue
+            urls.append(s_url)
+            video_type = s.get('type')
+            if video_type == 'smil':
+                continue
+            elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
+                m3u8_formats = self._extract_m3u8_formats(
+                    s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+                for m3u8_format in m3u8_formats:
+                    width = m3u8_format.get('width')
+                    if not width:
+                        continue
+                    vbr = self._search_regex(
+                        r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None)
+                    if vbr:
+                        m3u8_format.update({
+                            'vbr': int_or_none(vbr),
+                        })
+                formats.extend(m3u8_formats)
+            else:
+                width = int_or_none(s.get('width'))
+                vbr = int_or_none(s.get('bitrate'))
+                has_width = width != 0
+                formats.append({
+                    'format_id': (
+                        '%s-%d-%d' % (video_type, width, vbr)
+                        if width
+                        else video_type),
+                    'vbr': vbr if has_width else None,
+                    'width': width,
+                    'height': int_or_none(s.get('height')),
+                    'acodec': s.get('audioCodec'),
+                    'vcodec': s.get('videoCodec') if has_width else 'none',
+                    'filesize': int_or_none(s.get('fileSize')),
+                    'url': s_url,
+                    'ext': 'mp4',
+                    'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
+                })
+        source_media_url = video_data.get('sourceMediaURL')
+        if source_media_url:
+            formats.append({
+                'format_id': 'source_media',
+                'url': source_media_url,
+            })
+        self._sort_formats(
+            formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('blurb'),
+            'uploader': video_data.get('credits', {}).get('source'),
+            'formats': formats,
+            'duration': int_or_none(video_data.get('videoDuration'), 100),
+            'timestamp': int_or_none(
+                video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000),
+        }
+
+
+class WashingtonPostArticleIE(InfoExtractor):
+    IE_NAME = 'washingtonpost:article'
+    _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
         'info_dict': {
@@ -63,6 +152,10 @@ class WashingtonPostIE(InfoExtractor):
         }]
     }]
 
+    @classmethod
+    def suitable(cls, url):
+        return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url)
+
     def _real_extract(self, url):
         page_id = self._match_id(url)
         webpage = self._download_webpage(url, page_id)
@@ -74,54 +167,7 @@ class WashingtonPostIE(InfoExtractor):
                 <div\s+class="posttv-video-embed[^>]*?data-uuid=|
                 data-video-uuid=
             )"([^"]+)"''', webpage)
-        entries = []
-        for i, uuid in enumerate(uuids, start=1):
-            vinfo_all = self._download_json(
-                'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
-                page_id,
-                transform_source=strip_jsonp,
-                note='Downloading information of video %d/%d' % (i, len(uuids))
-            )
-            vinfo = vinfo_all[0]['contentConfig']
-            uploader = vinfo.get('credits', {}).get('source')
-            timestamp = int_or_none(
-                vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
-
-            formats = [{
-                'format_id': (
-                    '%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
-                    if s.get('width')
-                    else s.get('type')),
-                'vbr': s.get('bitrate') if s.get('width') != 0 else None,
-                'width': s.get('width'),
-                'height': s.get('height'),
-                'acodec': s.get('audioCodec'),
-                'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
-                'filesize': s.get('fileSize'),
-                'url': s.get('url'),
-                'ext': 'mp4',
-                'preference': -100 if s.get('type') == 'smil' else None,
-                'protocol': {
-                    'MP4': 'http',
-                    'F4F': 'f4m',
-                }.get(s.get('type')),
-            } for s in vinfo.get('streams', [])]
-            source_media_url = vinfo.get('sourceMediaURL')
-            if source_media_url:
-                formats.append({
-                    'format_id': 'source_media',
-                    'url': source_media_url,
-                })
-            self._sort_formats(formats)
-            entries.append({
-                'id': uuid,
-                'title': vinfo['title'],
-                'description': vinfo.get('blurb'),
-                'uploader': uploader,
-                'formats': formats,
-                'duration': int_or_none(vinfo.get('videoDuration'), 100),
-                'timestamp': timestamp,
-            })
+        entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
 
         return {
             '_type': 'playlist',
index 37cf3d3097c94b39f1b66ab11e0e651579f8d533..de7d6b55935cd5fd8edb4c83c581505c2f0f4214 100644 (file)
@@ -2,25 +2,26 @@
 from __future__ import unicode_literals
 
 import re
-import hashlib
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     unified_strdate,
+    HEADRequest,
+    float_or_none,
 )
 
 
 class WatIE(InfoExtractor):
-    _VALID_URL = r'(?:wat:(?P<real_id>\d{8})|http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)'
+    _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
     IE_NAME = 'wat.tv'
     _TESTS = [
         {
             'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
-            'md5': 'ce70e9223945ed26a8056d413ca55dc9',
+            'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
             'info_dict': {
                 'id': '11713067',
-                'display_id': 'soupe-figues-l-orange-aux-epices',
                 'ext': 'mp4',
                 'title': 'Soupe de figues à l\'orange et aux épices',
                 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
@@ -33,7 +34,6 @@ class WatIE(InfoExtractor):
             'md5': 'fbc84e4378165278e743956d9c1bf16b',
             'info_dict': {
                 'id': '11713075',
-                'display_id': 'gregory-lemarchal-voix-ange',
                 'ext': 'mp4',
                 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
                 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
@@ -44,96 +44,85 @@ class WatIE(InfoExtractor):
         },
     ]
 
-    def download_video_info(self, real_id):
-        # 'contentv4' is used in the website, but it also returns the related
-        # videos, we don't need them
-        info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
-        return info['media']
-
     def _real_extract(self, url):
-        def real_id_for_chapter(chapter):
-            return chapter['tc_start'].split('-')[0]
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
-        real_id = mobj.group('real_id')
-        if not real_id:
-            short_id = mobj.group('short_id')
-            webpage = self._download_webpage(url, display_id or short_id)
-            real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
+        video_id = self._match_id(url)
+        video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
 
-        video_info = self.download_video_info(real_id)
+        # 'contentv4' is used in the website, but it also returns the related
+        # videos, we don't need them
+        video_info = self._download_json(
+            'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media']
 
         error_desc = video_info.get('error_desc')
         if error_desc:
             raise ExtractorError(
                 '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
 
-        geo_list = video_info.get('geoList')
-        country = geo_list[0] if geo_list else ''
-
         chapters = video_info['chapters']
         first_chapter = chapters[0]
-        files = video_info['files']
-        first_file = files[0]
 
-        if real_id_for_chapter(first_chapter) != real_id:
-            self.to_screen('Multipart video detected')
-            chapter_urls = []
-            for chapter in chapters:
-                chapter_id = real_id_for_chapter(chapter)
-                # Yes, when we this chapter is processed by WatIE,
-                # it will download the info again
-                chapter_info = self.download_video_info(chapter_id)
-                chapter_urls.append(chapter_info['url'])
-            entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
-            return self.playlist_result(entries, real_id, video_info['title'])
+        def video_id_for_chapter(chapter):
+            return chapter['tc_start'].split('-')[0]
 
-        upload_date = None
-        if 'date_diffusion' in first_chapter:
-            upload_date = unified_strdate(first_chapter['date_diffusion'])
+        if video_id_for_chapter(first_chapter) != video_id:
+            self.to_screen('Multipart video detected')
+            entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
+            return self.playlist_result(entries, video_id, video_info['title'])
         # Otherwise we can continue and extract just one part, we have to use
-        # the short id for getting the video url
-
-        formats = [{
-            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
-            'format_id': 'Mobile',
-        }]
-
-        fmts = [('SD', 'web')]
-        if first_file.get('hasHD'):
-            fmts.append(('HD', 'webhd'))
-
-        def compute_token(param):
-            timestamp = '%08x' % int(self._download_webpage(
-                'http://www.wat.tv/servertime', real_id,
-                'Downloading server time').split('|')[0])
-            magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
-            return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
-
-        for fmt in fmts:
-            webid = '/%s/%s' % (fmt[1], real_id)
-            video_url = self._download_webpage(
-                'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
-                real_id,
-                'Downloading %s video URL' % fmt[0],
-                'Failed to download %s video URL' % fmt[0],
-                False)
-            if not video_url:
+        # the video id for getting the video url
+
+        date_diffusion = first_chapter.get('date_diffusion')
+        upload_date = unified_strdate(date_diffusion) if date_diffusion else None
+
+        def extract_url(path_template, url_type):
+            req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
+            head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type)
+            red_url = head.geturl()
+            if req_url == red_url:
+                raise ExtractorError(
+                    '%s said: Sorry, this video is not available from your country.' % self.IE_NAME,
+                    expected=True)
+            return red_url
+
+        m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
+        http_url = extract_url('android5/%s.mp4', 'http')
+
+        formats = []
+        m3u8_formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+        formats.extend(m3u8_formats)
+        formats.extend(self._extract_f4m_formats(
+            m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
+            video_id, f4m_id='hds', fatal=False))
+        for m3u8_format in m3u8_formats:
+            mobj = re.search(
+                r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url'])
+            if not mobj:
                 continue
-            formats.append({
-                'url': video_url,
-                'ext': 'mp4',
-                'format_id': fmt[0],
+            abr, vbr = mobj.groups()
+            abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
+            m3u8_format.update({
+                'vbr': vbr,
+                'abr': abr,
+            })
+            if not vbr or not abr:
+                continue
+            f = m3u8_format.copy()
+            f.update({
+                'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url),
+                'format_id': f['format_id'].replace('hls', 'http'),
+                'protocol': 'http',
             })
+            formats.append(f)
+        self._sort_formats(formats)
 
         return {
-            'id': real_id,
-            'display_id': display_id,
+            'id': video_id,
             'title': first_chapter['title'],
             'thumbnail': first_chapter['preview'],
             'description': first_chapter['description'],
             'view_count': video_info['views'],
             'upload_date': upload_date,
-            'duration': first_file['duration'],
+            'duration': video_info['files'][0]['duration'],
             'formats': formats,
         }
similarity index 54%
rename from youtube_dl/extractor/sexykarma.py
rename to youtube_dl/extractor/watchindianporn.py
index e33483674439fb2fcfcdce60a9de6a6ca328dfdd..5d3b5bdb4cb904acabea0864dd76be8a0cc62c30 100644 (file)
@@ -11,61 +11,27 @@ from ..utils import (
 )
 
 
-class SexyKarmaIE(InfoExtractor):
-    IE_DESC = 'Sexy Karma and Watch Indian Porn'
-    _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
-    _TESTS = [{
-        'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
-        'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
+class WatchIndianPornIE(InfoExtractor):
+    IE_DESC = 'Watch Indian Porn'
+    _VALID_URL = r'https?://(?:www\.)?watchindianporn\.net/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
+    _TEST = {
+        'url': 'http://www.watchindianporn.net/video/hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera-RZa2avywNPa.html',
+        'md5': '249589a164dde236ec65832bfce17440',
         'info_dict': {
-            'id': 'yHI70cOyIHt',
-            'display_id': 'taking-a-quick-pee',
+            'id': 'RZa2avywNPa',
+            'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera',
             'ext': 'mp4',
-            'title': 'Taking a quick pee.',
+            'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
             'thumbnail': 're:^https?://.*\.jpg$',
-            'uploader': 'wildginger7',
-            'upload_date': '20141008',
-            'duration': 22,
+            'uploader': 'LoveJay',
+            'upload_date': '20160428',
+            'duration': 226,
             'view_count': int,
             'comment_count': int,
             'categories': list,
             'age_limit': 18,
         }
-    }, {
-        'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
-        'md5': 'dd216c68d29b49b12842b9babe762a5d',
-        'info_dict': {
-            'id': '8Id6EZPbuHf',
-            'display_id': 'pot-pixie-tribute',
-            'ext': 'mp4',
-            'title': 'pot_pixie tribute',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'uploader': 'banffite',
-            'upload_date': '20141013',
-            'duration': 16,
-            'view_count': int,
-            'comment_count': int,
-            'categories': list,
-            'age_limit': 18,
-        }
-    }, {
-        'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
-        'md5': '9afb80675550406ed9a63ac2819ef69d',
-        'info_dict': {
-            'id': 'dW2mtctxJfs',
-            'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number',
-            'ext': 'mp4',
-            'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'uploader': 'Don',
-            'upload_date': '20140213',
-            'duration': 83,
-            'view_count': int,
-            'comment_count': int,
-            'categories': list,
-            'age_limit': 18,
-        }
-    }]
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -109,6 +75,9 @@ class SexyKarmaIE(InfoExtractor):
             'id': video_id,
             'display_id': display_id,
             'url': video_url,
+            'http_headers': {
+                'Referer': url,
+            },
             'title': title,
             'thumbnail': thumbnail,
             'uploader': uploader,
diff --git a/youtube_dl/extractor/wayofthemaster.py b/youtube_dl/extractor/wayofthemaster.py
deleted file mode 100644 (file)
index af7bb8b..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class WayOfTheMasterIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])'
-
-    _TEST = {
-        'url': 'http://www.wayofthemaster.com/hbks.shtml',
-        'md5': '5316b57487ada8480606a93cb3d18d24',
-        'info_dict': {
-            'id': 'hbks',
-            'ext': 'mp4',
-            'title': 'Intelligent Design vs. Evolution',
-        },
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._search_regex(
-            r'<img src="images/title_[^"]+".*?alt="([^"]+)"',
-            webpage, 'title', default=None)
-        if title is None:
-            title = self._html_search_regex(
-                r'<title>(.*?)</title>', webpage, 'page title')
-
-        url_base = self._search_regex(
-            r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"',
-            webpage, 'URL base')
-        formats = [{
-            'format_id': 'low',
-            'quality': 1,
-            'url': url_base + '_low.mp4',
-        }, {
-            'format_id': 'high',
-            'quality': 2,
-            'url': url_base + '_high.mp4',
-        }]
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-        }
index a851578e075589e6b36d828135f9919a9f1d0c38..390f9e8302f392a25c83af2520cb30b854500632 100644 (file)
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-import itertools
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_parse_qs,
-    compat_urlparse,
-)
 from ..utils import (
+    determine_ext,
+    ExtractorError,
+    js_to_json,
+    strip_jsonp,
     unified_strdate,
-    qualities,
+    update_url_query,
+    urlhandle_detect_ext,
 )
 
 
-class WDRIE(InfoExtractor):
-    _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?'
-    _VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX
+class WDRBaseIE(InfoExtractor):
+    def _extract_wdr_video(self, webpage, display_id):
+        # for wdr.de the data-extension is in a tag with the class "mediaLink"
+        # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
+        # for wdrmaus its in a link to the page in a multiline "videoLink"-tag
+        json_metadata = self._html_search_regex(
+            r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
+            webpage, 'media link', default=None, flags=re.MULTILINE)
+
+        if not json_metadata:
+            return
+
+        media_link_obj = self._parse_json(json_metadata, display_id,
+                                          transform_source=js_to_json)
+        jsonp_url = media_link_obj['mediaObj']['url']
+
+        metadata = self._download_json(
+            jsonp_url, 'metadata', transform_source=strip_jsonp)
+
+        metadata_tracker_data = metadata['trackerData']
+        metadata_media_resource = metadata['mediaResource']
+
+        formats = []
+
+        # check if the metadata contains a direct URL to a file
+        for kind, media_resource in metadata_media_resource.items():
+            if kind not in ('dflt', 'alt'):
+                continue
+
+            for tag_name, medium_url in media_resource.items():
+                if tag_name not in ('videoURL', 'audioURL'):
+                    continue
+
+                ext = determine_ext(medium_url)
+                if ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        medium_url, display_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls'))
+                elif ext == 'f4m':
+                    manifest_url = update_url_query(
+                        medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'})
+                    formats.extend(self._extract_f4m_formats(
+                        manifest_url, display_id, f4m_id='hds', fatal=False))
+                elif ext == 'smil':
+                    formats.extend(self._extract_smil_formats(
+                        medium_url, 'stream', fatal=False))
+                else:
+                    a_format = {
+                        'url': medium_url
+                    }
+                    if ext == 'unknown_video':
+                        urlh = self._request_webpage(
+                            medium_url, display_id, note='Determining extension')
+                        ext = urlhandle_detect_ext(urlh)
+                        a_format['ext'] = ext
+                    formats.append(a_format)
+
+        self._sort_formats(formats)
+
+        subtitles = {}
+        caption_url = metadata_media_resource.get('captionURL')
+        if caption_url:
+            subtitles['de'] = [{
+                'url': caption_url,
+                'ext': 'ttml',
+            }]
+
+        title = metadata_tracker_data['trackerClipTitle']
+
+        return {
+            'id': metadata_tracker_data.get('trackerClipId', display_id),
+            'display_id': display_id,
+            'title': title,
+            'alt_title': metadata_tracker_data.get('trackerClipSubcategory'),
+            'formats': formats,
+            'subtitles': subtitles,
+            'upload_date': unified_strdate(metadata_tracker_data.get('trackerClipAirTime')),
+        }
+
+
+class WDRIE(WDRBaseIE):
+    _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
+    _PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
+    _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
 
     _TESTS = [
         {
-            'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html',
+            'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html',
+            # HDS download, MD5 is unstable
             'info_dict': {
-                'id': 'mdb-362427',
+                'id': 'mdb-1058683',
                 'ext': 'flv',
-                'title': 'Servicezeit',
-                'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
-                'upload_date': '20140310',
-                'is_live': False
+                'display_id': 'doku-am-freitag/video-geheimnis-aachener-dom-100',
+                'title': 'Geheimnis Aachener Dom',
+                'alt_title': 'Doku am Freitag',
+                'upload_date': '20160304',
+                'description': 'md5:87be8ff14d8dfd7a7ee46f0299b52318',
+                'is_live': False,
+                'subtitles': {'de': [{
+                    'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml',
+                    'ext': 'ttml',
+                }]},
             },
-            'params': {
-                'skip_download': True,
+        },
+        {
+            'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html',
+            'md5': 'f4c1f96d01cf285240f53ea4309663d8',
+            'info_dict': {
+                'id': 'mdb-1072000',
+                'ext': 'mp3',
+                'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100',
+                'title': 'Schriftstellerin Juli Zeh',
+                'alt_title': 'WDR 3 Gespräch am Samstag',
+                'upload_date': '20160312',
+                'description': 'md5:e127d320bc2b1f149be697ce044a3dd7',
+                'is_live': False,
+                'subtitles': {}
             },
-            'skip': 'Page Not Found',
         },
         {
-            'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html',
+            'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
             'info_dict': {
-                'id': 'mdb-363194',
-                'ext': 'flv',
-                'title': 'Marga Spiegel ist tot',
-                'description': 'md5:2309992a6716c347891c045be50992e4',
-                'upload_date': '20140311',
-                'is_live': False
+                'id': 'mdb-103364',
+                'ext': 'mp4',
+                'display_id': 'index',
+                'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'alt_title': 'WDR Fernsehen Live',
+                'upload_date': None,
+                'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
+                'is_live': True,
+                'subtitles': {}
             },
             'params': {
-                'skip_download': True,
+                'skip_download': True,  # m3u8 download
             },
-            'skip': 'Page Not Found',
         },
         {
-            'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html',
-            'md5': '83e9e8fefad36f357278759870805898',
+            'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
+            'playlist_mincount': 8,
             'info_dict': {
-                'id': 'mdb-194332',
-                'ext': 'mp3',
-                'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
-                'description': 'md5:2309992a6716c347891c045be50992e4',
-                'upload_date': '20091129',
-                'is_live': False
+                'id': 'aktuelle-stunde/aktuelle-stunde-120',
             },
         },
         {
-            'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html',
-            'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa',
+            'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
             'info_dict': {
-                'id': 'mdb-478135',
-                'ext': 'mp3',
-                'title': 'Flavia Coelho: Amar é Amar',
-                'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
-                'upload_date': '20140717',
-                'is_live': False
+                'id': 'mdb-1096487',
+                'ext': 'flv',
+                'upload_date': 're:^[0-9]{8}$',
+                'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
+                'description': '- Die Sendung mit der Maus -',
             },
-            'skip': 'Page Not Found',
+            'skip': 'The id changes from week to week because of the new episode'
         },
         {
-            'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
-            'playlist_mincount': 146,
+            'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5',
+            'md5': '803138901f6368ee497b4d195bb164f2',
             'info_dict': {
-                'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
-            }
+                'id': 'mdb-186083',
+                'ext': 'mp4',
+                'upload_date': '20130919',
+                'title': 'Sachgeschichte - Achterbahn ',
+                'description': '- Die Sendung mit der Maus -',
+            },
         },
         {
-            'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
+            'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
+            # Live stream, MD5 unstable
             'info_dict': {
-                'id': 'mdb-103364',
-                'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-                'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
+                'id': 'mdb-869971',
                 'ext': 'flv',
-                'upload_date': '20150101',
-                'is_live': True
-            },
-            'params': {
-                'skip_download': True,
+                'title': 'Funkhaus Europa Livestream',
+                'description': 'md5:2309992a6716c347891c045be50992e4',
+                'upload_date': '20160101',
             },
         }
     ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        page_url = mobj.group('url')
-        page_id = mobj.group('id')
+        url_type = mobj.group('type')
+        page_url = mobj.group('page_url')
+        display_id = mobj.group('display_id')
+        webpage = self._download_webpage(url, display_id)
 
-        webpage = self._download_webpage(url, page_id)
+        info_dict = self._extract_wdr_video(webpage, display_id)
 
-        if mobj.group('player') is None:
+        if not info_dict:
             entries = [
-                self.url_result(page_url + href, 'WDR')
+                self.url_result(page_url + href[0], 'WDR')
                 for href in re.findall(
-                    r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX,
+                    r'<a href="(%s)"[^>]+data-extension=' % self._PAGE_REGEX,
                     webpage)
             ]
 
             if entries:  # Playlist page
-                return self.playlist_result(entries, page_id)
+                return self.playlist_result(entries, playlist_id=display_id)
 
-            # Overview page
-            entries = []
-            for page_num in itertools.count(2):
-                hrefs = re.findall(
-                    r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"',
-                    webpage)
-                entries.extend(
-                    self.url_result(page_url + href, 'WDR')
-                    for href in hrefs)
-                next_url_m = re.search(
-                    r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage)
-                if not next_url_m:
-                    break
-                next_url = page_url + next_url_m.group(1)
-                webpage = self._download_webpage(
-                    next_url, page_id,
-                    note='Downloading playlist page %d' % page_num)
-            return self.playlist_result(entries, page_id)
-
-        flashvars = compat_parse_qs(self._html_search_regex(
-            r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
+            raise ExtractorError('No downloadable streams found', expected=True)
 
-        page_id = flashvars['trackerClipId'][0]
-        video_url = flashvars['dslSrc'][0]
-        title = flashvars['trackerClipTitle'][0]
-        thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
-        is_live = flashvars.get('isLive', ['0'])[0] == '1'
+        is_live = url_type == 'live'
 
         if is_live:
-            title = self._live_title(title)
-
-        if 'trackerClipAirTime' in flashvars:
-            upload_date = flashvars['trackerClipAirTime'][0]
-        else:
-            upload_date = self._html_search_meta(
-                'DC.Date', webpage, 'upload date')
-
-        if upload_date:
-            upload_date = unified_strdate(upload_date)
-
-        formats = []
-        preference = qualities(['S', 'M', 'L', 'XL'])
-
-        if video_url.endswith('.f4m'):
-            formats.extend(self._extract_f4m_formats(
-                video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', page_id,
-                f4m_id='hds', fatal=False))
-        elif video_url.endswith('.smil'):
-            formats.extend(self._extract_smil_formats(
-                video_url, page_id, False, {
-                    'hdcore': '3.3.0',
-                    'plugin': 'aasp-3.3.0.99.43',
-                }))
-        else:
-            formats.append({
-                'url': video_url,
-                'http_headers': {
-                    'User-Agent': 'mobile',
-                },
+            info_dict.update({
+                'title': self._live_title(info_dict['title']),
+                'upload_date': None,
             })
+        elif 'upload_date' not in info_dict:
+            info_dict['upload_date'] = unified_strdate(self._html_search_meta('DC.Date', webpage, 'upload date'))
 
-        m3u8_url = self._search_regex(
-            r'rel="adaptiv"[^>]+href="([^"]+)"',
-            webpage, 'm3u8 url', default=None)
-        if m3u8_url:
-            formats.extend(self._extract_m3u8_formats(
-                m3u8_url, page_id, 'mp4', 'm3u8_native',
-                m3u8_id='hls', fatal=False))
+        info_dict.update({
+            'description': self._html_search_meta('Description', webpage),
+            'is_live': is_live,
+        })
 
-        direct_urls = re.findall(
-            r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage)
-        if direct_urls:
-            for quality, video_url in direct_urls:
-                formats.append({
-                    'url': video_url,
-                    'preference': preference(quality),
-                    'http_headers': {
-                        'User-Agent': 'mobile',
-                    },
-                })
-
-        self._sort_formats(formats)
-
-        description = self._html_search_meta('Description', webpage, 'description')
-
-        return {
-            'id': page_id,
-            'formats': formats,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'upload_date': upload_date,
-            'is_live': is_live
-        }
+        return info_dict
 
 
 class WDRMobileIE(InfoExtractor):
@@ -241,81 +262,3 @@ class WDRMobileIE(InfoExtractor):
                 'User-Agent': 'mobile',
             },
         }
-
-
-class WDRMausIE(InfoExtractor):
-    _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
-    IE_DESC = 'Sendung mit der Maus'
-    _TESTS = [{
-        'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
-        'info_dict': {
-            'id': 'aktuelle-sendung',
-            'ext': 'mp4',
-            'thumbnail': 're:^http://.+\.jpg',
-            'upload_date': 're:^[0-9]{8}$',
-            'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
-        }
-    }, {
-        'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
-        'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
-        'info_dict': {
-            'id': '40_jahre_maus',
-            'ext': 'mp4',
-            'thumbnail': 're:^http://.+\.jpg',
-            'upload_date': '20131007',
-            'title': '12.03.2011 - 40 Jahre Maus',
-        }
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-        param_code = self._html_search_regex(
-            r'<a href="\?startVideo=1&amp;([^"]+)"', webpage, 'parameters')
-
-        title_date = self._search_regex(
-            r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
-            webpage, 'air date')
-        title_str = self._html_search_regex(
-            r'<h1>(.*?)</h1>', webpage, 'title')
-        title = '%s - %s' % (title_date, title_str)
-        upload_date = unified_strdate(
-            self._html_search_meta('dc.date', webpage))
-
-        fields = compat_parse_qs(param_code)
-        video_url = fields['firstVideo'][0]
-        thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
-
-        formats = [{
-            'format_id': 'rtmp',
-            'url': video_url,
-        }]
-
-        jscode = self._download_webpage(
-            'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
-            video_id, fatal=False,
-            note='Downloading URL translation table',
-            errnote='Could not download URL translation table')
-        if jscode:
-            for m in re.finditer(
-                    r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
-                    jscode):
-                if video_url.startswith(m.group('stream')):
-                    http_url = video_url.replace(
-                        m.group('stream'), m.group('dl'))
-                    formats.append({
-                        'format_id': 'http',
-                        'url': http_url,
-                    })
-                    break
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-            'thumbnail': thumbnail,
-            'upload_date': upload_date,
-        }
index 2037d9b3d57cd5876d85e9552ffcc9f387fcc975..7aea47ed52f7f64032034ab43d51dbe524bff2b3 100644 (file)
@@ -12,38 +12,52 @@ class WebOfStoriesIE(InfoExtractor):
     _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
     _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
     _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
-    _TESTS = [
-        {
-            'url': 'http://www.webofstories.com/play/hans.bethe/71',
-            'md5': '373e4dd915f60cfe3116322642ddf364',
-            'info_dict': {
-                'id': '4536',
-                'ext': 'mp4',
-                'title': 'The temperature of the sun',
-                'thumbnail': 're:^https?://.*\.jpg$',
-                'description': 'Hans Bethe talks about calculating the temperature of the sun',
-                'duration': 238,
-            }
+    _TESTS = [{
+        'url': 'http://www.webofstories.com/play/hans.bethe/71',
+        'md5': '373e4dd915f60cfe3116322642ddf364',
+        'info_dict': {
+            'id': '4536',
+            'ext': 'mp4',
+            'title': 'The temperature of the sun',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'Hans Bethe talks about calculating the temperature of the sun',
+            'duration': 238,
+        }
+    }, {
+        'url': 'http://www.webofstories.com/play/55908',
+        'md5': '2985a698e1fe3211022422c4b5ed962c',
+        'info_dict': {
+            'id': '55908',
+            'ext': 'mp4',
+            'title': 'The story of Gemmata obscuriglobus',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
+            'duration': 169,
+        },
+        'skip': 'notfound',
+    }, {
+        # malformed og:title meta
+        'url': 'http://www.webofstories.com/play/54215?o=MS',
+        'info_dict': {
+            'id': '54215',
+            'ext': 'mp4',
+            'title': '"A Leg to Stand On"',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'Oliver Sacks talks about the death and resurrection of a limb',
+            'duration': 97,
         },
-        {
-            'url': 'http://www.webofstories.com/play/55908',
-            'md5': '2985a698e1fe3211022422c4b5ed962c',
-            'info_dict': {
-                'id': '55908',
-                'ext': 'mp4',
-                'title': 'The story of Gemmata obscuriglobus',
-                'thumbnail': 're:^https?://.*\.jpg$',
-                'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
-                'duration': 169,
-            }
+        'params': {
+            'skip_download': True,
         },
-    ]
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
-        title = self._og_search_title(webpage)
+        # Sometimes og:title meta is malformed
+        title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+            r'(?s)<strong>Title:\s*</strong>(.+?)<', webpage, 'title')
         description = self._html_search_meta('description', webpage)
         thumbnail = self._og_search_thumbnail(webpage)
 
diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py
deleted file mode 100644 (file)
index 20bb039..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class WeiboIE(InfoExtractor):
-    """
-    The videos in Weibo come from different sites, this IE just finds the link
-    to the external video and returns it.
-    """
-    _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
-
-    _TEST = {
-        'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
-        'info_dict': {
-            'id': '98322879',
-            'ext': 'flv',
-            'title': '魔声耳机最新广告“All Eyes On Us”',
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'add_ie': ['Sina'],
-    }
-
-    # Additional example videos from different sites
-    # Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm
-    # 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
-        video_id = mobj.group('id')
-        info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
-        info = self._download_json(info_url, video_id)
-
-        videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
-        # Prefer sina video since they have thumbnails
-        videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
-        player_url = videos_urls[-1]
-        m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
-                          player_url)
-        if m_sina is not None:
-            self.to_screen('Sina video detected')
-            sina_id = m_sina.group(1)
-            player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
-        return self.url_result(player_url)
index e333ae345b16e6ff6a25d2f57559c970eaf601d9..3dafbeec2c5f7ba0b2e18ec621c67966214d3307 100644 (file)
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 
 class WeiqiTVIE(InfoExtractor):
     IE_DESC = 'WQTV'
-    _VALID_URL = r'http://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)'
+    _VALID_URL = r'https?://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)'
 
     _TESTS = [{
         'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3',
index 041ff6c555123d44c97bc63810d2aa7903ec069e..54eb5142793827f8b733592d22b979d326593bee 100644 (file)
@@ -1,29 +1,33 @@
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
 from .youtube import YoutubeIE
+from .jwplatform import JWPlatformBaseIE
 
 
-class WimpIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
+class WimpIE(JWPlatformBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
     _TESTS = [{
-        'url': 'http://www.wimp.com/maruexhausted/',
+        'url': 'http://www.wimp.com/maru-is-exhausted/',
         'md5': 'ee21217ffd66d058e8b16be340b74883',
         'info_dict': {
-            'id': 'maruexhausted',
+            'id': 'maru-is-exhausted',
             'ext': 'mp4',
             'title': 'Maru is exhausted.',
             'description': 'md5:57e099e857c0a4ea312542b684a869b8',
         }
     }, {
         'url': 'http://www.wimp.com/clowncar/',
-        'md5': '4e2986c793694b55b37cf92521d12bb4',
+        'md5': '5c31ad862a90dc5b1f023956faec13fe',
         'info_dict': {
-            'id': 'clowncar',
-            'ext': 'mp4',
-            'title': 'It\'s like a clown car.',
-            'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2',
+            'id': 'cG4CEr2aiSg',
+            'ext': 'webm',
+            'title': 'Basset hound clown car...incredible!',
+            'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom',
+            'upload_date': '20140303',
+            'uploader': 'Gretchen Hoey',
+            'uploader_id': 'gretchenandjeff1',
         },
+        'add_ie': ['Youtube'],
     }]
 
     def _real_extract(self, url):
@@ -41,14 +45,13 @@ class WimpIE(InfoExtractor):
                 'ie_key': YoutubeIE.ie_key(),
             }
 
-        video_url = self._search_regex(
-            r'<video[^>]+>\s*<source[^>]+src=(["\'])(?P<url>.+?)\1',
-            webpage, 'video URL', group='url')
+        info_dict = self._extract_jwplayer_data(
+            webpage, video_id, require_title=False)
 
-        return {
+        info_dict.update({
             'id': video_id,
-            'url': video_url,
             'title': self._og_search_title(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
             'description': self._og_search_description(webpage),
-        }
+        })
+
+        return info_dict
index fdb16d91c25ae4a3d3c8314a76050d09cf86ef3a..c634b8decddf8fdb15649b05e8f49ad9efc36254 100644 (file)
@@ -3,63 +3,96 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
-    sanitized_Request,
+    int_or_none,
+    float_or_none,
 )
 
 
 class WistiaIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
-    _API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json'
+    _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)'
+    _API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
+    _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
         'md5': 'cafeb56ec0c53c18c97405eecb3133df',
         'info_dict': {
             'id': 'sh7fpupwlt',
             'ext': 'mov',
             'title': 'Being Resourceful',
+            'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
+            'upload_date': '20131204',
+            'timestamp': 1386185018,
             'duration': 117,
         },
-    }
+    }, {
+        'url': 'wistia:sh7fpupwlt',
+        'only_matching': True,
+    }, {
+        # with hls video
+        'url': 'wistia:807fafadvk',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        request = sanitized_Request(self._API_URL.format(video_id))
-        request.add_header('Referer', url)  # Some videos require this.
-        data_json = self._download_json(request, video_id)
+        data_json = self._download_json(
+            self._API_URL % video_id, video_id,
+            # Some videos require this.
+            headers={
+                'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id,
+            })
+
         if data_json.get('error'):
-            raise ExtractorError('Error while getting the playlist',
-                                 expected=True)
+            raise ExtractorError(
+                'Error while getting the playlist', expected=True)
+
         data = data_json['media']
+        title = data['name']
 
         formats = []
         thumbnails = []
-        for atype, a in data['assets'].items():
-            if atype == 'still':
-                thumbnails.append({
-                    'url': a['url'],
-                    'resolution': '%dx%d' % (a['width'], a['height']),
-                })
+        for a in data['assets']:
+            aurl = a.get('url')
+            if not aurl:
                 continue
-            if atype == 'preview':
+            astatus = a.get('status')
+            atype = a.get('type')
+            if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'):
                 continue
-            formats.append({
-                'format_id': atype,
-                'url': a['url'],
-                'width': a['width'],
-                'height': a['height'],
-                'filesize': a['size'],
-                'ext': a['ext'],
-                'preference': 1 if atype == 'original' else None,
-            })
+            elif atype in ('still', 'still_image'):
+                thumbnails.append({
+                    'url': aurl,
+                    'width': int_or_none(a.get('width')),
+                    'height': int_or_none(a.get('height')),
+                })
+            else:
+                aext = a.get('ext')
+                is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8'
+                formats.append({
+                    'format_id': atype,
+                    'url': aurl,
+                    'tbr': int_or_none(a.get('bitrate')),
+                    'vbr': int_or_none(a.get('opt_vbitrate')),
+                    'width': int_or_none(a.get('width')),
+                    'height': int_or_none(a.get('height')),
+                    'filesize': int_or_none(a.get('size')),
+                    'vcodec': a.get('codec'),
+                    'container': a.get('container'),
+                    'ext': 'mp4' if is_m3u8 else aext,
+                    'protocol': 'm3u8' if is_m3u8 else None,
+                    'preference': 1 if atype == 'original' else None,
+                })
 
         self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': data['name'],
+            'title': title,
+            'description': data.get('seoDescription'),
             'formats': formats,
             'thumbnails': thumbnails,
-            'duration': data.get('duration'),
+            'duration': float_or_none(data.get('duration')),
+            'timestamp': int_or_none(data.get('createdAt')),
         }
index c427649211079715a5510eef3eaf35981bdb1034..bdd7097baec16afb2a3c83dbdde0e89ebc713a69 100644 (file)
@@ -5,8 +5,10 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
     int_or_none,
     qualities,
+    remove_start,
 )
 
 
@@ -26,16 +28,17 @@ class WrzutaIE(InfoExtractor):
             'uploader_id': 'laboratoriumdextera',
             'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
         },
+        'skip': 'Redirected to wrzuta.pl',
     }, {
-        'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty',
-        'md5': 'bc78077859bea7bcfe4295d7d7fc9025',
+        'url': 'http://vexling.wrzuta.pl/audio/01xBFabGXu6/james_horner_-_into_the_na_39_vi_world_bonus',
+        'md5': 'f80564fb5a2ec6ec59705ae2bf2ba56d',
         'info_dict': {
-            'id': '063jOPX5ue2',
-            'ext': 'ogg',
-            'title': 'Liber & Natalia Szroeder - Teraz Ty',
-            'duration': 203,
-            'uploader_id': 'jolka85',
-            'description': 'md5:2d2b6340f9188c8c4cd891580e481096',
+            'id': '01xBFabGXu6',
+            'ext': 'mp3',
+            'title': 'James Horner - Into The Na\'vi World [Bonus]',
+            'description': 'md5:30a70718b2cd9df3120fce4445b0263b',
+            'duration': 95,
+            'uploader_id': 'vexling',
         },
     }]
 
@@ -45,7 +48,10 @@ class WrzutaIE(InfoExtractor):
         typ = mobj.group('typ')
         uploader = mobj.group('uploader')
 
-        webpage = self._download_webpage(url, video_id)
+        webpage, urlh = self._download_webpage_handle(url, video_id)
+
+        if urlh.geturl() == 'http://www.wrzuta.pl/':
+            raise ExtractorError('Video removed', expected=True)
 
         quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
 
@@ -80,3 +86,73 @@ class WrzutaIE(InfoExtractor):
             'description': self._og_search_description(webpage),
             'age_limit': embedpage.get('minimalAge', 0),
         }
+
+
+class WrzutaPlaylistIE(InfoExtractor):
+    """
+        this class covers extraction of wrzuta playlist entries
+        the extraction process bases on following steps:
+        * collect information of playlist size
+        * download all entries provided on
+          the playlist webpage (the playlist is split
+          on two pages: first directly reached from webpage
+          second: downloaded on demand by ajax call and rendered
+          using the ajax call response)
+        * in case size of extracted entries not reached total number of entries
+          use the ajax call to collect the remaining entries
+    """
+
+    IE_NAME = 'wrzuta.pl:playlist'
+    _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/(?P<id>[0-9a-zA-Z]+)'
+    _TESTS = [{
+        'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza',
+        'playlist_mincount': 14,
+        'info_dict': {
+            'id': '7XfO4vE84iR',
+            'title': 'Moja muza',
+        },
+    }, {
+        'url': 'http://heroesf70.wrzuta.pl/playlista/6Nj3wQHx756/lipiec_-_lato_2015_muzyka_swiata',
+        'playlist_mincount': 144,
+        'info_dict': {
+            'id': '6Nj3wQHx756',
+            'title': 'Lipiec - Lato 2015 Muzyka Świata',
+        },
+    }, {
+        'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        uploader = mobj.group('uploader')
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        playlist_size = int_or_none(self._html_search_regex(
+            (r'<div[^>]+class=["\']playlist-counter["\'][^>]*>\d+/(\d+)',
+             r'<div[^>]+class=["\']all-counter["\'][^>]*>(.+?)</div>'),
+            webpage, 'playlist size', default=None))
+
+        playlist_title = remove_start(
+            self._og_search_title(webpage), 'Playlista: ')
+
+        entries = []
+        if playlist_size:
+            entries = [
+                self.url_result(entry_url)
+                for _, entry_url in re.findall(
+                    r'<a[^>]+href=(["\'])(http.+?)\1[^>]+class=["\']playlist-file-page',
+                    webpage)]
+            if playlist_size > len(entries):
+                playlist_content = self._download_json(
+                    'http://%s.wrzuta.pl/xhr/get_playlist_offset/%s' % (uploader, playlist_id),
+                    playlist_id,
+                    'Downloading playlist JSON',
+                    'Unable to download playlist JSON')
+                entries.extend([
+                    self.url_result(entry['filelink'])
+                    for entry in playlist_content.get('files', []) if entry.get('filelink')])
+
+        return self.playlist_result(entries, playlist_id, playlist_title)
index 5a897371d1d69a95e08f7b4da4d457b3236e09cc..a83e68b17f53ca06fa4b76af6e3aa560fb23e107 100644 (file)
@@ -4,16 +4,22 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
+    float_or_none,
     unified_strdate,
 )
 
 
 class WSJIE(InfoExtractor):
-    _VALID_URL = r'https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=(?P<id>[a-zA-Z0-9-]+)'
+    _VALID_URL = r'''(?x)https?://
+        (?:
+            video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
+            (?:www\.)?wsj\.com/video/[^/]+/
+        )
+        (?P<id>[a-zA-Z0-9-]+)'''
     IE_DESC = 'Wall Street Journal'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
-        'md5': '9747d7a6ebc2f4df64b981e1dde9efa9',
+        'md5': 'e230a5bb249075e40793b655a54a02e4',
         'info_dict': {
             'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
             'ext': 'mp4',
@@ -24,65 +30,60 @@ class WSJIE(InfoExtractor):
             'duration': 90,
             'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
         },
-    }
+    }, {
+        'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        bitrates = [128, 174, 264, 320, 464, 664, 1264]
         api_url = (
             'http://video-api.wsj.com/api-video/find_all_videos.asp?'
-            'type=guid&count=1&query=%s&'
-            'fields=hls,adZone,thumbnailList,guid,state,secondsUntilStartTime,'
-            'author,description,name,linkURL,videoStillURL,duration,videoURL,'
-            'adCategory,catastrophic,linkShortURL,doctypeID,youtubeID,'
-            'titletag,rssURL,wsj-section,wsj-subsection,allthingsd-section,'
-            'allthingsd-subsection,sm-section,sm-subsection,provider,'
-            'formattedCreationDate,keywords,keywordsOmniture,column,editor,'
-            'emailURL,emailPartnerID,showName,omnitureProgramName,'
-            'omnitureVideoFormat,linkRelativeURL,touchCastID,'
-            'omniturePublishDate,%s') % (
-                video_id, ','.join('video%dkMP4Url' % br for br in bitrates))
+            'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
+            'thumbnailList,author,description,name,duration,videoURL,'
+            'titletag,formattedCreationDate,keywords,editor' % video_id)
         info = self._download_json(api_url, video_id)['items'][0]
-
-        # Thumbnails are conveniently in the correct format already
-        thumbnails = info.get('thumbnailList')
-        creator = info.get('author')
-        uploader_id = info.get('editor')
-        categories = info.get('keywords')
-        duration = int_or_none(info.get('duration'))
-        upload_date = unified_strdate(
-            info.get('formattedCreationDate'), day_first=False)
         title = info.get('name', info.get('titletag'))
 
-        formats = [{
-            'format_id': 'f4m',
-            'format_note': 'f4m (meta URL)',
-            'url': info['videoURL'],
-        }]
-        if info.get('hls'):
+        formats = []
+
+        f4m_url = info.get('videoURL')
+        if f4m_url:
+            formats.extend(self._extract_f4m_formats(
+                f4m_url, video_id, f4m_id='hds', fatal=False))
+
+        m3u8_url = info.get('hls')
+        if m3u8_url:
             formats.extend(self._extract_m3u8_formats(
                 info['hls'], video_id, ext='mp4',
-                preference=0, entry_protocol='m3u8_native'))
-        for br in bitrates:
-            field = 'video%dkMP4Url' % br
-            if info.get(field):
-                formats.append({
-                    'format_id': 'mp4-%d' % br,
-                    'container': 'mp4',
-                    'tbr': br,
-                    'url': info[field],
-                })
+                entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+
+        for v in info.get('videoMP4List', []):
+            mp4_url = v.get('url')
+            if not mp4_url:
+                continue
+            tbr = int_or_none(v.get('bitrate'))
+            formats.append({
+                'url': mp4_url,
+                'format_id': 'http' + ('-%d' % tbr if tbr else ''),
+                'tbr': tbr,
+                'width': int_or_none(v.get('width')),
+                'height': int_or_none(v.get('height')),
+                'fps': float_or_none(v.get('fps')),
+            })
         self._sort_formats(formats)
 
         return {
             'id': video_id,
             'formats': formats,
-            'thumbnails': thumbnails,
-            'creator': creator,
-            'uploader_id': uploader_id,
-            'duration': duration,
-            'upload_date': upload_date,
+            # Thumbnails are conveniently in the correct format already
+            'thumbnails': info.get('thumbnailList'),
+            'creator': info.get('author'),
+            'uploader_id': info.get('editor'),
+            'duration': int_or_none(info.get('duration')),
+            'upload_date': unified_strdate(info.get(
+                'formattedCreationDate'), day_first=False),
             'title': title,
-            'categories': categories,
+            'categories': info.get('keywords'),
         }
index 4ff99e5ca37fb8f4f0b663cc99761c31e75f1cf4..e4a2baad22534d772a90b8ec5832c11833f10281 100644 (file)
@@ -5,7 +5,7 @@ from ..compat import compat_urllib_parse_unquote
 
 
 class XBefIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking',
         'md5': 'a478b565baff61634a98f5e5338be995',
index 236ff403bd08f941a2eb023cd41c3bb21c49d4c3..b113ab1c4891fdf96898d359cf38779d61b394f8 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 class XboxClipsIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})'
     _TEST = {
-        'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
+        'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
         'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
         'info_dict': {
             'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
index a3236e66cdba09235ff94960bc8a47984e6c3eb7..995aada0d1565ccc76f2fa2c7654e3f53f834d91 100644 (file)
@@ -4,31 +4,45 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse
 from ..utils import (
+    decode_packed_codes,
     ExtractorError,
-    encode_dict,
     int_or_none,
+    NO_DEFAULT,
     sanitized_Request,
+    urlencode_postdata,
 )
 
 
 class XFileShareIE(InfoExtractor):
-    IE_DESC = 'XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me'
-    _VALID_URL = r'''(?x)
-        https?://(?P<host>(?:www\.)?
-            (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com|vidto\.me))/
-        (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
-    '''
-
-    _FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<'
+    _SITES = (
+        ('daclips.in', 'DaClips'),
+        ('filehoot.com', 'FileHoot'),
+        ('gorillavid.in', 'GorillaVid'),
+        ('movpod.in', 'MovPod'),
+        ('powerwatch.pw', 'PowerWatch'),
+        ('rapidvideo.ws', 'Rapidvideo.ws'),
+        ('thevideobee.to', 'TheVideoBee'),
+        ('vidto.me', 'Vidto'),
+        ('streamin.to', 'Streamin.To'),
+        ('xvidstage.com', 'XVIDSTAGE'),
+    )
+
+    IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
+    _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+                  % '|'.join(re.escape(site) for site in list(zip(*_SITES))[0]))
+
+    _FILE_NOT_FOUND_REGEXES = (
+        r'>(?:404 - )?File Not Found<',
+        r'>The file was removed by administrator<',
+    )
 
     _TESTS = [{
         'url': 'http://gorillavid.in/06y9juieqpmi',
         'md5': '5ae4a3580620380619678ee4875893ba',
         'info_dict': {
             'id': '06y9juieqpmi',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ',
             'thumbnail': 're:http://.*\.jpg',
         },
@@ -44,25 +58,6 @@ class XFileShareIE(InfoExtractor):
             'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
             'thumbnail': 're:http://.*\.jpg',
         }
-    }, {
-        # video with countdown timeout
-        'url': 'http://fastvideo.in/1qmdn1lmsmbw',
-        'md5': '8b87ec3f6564a3108a0e8e66594842ba',
-        'info_dict': {
-            'id': '1qmdn1lmsmbw',
-            'ext': 'mp4',
-            'title': 'Man of Steel - Trailer',
-            'thumbnail': 're:http://.*\.jpg',
-        },
-    }, {
-        'url': 'http://realvid.net/ctn2y6p2eviw',
-        'md5': 'b2166d2cf192efd6b6d764c18fd3710e',
-        'info_dict': {
-            'id': 'ctn2y6p2eviw',
-            'ext': 'flv',
-            'title': 'rdx 1955',
-            'thumbnail': 're:http://.*\.jpg',
-        },
     }, {
         'url': 'http://movpod.in/0wguyyxi1yca',
         'only_matching': True,
@@ -73,7 +68,8 @@ class XFileShareIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
             'thumbnail': 're:http://.*\.jpg',
-        }
+        },
+        'skip': 'Video removed',
     }, {
         'url': 'http://vidto.me/ku5glz52nqe1.html',
         'info_dict': {
@@ -81,6 +77,24 @@ class XFileShareIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'test'
         }
+    }, {
+        'url': 'http://powerwatch.pw/duecjibvicbu',
+        'info_dict': {
+            'id': 'duecjibvicbu',
+            'ext': 'mp4',
+            'title': 'Big Buck Bunny trailer',
+        },
+    }, {
+        'url': 'http://xvidstage.com/e0qcnl03co6z',
+        'info_dict': {
+            'id': 'e0qcnl03co6z',
+            'ext': 'mp4',
+            'title': 'Chucky Prank 2015.mp4',
+        },
+    }, {
+        # removed by administrator
+        'url': 'http://xvidstage.com/amfy7atlkx25',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -90,7 +104,7 @@ class XFileShareIE(InfoExtractor):
         url = 'http://%s/%s' % (mobj.group('host'), video_id)
         webpage = self._download_webpage(url, video_id)
 
-        if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
+        if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
             raise ExtractorError('Video %s does not exist' % video_id, expected=True)
 
         fields = self._hidden_inputs(webpage)
@@ -102,7 +116,7 @@ class XFileShareIE(InfoExtractor):
             if countdown:
                 self._sleep(countdown, video_id)
 
-            post = compat_urllib_parse.urlencode(encode_dict(fields))
+            post = urlencode_postdata(fields)
 
             req = sanitized_Request(url, post)
             req.add_header('Content-type', 'application/x-www-form-urlencoded')
@@ -112,13 +126,27 @@ class XFileShareIE(InfoExtractor):
         title = (self._search_regex(
             [r'style="z-index: [0-9]+;">([^<]+)</span>',
              r'<td nowrap>([^<]+)</td>',
+             r'h4-fine[^>]*>([^<]+)<',
              r'>Watch (.+) ',
              r'<h2 class="video-page-head">([^<]+)</h2>'],
             webpage, 'title', default=None) or self._og_search_title(webpage)).strip()
-        video_url = self._search_regex(
-            [r'file\s*:\s*["\'](http[^"\']+)["\'],',
-             r'file_link\s*=\s*\'(https?:\/\/[0-9a-zA-z.\/\-_]+)'],
-            webpage, 'file url')
+
+        def extract_video_url(default=NO_DEFAULT):
+            return self._search_regex(
+                (r'file\s*:\s*(["\'])(?P<url>http.+?)\1,',
+                 r'file_link\s*=\s*(["\'])(?P<url>http.+?)\1',
+                 r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http.+?)\2\)',
+                 r'<embed[^>]+src=(["\'])(?P<url>http.+?)\1'),
+                webpage, 'file url', default=default, group='url')
+
+        video_url = extract_video_url(default=None)
+
+        if not video_url:
+            webpage = decode_packed_codes(self._search_regex(
+                r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))",
+                webpage, 'packed code'))
+            video_url = extract_video_url()
+
         thumbnail = self._search_regex(
             r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
 
index fd43e8854c994e5b15c661ce49636853c41d2ecd..bd8e1af2e0f6c25fc44aea36c23b813b092b4438 100644 (file)
@@ -4,6 +4,7 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    dict_get,
     float_or_none,
     int_or_none,
     unified_strdate,
@@ -11,37 +12,52 @@ from ..utils import (
 
 
 class XHamsterIE(InfoExtractor):
-    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
-    _TESTS = [
-        {
-            'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
-            'info_dict': {
-                'id': '1509445',
-                'ext': 'mp4',
-                'title': 'FemaleAgent Shy beauty takes the bait',
-                'upload_date': '20121014',
-                'uploader': 'Ruseful2011',
-                'duration': 893.52,
-                'age_limit': 18,
-            }
+    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
+    _TESTS = [{
+        'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
+        'md5': '8281348b8d3c53d39fffb377d24eac4e',
+        'info_dict': {
+            'id': '1509445',
+            'ext': 'mp4',
+            'title': 'FemaleAgent Shy beauty takes the bait',
+            'upload_date': '20121014',
+            'uploader': 'Ruseful2011',
+            'duration': 893.52,
+            'age_limit': 18,
         },
-        {
-            'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
-            'info_dict': {
-                'id': '2221348',
-                'ext': 'mp4',
-                'title': 'Britney Spears  Sexy Booty',
-                'upload_date': '20130914',
-                'uploader': 'jojo747400',
-                'duration': 200.48,
-                'age_limit': 18,
-            }
+    }, {
+        'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+        'info_dict': {
+            'id': '2221348',
+            'ext': 'mp4',
+            'title': 'Britney Spears  Sexy Booty',
+            'upload_date': '20130914',
+            'uploader': 'jojo747400',
+            'duration': 200.48,
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # empty seo
+        'url': 'http://xhamster.com/movies/5667973/.html',
+        'info_dict': {
+            'id': '5667973',
+            'ext': 'mp4',
+            'title': '....',
+            'upload_date': '20160208',
+            'uploader': 'parejafree',
+            'duration': 72.0,
+            'age_limit': 18,
         },
-        {
-            'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
-            'only_matching': True,
+        'params': {
+            'skip_download': True,
         },
-    ]
+    }, {
+        'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         def extract_video_url(webpage, name):
@@ -169,7 +185,13 @@ class XHamsterEmbedIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         video_url = self._search_regex(
-            r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
-            webpage, 'xhamster url')
+            r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
+            webpage, 'xhamster url', default=None)
+
+        if not video_url:
+            vars = self._parse_json(
+                self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'),
+                video_id)
+            video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
 
         return self.url_result(video_url, 'XHamster')
diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py
new file mode 100644 (file)
index 0000000..a6dfc4a
--- /dev/null
@@ -0,0 +1,168 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import int_or_none
+
+
+class XiamiBaseIE(InfoExtractor):
+    _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
+
+    def _download_webpage(self, *args, **kwargs):
+        webpage = super(XiamiBaseIE, self)._download_webpage(*args, **kwargs)
+        if '>Xiami is currently not available in your country.<' in webpage:
+            self.raise_geo_restricted('Xiami is currently not available in your country')
+
+    def _extract_track(self, track, track_id=None):
+        title = track['title']
+        track_url = self._decrypt(track['location'])
+
+        subtitles = {}
+        lyrics_url = track.get('lyric_url') or track.get('lyric')
+        if lyrics_url and lyrics_url.startswith('http'):
+            subtitles['origin'] = [{'url': lyrics_url}]
+
+        return {
+            'id': track.get('song_id') or track_id,
+            'url': track_url,
+            'title': title,
+            'thumbnail': track.get('pic') or track.get('album_pic'),
+            'duration': int_or_none(track.get('length')),
+            'creator': track.get('artist', '').split(';')[0],
+            'track': title,
+            'album': track.get('album_name'),
+            'artist': track.get('artist'),
+            'subtitles': subtitles,
+        }
+
+    def _extract_tracks(self, item_id, typ=None):
+        playlist = self._download_json(
+            '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id)
+        return [
+            self._extract_track(track, item_id)
+            for track in playlist['data']['trackList']]
+
+    @staticmethod
+    def _decrypt(origin):
+        n = int(origin[0])
+        origin = origin[1:]
+        short_lenth = len(origin) // n
+        long_num = len(origin) - short_lenth * n
+        l = tuple()
+        for i in range(0, n):
+            length = short_lenth
+            if i < long_num:
+                length += 1
+            l += (origin[0:length], )
+            origin = origin[length:]
+        ans = ''
+        for i in range(0, short_lenth + 1):
+            for j in range(0, n):
+                if len(l[j]) > i:
+                    ans += l[j][i]
+        return compat_urllib_parse_unquote(ans).replace('^', '0')
+
+
+class XiamiSongIE(XiamiBaseIE):
+    IE_NAME = 'xiami:song'
+    IE_DESC = '虾米音乐'
+    _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://www.xiami.com/song/1775610518',
+        'md5': '521dd6bea40fd5c9c69f913c232cb57e',
+        'info_dict': {
+            'id': '1775610518',
+            'ext': 'mp3',
+            'title': 'Woman',
+            'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
+            'duration': 265,
+            'creator': 'HONNE',
+            'track': 'Woman',
+            'album': 'Woman',
+            'artist': 'HONNE',
+            'subtitles': {
+                'origin': [{
+                    'ext': 'lrc',
+                }],
+            },
+        },
+        'skip': 'Georestricted',
+    }, {
+        'url': 'http://www.xiami.com/song/1775256504',
+        'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
+        'info_dict': {
+            'id': '1775256504',
+            'ext': 'mp3',
+            'title': '悟空',
+            'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
+            'duration': 200,
+            'creator': '戴荃',
+            'track': '悟空',
+            'album': '悟空',
+            'artist': '戴荃',
+            'subtitles': {
+                'origin': [{
+                    'ext': 'lrc',
+                }],
+            },
+        },
+        'skip': 'Georestricted',
+    }]
+
+    def _real_extract(self, url):
+        return self._extract_tracks(self._match_id(url))[0]
+
+
+class XiamiPlaylistBaseIE(XiamiBaseIE):
+    def _real_extract(self, url):
+        item_id = self._match_id(url)
+        return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id)
+
+
+class XiamiAlbumIE(XiamiPlaylistBaseIE):
+    IE_NAME = 'xiami:album'
+    IE_DESC = '虾米音乐 - 专辑'
+    _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[0-9]+)'
+    _TYPE = '1'
+    _TESTS = [{
+        'url': 'http://www.xiami.com/album/2100300444',
+        'info_dict': {
+            'id': '2100300444',
+        },
+        'playlist_count': 10,
+        'skip': 'Georestricted',
+    }, {
+        'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
+        'only_matching': True,
+    }]
+
+
+class XiamiArtistIE(XiamiPlaylistBaseIE):
+    IE_NAME = 'xiami:artist'
+    IE_DESC = '虾米音乐 - 歌手'
+    _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[0-9]+)'
+    _TYPE = '2'
+    _TEST = {
+        'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
+        'info_dict': {
+            'id': '2132',
+        },
+        'playlist_count': 20,
+        'skip': 'Georestricted',
+    }
+
+
+class XiamiCollectionIE(XiamiPlaylistBaseIE):
+    IE_NAME = 'xiami:collection'
+    IE_DESC = '虾米音乐 - 精选集'
+    _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[0-9]+)'
+    _TYPE = '3'
+    _TEST = {
+        'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
+        'info_dict': {
+            'id': '156527391',
+        },
+        'playlist_mincount': 29,
+        'skip': 'Georestricted',
+    }
index 7c9d8af6f2585207347d58d08fc607ebf4d28900..36e5ead1e690db9bb0c1c1a64650a69c784bbe76 100644 (file)
@@ -2,15 +2,15 @@
 from __future__ import unicode_literals
 
 import re
+import time
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_chr,
     compat_ord,
 )
 from ..utils import (
     int_or_none,
-    parse_filesize,
+    parse_duration,
 )
 
 
@@ -22,7 +22,7 @@ class XMinusIE(InfoExtractor):
         'info_dict': {
             'id': '4542',
             'ext': 'mp3',
-            'title': 'Леонид Агутин-Песенка шофера',
+            'title': 'Леонид Агутин-Песенка шофёра',
             'duration': 156,
             'tbr': 320,
             'filesize_approx': 5900000,
@@ -36,38 +36,41 @@ class XMinusIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         artist = self._html_search_regex(
-            r'minus_track\.artist="(.+?)"', webpage, 'artist')
+            r'<a[^>]+href="/artist/\d+">([^<]+)</a>', webpage, 'artist')
         title = artist + '-' + self._html_search_regex(
-            r'minus_track\.title="(.+?)"', webpage, 'title')
-        duration = int_or_none(self._html_search_regex(
-            r'minus_track\.dur_sec=\'([0-9]*?)\'',
+            r'<span[^>]+class="minustrack-full-title(?:\s+[^"]+)?"[^>]*>([^<]+)', webpage, 'title')
+        duration = parse_duration(self._html_search_regex(
+            r'<span[^>]+class="player-duration(?:\s+[^"]+)?"[^>]*>([^<]+)',
             webpage, 'duration', fatal=False))
-        filesize_approx = parse_filesize(self._html_search_regex(
-            r'<div id="finfo"[^>]*>\s*↓\s*([0-9.]+\s*[a-zA-Z][bB])',
-            webpage, 'approximate filesize', fatal=False))
-        tbr = int_or_none(self._html_search_regex(
-            r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
-            webpage, 'bitrate', fatal=False))
+        mobj = re.search(
+            r'<div[^>]+class="dw-info(?:\s+[^"]+)?"[^>]*>(?P<tbr>\d+)\s*кбит/c\s+(?P<filesize>[0-9.]+)\s*мб</div>',
+            webpage)
+        tbr = filesize_approx = None
+        if mobj:
+            filesize_approx = float(mobj.group('filesize')) * 1000000
+            tbr = float(mobj.group('tbr'))
         view_count = int_or_none(self._html_search_regex(
-            r'<div class="quality.*?► ([0-9]+)',
+            r'<span><[^>]+class="icon-chart-bar".*?>(\d+)</span>',
             webpage, 'view count', fatal=False))
         description = self._html_search_regex(
-            r'(?s)<div id="song_texts">(.*?)</div><br',
+            r'(?s)<pre[^>]+id="lyrics-original"[^>]*>(.*?)</pre>',
             webpage, 'song lyrics', fatal=False)
         if description:
             description = re.sub(' *\r *', '\n', description)
 
-        enc_token = self._html_search_regex(
-            r'minus_track\.s?tkn="(.+?)"', webpage, 'enc_token')
-        token = ''.join(
-            c if pos == 3 else compat_chr(compat_ord(c) - 1)
-            for pos, c in enumerate(reversed(enc_token)))
-        video_url = 'http://x-minus.org/dwlf/%s/%s.mp3' % (video_id, token)
+        k = self._search_regex(
+            r'<div[^>]+id="player-bottom"[^>]+data-k="([^"]+)">', webpage,
+            'encoded data')
+        h = time.time() / 3600
+        a = sum(map(int, [compat_ord(c) for c in k])) + int(video_id) + h
+        video_url = 'http://x-minus.me/dl/minus?id=%s&tkn2=%df%d' % (video_id, a, h)
 
         return {
             'id': video_id,
             'title': title,
             'url': video_url,
+            # The extension is unknown until actual downloading
+            'ext': 'mp3',
             'duration': duration,
             'filesize_approx': filesize_approx,
             'tbr': tbr,
index 5a41f8ffa0c5a46a3d0431a6aac8e93ba8ca1cb9..bcb140305559a164f56392dd33eab4d5c7b0bab5 100644 (file)
@@ -6,17 +6,23 @@ from ..compat import compat_urllib_parse_unquote
 
 
 class XNXXIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)'
-    _TEST = {
-        'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
-        'md5': '0831677e2b4761795f68d417e0b7b445',
+    _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/'
+    _TESTS = [{
+        'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
+        'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0',
         'info_dict': {
-            'id': '1135332',
+            'id': '55awb78',
             'ext': 'flv',
-            'title': 'lida » Naked Funny Actress  (5)',
+            'title': 'Skyrim Test Video',
             'age_limit': 18,
-        }
-    }
+        },
+    }, {
+        'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.xnxx.com/video-55awb78/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
index 2466410faaba4e0047fe26099ee936b68dcb9e34..0be8932ad36fbc02615bf6081972a621048283b3 100644 (file)
@@ -66,6 +66,7 @@ class XuiteIE(InfoExtractor):
             'uploader_id': '242127761',
             'categories': ['電玩動漫'],
         },
+        'skip': 'Video removed',
     }, {
         'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
         'only_matching': True,
index 710ad5041988b0e1c932b135af91a27036dfd664..1dfe031cab8e9ccf4fa6fb23fbd90d40759ac930 100644 (file)
@@ -8,7 +8,6 @@ from ..utils import (
     clean_html,
     ExtractorError,
     determine_ext,
-    sanitized_Request,
 )
 
 
@@ -25,8 +24,6 @@ class XVideosIE(InfoExtractor):
         }
     }
 
-    _ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19'
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
@@ -35,31 +32,34 @@ class XVideosIE(InfoExtractor):
         if mobj:
             raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
 
-        video_url = compat_urllib_parse_unquote(
-            self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
         video_title = self._html_search_regex(
             r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
         video_thumbnail = self._search_regex(
             r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
 
-        formats = [{
-            'url': video_url,
-        }]
+        formats = []
 
-        android_req = sanitized_Request(url)
-        android_req.add_header('User-Agent', self._ANDROID_USER_AGENT)
-        android_webpage = self._download_webpage(android_req, video_id, fatal=False)
+        video_url = compat_urllib_parse_unquote(self._search_regex(
+            r'flv_url=(.+?)&', webpage, 'video URL', default=''))
+        if video_url:
+            formats.append({'url': video_url})
 
-        if android_webpage is not None:
-            player_params_str = self._search_regex(
-                'mobileReplacePlayerDivTwoQual\(([^)]+)\)',
-                android_webpage, 'player parameters', default='')
-            player_params = list(map(lambda s: s.strip(' \''), player_params_str.split(',')))
-            if player_params:
-                formats.extend([{
-                    'url': param,
-                    'preference': -10,
-                } for param in player_params if determine_ext(param) == 'mp4'])
+        player_args = self._search_regex(
+            r'(?s)new\s+HTML5Player\((.+?)\)', webpage, ' html5 player', default=None)
+        if player_args:
+            for arg in player_args.split(','):
+                format_url = self._search_regex(
+                    r'(["\'])(?P<url>https?://.+?)\1', arg, 'url',
+                    default=None, group='url')
+                if not format_url:
+                    continue
+                ext = determine_ext(format_url)
+                if ext == 'mp4':
+                    formats.append({'url': format_url})
+                elif ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
 
         self._sort_formats(formats)
 
@@ -67,7 +67,6 @@ class XVideosIE(InfoExtractor):
             'id': video_id,
             'formats': formats,
             'title': video_title,
-            'ext': 'flv',
             'thumbnail': video_thumbnail,
             'age_limit': 18,
         }
index 4c6142927d38c088c5b03306439bd624f6e209e7..927a964a4c5036094d657a634c6df8f8874e00af 100644 (file)
@@ -8,6 +8,7 @@ import re
 from .common import InfoExtractor, SearchInfoExtractor
 from ..compat import (
     compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urlparse,
 )
 from ..utils import (
@@ -23,7 +24,7 @@ from .nbc import NBCSportsVPlayerIE
 
 class YahooIE(InfoExtractor):
     IE_DESC = 'Yahoo screen and movies'
-    _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
+    _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?)'
     _TESTS = [
         {
             'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@@ -37,7 +38,7 @@ class YahooIE(InfoExtractor):
         },
         {
             'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
-            'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
+            'md5': 'c3466d2b6d5dd6b9f41ba9ed04c24b23',
             'info_dict': {
                 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',
                 'ext': 'mp4',
@@ -48,7 +49,7 @@ class YahooIE(InfoExtractor):
         },
         {
             'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
-            'md5': '60e8ac193d8fb71997caa8fce54c6460',
+            'md5': '75ffabdb87c16d4ffe8c036dc4d1c136',
             'info_dict': {
                 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
                 'ext': 'mp4',
@@ -58,15 +59,15 @@ class YahooIE(InfoExtractor):
             }
         },
         {
-            'url': 'https://tw.screen.yahoo.com/election-2014-askmayor/敢問市長-黃秀霜批賴清德-非常高傲-033009720.html',
-            'md5': '3a09cf59349cfaddae1797acc3c087fc',
+            'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html',
+            'md5': '9035d38f88b1782682a3e89f985be5bb',
             'info_dict': {
                 'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',
                 'ext': 'mp4',
                 'title': '敢問市長/黃秀霜批賴清德「非常高傲」',
                 'description': '直言台南沒捷運 交通居五都之末',
                 'duration': 396,
-            }
+            },
         },
         {
             'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
@@ -88,17 +89,32 @@ class YahooIE(InfoExtractor):
                 'title': 'Program that makes hockey more affordable not offered in Manitoba',
                 'description': 'md5:c54a609f4c078d92b74ffb9bf1f496f4',
                 'duration': 121,
-            }
+            },
+            'skip': 'Video gone',
         }, {
             'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html',
-            'md5': '226a895aae7e21b0129e2a2006fe9690',
             'info_dict': {
-                'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
-                'ext': 'mp4',
-                'title': '\'The Interview\' TV Spot: War',
-                'description': 'The Interview',
-                'duration': 30,
-            }
+                'id': '154609075',
+            },
+            'playlist': [{
+                'md5': 'f8e336c6b66f503282e5f719641d6565',
+                'info_dict': {
+                    'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
+                    'ext': 'mp4',
+                    'title': '\'The Interview\' TV Spot: War',
+                    'description': 'The Interview',
+                    'duration': 30,
+                },
+            }, {
+                'md5': '958bcb90b4d6df71c56312137ee1cd5a',
+                'info_dict': {
+                    'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9',
+                    'ext': 'mp4',
+                    'title': '\'The Interview\' TV Spot: Guys',
+                    'description': 'The Interview',
+                    'duration': 30,
+                },
+            }],
         }, {
             'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
             'md5': '88e209b417f173d86186bef6e4d1f160',
@@ -118,10 +134,11 @@ class YahooIE(InfoExtractor):
                 'title': 'Connect the Dots: Dark Side of Virgo',
                 'description': 'md5:1428185051cfd1949807ad4ff6d3686a',
                 'duration': 201,
-            }
+            },
+            'skip': 'Domain name in.lifestyle.yahoo.com gone',
         }, {
             'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
-            'md5': '989396ae73d20c6f057746fb226aa215',
+            'md5': 'b17ac378b1134fa44370fb27db09a744',
             'info_dict': {
                 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
                 'ext': 'mp4',
@@ -140,6 +157,9 @@ class YahooIE(InfoExtractor):
                 'ext': 'flv',
                 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
                 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+                'upload_date': '20150313',
+                'uploader': 'NBCU-SPORTS',
+                'timestamp': 1426270238,
             }
         }, {
             'url': 'https://tw.news.yahoo.com/-100120367.html',
@@ -147,7 +167,7 @@ class YahooIE(InfoExtractor):
         }, {
             # Query result is embedded in webpage, but explicit request to video API fails with geo restriction
             'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
-            'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
+            'md5': '1ddbf7c850777548438e5c4f147c7b8c',
             'info_dict': {
                 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
                 'ext': 'mp4',
@@ -165,6 +185,17 @@ class YahooIE(InfoExtractor):
                 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.',
             },
         },
+        {
+            # config['models']['applet_model']['data']['sapi'] has no query
+            'url': 'https://www.yahoo.com/music/livenation/event/galactic-2016',
+            'md5': 'dac0c72d502bc5facda80c9e6d5c98db',
+            'info_dict': {
+                'id': 'a6015640-e9e5-3efb-bb60-05589a183919',
+                'ext': 'mp4',
+                'description': 'Galactic',
+                'title': 'Dolla Diva (feat. Maggie Koerner)',
+            },
+        },
     ]
 
     def _real_extract(self, url):
@@ -173,19 +204,26 @@ class YahooIE(InfoExtractor):
         page_id = mobj.group('id')
         url = mobj.group('url')
         host = mobj.group('host')
-        webpage = self._download_webpage(url, display_id)
+        webpage, urlh = self._download_webpage_handle(url, display_id)
+        if 'err=404' in urlh.geturl():
+            raise ExtractorError('Video gone', expected=True)
 
         # Look for iframed media first
-        iframe_m = re.search(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
-        if iframe_m:
+        entries = []
+        iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
+        for idx, iframe_url in enumerate(iframe_urls):
             iframepage = self._download_webpage(
-                host + iframe_m.group(1), display_id, 'Downloading iframe webpage')
+                host + iframe_url, display_id,
+                note='Downloading iframe webpage for video #%d' % idx)
             items_json = self._search_regex(
                 r'mediaItems: (\[.+?\])$', iframepage, 'items', flags=re.MULTILINE, default=None)
             if items_json:
                 items = json.loads(items_json)
                 video_id = items[0]['id']
-                return self._get_info(video_id, display_id, webpage)
+                entries.append(self._get_info(video_id, display_id, webpage))
+        if entries:
+            return self.playlist_result(entries, page_id)
+
         # Look for NBCSports iframes
         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
         if nbc_sports_url:
@@ -201,7 +239,7 @@ class YahooIE(InfoExtractor):
             config = self._parse_json(config_json, display_id, fatal=False)
             if config:
                 sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
-                if sapi:
+                if sapi and 'query' in sapi:
                     return self._extract_info(display_id, sapi, webpage)
 
         items_json = self._search_regex(
@@ -303,9 +341,9 @@ class YahooIE(InfoExtractor):
         region = self._search_regex(
             r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
             webpage, 'region', fatal=False, default='US')
-        data = compat_urllib_parse.urlencode({
+        data = compat_urllib_parse_urlencode({
             'protocol': 'http',
-            'region': region,
+            'region': region.upper(),
         })
         query_url = (
             'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
index 001ee17b6f93d457bdc2fbdaf802b61ef19e1b41..63bbc06346a04b385c722eaae22d0ff5c41445f4 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 class YamIE(InfoExtractor):
     IE_DESC = '蕃薯藤yam天空部落'
-    _VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)'
+    _VALID_URL = r'https?://mymedia.yam.com/m/(?P<id>\d+)'
 
     _TESTS = [{
         # An audio hosted on Yam
index d3cc1a29fa473fee2f58e91323774633be00fc4b..b37d0eab66b53ab45ff38dcac91598079f08a275 100644 (file)
@@ -5,18 +5,48 @@ import re
 import hashlib
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_urllib_parse,
-)
+from ..compat import compat_str
 from ..utils import (
+    ExtractorError,
     int_or_none,
     float_or_none,
-    sanitized_Request,
 )
 
 
-class YandexMusicTrackIE(InfoExtractor):
+class YandexMusicBaseIE(InfoExtractor):
+    @staticmethod
+    def _handle_error(response):
+        if isinstance(response, dict):
+            error = response.get('error')
+            if error:
+                raise ExtractorError(error, expected=True)
+            if response.get('type') == 'captcha' or 'captcha' in response:
+                YandexMusicBaseIE._raise_captcha()
+
+    @staticmethod
+    def _raise_captcha():
+        raise ExtractorError(
+            'YandexMusic has considered youtube-dl requests automated and '
+            'asks you to solve a CAPTCHA. You can either wait for some '
+            'time until unblocked and optionally use --sleep-interval '
+            'in future or alternatively you can go to https://music.yandex.ru/ '
+            'solve CAPTCHA, then export cookies and pass cookie file to '
+            'youtube-dl with --cookies',
+            expected=True)
+
+    def _download_webpage(self, *args, **kwargs):
+        webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
+        if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
+            self._raise_captcha()
+        return webpage
+
+    def _download_json(self, *args, **kwargs):
+        response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
+        self._handle_error(response)
+        return response
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
     IE_NAME = 'yandexmusic:track'
     IE_DESC = 'Яндекс.Музыка - Трек'
     _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
@@ -27,10 +57,16 @@ class YandexMusicTrackIE(InfoExtractor):
         'info_dict': {
             'id': '4878838',
             'ext': 'mp3',
-            'title': 'Carlo Ambrosio - Gypsy Eyes 1',
+            'title': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio - Gypsy Eyes 1',
             'filesize': 4628061,
             'duration': 193.04,
-        }
+            'track': 'Gypsy Eyes 1',
+            'album': 'Gypsy Soul',
+            'album_artist': 'Carlo Ambrosio',
+            'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio',
+            'release_year': '2009',
+        },
+        'skip': 'Travis CI servers blocked by YandexMusic',
     }
 
     def _get_track_url(self, storage_dir, track_id):
@@ -52,16 +88,45 @@ class YandexMusicTrackIE(InfoExtractor):
             thumbnail = cover_uri.replace('%%', 'orig')
             if not thumbnail.startswith('http'):
                 thumbnail = 'http://' + thumbnail
-        return {
+
+        track_title = track['title']
+        track_info = {
             'id': track['id'],
             'ext': 'mp3',
             'url': self._get_track_url(track['storageDir'], track['id']),
-            'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
             'filesize': int_or_none(track.get('fileSize')),
             'duration': float_or_none(track.get('durationMs'), 1000),
             'thumbnail': thumbnail,
+            'track': track_title,
         }
 
+        def extract_artist(artist_list):
+            if artist_list and isinstance(artist_list, list):
+                artists_names = [a['name'] for a in artist_list if a.get('name')]
+                if artists_names:
+                    return ', '.join(artists_names)
+
+        albums = track.get('albums')
+        if albums and isinstance(albums, list):
+            album = albums[0]
+            if isinstance(album, dict):
+                year = album.get('year')
+                track_info.update({
+                    'album': album.get('title'),
+                    'album_artist': extract_artist(album.get('artists')),
+                    'release_year': compat_str(year) if year else None,
+                })
+
+        track_artist = extract_artist(track.get('artists'))
+        if track_artist:
+            track_info.update({
+                'artist': track_artist,
+                'title': '%s - %s' % (track_artist, track_title),
+            })
+        else:
+            track_info['title'] = track_title
+        return track_info
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         album_id, track_id = mobj.group('album_id'), mobj.group('id')
@@ -73,7 +138,7 @@ class YandexMusicTrackIE(InfoExtractor):
         return self._get_track_info(track)
 
 
-class YandexMusicPlaylistBaseIE(InfoExtractor):
+class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
     def _build_playlist(self, tracks):
         return [
             self.url_result(
@@ -93,6 +158,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
             'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
         },
         'playlist_count': 50,
+        'skip': 'Travis CI servers blocked by YandexMusic',
     }
 
     def _real_extract(self, url):
@@ -115,7 +181,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
 class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
     IE_NAME = 'yandexmusic:playlist'
     IE_DESC = 'Яндекс.Музыка - Плейлист'
-    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
+    _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
@@ -125,6 +191,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
             'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
         },
         'playlist_count': 6,
+        'skip': 'Travis CI servers blocked by YandexMusic',
     }, {
         # playlist exceeding the limit of 150 tracks shipped with webpage (see
         # https://github.com/rg3/youtube-dl/issues/6666)
@@ -133,46 +200,64 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
             'id': '1036',
             'title': 'Музыка 90-х',
         },
-        'playlist_count': 310,
+        'playlist_mincount': 300,
+        'skip': 'Travis CI servers blocked by YandexMusic',
     }]
 
     def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, playlist_id)
-
-        mu = self._parse_json(
-            self._search_regex(
-                r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
-            playlist_id)
-
-        playlist = mu['pageData']['playlist']
-        tracks, track_ids = playlist['tracks'], playlist['trackIds']
-
-        # tracks dictionary shipped with webpage is limited to 150 tracks,
+        mobj = re.match(self._VALID_URL, url)
+        tld = mobj.group('tld')
+        user = mobj.group('user')
+        playlist_id = mobj.group('id')
+
+        playlist = self._download_json(
+            'https://music.yandex.%s/handlers/playlist.jsx' % tld,
+            playlist_id, 'Downloading missing tracks JSON',
+            fatal=False,
+            headers={
+                'Referer': url,
+                'X-Requested-With': 'XMLHttpRequest',
+                'X-Retpath-Y': url,
+            },
+            query={
+                'owner': user,
+                'kinds': playlist_id,
+                'light': 'true',
+                'lang': tld,
+                'external-domain': 'music.yandex.%s' % tld,
+                'overembed': 'false',
+            })['playlist']
+
+        tracks, track_ids = playlist['tracks'], map(compat_str, playlist['trackIds'])
+
+        # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
         # missing tracks should be retrieved manually.
         if len(tracks) < len(track_ids):
-            present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')])
-            missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids)
-            request = sanitized_Request(
-                'https://music.yandex.ru/handlers/track-entries.jsx',
-                compat_urllib_parse.urlencode({
+            present_track_ids = set([
+                compat_str(track['id'])
+                for track in tracks if track.get('id')])
+            missing_track_ids = [
+                track_id for track_id in track_ids
+                if track_id not in present_track_ids]
+            missing_tracks = self._download_json(
+                'https://music.yandex.%s/handlers/track-entries.jsx' % tld,
+                playlist_id, 'Downloading missing tracks JSON',
+                fatal=False,
+                headers={
+                    'Referer': url,
+                    'X-Requested-With': 'XMLHttpRequest',
+                },
+                query={
                     'entries': ','.join(missing_track_ids),
-                    'lang': mu.get('settings', {}).get('lang', 'en'),
-                    'external-domain': 'music.yandex.ru',
+                    'lang': tld,
+                    'external-domain': 'music.yandex.%s' % tld,
                     'overembed': 'false',
-                    'sign': mu.get('authData', {}).get('user', {}).get('sign'),
                     'strict': 'true',
-                }).encode('utf-8'))
-            request.add_header('Referer', url)
-            request.add_header('X-Requested-With', 'XMLHttpRequest')
-
-            missing_tracks = self._download_json(
-                request, playlist_id, 'Downloading missing tracks JSON', fatal=False)
+                })
             if missing_tracks:
                 tracks.extend(missing_tracks)
 
         return self.playlist_result(
             self._build_playlist(tracks),
             compat_str(playlist_id),
-            playlist['title'], playlist.get('description'))
+            playlist.get('title'), playlist.get('description'))
index 869f3e8190ca0b751366a85f142a0b49fe294fa1..0d943c3432a57570afc229ab4efce66b2118f763 100644 (file)
@@ -9,7 +9,7 @@ from ..compat import compat_urllib_parse_unquote_plus
 
 
 class YnetIE(InfoExtractor):
-    _VALID_URL = r'http://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'
+    _VALID_URL = r'https?://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'
     _TESTS = [
         {
             'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html',
@@ -41,10 +41,12 @@ class YnetIE(InfoExtractor):
         m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
         if m:
             title = m.group('title')
+        formats = self._extract_f4m_formats(f4m_url, video_id)
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': title,
-            'formats': self._extract_f4m_formats(f4m_url, video_id),
+            'formats': formats,
             'thumbnail': self._og_search_thumbnail(webpage),
         }
index 900eb2abac2bcc346eb8b07782ac017a25a44c24..147608ebebbbe8b71dda64e2daf974362868c50c 100644 (file)
@@ -2,17 +2,20 @@
 from __future__ import unicode_literals
 
 import base64
+import itertools
 import random
+import re
 import string
 import time
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_ord,
 )
 from ..utils import (
     ExtractorError,
+    get_element_by_attribute,
     sanitized_Request,
 )
 
@@ -64,6 +67,14 @@ class YoukuIE(InfoExtractor):
         'params': {
             'videopassword': '100600',
         },
+    }, {
+        # /play/get.json contains streams with "channel_type":"tail"
+        'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
+        'info_dict': {
+            'id': 'XOTUxMzg4NDMy',
+            'title': '我的世界☆明月庄主☆车震猎杀☆杀人艺术Minecraft',
+        },
+        'playlist_count': 6,
     }]
 
     def construct_video_urls(self, data):
@@ -92,6 +103,8 @@ class YoukuIE(InfoExtractor):
 
         fileid_dict = {}
         for stream in data['stream']:
+            if stream.get('channel_type') == 'tail':
+                continue
             format = stream.get('stream_type')
             fileid = stream['stream_fileid']
             fileid_dict[format] = fileid
@@ -117,6 +130,8 @@ class YoukuIE(InfoExtractor):
         # generate video_urls
         video_urls_dict = {}
         for stream in data['stream']:
+            if stream.get('channel_type') == 'tail':
+                continue
             format = stream.get('stream_type')
             video_urls = []
             for dt in stream['segs']:
@@ -138,7 +153,7 @@ class YoukuIE(InfoExtractor):
                     '_00' + \
                     '/st/' + self.parse_ext_l(format) + \
                     '/fileid/' + get_fileid(format, n) + '?' + \
-                    compat_urllib_parse.urlencode(param)
+                    compat_urllib_parse_urlencode(param)
                 video_urls.append(video_url)
             video_urls_dict[format] = video_urls
 
@@ -253,6 +268,8 @@ class YoukuIE(InfoExtractor):
             # which one has all
         } for i in range(max(len(v.get('segs')) for v in data['stream']))]
         for stream in data['stream']:
+            if stream.get('channel_type') == 'tail':
+                continue
             fm = stream.get('stream_type')
             video_urls = video_urls_dict[fm]
             for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
@@ -261,6 +278,8 @@ class YoukuIE(InfoExtractor):
                     'format_id': self.get_format_name(fm),
                     'ext': self.parse_ext_l(fm),
                     'filesize': int(seg['size']),
+                    'width': stream.get('width'),
+                    'height': stream.get('height'),
                 })
 
         return {
@@ -269,3 +288,52 @@ class YoukuIE(InfoExtractor):
             'title': title,
             'entries': entries,
         }
+
+
+class YoukuShowIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?youku\.com/show_page/id_(?P<id>[0-9a-z]+)\.html'
+    IE_NAME = 'youku:show'
+
+    _TEST = {
+        'url': 'http://www.youku.com/show_page/id_zc7c670be07ff11e48b3f.html',
+        'info_dict': {
+            'id': 'zc7c670be07ff11e48b3f',
+            'title': '花千骨 未删减版',
+            'description': 'md5:578d4f2145ae3f9128d9d4d863312910',
+        },
+        'playlist_count': 50,
+    }
+
+    _PAGE_SIZE = 40
+
+    def _find_videos_in_page(self, webpage):
+        videos = re.findall(
+            r'<li><a[^>]+href="(?P<url>https?://v\.youku\.com/[^"]+)"[^>]+title="(?P<title>[^"]+)"', webpage)
+        return [
+            self.url_result(video_url, YoukuIE.ie_key(), title)
+            for video_url, title in videos]
+
+    def _real_extract(self, url):
+        show_id = self._match_id(url)
+        webpage = self._download_webpage(url, show_id)
+
+        entries = self._find_videos_in_page(webpage)
+
+        playlist_title = self._html_search_regex(
+            r'<span[^>]+class="name">([^<]+)</span>', webpage, 'playlist title', fatal=False)
+        detail_div = get_element_by_attribute('class', 'detail', webpage) or ''
+        playlist_description = self._html_search_regex(
+            r'<span[^>]+style="display:none"[^>]*>([^<]+)</span>',
+            detail_div, 'playlist description', fatal=False)
+
+        for idx in itertools.count(1):
+            episodes_page = self._download_webpage(
+                'http://www.youku.com/show_episode/id_%s.html' % show_id,
+                show_id, query={'divid': 'reload_%d' % (idx * self._PAGE_SIZE + 1)},
+                note='Downloading episodes page %d' % idx)
+            new_entries = self._find_videos_in_page(episodes_page)
+            entries.extend(new_entries)
+            if len(new_entries) < self._PAGE_SIZE:
+                break
+
+        return self.playlist_result(entries, show_id, playlist_title, playlist_description)
index b29baafc441c220b4128c9363f341f7159b8df93..0df2d76ee198d5d6ae1914f078cc96accec2d17e 100644 (file)
@@ -17,7 +17,7 @@ class YouPornIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
-        'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
+        'md5': '3744d24c50438cf5b6f6d59feb5055c2',
         'info_dict': {
             'id': '505835',
             'display_id': 'sex-ed-is-it-safe-to-masturbate-daily',
@@ -75,7 +75,7 @@ class YouPornIE(InfoExtractor):
         links = []
 
         sources = self._search_regex(
-            r'sources\s*:\s*({.+?})', webpage, 'sources', default=None)
+            r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
         if sources:
             for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
                 links.append(link)
@@ -101,8 +101,9 @@ class YouPornIE(InfoExtractor):
             }
             # Video URL's path looks like this:
             #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+            #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
             # We will benefit from it by extracting some metadata
-            mobj = re.search(r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
+            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
             if mobj:
                 height = int(mobj.group('height'))
                 bitrate = int(mobj.group('bitrate'))
@@ -120,21 +121,21 @@ class YouPornIE(InfoExtractor):
             webpage, 'thumbnail', fatal=False, group='thumbnail')
 
         uploader = self._html_search_regex(
-            r'(?s)<div[^>]+class=["\']videoInfoBy(?:\s+[^"\']+)?["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>',
+            r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
             webpage, 'uploader', fatal=False)
         upload_date = unified_strdate(self._html_search_regex(
-            r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>',
+            r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>',
             webpage, 'upload date', fatal=False))
 
         age_limit = self._rta_search(webpage)
 
         average_rating = int_or_none(self._search_regex(
-            r'<div[^>]+class=["\']videoInfoRating["\'][^>]*>\s*<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>',
+            r'<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>',
             webpage, 'average rating', fatal=False))
 
         view_count = str_to_int(self._search_regex(
-            r'(?s)<div[^>]+class=["\']videoInfoViews["\'][^>]*>.*?([\d,.]+)\s*</div>',
-            webpage, 'view count', fatal=False))
+            r'(?s)<div[^>]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P<count>[\d,.]+)<',
+            webpage, 'view count', fatal=False, group='count'))
         comment_count = str_to_int(self._search_regex(
             r'>All [Cc]omments? \(([\d,.]+)\)',
             webpage, 'comment count', fatal=False))
index e24dd3e5b6ddbab40f128f29574777631c44ef80..c8d54f22a80ac447179305f50a85b42c483b946e 100644 (file)
@@ -6,6 +6,7 @@ from __future__ import unicode_literals
 import itertools
 import json
 import os.path
+import random
 import re
 import time
 import traceback
@@ -16,16 +17,15 @@ from ..swfinterp import SWFInterpreter
 from ..compat import (
     compat_chr,
     compat_parse_qs,
-    compat_urllib_parse,
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote_plus,
+    compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
     compat_urlparse,
     compat_str,
 )
 from ..utils import (
     clean_html,
-    encode_dict,
     error_to_compat_str,
     ExtractorError,
     float_or_none,
@@ -44,6 +44,7 @@ from ..utils import (
     unified_strdate,
     unsmuggle_url,
     uppercase_escape,
+    urlencode_postdata,
     ISO3166Utils,
 )
 
@@ -115,7 +116,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             'hl': 'en_US',
         }
 
-        login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
+        login_data = urlencode_postdata(login_form_strs)
 
         req = sanitized_Request(self._LOGIN_URL, login_data)
         login_results = self._download_webpage(
@@ -124,6 +125,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         if login_results is False:
             return False
 
+        error_msg = self._html_search_regex(
+            r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
+            login_results, 'error message', default=None)
+        if error_msg:
+            raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
+
         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
 
@@ -148,7 +155,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                 'TrustDevice': 'on',
             })
 
-            tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
+            tfa_data = urlencode_postdata(tfa_form_strs)
 
             tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
             tfa_results = self._download_webpage(
@@ -233,7 +240,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 
 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
     def _process_page(self, content):
-        for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
+        for playlist_id in orderedSet(re.findall(
+                r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
+                content)):
             yield self.url_result(
                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 
@@ -267,7 +276,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          ))
                          |(?:
                             youtu\.be|                                        # just youtu.be/xxxx
-                            vid\.plus                                         # or vid.plus/xxxx
+                            vid\.plus|                                        # or vid.plus/xxxx
+                            zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
                          )/
                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                          )
@@ -308,6 +318,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 
         # Apple HTTP Live Streaming
+        '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
@@ -333,6 +344,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
+        '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
+        '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
 
         # Dash webm
         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
@@ -382,7 +395,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
                 'uploader': 'Philipp Hagemeister',
                 'uploader_id': 'phihag',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
                 'upload_date': '20121002',
+                'license': 'Standard YouTube License',
                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                 'categories': ['Science & Technology'],
                 'tags': ['youtube-dl'],
@@ -401,12 +416,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'upload_date': '20120506',
                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
                 'alt_title': 'I Love It (feat. Charli XCX)',
-                'description': 'md5:782e8651347686cba06e58f71ab51773',
+                'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
                          'iconic ep', 'iconic', 'love', 'it'],
                 'uploader': 'Icona Pop',
                 'uploader_id': 'IconaPop',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop',
+                'license': 'Standard YouTube License',
                 'creator': 'Icona Pop',
             }
         },
@@ -422,6 +439,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:64249768eec3bc4276236606ea996373',
                 'uploader': 'justintimberlakeVEVO',
                 'uploader_id': 'justintimberlakeVEVO',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
+                'license': 'Standard YouTube License',
                 'creator': 'Justin Timberlake',
                 'age_limit': 18,
             }
@@ -437,6 +456,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
                 'uploader': 'SET India',
                 'uploader_id': 'setindia',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia',
+                'license': 'Standard YouTube License',
                 'age_limit': 18,
             }
         },
@@ -449,7 +470,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
                 'uploader': 'Philipp Hagemeister',
                 'uploader_id': 'phihag',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
                 'upload_date': '20121002',
+                'license': 'Standard YouTube License',
                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                 'categories': ['Science & Technology'],
                 'tags': ['youtube-dl'],
@@ -468,14 +491,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'm4a',
                 'upload_date': '20121002',
                 'uploader_id': '8KVIDEO',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
                 'description': '',
                 'uploader': '8KVIDEO',
+                'license': 'Standard YouTube License',
                 'title': 'UHDTV TEST 8K VIDEO.mp4'
             },
             'params': {
                 'youtube_include_dash_manifest': True,
                 'format': '141',
             },
+            'skip': 'format 141 not served anymore',
         },
         # DASH manifest with encrypted signature
         {
@@ -488,10 +514,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'AfrojackVEVO',
                 'uploader_id': 'AfrojackVEVO',
                 'upload_date': '20131011',
+                'license': 'Standard YouTube License',
             },
             'params': {
                 'youtube_include_dash_manifest': True,
-                'format': '141',
+                'format': '141/bestaudio[ext=m4a]',
             },
         },
         # JS player signature function name containing $
@@ -506,11 +533,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'TaylorSwiftVEVO',
                 'uploader_id': 'TaylorSwiftVEVO',
                 'upload_date': '20140818',
+                'license': 'Standard YouTube License',
                 'creator': 'Taylor Swift',
             },
             'params': {
                 'youtube_include_dash_manifest': True,
-                'format': '141',
+                'format': '141/bestaudio[ext=m4a]',
             },
         },
         # Controversy video
@@ -522,6 +550,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'upload_date': '20100909',
                 'uploader': 'The Amazing Atheist',
                 'uploader_id': 'TheAmazingAtheist',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
+                'license': 'Standard YouTube License',
                 'title': 'Burning Everyone\'s Koran',
                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
             }
@@ -536,7 +566,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'uploader': 'The Witcher',
                 'uploader_id': 'WitcherGame',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
                 'upload_date': '20140605',
+                'license': 'Standard YouTube License',
                 'age_limit': 18,
             },
         },
@@ -550,7 +582,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
                 'uploader': 'LloydVEVO',
                 'uploader_id': 'LloydVEVO',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
                 'upload_date': '20110629',
+                'license': 'Standard YouTube License',
                 'age_limit': 18,
             },
         },
@@ -562,9 +596,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'upload_date': '20100430',
                 'uploader_id': 'deadmau5',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5',
                 'creator': 'deadmau5',
                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
                 'uploader': 'deadmau5',
+                'license': 'Standard YouTube License',
                 'title': 'Deadmau5 - Some Chords (HD)',
                 'alt_title': 'Some Chords',
             },
@@ -580,8 +616,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'upload_date': '20150827',
                 'uploader_id': 'olympic',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
+                'license': 'Standard YouTube License',
                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
-                'uploader': 'Olympics',
+                'uploader': 'Olympic',
                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
             },
             'params': {
@@ -597,8 +635,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'stretched_ratio': 16 / 9.,
                 'upload_date': '20110310',
                 'uploader_id': 'AllenMeow',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
                 'uploader': '孫艾倫',
+                'license': 'Standard YouTube License',
                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
             },
         },
@@ -629,8 +669,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
                 'upload_date': '20150625',
                 'uploader_id': 'dorappi2000',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
                 'uploader': 'dorappi2000',
-                'formats': 'mincount:33',
+                'license': 'Standard YouTube License',
+                'formats': 'mincount:32',
             },
         },
         # DASH manifest with segment_list
@@ -644,12 +686,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'Airtek',
                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
+                'license': 'Standard YouTube License',
                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
             },
             'params': {
                 'youtube_include_dash_manifest': True,
                 'format': '135',  # bestvideo
-            }
+            },
+            'skip': 'This live event has ended.',
         },
         {
             # Multifeed videos (multiple cameras), URL is for Main Camera
@@ -668,6 +712,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'upload_date': '20150721',
                     'uploader': 'Beer Games Beer',
                     'uploader_id': 'beergamesbeer',
+                    'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+                    'license': 'Standard YouTube License',
                 },
             }, {
                 'info_dict': {
@@ -678,6 +724,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'upload_date': '20150721',
                     'uploader': 'Beer Games Beer',
                     'uploader_id': 'beergamesbeer',
+                    'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+                    'license': 'Standard YouTube License',
                 },
             }, {
                 'info_dict': {
@@ -688,6 +736,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'upload_date': '20150721',
                     'uploader': 'Beer Games Beer',
                     'uploader_id': 'beergamesbeer',
+                    'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+                    'license': 'Standard YouTube License',
                 },
             }, {
                 'info_dict': {
@@ -698,6 +748,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'upload_date': '20150721',
                     'uploader': 'Beer Games Beer',
                     'uploader_id': 'beergamesbeer',
+                    'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+                    'license': 'Standard YouTube License',
                 },
             }],
             'params': {
@@ -712,11 +764,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
             },
             'playlist_count': 2,
+            'skip': 'Not multifeed anymore',
         },
         {
             'url': 'http://vid.plus/FlRa-iH7PGw',
             'only_matching': True,
         },
+        {
+            'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
+            'only_matching': True,
+        },
         {
             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
             # Also tests cut-off URL expansion in video description (see
@@ -731,7 +788,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                 'upload_date': '20151119',
                 'uploader_id': 'IronSoulElf',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
                 'uploader': 'IronSoulElf',
+                'license': 'Standard YouTube License',
                 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
             },
             'params': {
@@ -758,6 +817,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'params': {
                 'skip_download': True,
             },
+            'skip': 'This video does not exist.',
+        },
+        {
+            # Video licensed under Creative Commons
+            'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
+            'info_dict': {
+                'id': 'M4gD1WSo5mA',
+                'ext': 'mp4',
+                'title': 'md5:e41008789470fc2533a3252216f1c1d1',
+                'description': 'md5:a677553cf0840649b731a3024aeff4cc',
+                'upload_date': '20150127',
+                'uploader_id': 'BerkmanCenter',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
+                'uploader': 'BerkmanCenter',
+                'license': 'Creative Commons Attribution license (reuse allowed)',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # Channel-like uploader_url
+            'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
+            'info_dict': {
+                'id': 'eQcmzGIKrzg',
+                'ext': 'mp4',
+                'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
+                'description': 'md5:dda0d780d5a6e120758d1711d062a867',
+                'upload_date': '20151119',
+                'uploader': 'Bernie 2016',
+                'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
+                'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+                'license': 'Creative Commons Attribution license (reuse allowed)',
+            },
+            'params': {
+                'skip_download': True,
+            },
         },
         {
             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
@@ -930,7 +1026,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 continue
             sub_formats = []
             for ext in self._SUBTITLE_FORMATS:
-                params = compat_urllib_parse.urlencode({
+                params = compat_urllib_parse_urlencode({
                     'lang': lang,
                     'v': video_id,
                     'fmt': ext,
@@ -975,40 +1071,67 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             return {}
         try:
             args = player_config['args']
-            caption_url = args['ttsurl']
-            if not caption_url:
-                self._downloader.report_warning(err_msg)
-                return {}
-            timestamp = args['timestamp']
-            # We get the available subtitles
-            list_params = compat_urllib_parse.urlencode({
-                'type': 'list',
-                'tlangs': 1,
-                'asrs': 1,
-            })
-            list_url = caption_url + '&' + list_params
-            caption_list = self._download_xml(list_url, video_id)
-            original_lang_node = caption_list.find('track')
-            if original_lang_node is None:
-                self._downloader.report_warning('Video doesn\'t have automatic captions')
-                return {}
-            original_lang = original_lang_node.attrib['lang_code']
-            caption_kind = original_lang_node.attrib.get('kind', '')
+            caption_url = args.get('ttsurl')
+            if caption_url:
+                timestamp = args['timestamp']
+                # We get the available subtitles
+                list_params = compat_urllib_parse_urlencode({
+                    'type': 'list',
+                    'tlangs': 1,
+                    'asrs': 1,
+                })
+                list_url = caption_url + '&' + list_params
+                caption_list = self._download_xml(list_url, video_id)
+                original_lang_node = caption_list.find('track')
+                if original_lang_node is None:
+                    self._downloader.report_warning('Video doesn\'t have automatic captions')
+                    return {}
+                original_lang = original_lang_node.attrib['lang_code']
+                caption_kind = original_lang_node.attrib.get('kind', '')
+
+                sub_lang_list = {}
+                for lang_node in caption_list.findall('target'):
+                    sub_lang = lang_node.attrib['lang_code']
+                    sub_formats = []
+                    for ext in self._SUBTITLE_FORMATS:
+                        params = compat_urllib_parse_urlencode({
+                            'lang': original_lang,
+                            'tlang': sub_lang,
+                            'fmt': ext,
+                            'ts': timestamp,
+                            'kind': caption_kind,
+                        })
+                        sub_formats.append({
+                            'url': caption_url + '&' + params,
+                            'ext': ext,
+                        })
+                    sub_lang_list[sub_lang] = sub_formats
+                return sub_lang_list
+
+            # Some videos don't provide ttsurl but rather caption_tracks and
+            # caption_translation_languages (e.g. 20LmZk1hakA)
+            caption_tracks = args['caption_tracks']
+            caption_translation_languages = args['caption_translation_languages']
+            caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
+            parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
+            caption_qs = compat_parse_qs(parsed_caption_url.query)
 
             sub_lang_list = {}
-            for lang_node in caption_list.findall('target'):
-                sub_lang = lang_node.attrib['lang_code']
+            for lang in caption_translation_languages.split(','):
+                lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
+                sub_lang = lang_qs.get('lc', [None])[0]
+                if not sub_lang:
+                    continue
                 sub_formats = []
                 for ext in self._SUBTITLE_FORMATS:
-                    params = compat_urllib_parse.urlencode({
-                        'lang': original_lang,
-                        'tlang': sub_lang,
-                        'fmt': ext,
-                        'ts': timestamp,
-                        'kind': caption_kind,
+                    caption_qs.update({
+                        'tlang': [sub_lang],
+                        'fmt': [ext],
                     })
+                    sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
+                        query=compat_urllib_parse_urlencode(caption_qs, True)))
                     sub_formats.append({
-                        'url': caption_url + '&' + params,
+                        'url': sub_url,
                         'ext': ext,
                     })
                 sub_lang_list[sub_lang] = sub_formats
@@ -1019,6 +1142,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             self._downloader.report_warning(err_msg)
             return {}
 
+    def _mark_watched(self, video_id, video_info):
+        playback_url = video_info.get('videostats_playback_base_url', [None])[0]
+        if not playback_url:
+            return
+        parsed_playback_url = compat_urlparse.urlparse(playback_url)
+        qs = compat_urlparse.parse_qs(parsed_playback_url.query)
+
+        # cpn generation algorithm is reverse engineered from base.js.
+        # In fact it works even with dummy cpn.
+        CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
+        cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
+
+        qs.update({
+            'ver': ['2'],
+            'cpn': [cpn],
+        })
+        playback_url = compat_urlparse.urlunparse(
+            parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+        self._download_webpage(
+            playback_url, video_id, 'Marking watched',
+            'Unable to mark watched', fatal=False)
+
     @classmethod
     def extract_id(cls, url):
         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
@@ -1098,7 +1244,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             # this can be viewed without login into Youtube
             url = proto + '://www.youtube.com/embed/%s' % video_id
             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
-            data = compat_urllib_parse.urlencode({
+            data = compat_urllib_parse_urlencode({
                 'video_id': video_id,
                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
                 'sts': self._search_regex(
@@ -1186,10 +1332,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         if video_description:
             video_description = re.sub(r'''(?x)
                 <a\s+
-                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                    (?:[a-zA-Z-]+="[^"]*"\s+)*?
                     (?:title|href)="([^"]+)"\s+
-                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
-                    class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
+                    (?:[a-zA-Z-]+="[^"]*"\s+)*?
+                    class="[^"]*"[^>]*>
                 [^<]+\.{3}\s*
                 </a>
             ''', r'\1', video_description)
@@ -1245,9 +1391,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         # uploader_id
         video_uploader_id = None
-        mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
+        video_uploader_url = None
+        mobj = re.search(
+            r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
+            video_webpage)
         if mobj is not None:
-            video_uploader_id = mobj.group(1)
+            video_uploader_id = mobj.group('uploader_id')
+            video_uploader_url = mobj.group('uploader_url')
         else:
             self._downloader.report_warning('unable to extract uploader nickname')
 
@@ -1275,6 +1425,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
         upload_date = unified_strdate(upload_date)
 
+        video_license = self._html_search_regex(
+            r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
+            video_webpage, 'license', default=None)
+
         m_music = re.search(
             r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
             video_webpage)
@@ -1348,6 +1502,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
             if 'rtmpe%3Dyes' in encoded_url_map:
                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
+            formats_spec = {}
+            fmt_list = video_info.get('fmt_list', [''])[0]
+            if fmt_list:
+                for fmt in fmt_list.split(','):
+                    spec = fmt.split('/')
+                    if len(spec) > 1:
+                        width_height = spec[1].split('x')
+                        if len(width_height) == 2:
+                            formats_spec[spec[0]] = {
+                                'resolution': spec[1],
+                                'width': int_or_none(width_height[0]),
+                                'height': int_or_none(width_height[1]),
+                            }
             formats = []
             for url_data_str in encoded_url_map.split(','):
                 url_data = compat_parse_qs(url_data_str)
@@ -1416,6 +1583,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 }
                 if format_id in self._formats:
                     dct.update(self._formats[format_id])
+                if format_id in formats_spec:
+                    dct.update(formats_spec[format_id])
 
                 # Some itags are not included in DASH manifest thus corresponding formats will
                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
@@ -1528,11 +1697,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         self._sort_formats(formats)
 
+        self.mark_watched(video_id, video_info)
+
         return {
             'id': video_id,
             'uploader': video_uploader,
             'uploader_id': video_uploader_id,
+            'uploader_url': video_uploader_url,
             'upload_date': upload_date,
+            'license': video_license,
             'creator': video_creator,
             'title': video_title,
             'alt_title': video_alt_title,
@@ -1657,20 +1830,32 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
     def _extract_mix(self, playlist_id):
         # The mixes are generated from a single video
         # the id of the playlist is just 'RD' + video_id
-        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
-        webpage = self._download_webpage(
-            url, playlist_id, 'Downloading Youtube mix')
+        ids = []
+        last_id = playlist_id[-11:]
+        for n in itertools.count(1):
+            url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
+            webpage = self._download_webpage(
+                url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
+            new_ids = orderedSet(re.findall(
+                r'''(?xs)data-video-username=".*?".*?
+                           href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
+                webpage))
+            # Fetch new pages until all the videos are repeated, it seems that
+            # there are always 51 unique videos.
+            new_ids = [_id for _id in new_ids if _id not in ids]
+            if not new_ids:
+                break
+            ids.extend(new_ids)
+            last_id = ids[-1]
+
+        url_results = self._ids_to_results(ids)
+
         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
         title_span = (
             search_title('playlist-title') or
             search_title('title long-title') or
             search_title('title'))
         title = clean_html(title_span)
-        ids = orderedSet(re.findall(
-            r'''(?xs)data-video-username=".*?".*?
-                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
-            webpage))
-        url_results = self._ids_to_results(ids)
 
         return self.playlist_result(url_results, playlist_id, title)
 
@@ -1723,7 +1908,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         if video:
             return video
 
-        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
+        if playlist_id.startswith(('RD', 'UL', 'PU')):
             # Mixes require a custom extraction process
             return self._extract_mix(playlist_id)
 
@@ -1757,7 +1942,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
 
     @classmethod
     def suitable(cls, url):
-        return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)
+        return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
+                else super(YoutubeChannelIE, cls).suitable(url))
 
     def _real_extract(self, url):
         channel_id = self._match_id(url)
@@ -1806,7 +1992,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
 
 class YoutubeUserIE(YoutubeChannelIE):
     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/|c/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
     _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
     IE_NAME = 'youtube:user'
 
@@ -1819,19 +2005,67 @@ class YoutubeUserIE(YoutubeChannelIE):
     }, {
         'url': 'ytuser:phihag',
         'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/c/gametrailers',
+        'only_matching': True,
     }]
 
     @classmethod
     def suitable(cls, url):
         # Don't return True if the url can be extracted with other youtube
         # extractor, the regex would is too permissive and it would match.
-        other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
-        if any(ie.suitable(url) for ie in other_ies):
+        other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
+        if any(ie.suitable(url) for ie in other_yt_ies):
             return False
         else:
             return super(YoutubeUserIE, cls).suitable(url)
 
 
+class YoutubeLiveIE(YoutubeBaseInfoExtractor):
+    IE_DESC = 'YouTube.com live streams'
+    _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live'
+    IE_NAME = 'youtube:live'
+
+    _TESTS = [{
+        'url': 'http://www.youtube.com/user/TheYoungTurks/live',
+        'info_dict': {
+            'id': 'a48o2S1cPoo',
+            'ext': 'mp4',
+            'title': 'The Young Turks - Live Main Show',
+            'uploader': 'The Young Turks',
+            'uploader_id': 'TheYoungTurks',
+            'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
+            'upload_date': '20150715',
+            'license': 'Standard YouTube License',
+            'description': 'md5:438179573adcdff3c97ebb1ee632b891',
+            'categories': ['News & Politics'],
+            'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
+            'like_count': int,
+            'dislike_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        channel_id = mobj.group('id')
+        base_url = mobj.group('base_url')
+        webpage = self._download_webpage(url, channel_id, fatal=False)
+        if webpage:
+            page_type = self._og_search_property(
+                'type', webpage, 'page type', default=None)
+            video_id = self._html_search_meta(
+                'videoId', webpage, 'video id', default=None)
+            if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):
+                return self.url_result(video_id, YoutubeIE.ie_key())
+        return self.url_result(base_url)
+
+
 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
     IE_DESC = 'YouTube.com user/channel playlists'
     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
@@ -1885,7 +2119,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
                 'spf': 'navigate',
             }
             url_query.update(self._EXTRA_QUERY_ARGS)
-            result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
+            result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
             data = self._download_json(
                 result_url, video_id='query "%s"' % query,
                 note='Downloading page %s' % pagenum,
@@ -1914,10 +2148,11 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
 
 
-class YoutubeSearchURLIE(InfoExtractor):
+class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
     IE_DESC = 'YouTube.com search URLs'
     IE_NAME = 'youtube:search_url'
     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
+    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
     _TESTS = [{
         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
         'playlist_mincount': 5,
@@ -1932,32 +2167,8 @@ class YoutubeSearchURLIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
-
         webpage = self._download_webpage(url, query)
-        result_code = self._search_regex(
-            r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
-
-        part_codes = re.findall(
-            r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
-        entries = []
-        for part_code in part_codes:
-            part_title = self._html_search_regex(
-                [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
-            part_url_snippet = self._html_search_regex(
-                r'(?s)href="([^"]+)"', part_code, 'item URL')
-            part_url = compat_urlparse.urljoin(
-                'https://www.youtube.com/', part_url_snippet)
-            entries.append({
-                '_type': 'url',
-                'url': part_url,
-                'title': part_title,
-            })
-
-        return {
-            '_type': 'playlist',
-            'entries': entries,
-            'title': query,
-        }
+        return self.playlist_result(self._process_page(webpage), playlist_title=query)
 
 
 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
index c619a75e2a2d6617fd083c44bf05114d9e4c83bc..2ef17727592405b7bb20b378403d82470b52ce2f 100644 (file)
@@ -85,6 +85,13 @@ class ZDFIE(InfoExtractor):
         uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
         uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
         upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
+        subtitles = {}
+        captions_url = doc.find('.//caption/url')
+        if captions_url is not None:
+            subtitles['de'] = [{
+                'url': captions_url.text,
+                'ext': 'ttml',
+            }]
 
         def xml_to_thumbnails(fnode):
             thumbnails = []
@@ -137,6 +144,10 @@ class ZDFIE(InfoExtractor):
                 formats.extend(self._extract_smil_formats(
                     video_url, video_id, fatal=False))
             elif ext == 'm3u8':
+                # the certificates are misconfigured (see
+                # https://github.com/rg3/youtube-dl/issues/8665)
+                if video_url.startswith('https://'):
+                    continue
                 formats.extend(self._extract_m3u8_formats(
                     video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
             elif ext == 'f4m':
@@ -186,6 +197,7 @@ class ZDFIE(InfoExtractor):
             'uploader_id': uploader_id,
             'upload_date': upload_date,
             'formats': formats,
+            'subtitles': subtitles,
         }
 
     def _real_extract(self, url):
index a7440c58242079ea1c6874e1bed0abe756fdc814..9737f70021d3285a4e8df616467b764de1a91fa2 100644 (file)
@@ -232,7 +232,7 @@ class JSInterpreter(object):
     def extract_function(self, funcname):
         func_m = re.search(
             r'''(?x)
-                (?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
+                (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
                 \((?P<args>[^)]*)\)\s*
                 \{(?P<code>[^}]+)\}''' % (
                 re.escape(funcname), re.escape(funcname), re.escape(funcname)),
index 3afa8bb6faac3610dfc3695fe27d6006ff9339b5..99ce4131fdfaacbbbca07805fbc86c8563ea95fe 100644 (file)
@@ -170,6 +170,14 @@ def parseOpts(overrideArguments=None):
         action='store_const', dest='extract_flat', const='in_playlist',
         default=False,
         help='Do not extract the videos of a playlist, only list them.')
+    general.add_option(
+        '--mark-watched',
+        action='store_true', dest='mark_watched', default=False,
+        help='Mark videos watched (YouTube only)')
+    general.add_option(
+        '--no-mark-watched',
+        action='store_false', dest='mark_watched', default=False,
+        help='Do not mark videos watched (YouTube only)')
     general.add_option(
         '--no-color', '--no-colors',
         action='store_true', dest='no_color',
@@ -180,7 +188,10 @@ def parseOpts(overrideArguments=None):
     network.add_option(
         '--proxy', dest='proxy',
         default=None, metavar='URL',
-        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
+        help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable experimental '
+             'SOCKS proxy, specify a proper scheme. For example '
+             'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") '
+             'for direct connection')
     network.add_option(
         '--socket-timeout',
         dest='socket_timeout', type=float, default=None, metavar='SECONDS',
@@ -384,13 +395,17 @@ def parseOpts(overrideArguments=None):
 
     downloader = optparse.OptionGroup(parser, 'Download Options')
     downloader.add_option(
-        '-r', '--rate-limit',
-        dest='ratelimit', metavar='LIMIT',
+        '-r', '--limit-rate', '--rate-limit',
+        dest='ratelimit', metavar='RATE',
         help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
     downloader.add_option(
         '-R', '--retries',
         dest='retries', metavar='RETRIES', default=10,
         help='Number of retries (default is %default), or "infinite".')
+    downloader.add_option(
+        '--fragment-retries',
+        dest='fragment_retries', metavar='RETRIES', default=10,
+        help='Number of retries for a fragment (default is %default), or "infinite" (DASH only)')
     downloader.add_option(
         '--buffer-size',
         dest='buffersize', metavar='SIZE', default='1024',
@@ -413,8 +428,12 @@ def parseOpts(overrideArguments=None):
         help='Set file xattribute ytdl.filesize with expected filesize (experimental)')
     downloader.add_option(
         '--hls-prefer-native',
-        dest='hls_prefer_native', action='store_true',
-        help='Use the native HLS downloader instead of ffmpeg (experimental)')
+        dest='hls_prefer_native', action='store_true', default=None,
+        help='Use the native HLS downloader instead of ffmpeg')
+    downloader.add_option(
+        '--hls-prefer-ffmpeg',
+        dest='hls_prefer_native', action='store_false', default=None,
+        help='Use ffmpeg instead of the native HLS downloader')
     downloader.add_option(
         '--hls-use-mpegts',
         dest='hls_use_mpegts', action='store_true',
@@ -649,7 +668,7 @@ def parseOpts(overrideArguments=None):
         action='store_true', dest='writeannotations', default=False,
         help='Write video annotations to a .annotations.xml file')
     filesystem.add_option(
-        '--load-info',
+        '--load-info-json', '--load-info',
         dest='load_info_filename', metavar='FILE',
         help='JSON file containing the video information (created with the "--write-info-json" option)')
     filesystem.add_option(
@@ -712,7 +731,7 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '--embed-subs',
         action='store_true', dest='embedsubtitles', default=False,
-        help='Embed subtitles in the video (only for mkv and mp4 videos)')
+        help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
     postproc.add_option(
         '--embed-thumbnail',
         action='store_true', dest='embedthumbnail', default=False,
index 0d8ef6ca26c6ef7f1b7b402b387d20eebd3f8a8f..3ea5183999d5ed2adacbb05bccc6af93e8ac6750 100644 (file)
@@ -6,6 +6,7 @@ from .ffmpeg import (
     FFmpegEmbedSubtitlePP,
     FFmpegExtractAudioPP,
     FFmpegFixupStretchedPP,
+    FFmpegFixupM3u8PP,
     FFmpegFixupM4aPP,
     FFmpegMergerPP,
     FFmpegMetadataPP,
@@ -26,6 +27,7 @@ __all__ = [
     'ExecAfterDownloadPP',
     'FFmpegEmbedSubtitlePP',
     'FFmpegExtractAudioPP',
+    'FFmpegFixupM3u8PP',
     'FFmpegFixupM4aPP',
     'FFmpegFixupStretchedPP',
     'FFmpegMergerPP',
index 74f66d669c0679a9eece06b1924ecc9f5dae00d2..90630c2d7391de9fd288662c8f207433702f8c99 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import subprocess
 
 from .common import PostProcessor
-from ..compat import shlex_quote
+from ..compat import compat_shlex_quote
 from ..utils import PostProcessingError
 
 
@@ -17,7 +17,7 @@ class ExecAfterDownloadPP(PostProcessor):
         if '{}' not in cmd:
             cmd += ' {}'
 
-        cmd = cmd.replace('{}', shlex_quote(information['filepath']))
+        cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
 
         self._downloader.to_screen('[exec] Executing command: %s' % cmd)
         retCode = subprocess.call(cmd, shell=True)
index 380bc6f292f2390fdf5dabd131a20184ba8334df..fa99b0c2a6aa4d39fbffdcbafd4926431794c55b 100644 (file)
@@ -25,6 +25,19 @@ from ..utils import (
 )
 
 
+EXT_TO_OUT_FORMATS = {
+    "aac": "adts",
+    "m4a": "ipod",
+    "mka": "matroska",
+    "mkv": "matroska",
+    "mpg": "mpeg",
+    "ogv": "ogg",
+    "ts": "mpegts",
+    "wma": "asf",
+    "wmv": "asf",
+}
+
+
 class FFmpegPostProcessorError(PostProcessingError):
     pass
 
@@ -162,7 +175,8 @@ class FFmpegPostProcessor(PostProcessor):
         # Always use 'file:' because the filename may contain ':' (ffmpeg
         # interprets that as a protocol) or can start with '-' (-- is broken in
         # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
-        return 'file:' + fn
+        # Also leave '-' intact in order not to break streaming to stdout.
+        return 'file:' + fn if fn != '-' else fn
 
 
 class FFmpegExtractAudioPP(FFmpegPostProcessor):
@@ -318,17 +332,34 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
 
 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
     def run(self, information):
-        if information['ext'] not in ['mp4', 'mkv']:
-            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files')
+        if information['ext'] not in ('mp4', 'webm', 'mkv'):
+            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files')
             return [], information
         subtitles = information.get('requested_subtitles')
         if not subtitles:
             self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
             return [], information
 
-        sub_langs = list(subtitles.keys())
         filename = information['filepath']
-        sub_filenames = [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
+
+        ext = information['ext']
+        sub_langs = []
+        sub_filenames = []
+        webm_vtt_warn = False
+
+        for lang, sub_info in subtitles.items():
+            sub_ext = sub_info['ext']
+            if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
+                sub_langs.append(lang)
+                sub_filenames.append(subtitles_filename(filename, lang, sub_ext))
+            else:
+                if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
+                    webm_vtt_warn = True
+                    self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')
+
+        if not sub_langs:
+            return [], information
+
         input_files = [filename] + sub_filenames
 
         opts = [
@@ -358,23 +389,30 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
 class FFmpegMetadataPP(FFmpegPostProcessor):
     def run(self, info):
         metadata = {}
-        if info.get('title') is not None:
-            metadata['title'] = info['title']
-        if info.get('upload_date') is not None:
-            metadata['date'] = info['upload_date']
-        if info.get('artist') is not None:
-            metadata['artist'] = info['artist']
-        elif info.get('uploader') is not None:
-            metadata['artist'] = info['uploader']
-        elif info.get('uploader_id') is not None:
-            metadata['artist'] = info['uploader_id']
-        if info.get('description') is not None:
-            metadata['description'] = info['description']
-            metadata['comment'] = info['description']
-        if info.get('webpage_url') is not None:
-            metadata['purl'] = info['webpage_url']
-        if info.get('album') is not None:
-            metadata['album'] = info['album']
+
+        def add(meta_list, info_list=None):
+            if not info_list:
+                info_list = meta_list
+            if not isinstance(meta_list, (list, tuple)):
+                meta_list = (meta_list,)
+            if not isinstance(info_list, (list, tuple)):
+                info_list = (info_list,)
+            for info_f in info_list:
+                if info.get(info_f) is not None:
+                    for meta_f in meta_list:
+                        metadata[meta_f] = info[info_f]
+                    break
+
+        add('title', ('track', 'title'))
+        add('date', 'upload_date')
+        add(('description', 'comment'), 'description')
+        add('purl', 'webpage_url')
+        add('track', 'track_number')
+        add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
+        add('genre')
+        add('album')
+        add('album_artist')
+        add('disc', 'disc_number')
 
         if not metadata:
             self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
@@ -391,10 +429,6 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
         for (name, value) in metadata.items():
             options.extend(['-metadata', '%s=%s' % (name, value)])
 
-        # https://github.com/rg3/youtube-dl/issues/8350
-        if info.get('protocol') == 'm3u8_native' or info.get('protocol') == 'm3u8' and self._downloader.params.get('hls_prefer_native', False):
-            options.extend(['-bsf:a', 'aac_adtstoasc'])
-
         self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
         self.run_ffmpeg(filename, temp_filename, options)
         os.remove(encodeFilename(filename))
@@ -467,6 +501,21 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
         return [], info
 
 
+class FFmpegFixupM3u8PP(FFmpegPostProcessor):
+    def run(self, info):
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+
+        options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+        self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
+        self.run_ffmpeg(filename, temp_filename, options)
+
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return [], info
+
+
 class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
     def __init__(self, downloader=None, format=None):
         super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
@@ -495,7 +544,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
             sub_filenames.append(old_file)
             new_file = subtitles_filename(filename, lang, new_ext)
 
-            if ext == 'dfxp' or ext == 'ttml':
+            if ext == 'dfxp' or ext == 'ttml' or ext == 'tt':
                 self._downloader.report_warning(
                     'You have requested to convert dfxp (TTML) subtitles into another format, '
                     'which results in style information loss')
index 480d48d057400fafb0acdfc8492fca31b1d2f674..e39ca60aa08326b6f05814ff800bb09c75755e48 100644 (file)
@@ -6,6 +6,7 @@ import sys
 import errno
 
 from .common import PostProcessor
+from ..compat import compat_os_name
 from ..utils import (
     check_executable,
     hyphenate_date,
@@ -73,7 +74,7 @@ class XAttrMetadataPP(PostProcessor):
                     raise XAttrMetadataError(e.errno, e.strerror)
 
         except ImportError:
-            if os.name == 'nt':
+            if compat_os_name == 'nt':
                 # Write xattrs to NTFS Alternate Data Streams:
                 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
                 def write_xattr(path, key, value):
@@ -168,7 +169,7 @@ class XAttrMetadataPP(PostProcessor):
                     'Unable to write extended attributes due to too long values.')
             else:
                 msg = 'This filesystem doesn\'t support extended attributes. '
-                if os.name == 'nt':
+                if compat_os_name == 'nt':
                     msg += 'You need to use NTFS.'
                 else:
                     msg += '(You may have to enable them in your /etc/fstab)'
diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py
new file mode 100644 (file)
index 0000000..fd49d74
--- /dev/null
@@ -0,0 +1,271 @@
+# Public Domain SOCKS proxy protocol implementation
+# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3
+
+from __future__ import unicode_literals
+
+# References:
+# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol
+# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol
+# SOCKS5 protocol https://tools.ietf.org/html/rfc1928
+# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929
+
+import collections
+import socket
+
+from .compat import (
+    compat_ord,
+    compat_struct_pack,
+    compat_struct_unpack,
+)
+
+__author__ = 'Timo Schmid <coding@timoschmid.de>'
+
+SOCKS4_VERSION = 4
+SOCKS4_REPLY_VERSION = 0x00
+# Excerpt from SOCKS4A protocol:
+# if the client cannot resolve the destination host's domain name to find its
+# IP address, it should set the first three bytes of DSTIP to NULL and the last
+# byte to a non-zero value.
+SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF)
+
+SOCKS5_VERSION = 5
+SOCKS5_USER_AUTH_VERSION = 0x01
+SOCKS5_USER_AUTH_SUCCESS = 0x00
+
+
+class Socks4Command(object):
+    CMD_CONNECT = 0x01
+    CMD_BIND = 0x02
+
+
+class Socks5Command(Socks4Command):
+    CMD_UDP_ASSOCIATE = 0x03
+
+
+class Socks5Auth(object):
+    AUTH_NONE = 0x00
+    AUTH_GSSAPI = 0x01
+    AUTH_USER_PASS = 0x02
+    AUTH_NO_ACCEPTABLE = 0xFF  # For server response
+
+
+class Socks5AddressType(object):
+    ATYP_IPV4 = 0x01
+    ATYP_DOMAINNAME = 0x03
+    ATYP_IPV6 = 0x04
+
+
+class ProxyError(IOError):
+    ERR_SUCCESS = 0x00
+
+    def __init__(self, code=None, msg=None):
+        if code is not None and msg is None:
+            msg = self.CODES.get(code) and 'unknown error'
+        super(ProxyError, self).__init__(code, msg)
+
+
+class InvalidVersionError(ProxyError):
+    def __init__(self, expected_version, got_version):
+        msg = ('Invalid response version from server. Expected {0:02x} got '
+               '{1:02x}'.format(expected_version, got_version))
+        super(InvalidVersionError, self).__init__(0, msg)
+
+
+class Socks4Error(ProxyError):
+    ERR_SUCCESS = 90
+
+    CODES = {
+        91: 'request rejected or failed',
+        92: 'request rejected becasue SOCKS server cannot connect to identd on the client',
+        93: 'request rejected because the client program and identd report different user-ids'
+    }
+
+
+class Socks5Error(ProxyError):
+    ERR_GENERAL_FAILURE = 0x01
+
+    CODES = {
+        0x01: 'general SOCKS server failure',
+        0x02: 'connection not allowed by ruleset',
+        0x03: 'Network unreachable',
+        0x04: 'Host unreachable',
+        0x05: 'Connection refused',
+        0x06: 'TTL expired',
+        0x07: 'Command not supported',
+        0x08: 'Address type not supported',
+        0xFE: 'unknown username or invalid password',
+        0xFF: 'all offered authentication methods were rejected'
+    }
+
+
+class ProxyType(object):
+    SOCKS4 = 0
+    SOCKS4A = 1
+    SOCKS5 = 2
+
+Proxy = collections.namedtuple('Proxy', (
+    'type', 'host', 'port', 'username', 'password', 'remote_dns'))
+
+
+class sockssocket(socket.socket):
+    def __init__(self, *args, **kwargs):
+        self._proxy = None
+        super(sockssocket, self).__init__(*args, **kwargs)
+
+    def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None):
+        assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5)
+
+        self._proxy = Proxy(proxytype, addr, port, username, password, rdns)
+
+    def recvall(self, cnt):
+        data = b''
+        while len(data) < cnt:
+            cur = self.recv(cnt - len(data))
+            if not cur:
+                raise IOError('{0} bytes missing'.format(cnt - len(data)))
+            data += cur
+        return data
+
+    def _recv_bytes(self, cnt):
+        data = self.recvall(cnt)
+        return compat_struct_unpack('!{0}B'.format(cnt), data)
+
+    @staticmethod
+    def _len_and_data(data):
+        return compat_struct_pack('!B', len(data)) + data
+
+    def _check_response_version(self, expected_version, got_version):
+        if got_version != expected_version:
+            self.close()
+            raise InvalidVersionError(expected_version, got_version)
+
+    def _resolve_address(self, destaddr, default, use_remote_dns):
+        try:
+            return socket.inet_aton(destaddr)
+        except socket.error:
+            if use_remote_dns and self._proxy.remote_dns:
+                return default
+            else:
+                return socket.inet_aton(socket.gethostbyname(destaddr))
+
+    def _setup_socks4(self, address, is_4a=False):
+        destaddr, port = address
+
+        ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a)
+
+        packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
+
+        username = (self._proxy.username or '').encode('utf-8')
+        packet += username + b'\x00'
+
+        if is_4a and self._proxy.remote_dns:
+            packet += destaddr.encode('utf-8') + b'\x00'
+
+        self.sendall(packet)
+
+        version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8))
+
+        self._check_response_version(SOCKS4_REPLY_VERSION, version)
+
+        if resp_code != Socks4Error.ERR_SUCCESS:
+            self.close()
+            raise Socks4Error(resp_code)
+
+        return (dsthost, dstport)
+
+    def _setup_socks4a(self, address):
+        self._setup_socks4(address, is_4a=True)
+
+    def _socks5_auth(self):
+        packet = compat_struct_pack('!B', SOCKS5_VERSION)
+
+        auth_methods = [Socks5Auth.AUTH_NONE]
+        if self._proxy.username and self._proxy.password:
+            auth_methods.append(Socks5Auth.AUTH_USER_PASS)
+
+        packet += compat_struct_pack('!B', len(auth_methods))
+        packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods)
+
+        self.sendall(packet)
+
+        version, method = self._recv_bytes(2)
+
+        self._check_response_version(SOCKS5_VERSION, version)
+
+        if method == Socks5Auth.AUTH_NO_ACCEPTABLE:
+            self.close()
+            raise Socks5Error(method)
+
+        if method == Socks5Auth.AUTH_USER_PASS:
+            username = self._proxy.username.encode('utf-8')
+            password = self._proxy.password.encode('utf-8')
+            packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION)
+            packet += self._len_and_data(username) + self._len_and_data(password)
+            self.sendall(packet)
+
+            version, status = self._recv_bytes(2)
+
+            self._check_response_version(SOCKS5_USER_AUTH_VERSION, version)
+
+            if status != SOCKS5_USER_AUTH_SUCCESS:
+                self.close()
+                raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE)
+
+    def _setup_socks5(self, address):
+        destaddr, port = address
+
+        ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True)
+
+        self._socks5_auth()
+
+        reserved = 0
+        packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved)
+        if ipaddr is None:
+            destaddr = destaddr.encode('utf-8')
+            packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
+            packet += self._len_and_data(destaddr)
+        else:
+            packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
+        packet += compat_struct_pack('!H', port)
+
+        self.sendall(packet)
+
+        version, status, reserved, atype = self._recv_bytes(4)
+
+        self._check_response_version(SOCKS5_VERSION, version)
+
+        if status != Socks5Error.ERR_SUCCESS:
+            self.close()
+            raise Socks5Error(status)
+
+        if atype == Socks5AddressType.ATYP_IPV4:
+            destaddr = self.recvall(4)
+        elif atype == Socks5AddressType.ATYP_DOMAINNAME:
+            alen = compat_ord(self.recv(1))
+            destaddr = self.recvall(alen)
+        elif atype == Socks5AddressType.ATYP_IPV6:
+            destaddr = self.recvall(16)
+        destport = compat_struct_unpack('!H', self.recvall(2))[0]
+
+        return (destaddr, destport)
+
+    def _make_proxy(self, connect_func, address):
+        if not self._proxy:
+            return connect_func(self, address)
+
+        result = connect_func(self, (self._proxy.host, self._proxy.port))
+        if result != 0 and result is not None:
+            return result
+        setup_funcs = {
+            ProxyType.SOCKS4: self._setup_socks4,
+            ProxyType.SOCKS4A: self._setup_socks4a,
+            ProxyType.SOCKS5: self._setup_socks5,
+        }
+        setup_funcs[self._proxy.type](address)
+        return result
+
+    def connect(self, address):
+        self._make_proxy(socket.socket.connect, address)
+
+    def connect_ex(self, address):
+        return self._make_proxy(socket.socket.connect_ex, address)
index 06c1d6cc1755ef022aa78967d4b651e21fd66618..7cf490aa43a878b3c377bea0b173c7a2b170c2c7 100644 (file)
@@ -4,10 +4,12 @@ import collections
 import io
 import zlib
 
-from .compat import compat_str
+from .compat import (
+    compat_str,
+    compat_struct_unpack,
+)
 from .utils import (
     ExtractorError,
-    struct_unpack,
 )
 
 
@@ -23,17 +25,17 @@ def _extract_tags(file_contents):
             file_contents[:1])
 
     # Determine number of bits in framesize rectangle
-    framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3
+    framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3
     framesize_len = (5 + 4 * framesize_nbits + 7) // 8
 
     pos = framesize_len + 2 + 2
     while pos < len(content):
-        header16 = struct_unpack('<H', content[pos:pos + 2])[0]
+        header16 = compat_struct_unpack('<H', content[pos:pos + 2])[0]
         pos += 2
         tag_code = header16 >> 6
         tag_len = header16 & 0x3f
         if tag_len == 0x3f:
-            tag_len = struct_unpack('<I', content[pos:pos + 4])[0]
+            tag_len = compat_struct_unpack('<I', content[pos:pos + 4])[0]
             pos += 4
         assert pos + tag_len <= len(content), \
             ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
@@ -101,7 +103,7 @@ def _read_int(reader):
     for _ in range(5):
         buf = reader.read(1)
         assert len(buf) == 1
-        b = struct_unpack('<B', buf)[0]
+        b = compat_struct_unpack('<B', buf)[0]
         res = res | ((b & 0x7f) << shift)
         if b & 0x80 == 0:
             break
@@ -127,7 +129,7 @@ def _s24(reader):
     bs = reader.read(3)
     assert len(bs) == 3
     last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00'
-    return struct_unpack('<i', bs + last_byte)[0]
+    return compat_struct_unpack('<i', bs + last_byte)[0]
 
 
 def _read_string(reader):
@@ -146,7 +148,7 @@ def _read_bytes(count, reader):
 
 def _read_byte(reader):
     resb = _read_bytes(1, reader=reader)
-    res = struct_unpack('<B', resb)[0]
+    res = compat_struct_unpack('<B', resb)[0]
     return res
 
 
index 676ebe1c42d1d6b54eb50bfc3f087e6fee8e20f0..ebce9666a21465b53b93ccd0bd263b29349720a0 100644 (file)
@@ -83,11 +83,8 @@ def update_self(to_screen, verbose, opener):
 
     print_notes(to_screen, versions_info['versions'])
 
-    filename = sys.argv[0]
-    # Py2EXE: Filename could be different
-    if hasattr(sys, 'frozen') and not os.path.isfile(filename):
-        if os.path.isfile(filename + '.exe'):
-            filename += '.exe'
+    # sys.executable is set to the full pathname of the exe-file for py2exe
+    filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0]
 
     if not os.access(filename, os.W_OK):
         to_screen('ERROR: no write permissions on %s' % filename)
@@ -95,7 +92,7 @@ def update_self(to_screen, verbose, opener):
 
     # Py2EXE
     if hasattr(sys, 'frozen'):
-        exe = os.path.abspath(filename)
+        exe = filename
         directory = os.path.dirname(exe)
         if not os.access(directory, os.W_OK):
             to_screen('ERROR: no write permissions on %s' % directory)
index 6978a10e4cfa9b89874f5ec1da15c30a3e6d7e99..562031fe110a9a6962c6e5efed2bb311f3059979 100644 (file)
@@ -14,8 +14,8 @@ import email.utils
 import errno
 import functools
 import gzip
-import itertools
 import io
+import itertools
 import json
 import locale
 import math
@@ -24,9 +24,8 @@ import os
 import pipes
 import platform
 import re
-import ssl
 import socket
-import struct
+import ssl
 import subprocess
 import sys
 import tempfile
@@ -35,29 +34,49 @@ import xml.etree.ElementTree
 import zlib
 
 from .compat import (
+    compat_HTMLParser,
     compat_basestring,
     compat_chr,
     compat_etree_fromstring,
     compat_html_entities,
+    compat_html_entities_html5,
     compat_http_client,
     compat_kwargs,
     compat_parse_qs,
+    compat_shlex_quote,
     compat_socket_create_connection,
     compat_str,
+    compat_struct_pack,
     compat_urllib_error,
     compat_urllib_parse,
+    compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
+    compat_urllib_parse_unquote_plus,
     compat_urllib_request,
     compat_urlparse,
-    shlex_quote,
+    compat_xpath,
 )
 
+from .socks import (
+    ProxyType,
+    sockssocket,
+)
+
+
+def register_socks_protocols():
+    # "Register" SOCKS protocols
+    # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
+    # URLs with protocols not in urlparse.uses_netloc are not handled correctly
+    for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
+        if scheme not in compat_urlparse.uses_netloc:
+            compat_urlparse.uses_netloc.append(scheme)
+
 
 # This is not clearly defined otherwise
 compiled_regex_type = type(re.compile(''))
 
 std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)',
     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
     'Accept-Encoding': 'gzip, deflate',
@@ -86,6 +105,11 @@ KNOWN_EXTENSIONS = (
     'wav',
     'f4f', 'f4m', 'm3u8', 'smil')
 
+# needed for sanitizing filenames in restricted mode
+ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
+                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
+                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
+
 
 def preferredencoding():
     """Get preferred encoding.
@@ -160,18 +184,11 @@ if sys.version_info >= (2, 7):
     def find_xpath_attr(node, xpath, key, val=None):
         """ Find the xpath xpath[@key=val] """
         assert re.match(r'^[a-zA-Z_-]+$', key)
-        if val:
-            assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
         return node.find(expr)
 else:
     def find_xpath_attr(node, xpath, key, val=None):
-        # Here comes the crazy part: In 2.6, if the xpath is a unicode,
-        # .//node does not match if a node is a direct child of . !
-        if isinstance(xpath, compat_str):
-            xpath = xpath.encode('ascii')
-
-        for f in node.findall(xpath):
+        for f in node.findall(compat_xpath(xpath)):
             if key not in f.attrib:
                 continue
             if val is None or f.attrib.get(key) == val:
@@ -196,9 +213,7 @@ def xpath_with_ns(path, ns_map):
 
 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
     def _find_xpath(xpath):
-        if sys.version_info < (2, 7):  # Crazy 2.6
-            xpath = xpath.encode('ascii')
-        return node.find(xpath)
+        return node.find(compat_xpath(xpath))
 
     if isinstance(xpath, (str, compat_str)):
         n = _find_xpath(xpath)
@@ -257,9 +272,9 @@ def get_element_by_attribute(attribute, value, html):
 
     m = re.search(r'''(?xs)
         <([a-zA-Z0-9:._-]+)
-         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
+         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
          \s+%s=['"]?%s['"]?
-         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
+         (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
         \s*>
         (?P<content>.*?)
         </\1>
@@ -275,6 +290,38 @@ def get_element_by_attribute(attribute, value, html):
     return unescapeHTML(res)
 
 
+class HTMLAttributeParser(compat_HTMLParser):
+    """Trivial HTML parser to gather the attributes for a single element"""
+    def __init__(self):
+        self.attrs = {}
+        compat_HTMLParser.__init__(self)
+
+    def handle_starttag(self, tag, attrs):
+        self.attrs = dict(attrs)
+
+
+def extract_attributes(html_element):
+    """Given a string for an HTML element such as
+    <el
+         a="foo" B="bar" c="&98;az" d=boz
+         empty= noval entity="&amp;"
+         sq='"' dq="'"
+    >
+    Decode and return a dictionary of attributes.
+    {
+        'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
+        'empty': '', 'noval': None, 'entity': '&',
+        'sq': '"', 'dq': '\''
+    }.
+    NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
+    but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
+    """
+    parser = HTMLAttributeParser()
+    parser.feed(html_element)
+    parser.close()
+    return parser.attrs
+
+
 def clean_html(html):
     """Clean an HTML snippet into a readable string"""
 
@@ -339,6 +386,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
     """
     def replace_insane(char):
+        if restricted and char in ACCENT_CHARS:
+            return ACCENT_CHARS[char]
         if char == '?' or ord(char) < 32 or ord(char) == 127:
             return ''
         elif char == '"':
@@ -391,9 +440,12 @@ def sanitize_path(s):
 
 # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
 # unwanted failures due to missing protocol
+def sanitize_url(url):
+    return 'http:%s' % url if url.startswith('//') else url
+
+
 def sanitized_Request(url, *args, **kwargs):
-    return compat_urllib_request.Request(
-        'http:%s' % url if url.startswith('//') else url, *args, **kwargs)
+    return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
 
 
 def orderedSet(iterable):
@@ -405,12 +457,19 @@ def orderedSet(iterable):
     return res
 
 
-def _htmlentity_transform(entity):
+def _htmlentity_transform(entity_with_semicolon):
     """Transforms an HTML entity to a character."""
+    entity = entity_with_semicolon[:-1]
+
     # Known non-numeric HTML entity
     if entity in compat_html_entities.name2codepoint:
         return compat_chr(compat_html_entities.name2codepoint[entity])
 
+    # TODO: HTML5 allows entities without a semicolon. For example,
+    # '&Eacuteric' should be decoded as 'Éric'.
+    if entity_with_semicolon in compat_html_entities_html5:
+        return compat_html_entities_html5[entity_with_semicolon]
+
     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
     if mobj is not None:
         numstr = mobj.group(1)
@@ -435,7 +494,7 @@ def unescapeHTML(s):
     assert type(s) == compat_str
 
     return re.sub(
-        r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
+        r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 
 
 def get_subprocess_encoding():
@@ -467,6 +526,10 @@ def encodeFilename(s, for_subprocess=False):
     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
         return s
 
+    # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
+    if sys.platform.startswith('java'):
+        return s
+
     return s.encode(get_subprocess_encoding(), 'ignore')
 
 
@@ -712,8 +775,15 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
         self._params = params
 
     def http_open(self, req):
+        conn_class = compat_http_client.HTTPConnection
+
+        socks_proxy = req.headers.get('Ytdl-socks-proxy')
+        if socks_proxy:
+            conn_class = make_socks_conn_class(conn_class, socks_proxy)
+            del req.headers['Ytdl-socks-proxy']
+
         return self.do_open(functools.partial(
-            _create_http_connection, self, compat_http_client.HTTPConnection, False),
+            _create_http_connection, self, conn_class, False),
             req)
 
     @staticmethod
@@ -745,12 +815,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
         # Substitute URL if any change after escaping
         if url != url_escaped:
-            req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
-            new_req = req_type(
-                url_escaped, data=req.data, headers=req.headers,
-                origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
-            new_req.timeout = req.timeout
-            req = new_req
+            req = update_Request(req, url=url_escaped)
 
         for h, v in std_headers.items():
             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
@@ -804,9 +869,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
                 if sys.version_info >= (3, 0):
                     location = location.encode('iso-8859-1').decode('utf-8')
+                else:
+                    location = location.decode('utf-8')
                 location_escaped = escape_url(location)
                 if location != location_escaped:
                     del resp.headers['Location']
+                    if sys.version_info < (3, 0):
+                        location_escaped = location_escaped.encode('utf-8')
                     resp.headers['Location'] = location_escaped
         return resp
 
@@ -814,6 +883,49 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     https_response = http_response
 
 
+def make_socks_conn_class(base_class, socks_proxy):
+    assert issubclass(base_class, (
+        compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
+
+    url_components = compat_urlparse.urlparse(socks_proxy)
+    if url_components.scheme.lower() == 'socks5':
+        socks_type = ProxyType.SOCKS5
+    elif url_components.scheme.lower() in ('socks', 'socks4'):
+        socks_type = ProxyType.SOCKS4
+    elif url_components.scheme.lower() == 'socks4a':
+        socks_type = ProxyType.SOCKS4A
+
+    def unquote_if_non_empty(s):
+        if not s:
+            return s
+        return compat_urllib_parse_unquote_plus(s)
+
+    proxy_args = (
+        socks_type,
+        url_components.hostname, url_components.port or 1080,
+        True,  # Remote DNS
+        unquote_if_non_empty(url_components.username),
+        unquote_if_non_empty(url_components.password),
+    )
+
+    class SocksConnection(base_class):
+        def connect(self):
+            self.sock = sockssocket()
+            self.sock.setproxy(*proxy_args)
+            if type(self.timeout) in (int, float):
+                self.sock.settimeout(self.timeout)
+            self.sock.connect((self.host, self.port))
+
+            if isinstance(self, compat_http_client.HTTPSConnection):
+                if hasattr(self, '_context'):  # Python > 2.6
+                    self.sock = self._context.wrap_socket(
+                        self.sock, server_hostname=self.host)
+                else:
+                    self.sock = ssl.wrap_socket(self.sock)
+
+    return SocksConnection
+
+
 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
     def __init__(self, params, https_conn_class=None, *args, **kwargs):
         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
@@ -822,12 +934,20 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
 
     def https_open(self, req):
         kwargs = {}
+        conn_class = self._https_conn_class
+
         if hasattr(self, '_context'):  # python > 2.6
             kwargs['context'] = self._context
         if hasattr(self, '_check_hostname'):  # python 3.x
             kwargs['check_hostname'] = self._check_hostname
+
+        socks_proxy = req.headers.get('Ytdl-socks-proxy')
+        if socks_proxy:
+            conn_class = make_socks_conn_class(conn_class, socks_proxy)
+            del req.headers['Ytdl-socks-proxy']
+
         return self.do_open(functools.partial(
-            _create_http_connection, self, self._https_conn_class, True),
+            _create_http_connection, self, conn_class, True),
             req, **kwargs)
 
 
@@ -905,9 +1025,9 @@ def unified_strdate(date_str, day_first=True):
         '%d %b %Y',
         '%B %d %Y',
         '%b %d %Y',
-        '%b %dst %Y %I:%M%p',
-        '%b %dnd %Y %I:%M%p',
-        '%b %dth %Y %I:%M%p',
+        '%b %dst %Y %I:%M',
+        '%b %dnd %Y %I:%M',
+        '%b %dth %Y %I:%M',
         '%Y %m %d',
         '%Y-%m-%d',
         '%Y/%m/%d',
@@ -927,6 +1047,7 @@ def unified_strdate(date_str, day_first=True):
         format_expressions.extend([
             '%d-%m-%Y',
             '%d.%m.%Y',
+            '%d.%m.%y',
             '%d/%m/%Y',
             '%d/%m/%y',
             '%d/%m/%Y %H:%M:%S',
@@ -947,7 +1068,10 @@ def unified_strdate(date_str, day_first=True):
     if upload_date is None:
         timetuple = email.utils.parsedate_tz(date_str)
         if timetuple:
-            upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
+            try:
+                upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
+            except ValueError:
+                pass
     if upload_date is not None:
         return compat_str(upload_date)
 
@@ -1158,7 +1282,7 @@ def bytes_to_intlist(bs):
 def intlist_to_bytes(xs):
     if not xs:
         return b''
-    return struct_pack('%dB' % len(xs), *xs)
+    return compat_struct_pack('%dB' % len(xs), *xs)
 
 
 # Cross-platform file locking
@@ -1217,13 +1341,23 @@ if sys.platform == 'win32':
             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 
 else:
-    import fcntl
+    # Some platforms, such as Jython, is missing fcntl
+    try:
+        import fcntl
 
-    def _lock_file(f, exclusive):
-        fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+        def _lock_file(f, exclusive):
+            fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 
-    def _unlock_file(f):
-        fcntl.flock(f, fcntl.LOCK_UN)
+        def _unlock_file(f):
+            fcntl.flock(f, fcntl.LOCK_UN)
+    except ImportError:
+        UNSUPPORTED_MSG = 'file locking is not supported on this platform'
+
+        def _lock_file(f, exclusive):
+            raise IOError(UNSUPPORTED_MSG)
+
+        def _unlock_file(f):
+            raise IOError(UNSUPPORTED_MSG)
 
 
 class locked_file(object):
@@ -1276,7 +1410,7 @@ def shell_quote(args):
 def smuggle_url(url, data):
     """ Pass additional data in a URL for internal use. """
 
-    sdata = compat_urllib_parse.urlencode(
+    sdata = compat_urllib_parse_urlencode(
         {'__youtubedl_smuggle': json.dumps(data)})
     return url + '#' + sdata
 
@@ -1304,6 +1438,17 @@ def format_bytes(bytes):
     return '%.2f%s' % (converted, suffix)
 
 
+def lookup_unit_table(unit_table, s):
+    units_re = '|'.join(re.escape(u) for u in unit_table)
+    m = re.match(
+        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
+    if not m:
+        return None
+    num_str = m.group('num').replace(',', '.')
+    mult = unit_table[m.group('unit')]
+    return int(float(num_str) * mult)
+
+
 def parse_filesize(s):
     if s is None:
         return None
@@ -1347,15 +1492,28 @@ def parse_filesize(s):
         'Yb': 1000 ** 8,
     }
 
-    units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
-    m = re.match(
-        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
-    if not m:
+    return lookup_unit_table(_UNIT_TABLE, s)
+
+
+def parse_count(s):
+    if s is None:
         return None
 
-    num_str = m.group('num').replace(',', '.')
-    mult = _UNIT_TABLE[m.group('unit')]
-    return int(float(num_str) * mult)
+    s = s.strip()
+
+    if re.match(r'^[\d,.]+$', s):
+        return str_to_int(s)
+
+    _UNIT_TABLE = {
+        'k': 1000,
+        'K': 1000,
+        'm': 1000 ** 2,
+        'M': 1000 ** 2,
+        'kk': 1000 ** 2,
+        'KK': 1000 ** 2,
+    }
+
+    return lookup_unit_table(_UNIT_TABLE, s)
 
 
 def month_by_name(name):
@@ -1387,6 +1545,12 @@ def fix_xml_ampersands(xml_str):
 
 def setproctitle(title):
     assert isinstance(title, compat_str)
+
+    # ctypes in Jython is not complete
+    # http://bugs.jython.org/issue2148
+    if sys.platform.startswith('java'):
+        return
+
     try:
         libc = ctypes.cdll.LoadLibrary('libc.so.6')
     except OSError:
@@ -1401,15 +1565,11 @@ def setproctitle(title):
 
 
 def remove_start(s, start):
-    if s.startswith(start):
-        return s[len(start):]
-    return s
+    return s[len(start):] if s is not None and s.startswith(start) else s
 
 
 def remove_end(s, end):
-    if s.endswith(end):
-        return s[:-len(end)]
-    return s
+    return s[:-len(end)] if s is not None and s.endswith(end) else s
 
 
 def remove_quotes(s):
@@ -1472,44 +1632,46 @@ def parse_duration(s):
 
     s = s.strip()
 
-    m = re.match(
-        r'''(?ix)(?:P?T)?
-        (?:
-            (?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
-            (?P<only_hours>[0-9.]+)\s*(?:hours?)|
-
-            \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
-            (?:
+    days, hours, mins, secs, ms = [None] * 5
+    m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?$', s)
+    if m:
+        days, hours, mins, secs, ms = m.groups()
+    else:
+        m = re.match(
+            r'''(?ix)(?:P?T)?
                 (?:
-                    (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
-                    (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
+                    (?P<days>[0-9]+)\s*d(?:ays?)?\s*
                 )?
-                (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
-            )?
-            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
-        )$''', s)
-    if not m:
-        return None
-    res = 0
-    if m.group('only_mins'):
-        return float_or_none(m.group('only_mins'), invscale=60)
-    if m.group('only_hours'):
-        return float_or_none(m.group('only_hours'), invscale=60 * 60)
-    if m.group('secs'):
-        res += int(m.group('secs'))
-    if m.group('mins_reversed'):
-        res += int(m.group('mins_reversed')) * 60
-    if m.group('mins'):
-        res += int(m.group('mins')) * 60
-    if m.group('hours'):
-        res += int(m.group('hours')) * 60 * 60
-    if m.group('hours_reversed'):
-        res += int(m.group('hours_reversed')) * 60 * 60
-    if m.group('days'):
-        res += int(m.group('days')) * 24 * 60 * 60
-    if m.group('ms'):
-        res += float(m.group('ms'))
-    return res
+                (?:
+                    (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
+                )?
+                (?:
+                    (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
+                )?
+                (?:
+                    (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
+                )?$''', s)
+        if m:
+            days, hours, mins, secs, ms = m.groups()
+        else:
+            m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s)
+            if m:
+                hours, mins = m.groups()
+            else:
+                return None
+
+    duration = 0
+    if secs:
+        duration += float(secs)
+    if mins:
+        duration += float(mins) * 60
+    if hours:
+        duration += float(hours) * 60 * 60
+    if days:
+        duration += float(days) * 24 * 60 * 60
+    if ms:
+        duration += float(ms)
+    return duration
 
 
 def prepend_extension(filename, ext, expected_real_ext=None):
@@ -1570,9 +1732,12 @@ class PagedList(object):
 
 
 class OnDemandPagedList(PagedList):
-    def __init__(self, pagefunc, pagesize):
+    def __init__(self, pagefunc, pagesize, use_cache=False):
         self._pagefunc = pagefunc
         self._pagesize = pagesize
+        self._use_cache = use_cache
+        if use_cache:
+            self._cache = {}
 
     def getslice(self, start=0, end=None):
         res = []
@@ -1582,7 +1747,13 @@ class OnDemandPagedList(PagedList):
             if start >= nextfirstid:
                 continue
 
-            page_results = list(self._pagefunc(pagenum))
+            page_results = None
+            if self._use_cache:
+                page_results = self._cache.get(pagenum)
+            if page_results is None:
+                page_results = list(self._pagefunc(pagenum))
+            if self._use_cache:
+                self._cache[pagenum] = page_results
 
             startv = (
                 start % self._pagesize
@@ -1668,29 +1839,13 @@ def escape_url(url):
     """Escape URL as suggested by RFC 3986"""
     url_parsed = compat_urllib_parse_urlparse(url)
     return url_parsed._replace(
+        netloc=url_parsed.netloc.encode('idna').decode('ascii'),
         path=escape_rfc3986(url_parsed.path),
         params=escape_rfc3986(url_parsed.params),
         query=escape_rfc3986(url_parsed.query),
         fragment=escape_rfc3986(url_parsed.fragment)
     ).geturl()
 
-try:
-    struct.pack('!I', 0)
-except TypeError:
-    # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
-    def struct_pack(spec, *args):
-        if isinstance(spec, compat_str):
-            spec = spec.encode('ascii')
-        return struct.pack(spec, *args)
-
-    def struct_unpack(spec, *args):
-        if isinstance(spec, compat_str):
-            spec = spec.encode('ascii')
-        return struct.unpack(spec, *args)
-else:
-    struct_pack = struct.pack
-    struct_unpack = struct.unpack
-
 
 def read_batch_urls(batch_fd):
     def fixup(url):
@@ -1709,13 +1864,31 @@ def read_batch_urls(batch_fd):
 
 
 def urlencode_postdata(*args, **kargs):
-    return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
+    return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
 
 
-def encode_dict(d, encoding='utf-8'):
-    def encode(v):
-        return v.encode(encoding) if isinstance(v, compat_basestring) else v
-    return dict((encode(k), encode(v)) for k, v in d.items())
+def update_url_query(url, query):
+    if not query:
+        return url
+    parsed_url = compat_urlparse.urlparse(url)
+    qs = compat_parse_qs(parsed_url.query)
+    qs.update(query)
+    return compat_urlparse.urlunparse(parsed_url._replace(
+        query=compat_urllib_parse_urlencode(qs, True)))
+
+
+def update_Request(req, url=None, data=None, headers={}, query={}):
+    req_headers = req.headers.copy()
+    req_headers.update(headers)
+    req_data = data or req.data
+    req_url = update_url_query(url or req.get_full_url(), query)
+    req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
+    new_req = req_type(
+        req_url, data=req_data, headers=req_headers,
+        origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+    if hasattr(req, 'timeout'):
+        new_req.timeout = req.timeout
+    return new_req
 
 
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
@@ -1728,6 +1901,16 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True):
     return d.get(key_or_keys, default)
 
 
+def try_get(src, getter, expected_type=None):
+    try:
+        v = getter(src)
+    except (AttributeError, KeyError, TypeError, IndexError):
+        pass
+    else:
+        if expected_type is None or isinstance(v, expected_type):
+            return v
+
+
 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
 
@@ -1750,7 +1933,7 @@ def parse_age_limit(s):
 
 def strip_jsonp(code):
     return re.sub(
-        r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
+        r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
 
 
 def js_to_json(code):
@@ -1758,24 +1941,38 @@ def js_to_json(code):
         v = m.group(0)
         if v in ('true', 'false', 'null'):
             return v
-        if v.startswith('"'):
-            v = re.sub(r"\\'", "'", v[1:-1])
-        elif v.startswith("'"):
-            v = v[1:-1]
-            v = re.sub(r"\\\\|\\'|\"", lambda m: {
-                '\\\\': '\\\\',
-                "\\'": "'",
+        elif v.startswith('/*') or v == ',':
+            return ""
+
+        if v[0] in ("'", '"'):
+            v = re.sub(r'(?s)\\.|"', lambda m: {
                 '"': '\\"',
-            }[m.group(0)], v)
+                "\\'": "'",
+                '\\\n': '',
+                '\\x': '\\u00',
+            }.get(m.group(0), m.group(0)), v[1:-1])
+
+        INTEGER_TABLE = (
+            (r'^0[xX][0-9a-fA-F]+', 16),
+            (r'^0+[0-7]+', 8),
+        )
+
+        for regex, base in INTEGER_TABLE:
+            im = re.match(regex, v)
+            if im:
+                i = int(im.group(0), base)
+                return '"%d":' % i if v.endswith(':') else '%d' % i
+
         return '"%s"' % v
 
-    res = re.sub(r'''(?x)
-        "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
-        '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
-        [a-zA-Z_][.a-zA-Z_0-9]*
+    return re.sub(r'''(?sx)
+        "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
+        '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
+        /\*.*?\*/|,(?=\s*[\]}])|
+        [a-zA-Z_][.a-zA-Z_0-9]*|
+        \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
+        [0-9]+(?=\s*:)
         ''', fix_kv, code)
-    res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
-    return res
 
 
 def qualities(quality_ids):
@@ -1823,7 +2020,7 @@ def ytdl_is_updateable():
 
 def args_to_str(args):
     # Get a short string representation for a subprocess command
-    return ' '.join(shlex_quote(a) for a in args)
+    return ' '.join(compat_shlex_quote(a) for a in args)
 
 
 def error_to_compat_str(err):
@@ -1836,8 +2033,14 @@ def error_to_compat_str(err):
 
 
 def mimetype2ext(mt):
+    if mt is None:
+        return None
+
     ext = {
         'audio/mp4': 'm4a',
+        # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
+        # it's the most popular one
+        'audio/mpeg': 'mp3',
     }.get(mt)
     if ext is not None:
         return ext
@@ -1858,11 +2061,7 @@ def mimetype2ext(mt):
 
 
 def urlhandle_detect_ext(url_handle):
-    try:
-        url_handle.headers
-        getheader = lambda h: url_handle.headers[h]
-    except AttributeError:  # Python < 3
-        getheader = url_handle.info().getheader
+    getheader = url_handle.headers.get
 
     cd = getheader('Content-Disposition')
     if cd:
@@ -2036,6 +2235,7 @@ def dfxp2srt(dfxp_data):
     _x = functools.partial(xpath_with_ns, ns_map={
         'ttml': 'http://www.w3.org/ns/ttml',
         'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
+        'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1',
     })
 
     class TTMLPElementParser(object):
@@ -2062,7 +2262,7 @@ def dfxp2srt(dfxp_data):
 
     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
     out = []
-    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
+    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p')
 
     if not paras:
         raise ValueError('Invalid dfxp/TTML subtitle')
@@ -2591,6 +2791,10 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
 
         if proxy == '__noproxy__':
             return None  # No Proxy
+        if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+            req.add_header('Ytdl-socks-proxy', proxy)
+            # youtube-dl's http/https handlers do wrapping the socket with socks
+            return None
         return compat_urllib_request.ProxyHandler.proxy_open(
             self, req, proxy, type)
 
@@ -2610,3 +2814,50 @@ def ohdave_rsa_encrypt(data, exponent, modulus):
     payload = int(binascii.hexlify(data[::-1]), 16)
     encrypted = pow(payload, exponent, modulus)
     return '%x' % encrypted
+
+
+def encode_base_n(num, n, table=None):
+    FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    if not table:
+        table = FULL_TABLE[:n]
+
+    if n > len(table):
+        raise ValueError('base %d exceeds table length %d' % (n, len(table)))
+
+    if num == 0:
+        return table[0]
+
+    ret = ''
+    while num:
+        ret = table[num % n] + ret
+        num = num // n
+    return ret
+
+
+def decode_packed_codes(code):
+    mobj = re.search(
+        r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)",
+        code)
+    obfucasted_code, base, count, symbols = mobj.groups()
+    base = int(base)
+    count = int(count)
+    symbols = symbols.split('|')
+    symbol_table = {}
+
+    while count:
+        count -= 1
+        base_n_count = encode_base_n(count, base)
+        symbol_table[base_n_count] = symbols[count] or base_n_count
+
+    return re.sub(
+        r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
+        obfucasted_code)
+
+
+def parse_m3u8_attributes(attrib):
+    info = {}
+    for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
+        if val.startswith('"'):
+            val = val[1:-1]
+        info[key] = val
+    return info
index 7a3df6a26fc7d73fb9c3b7c34d9923208d3d112e..2b7a4c98dd2ded70a49a2455ec1309ad921c512e 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2016.02.22'
+__version__ = '2016.06.25'