]> Raphaël G. Git Repositories - youtubedl/commitdiff
Imported Upstream version 2015.01.16
authorRogério Brito <rbrito@ime.usp.br>
Fri, 16 Jan 2015 17:58:13 +0000 (15:58 -0200)
committerRogério Brito <rbrito@ime.usp.br>
Fri, 16 Jan 2015 17:58:13 +0000 (15:58 -0200)
290 files changed:
Makefile
README.md
README.txt
devscripts/gh-pages/update-sites.py
devscripts/make_contributing.py [new file with mode: 0755]
devscripts/make_supportedsites.py [new file with mode: 0644]
devscripts/prepare_manpage.py
docs/supportedsites.md [new file with mode: 0644]
test/helper.py
test/swftests/ArrayAccess.swf
test/swftests/ClassCall.swf
test/swftests/ClassConstruction.swf
test/swftests/ConstArrayAccess.swf
test/swftests/ConstantInt.swf
test/swftests/DictCall.swf
test/swftests/EqualsOperator.swf
test/swftests/LocalVars.swf
test/swftests/MemberAssignment.swf
test/swftests/NeOperator.swf
test/swftests/PrivateCall.swf
test/swftests/PrivateVoidCall.swf
test/swftests/StaticAssignment.swf
test/swftests/StaticRetrieval.swf
test/swftests/StringBasics.swf
test/swftests/StringCharCodeAt.swf
test/swftests/StringConversion.swf
test/test_InfoExtractor.py
test/test_YoutubeDL.py
test/test_age_restriction.py
test/test_download.py
test/test_subtitles.py
test/test_unicode_literals.py
test/test_utils.py
test/test_write_info_json.py [deleted file]
youtube-dl
youtube-dl.1
youtube-dl.bash-completion
youtube-dl.fish
youtube-dl.zsh
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/compat.py
youtube_dl/downloader/common.py
youtube_dl/downloader/f4m.py
youtube_dl/downloader/hls.py
youtube_dl/downloader/http.py
youtube_dl/downloader/mplayer.py
youtube_dl/downloader/rtmp.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/adobetv.py [new file with mode: 0644]
youtube_dl/extractor/adultswim.py
youtube_dl/extractor/aljazeera.py [new file with mode: 0644]
youtube_dl/extractor/allocine.py
youtube_dl/extractor/alphaporno.py [new file with mode: 0644]
youtube_dl/extractor/aol.py
youtube_dl/extractor/appletrailers.py
youtube_dl/extractor/archiveorg.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/atresplayer.py [new file with mode: 0644]
youtube_dl/extractor/atttechchannel.py [new file with mode: 0644]
youtube_dl/extractor/audiomack.py
youtube_dl/extractor/auengine.py
youtube_dl/extractor/azubu.py [new file with mode: 0644]
youtube_dl/extractor/bambuser.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/bbccouk.py
youtube_dl/extractor/behindkink.py
youtube_dl/extractor/bet.py [new file with mode: 0644]
youtube_dl/extractor/bilibili.py
youtube_dl/extractor/bliptv.py
youtube_dl/extractor/breakcom.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/buzzfeed.py
youtube_dl/extractor/canalplus.py
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/channel9.py
youtube_dl/extractor/cinchcast.py [new file with mode: 0644]
youtube_dl/extractor/cnet.py
youtube_dl/extractor/cnn.py
youtube_dl/extractor/collegerama.py [new file with mode: 0644]
youtube_dl/extractor/comcarcoff.py [new file with mode: 0644]
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/commonmistakes.py [new file with mode: 0644]
youtube_dl/extractor/condenast.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/cspan.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/daum.py
youtube_dl/extractor/dbtv.py
youtube_dl/extractor/discovery.py
youtube_dl/extractor/drbonanza.py [new file with mode: 0644]
youtube_dl/extractor/drtv.py
youtube_dl/extractor/dvtv.py [new file with mode: 0644]
youtube_dl/extractor/ebaumsworld.py
youtube_dl/extractor/echomsk.py [new file with mode: 0644]
youtube_dl/extractor/ehow.py
youtube_dl/extractor/eighttracks.py
youtube_dl/extractor/ellentv.py
youtube_dl/extractor/elpais.py
youtube_dl/extractor/engadget.py
youtube_dl/extractor/eroprofile.py [new file with mode: 0644]
youtube_dl/extractor/escapist.py
youtube_dl/extractor/everyonesmixtape.py
youtube_dl/extractor/extremetube.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/firedrive.py
youtube_dl/extractor/fivemin.py
youtube_dl/extractor/fktv.py
youtube_dl/extractor/fourtube.py
youtube_dl/extractor/foxgay.py [new file with mode: 0644]
youtube_dl/extractor/foxnews.py [new file with mode: 0644]
youtube_dl/extractor/franceculture.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/gameone.py
youtube_dl/extractor/gamespot.py
youtube_dl/extractor/gdcvault.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/giantbomb.py [new file with mode: 0644]
youtube_dl/extractor/giga.py [new file with mode: 0644]
youtube_dl/extractor/goldenmoustache.py
youtube_dl/extractor/golem.py
youtube_dl/extractor/googlesearch.py
youtube_dl/extractor/gorillavid.py
youtube_dl/extractor/goshgay.py
youtube_dl/extractor/groupon.py [new file with mode: 0644]
youtube_dl/extractor/hellporno.py [new file with mode: 0644]
youtube_dl/extractor/helsinki.py
youtube_dl/extractor/hitbox.py [new file with mode: 0644]
youtube_dl/extractor/hostingbulk.py
youtube_dl/extractor/howstuffworks.py
youtube_dl/extractor/huffpost.py
youtube_dl/extractor/hypem.py
youtube_dl/extractor/imdb.py
youtube_dl/extractor/infoq.py
youtube_dl/extractor/internetvideoarchive.py
youtube_dl/extractor/iprima.py
youtube_dl/extractor/ivi.py
youtube_dl/extractor/karaoketv.py [new file with mode: 0644]
youtube_dl/extractor/keek.py
youtube_dl/extractor/keezmovies.py
youtube_dl/extractor/khanacademy.py
youtube_dl/extractor/kontrtube.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/lnkgo.py [new file with mode: 0644]
youtube_dl/extractor/lrt.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/malemotion.py
youtube_dl/extractor/metacafe.py
youtube_dl/extractor/minhateca.py [new file with mode: 0644]
youtube_dl/extractor/mit.py
youtube_dl/extractor/mitele.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/moevideo.py
youtube_dl/extractor/mofosex.py
youtube_dl/extractor/moniker.py
youtube_dl/extractor/mooshare.py
youtube_dl/extractor/motorsport.py
youtube_dl/extractor/movieclips.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/myspace.py
youtube_dl/extractor/myspass.py
youtube_dl/extractor/myvidster.py [new file with mode: 0644]
youtube_dl/extractor/naver.py
youtube_dl/extractor/nba.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/nerdcubed.py [new file with mode: 0644]
youtube_dl/extractor/netzkino.py [new file with mode: 0644]
youtube_dl/extractor/nfb.py
youtube_dl/extractor/nfl.py
youtube_dl/extractor/nhl.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/ninegag.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/normalboots.py
youtube_dl/extractor/nosvideo.py
youtube_dl/extractor/novamov.py
youtube_dl/extractor/nowvideo.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/ntv.py
youtube_dl/extractor/nuvid.py
youtube_dl/extractor/ooyala.py
youtube_dl/extractor/openfilm.py [new file with mode: 0644]
youtube_dl/extractor/orf.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/photobucket.py
youtube_dl/extractor/played.py
youtube_dl/extractor/playfm.py
youtube_dl/extractor/playvid.py
youtube_dl/extractor/pornhd.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/pornotube.py
youtube_dl/extractor/promptfile.py
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/quickvid.py
youtube_dl/extractor/radiobremen.py [new file with mode: 0644]
youtube_dl/extractor/radiode.py [new file with mode: 0644]
youtube_dl/extractor/rai.py
youtube_dl/extractor/restudy.py [new file with mode: 0644]
youtube_dl/extractor/rte.py [new file with mode: 0644]
youtube_dl/extractor/rtlnl.py
youtube_dl/extractor/rtp.py [new file with mode: 0644]
youtube_dl/extractor/rts.py
youtube_dl/extractor/rutube.py
youtube_dl/extractor/screencast.py
youtube_dl/extractor/screencastomatic.py [new file with mode: 0644]
youtube_dl/extractor/screenwavemedia.py [moved from youtube_dl/extractor/cinemassacre.py with 50% similarity]
youtube_dl/extractor/sexykarma.py
youtube_dl/extractor/shared.py
youtube_dl/extractor/sharesix.py
youtube_dl/extractor/sina.py
youtube_dl/extractor/slideshare.py
youtube_dl/extractor/smotri.py
youtube_dl/extractor/sockshare.py
youtube_dl/extractor/sohu.py
youtube_dl/extractor/soulanime.py [new file with mode: 0644]
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/spankwire.py
youtube_dl/extractor/spiegel.py
youtube_dl/extractor/sportdeutschland.py
youtube_dl/extractor/streamcloud.py
youtube_dl/extractor/streamcz.py
youtube_dl/extractor/sunporno.py
youtube_dl/extractor/tagesschau.py
youtube_dl/extractor/tapely.py
youtube_dl/extractor/teachertube.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/telecinco.py
youtube_dl/extractor/teletask.py [new file with mode: 0644]
youtube_dl/extractor/tenplay.py
youtube_dl/extractor/tf1.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/tlc.py
youtube_dl/extractor/tmz.py
youtube_dl/extractor/tnaflix.py
youtube_dl/extractor/tube8.py
youtube_dl/extractor/tudou.py
youtube_dl/extractor/tunein.py
youtube_dl/extractor/tutv.py
youtube_dl/extractor/tvigle.py
youtube_dl/extractor/tvp.py
youtube_dl/extractor/tvplay.py
youtube_dl/extractor/twentyfourvideo.py [new file with mode: 0644]
youtube_dl/extractor/twitch.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/urort.py
youtube_dl/extractor/ustream.py
youtube_dl/extractor/vbox7.py
youtube_dl/extractor/veehd.py
youtube_dl/extractor/veoh.py
youtube_dl/extractor/vevo.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/videodetective.py
youtube_dl/extractor/videomega.py
youtube_dl/extractor/vier.py [new file with mode: 0644]
youtube_dl/extractor/viki.py
youtube_dl/extractor/vimple.py
youtube_dl/extractor/vine.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/vodlocker.py
youtube_dl/extractor/vube.py
youtube_dl/extractor/vuclip.py
youtube_dl/extractor/washingtonpost.py
youtube_dl/extractor/wdr.py
youtube_dl/extractor/webofstories.py [new file with mode: 0644]
youtube_dl/extractor/wistia.py
youtube_dl/extractor/xbef.py
youtube_dl/extractor/xboxclips.py
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/xminus.py
youtube_dl/extractor/xnxx.py
youtube_dl/extractor/xtube.py
youtube_dl/extractor/xvideos.py
youtube_dl/extractor/xxxymovies.py [new file with mode: 0644]
youtube_dl/extractor/yahoo.py
youtube_dl/extractor/yesjapan.py [new file with mode: 0644]
youtube_dl/extractor/ynet.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zdf.py
youtube_dl/options.py
youtube_dl/postprocessor/__init__.py
youtube_dl/postprocessor/execafterdownload.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/swfinterp.py
youtube_dl/update.py
youtube_dl/utils.py
youtube_dl/version.py

index 3e1debc7e1a9184b82a5bc6528b0f8dbdfd7e6f4..5780798793cf2915807d28791a8cb59d40e9dcf2 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
-all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
+all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 
 clean:
-       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part
+       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
 
 cleanall: clean
        rm -f youtube-dl youtube-dl.exe
@@ -35,13 +35,22 @@ install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtu
        install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
        install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
 
+codetest:
+       flake8 .
+
 test:
        #nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
        nosetests --verbose test
+       $(MAKE) codetest
+
+ot: offlinetest
+
+offlinetest: codetest
+       nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
 
 tar: youtube-dl.tar.gz
 
-.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion
+.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion ot offlinetest codetest supportedsites
 
 pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
 
@@ -54,7 +63,13 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
        chmod a+x youtube-dl
 
 README.md: youtube_dl/*.py youtube_dl/*/*.py
-       COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py
+       COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py
+
+CONTRIBUTING.md: README.md
+       python devscripts/make_contributing.py README.md CONTRIBUTING.md
+
+supportedsites:
+       python devscripts/make_supportedsites.py docs/supportedsites.md
 
 README.txt: README.md
        pandoc -f markdown -t plain README.md -o README.txt
index d6e7ff902c86021db324cb3b9d06d441b8673cad..078e9df828393989f931e53b587503e6547c37b3 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,7 +1,15 @@
 youtube-dl - download videos from youtube.com or other video platforms
 
-# SYNOPSIS
-**youtube-dl** [OPTIONS] URL [URL...]
+- [INSTALLATION](#installation)
+- [DESCRIPTION](#description)
+- [OPTIONS](#options)
+- [CONFIGURATION](#configuration)
+- [OUTPUT TEMPLATE](#output-template)
+- [VIDEO SELECTION](#video-selection)
+- [FAQ](#faq)
+- [DEVELOPER INSTRUCTIONS](#developer-instructions)
+- [BUGS](#bugs)
+- [COPYRIGHT](#copyright)
 
 # INSTALLATION
 
@@ -34,6 +42,8 @@ YouTube.com and a few more sites. It requires the Python interpreter, version
 your Unix box, on Windows or on Mac OS X. It is released to the public domain,
 which means you can modify it, redistribute it or use it however you like.
 
+    youtube-dl [OPTIONS] URL [URL...]
+
 # OPTIONS
     -h, --help                       print this help text and exit
     --version                        print program version and exit
@@ -50,10 +60,6 @@ which means you can modify it, redistribute it or use it however you like.
                                      they would handle
     --extractor-descriptions         Output descriptions of all supported
                                      extractors
-    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
-                                     an empty string (--proxy "") for direct
-                                     connection
-    --socket-timeout None            Time to wait before giving up, in seconds
     --default-search PREFIX          Use this prefix for unqualified URLs. For
                                      example "gvsearch2:" downloads two videos
                                      from google videos for  youtube-dl "large
@@ -65,13 +71,25 @@ which means you can modify it, redistribute it or use it however you like.
                                      this is not possible instead of searching.
     --ignore-config                  Do not read configuration files. When given
                                      in the global configuration file /etc
-                                     /youtube-dl.conf: do not read the user
-                                     configuration in ~/.config/youtube-dl.conf
-                                     (%APPDATA%/youtube-dl/config.txt on
-                                     Windows)
+                                     /youtube-dl.conf: Do not read the user
+                                     configuration in ~/.config/youtube-
+                                     dl/config (%APPDATA%/youtube-dl/config.txt
+                                     on Windows)
     --flat-playlist                  Do not extract the videos of a playlist,
                                      only list them.
 
+## Network Options:
+    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
+                                     an empty string (--proxy "") for direct
+                                     connection
+    --socket-timeout SECONDS         Time to wait before giving up, in seconds
+    --source-address IP              Client-side IP address to bind to
+                                     (experimental)
+    -4, --force-ipv4                 Make all connections via IPv4
+                                     (experimental)
+    -6, --force-ipv6                 Make all connections via IPv6
+                                     (experimental)
+
 ## Video Selection:
     --playlist-start NUMBER          playlist video to start at (default is 1)
     --playlist-end NUMBER            playlist video to end at (default is last)
@@ -113,12 +131,12 @@ which means you can modify it, redistribute it or use it however you like.
                                      size. By default, the buffer size is
                                      automatically resized from an initial value
                                      of SIZE.
+    --playlist-reverse               Download playlist videos in reverse order
 
 ## Filesystem Options:
     -a, --batch-file FILE            file containing URLs to download ('-' for
                                      stdin)
     --id                             use only video ID in file name
-    -A, --auto-number                number downloaded files starting from 00000
     -o, --output TEMPLATE            output filename template. Use %(title)s to
                                      get the title, %(uploader)s for the
                                      uploader name, %(uploader_id)s for the
@@ -152,6 +170,9 @@ which means you can modify it, redistribute it or use it however you like.
     --restrict-filenames             Restrict filenames to only ASCII
                                      characters, and avoid "&" and spaces in
                                      filenames
+    -A, --auto-number                [deprecated; use  -o
+                                     "%(autonumber)s-%(title)s.%(ext)s" ] number
+                                     downloaded files starting from 00000
     -t, --title                      [deprecated] use title in file name
                                      (default)
     -l, --literal                    [deprecated] alias of --title
@@ -206,6 +227,8 @@ which means you can modify it, redistribute it or use it however you like.
                                      for each command-line argument. If the URL
                                      refers to a playlist, dump the whole
                                      playlist information in a single line.
+    --print-json                     Be quiet and print the video information as
+                                     JSON (video is still being downloaded).
     --newline                        output progress bar as new lines
     --no-progress                    do not print progress bar
     --console-title                  display progress in console titlebar
@@ -216,6 +239,10 @@ which means you can modify it, redistribute it or use it however you like.
                                      files in the current directory to debug
                                      problems
     --print-traffic                  Display sent and read HTTP traffic
+    -C, --call-home                  Contact the youtube-dl server for
+                                     debugging.
+    --no-call-home                   Do NOT contact the youtube-dl server for
+                                     debugging.
 
 ## Workarounds:
     --encoding ENCODING              Force the specified encoding (experimental)
@@ -235,14 +262,15 @@ which means you can modify it, redistribute it or use it however you like.
 
 ## Video Format Options:
     -f, --format FORMAT              video format code, specify the order of
-                                     preference using slashes: -f 22/17/18 .  -f
-                                     mp4 , -f m4a and  -f flv  are also
-                                     supported. You can also use the special
-                                     names "best", "bestvideo", "bestaudio",
-                                     "worst", "worstvideo" and "worstaudio". By
-                                     default, youtube-dl will pick the best
-                                     quality. Use commas to download multiple
-                                     audio formats, such as -f
+                                     preference using slashes, as in -f 22/17/18
+                                     .  Instead of format codes, you can select
+                                     by extension for the extensions aac, m4a,
+                                     mp3, mp4, ogg, wav, webm. You can also use
+                                     the special names "best", "bestvideo",
+                                     "bestaudio", "worst".  By default, youtube-
+                                     dl will pick the best quality. Use commas
+                                     to download multiple audio formats, such as
+                                     -f
                                      136/137/mp4/bestvideo,140/m4a/bestaudio.
                                      You can merge the video and audio of two
                                      formats into a single file using -f <video-
@@ -256,6 +284,10 @@ which means you can modify it, redistribute it or use it however you like.
     -F, --list-formats               list all available formats
     --youtube-skip-dash-manifest     Do not download the DASH manifest on
                                      YouTube videos
+    --merge-output-format FORMAT     If a merge is required (e.g.
+                                     bestvideo+bestaudio), output to given
+                                     container format. One of mkv, mp4, ogg,
+                                     webm, flv.Ignored if no merge is required
 
 ## Subtitle Options:
     --write-sub                      write subtitle file
@@ -302,6 +334,11 @@ which means you can modify it, redistribute it or use it however you like.
     --add-metadata                   write metadata to the video file
     --xattrs                         write metadata to the video file's xattrs
                                      (using dublin core and xdg standards)
+    --fixup POLICY                   (experimental) Automatically correct known
+                                     faults of the file. One of never (do
+                                     nothing), warn (only emit a warning),
+                                     detect_or_warn(check whether we can do
+                                     anything about it, warn otherwise
     --prefer-avconv                  Prefer avconv over ffmpeg for running the
                                      postprocessors (default)
     --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
@@ -313,7 +350,7 @@ which means you can modify it, redistribute it or use it however you like.
 
 # CONFIGURATION
 
-You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
+You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
 
 # OUTPUT TEMPLATE
 
@@ -407,9 +444,15 @@ Apparently YouTube requires you to pass a CAPTCHA test if you download too much.
 
 Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
 
-### The links provided by youtube-dl -g are not working anymore
+### I extracted a video URL with -g, but it does not play on another machine / in my webbrowser.
+
+It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies.  Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl.
+
+It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.
 
-The URLs youtube-dl outputs require the downloader to have the correct cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl.
+Please bear in mind that some URL protocols are **not** supported by browsers out of the box, including RTMP. If you are using -g, your own downloader must support these as well.
+
+If you want to play the video on a machine that is not running youtube-dl, you can relay the video content from the machine that runs youtube-dl. You can use `-o -` to let youtube-dl stream a video to stdout, or simply allow the player to download the files written by youtube-dl in turn.
 
 ### ERROR: no fmt_url_map or conn information found in video info
 
@@ -436,6 +479,22 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
 
 To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
 
+### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
+
+If you put youtube-dl and ffmpeg in the same directory that you're running the command from, it will work, but that's rather cumbersome.
+
+To make a different directory work - either for ffmpeg, or for youtube-dl, or for both - simply create the directory (say, `C:\bin`, or `C:\Users\<User name>\bin`), put all the executables directly in there, and then [set your PATH environment variable](https://www.java.com/en/download/help/path.xml) to include that directory.
+
+From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
+
+### How can I detect whether a given URL is supported by youtube-dl?
+
+For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
+
+It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
+
+If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
+
 # DEVELOPER INSTRUCTIONS
 
 Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
@@ -526,23 +585,61 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
 
 From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
 
-    import youtube_dl
+```python
+import youtube_dl
 
-    ydl_opts = {}
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+ydl_opts = {}
+with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+    ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+```
 
 Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
 
+Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
+
+```python
+import youtube_dl
+
+
+class MyLogger(object):
+    def debug(self, msg):
+        pass
+
+    def warning(self, msg):
+        pass
+
+    def error(self, msg):
+        print(msg)
+
+
+def my_hook(d):
+    if d['status'] == 'finished':
+        print('Done downloading, now converting ...')
+
+
+ydl_opts = {
+    'format': 'bestaudio/best',
+    'postprocessors': [{
+        'key': 'FFmpegExtractAudio',
+        'preferredcodec': 'mp3',
+        'preferredquality': '192',
+    }],
+    'logger': MyLogger(),
+    'progress_hooks': [my_hook],
+}
+with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+    ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+```
+
 # BUGS
 
-Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
+Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the irc channel #youtube-dl on freenode.
 
-Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
+**Please include the full output of youtube-dl when run with `-v`**.
 
-For discussions, join us in the irc channel #youtube-dl on freenode.
+The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
 
-When you submit a request, please re-read it once to avoid a couple of mistakes (you can and should use this as a checklist):
+Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
 
 ### Is the description of the issue itself sufficient?
 
index 601fe9ae25a8edd9d06492e3610b101651225ef6..5e2e5f75822e4fabc02b9cca3a16e0e80f2672a4 100644 (file)
@@ -1,9 +1,15 @@
 youtube-dl - download videos from youtube.com or other video platforms
 
-SYNOPSIS
-========
-
-youtube-dl OPTIONS URL [URL...]
+-   INSTALLATION
+-   DESCRIPTION
+-   OPTIONS
+-   CONFIGURATION
+-   OUTPUT TEMPLATE
+-   VIDEO SELECTION
+-   FAQ
+-   DEVELOPER INSTRUCTIONS
+-   BUGS
+-   COPYRIGHT
 
 INSTALLATION
 ============
@@ -44,6 +50,8 @@ work on your Unix box, on Windows or on Mac OS X. It is released to the
 public domain, which means you can modify it, redistribute it or use it
 however you like.
 
+    youtube-dl [OPTIONS] URL [URL...]
+
 OPTIONS
 =======
 
@@ -62,10 +70,6 @@ OPTIONS
                                      they would handle
     --extractor-descriptions         Output descriptions of all supported
                                      extractors
-    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
-                                     an empty string (--proxy "") for direct
-                                     connection
-    --socket-timeout None            Time to wait before giving up, in seconds
     --default-search PREFIX          Use this prefix for unqualified URLs. For
                                      example "gvsearch2:" downloads two videos
                                      from google videos for  youtube-dl "large
@@ -77,13 +81,27 @@ OPTIONS
                                      this is not possible instead of searching.
     --ignore-config                  Do not read configuration files. When given
                                      in the global configuration file /etc
-                                     /youtube-dl.conf: do not read the user
-                                     configuration in ~/.config/youtube-dl.conf
-                                     (%APPDATA%/youtube-dl/config.txt on
-                                     Windows)
+                                     /youtube-dl.conf: Do not read the user
+                                     configuration in ~/.config/youtube-
+                                     dl/config (%APPDATA%/youtube-dl/config.txt
+                                     on Windows)
     --flat-playlist                  Do not extract the videos of a playlist,
                                      only list them.
 
+Network Options:
+----------------
+
+    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
+                                     an empty string (--proxy "") for direct
+                                     connection
+    --socket-timeout SECONDS         Time to wait before giving up, in seconds
+    --source-address IP              Client-side IP address to bind to
+                                     (experimental)
+    -4, --force-ipv4                 Make all connections via IPv4
+                                     (experimental)
+    -6, --force-ipv6                 Make all connections via IPv6
+                                     (experimental)
+
 Video Selection:
 ----------------
 
@@ -129,6 +147,7 @@ Download Options:
                                      size. By default, the buffer size is
                                      automatically resized from an initial value
                                      of SIZE.
+    --playlist-reverse               Download playlist videos in reverse order
 
 Filesystem Options:
 -------------------
@@ -136,7 +155,6 @@ Filesystem Options:
     -a, --batch-file FILE            file containing URLs to download ('-' for
                                      stdin)
     --id                             use only video ID in file name
-    -A, --auto-number                number downloaded files starting from 00000
     -o, --output TEMPLATE            output filename template. Use %(title)s to
                                      get the title, %(uploader)s for the
                                      uploader name, %(uploader_id)s for the
@@ -170,6 +188,9 @@ Filesystem Options:
     --restrict-filenames             Restrict filenames to only ASCII
                                      characters, and avoid "&" and spaces in
                                      filenames
+    -A, --auto-number                [deprecated; use  -o
+                                     "%(autonumber)s-%(title)s.%(ext)s" ] number
+                                     downloaded files starting from 00000
     -t, --title                      [deprecated] use title in file name
                                      (default)
     -l, --literal                    [deprecated] alias of --title
@@ -226,6 +247,8 @@ Verbosity / Simulation Options:
                                      for each command-line argument. If the URL
                                      refers to a playlist, dump the whole
                                      playlist information in a single line.
+    --print-json                     Be quiet and print the video information as
+                                     JSON (video is still being downloaded).
     --newline                        output progress bar as new lines
     --no-progress                    do not print progress bar
     --console-title                  display progress in console titlebar
@@ -236,6 +259,10 @@ Verbosity / Simulation Options:
                                      files in the current directory to debug
                                      problems
     --print-traffic                  Display sent and read HTTP traffic
+    -C, --call-home                  Contact the youtube-dl server for
+                                     debugging.
+    --no-call-home                   Do NOT contact the youtube-dl server for
+                                     debugging.
 
 Workarounds:
 ------------
@@ -259,14 +286,15 @@ Video Format Options:
 ---------------------
 
     -f, --format FORMAT              video format code, specify the order of
-                                     preference using slashes: -f 22/17/18 .  -f
-                                     mp4 , -f m4a and  -f flv  are also
-                                     supported. You can also use the special
-                                     names "best", "bestvideo", "bestaudio",
-                                     "worst", "worstvideo" and "worstaudio". By
-                                     default, youtube-dl will pick the best
-                                     quality. Use commas to download multiple
-                                     audio formats, such as -f
+                                     preference using slashes, as in -f 22/17/18
+                                     .  Instead of format codes, you can select
+                                     by extension for the extensions aac, m4a,
+                                     mp3, mp4, ogg, wav, webm. You can also use
+                                     the special names "best", "bestvideo",
+                                     "bestaudio", "worst".  By default, youtube-
+                                     dl will pick the best quality. Use commas
+                                     to download multiple audio formats, such as
+                                     -f
                                      136/137/mp4/bestvideo,140/m4a/bestaudio.
                                      You can merge the video and audio of two
                                      formats into a single file using -f <video-
@@ -280,6 +308,10 @@ Video Format Options:
     -F, --list-formats               list all available formats
     --youtube-skip-dash-manifest     Do not download the DASH manifest on
                                      YouTube videos
+    --merge-output-format FORMAT     If a merge is required (e.g.
+                                     bestvideo+bestaudio), output to given
+                                     container format. One of mkv, mp4, ogg,
+                                     webm, flv.Ignored if no merge is required
 
 Subtitle Options:
 -----------------
@@ -332,6 +364,11 @@ Post-processing Options:
     --add-metadata                   write metadata to the video file
     --xattrs                         write metadata to the video file's xattrs
                                      (using dublin core and xdg standards)
+    --fixup POLICY                   (experimental) Automatically correct known
+                                     faults of the file. One of never (do
+                                     nothing), warn (only emit a warning),
+                                     detect_or_warn(check whether we can do
+                                     anything about it, warn otherwise
     --prefer-avconv                  Prefer avconv over ffmpeg for running the
                                      postprocessors (default)
     --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
@@ -348,7 +385,8 @@ You can configure youtube-dl by placing default arguments (such as
 --extract-audio --no-mtime to always extract the audio and not copy the
 mtime) into /etc/youtube-dl.conf and/or ~/.config/youtube-dl/config. On
 Windows, the configuration file locations are
-%APPDATA%\youtube-dl\config.txt and C:\Users\<Yourname>\youtube-dl.conf.
+%APPDATA%\youtube-dl\config.txt and
+C:\Users\<user name>\youtube-dl.conf.
 
 OUTPUT TEMPLATE
 ===============
@@ -497,13 +535,29 @@ I have downloaded a video but how can I play it?
 Once the video is fully downloaded, use any video player, such as vlc or
 mplayer.
 
-The links provided by youtube-dl -g are not working anymore
+I extracted a video URL with -g, but it does not play on another machine / in my webbrowser.
+
+It depends a lot on the service. In many cases, requests for the video
+(to download/play it) must come from the same IP address and with the
+same cookies. Use the --cookies option to write the required cookies
+into a file, and advise your downloader to read cookies from that file.
+Some sites also require a common user agent to be used, use
+--dump-user-agent to see the one in use by youtube-dl.
+
+It may be beneficial to use IPv6; in some cases, the restrictions are
+only applied to IPv4. Some services (sometimes only for a subset of
+videos) do not restrict the video URL by IP address, cookie, or
+user-agent, but these are the exception rather than the rule.
 
-The URLs youtube-dl outputs require the downloader to have the correct
-cookies. Use the --cookies option to write the required cookies into a
-file, and advise your downloader to read cookies from that file. Some
-sites also require a common user agent to be used, use --dump-user-agent
-to see the one in use by youtube-dl.
+Please bear in mind that some URL protocols are not supported by
+browsers out of the box, including RTMP. If you are using -g, your own
+downloader must support these as well.
+
+If you want to play the video on a machine that is not running
+youtube-dl, you can relay the video content from the machine that runs
+youtube-dl. You can use -o - to let youtube-dl stream a video to stdout,
+or simply allow the player to download the files written by youtube-dl
+in turn.
 
 ERROR: no fmt_url_map or conn information found in video info
 
@@ -540,6 +594,45 @@ The exe throws a Runtime error from Visual C++
 To run the exe you need to install first the Microsoft Visual C++ 2008
 Redistributable Package.
 
+On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
+
+If you put youtube-dl and ffmpeg in the same directory that you're
+running the command from, it will work, but that's rather cumbersome.
+
+To make a different directory work - either for ffmpeg, or for
+youtube-dl, or for both - simply create the directory (say, C:\bin, or
+C:\Users\<User name>\bin), put all the executables directly in there,
+and then set your PATH environment variable to include that directory.
+
+From then on, after restarting your shell, you will be able to access
+both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg)
+by simply typing youtube-dl or ffmpeg, no matter what directory you're
+in.
+
+How can I detect whether a given URL is supported by youtube-dl?
+
+For one, have a look at the list of supported sites. Note that it can
+sometimes happen that the site changes its URL scheme (say, from
+http://example.com/v/1234567 to http://example.com/v/1234567 ) and
+youtube-dl reports an URL of a service in that list as unsupported. In
+that case, simply report a bug.
+
+It is not possible to detect whether a URL is supported or not. That's
+because youtube-dl contains a generic extractor which matches all URLs.
+You may be tempted to disable, exclude, or remove the generic extractor,
+but the generic extractor not only allows users to extract videos from
+lots of websites that embed a video from another service, but may also
+be used to extract video from a service that it's hosting itself.
+Therefore, we neither recommend nor support disabling, excluding, or
+removing the generic extractor.
+
+If you want to find out whether a given URL is supported, simply call
+youtube-dl with it. If you get no videos back, chances are the URL is
+either not referring to a video or unsupported. You can find out which
+by examining the output (if you run youtube-dl on the console) or
+catching an UnsupportedError exception if you run it from a Python
+program.
+
 DEVELOPER INSTRUCTIONS
 ======================
 
@@ -654,32 +747,72 @@ any problems parsing its output, feel free to create a report.
 From a Python program, you can embed youtube-dl in a more powerful
 fashion, like this:
 
-    import youtube_dl
+``` {.python}
+import youtube_dl
 
-    ydl_opts = {}
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+ydl_opts = {}
+with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+    ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+```
 
 Most likely, you'll want to use various options. For a list of what can
 be done, have a look at youtube_dl/YoutubeDL.py. For a start, if you
 want to intercept youtube-dl's output, set a logger object.
 
+Here's a more complete example of a program that outputs only errors
+(and a short message after the download is finished), and
+downloads/converts the video to an mp3 file:
+
+``` {.python}
+import youtube_dl
+
+
+class MyLogger(object):
+    def debug(self, msg):
+        pass
+
+    def warning(self, msg):
+        pass
+
+    def error(self, msg):
+        print(msg)
+
+
+def my_hook(d):
+    if d['status'] == 'finished':
+        print('Done downloading, now converting ...')
+
+
+ydl_opts = {
+    'format': 'bestaudio/best',
+    'postprocessors': [{
+        'key': 'FFmpegExtractAudio',
+        'preferredcodec': 'mp3',
+        'preferredquality': '192',
+    }],
+    'logger': MyLogger(),
+    'progress_hooks': [my_hook],
+}
+with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+    ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+```
+
 BUGS
 ====
 
 Bugs and suggestions should be reported at:
 https://github.com/rg3/youtube-dl/issues . Unless you were prompted so
 or there is another pertinent reason (e.g. GitHub fails to accept the
-bug report), please do not send bug reports via personal email.
+bug report), please do not send bug reports via personal email. For
+discussions, join us in the irc channel #youtube-dl on freenode.
+
+Please include the full output of youtube-dl when run with -v.
 
-Please include the full output of the command when run with --verbose.
 The output (including the first lines) contain important debugging
 information. Issues without the full output are often not reproducible
 and therefore do not get solved in short order, if ever.
 
-For discussions, join us in the irc channel #youtube-dl on freenode.
-
-When you submit a request, please re-read it once to avoid a couple of
+Please re-read your issue once again to avoid a couple of common
 mistakes (you can and should use this as a checklist):
 
 Is the description of the issue itself sufficient?
index f0f0481c781ab40f8386de5c87a99c6468e00708..d3ef5f0b50daa56513118f55d5b636e5f46552a0 100755 (executable)
@@ -16,7 +16,7 @@ def main():
         template = tmplf.read()
 
     ie_htmls = []
-    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
+    for ie in youtube_dl.list_extractors(age_limit=None):
         ie_html = '<b>{}</b>'.format(ie.IE_NAME)
         ie_desc = getattr(ie, 'IE_DESC', None)
         if ie_desc is False:
diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py
new file mode 100755 (executable)
index 0000000..5e454a4
--- /dev/null
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+import io
+import optparse
+import re
+
+
+def main():
+    parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
+    options, args = parser.parse_args()
+    if len(args) != 2:
+        parser.error('Expected an input and an output filename')
+
+    infile, outfile = args
+
+    with io.open(infile, encoding='utf-8') as inf:
+        readme = inf.read()
+
+    bug_text = re.search(
+        r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
+    dev_text = re.search(
+        r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING YOUTUBE-DL',
+        readme).group(1)
+
+    out = bug_text + dev_text
+
+    with io.open(outfile, 'w', encoding='utf-8') as outf:
+        outf.write(out)
+
+if __name__ == '__main__':
+    main()
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
new file mode 100644 (file)
index 0000000..3df4385
--- /dev/null
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+import io
+import optparse
+import os
+import sys
+
+
+# Import youtube_dl
+ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
+sys.path.append(ROOT_DIR)
+import youtube_dl
+
+
+def main():
+    parser = optparse.OptionParser(usage='%prog OUTFILE.md')
+    options, args = parser.parse_args()
+    if len(args) != 1:
+        parser.error('Expected an output filename')
+
+    outfile, = args
+
+    def gen_ies_md(ies):
+        for ie in ies:
+            ie_md = '**{0}**'.format(ie.IE_NAME)
+            ie_desc = getattr(ie, 'IE_DESC', None)
+            if ie_desc is False:
+                continue
+            if ie_desc is not None:
+                ie_md += ': {0}'.format(ie.IE_DESC)
+            if not ie.working():
+                ie_md += ' (Currently broken)'
+            yield ie_md
+
+    ies = sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower())
+    out = '# Supported sites\n' + ''.join(
+        ' - ' + md + '\n'
+        for md in gen_ies_md(ies))
+
+    with io.open(outfile, 'w', encoding='utf-8') as outf:
+        outf.write(out)
+
+if __name__ == '__main__':
+    main()
index f66bebfea6de2cec60a10f943380b9247e5c3d60..7ece37754d1003ba4cbe63fed109eef00711adcd 100644 (file)
@@ -11,8 +11,19 @@ README_FILE = os.path.join(ROOT_DIR, 'README.md')
 with io.open(README_FILE, encoding='utf-8') as f:
     readme = f.read()
 
-PREFIX = '%YOUTUBE-DL(1)\n\n# NAME\n'
-readme = re.sub(r'(?s)# INSTALLATION.*?(?=# DESCRIPTION)', '', readme)
+PREFIX = '''%YOUTUBE-DL(1)
+
+# NAME
+
+youtube\-dl \- download videos from youtube.com or other video platforms
+
+# SYNOPSIS
+
+**youtube-dl** \[OPTIONS\] URL [URL...]
+
+'''
+readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
+readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
 readme = PREFIX + readme
 
 if sys.version_info < (3, 0):
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
new file mode 100644 (file)
index 0000000..dbbf4a7
--- /dev/null
@@ -0,0 +1,500 @@
+# Supported sites
+ - **1up.com**
+ - **220.ro**
+ - **24video**
+ - **3sat**
+ - **4tube**
+ - **56.com**
+ - **5min**
+ - **8tracks**
+ - **9gag**
+ - **abc.net.au**
+ - **AcademicEarth:Course**
+ - **AddAnime**
+ - **AdobeTV**
+ - **AdultSwim**
+ - **Aftonbladet**
+ - **AlJazeera**
+ - **Allocine**
+ - **anitube.se**
+ - **AnySex**
+ - **Aparat**
+ - **AppleTrailers**
+ - **archive.org**: archive.org videos
+ - **ARD**
+ - **ARD:mediathek**
+ - **arte.tv**
+ - **arte.tv:+7**
+ - **arte.tv:concert**
+ - **arte.tv:creative**
+ - **arte.tv:ddc**
+ - **arte.tv:embed**
+ - **arte.tv:future**
+ - **audiomack**
+ - **AUEngine**
+ - **Azubu**
+ - **bambuser**
+ - **bambuser:channel**
+ - **Bandcamp**
+ - **Bandcamp:album**
+ - **bbc.co.uk**: BBC iPlayer
+ - **Beeg**
+ - **BehindKink**
+ - **Bet**
+ - **Bild**: Bild.de
+ - **BiliBili**
+ - **blinkx**
+ - **blip.tv:user**
+ - **BlipTV**
+ - **Bloomberg**
+ - **Bpb**: Bundeszentrale für politische Bildung
+ - **BR**: Bayerischer Rundfunk Mediathek
+ - **Break**
+ - **Brightcove**
+ - **BuzzFeed**
+ - **BYUtv**
+ - **Canal13cl**
+ - **canalc2.tv**
+ - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
+ - **CBS**
+ - **CBSNews**: CBS News
+ - **CeskaTelevize**
+ - **channel9**: Channel 9
+ - **Chilloutzone**
+ - **Cinchcast**
+ - **Cinemassacre**
+ - **clipfish**
+ - **cliphunter**
+ - **Clipsyndicate**
+ - **Cloudy**
+ - **Clubic**
+ - **cmt.com**
+ - **CNET**
+ - **CNN**
+ - **CNNBlogs**
+ - **CollegeHumor**
+ - **ComCarCoff**
+ - **ComedyCentral**
+ - **ComedyCentralShows**: The Daily Show / The Colbert Report
+ - **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
+ - **Cracked**
+ - **Criterion**
+ - **Crunchyroll**
+ - **crunchyroll:playlist**
+ - **CSpan**: C-SPAN
+ - **culturebox.francetvinfo.fr**
+ - **dailymotion**
+ - **dailymotion:playlist**
+ - **dailymotion:user**
+ - **daum.net**
+ - **DBTV**
+ - **DeezerPlaylist**
+ - **defense.gouv.fr**
+ - **Discovery**
+ - **divxstage**: DivxStage
+ - **Dotsub**
+ - **Dropbox**
+ - **DrTuber**
+ - **DRTV**
+ - **Dump**
+ - **dvtv**: http://video.aktualne.cz/
+ - **EbaumsWorld**
+ - **eHow**
+ - **Einthusan**
+ - **eitb.tv**
+ - **EllenTV**
+ - **EllenTV:clips**
+ - **ElPais**: El País
+ - **EMPFlix**
+ - **Engadget**
+ - **Eporner**
+ - **Escapist**
+ - **EveryonesMixtape**
+ - **exfm**: ex.fm
+ - **ExpoTV**
+ - **ExtremeTube**
+ - **facebook**
+ - **faz.net**
+ - **fc2**
+ - **fernsehkritik.tv**
+ - **fernsehkritik.tv:postecke**
+ - **Firedrive**
+ - **Firstpost**
+ - **firsttv**: Видеоархив - Первый канал
+ - **Flickr**
+ - **Folketinget**: Folketinget (ft.dk; Danish parliament)
+ - **Foxgay**
+ - **FoxNews**
+ - **france2.fr:generation-quoi**
+ - **FranceCulture**
+ - **FranceInter**
+ - **francetv**: France 2, 3, 4, 5 and Ô
+ - **francetvinfo.fr**
+ - **Freesound**
+ - **freespeech.org**
+ - **FreeVideo**
+ - **FunnyOrDie**
+ - **Gamekings**
+ - **GameOne**
+ - **gameone:playlist**
+ - **GameSpot**
+ - **GameStar**
+ - **Gametrailers**
+ - **GDCVault**
+ - **generic**: Generic downloader that works on some sites
+ - **GiantBomb**
+ - **Glide**: Glide mobile video messages (glide.me)
+ - **Globo**
+ - **GodTube**
+ - **GoldenMoustache**
+ - **Golem**
+ - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in and fastvideo.in
+ - **Goshgay**
+ - **Grooveshark**
+ - **Groupon**
+ - **Hark**
+ - **Heise**
+ - **Helsinki**: helsinki.fi
+ - **HentaiStigma**
+ - **HornBunny**
+ - **HostingBulk**
+ - **HotNewHipHop**
+ - **Howcast**
+ - **HowStuffWorks**
+ - **HuffPost**: Huffington Post
+ - **Hypem**
+ - **Iconosquare**
+ - **ign.com**
+ - **imdb**: Internet Movie Database trailers
+ - **imdb:list**: Internet Movie Database lists
+ - **Ina**
+ - **InfoQ**
+ - **Instagram**
+ - **instagram:user**: Instagram user profile
+ - **InternetVideoArchive**
+ - **IPrima**
+ - **ivi**: ivi.ru
+ - **ivi:compilation**: ivi.ru compilations
+ - **Izlesene**
+ - **JadoreCettePub**
+ - **JeuxVideo**
+ - **Jove**
+ - **jpopsuki.tv**
+ - **Jukebox**
+ - **Kankan**
+ - **keek**
+ - **KeezMovies**
+ - **KhanAcademy**
+ - **KickStarter**
+ - **kontrtube**: KontrTube.ru - Труба зовёт
+ - **KrasView**: Красвью
+ - **Ku6**
+ - **la7.tv**
+ - **Laola1Tv**
+ - **lifenews**: LIFE | NEWS
+ - **LiveLeak**
+ - **livestream**
+ - **livestream:original**
+ - **lrt.lt**
+ - **lynda**: lynda.com videos
+ - **lynda:course**: lynda.com online courses
+ - **m6**
+ - **macgamestore**: MacGameStore trailers
+ - **mailru**: Видео@Mail.Ru
+ - **Malemotion**
+ - **MDR**
+ - **metacafe**
+ - **Metacritic**
+ - **Mgoon**
+ - **Minhateca**
+ - **MinistryGrid**
+ - **mitele.es**
+ - **mixcloud**
+ - **MLB**
+ - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
+ - **Mofosex**
+ - **Mojvideo**
+ - **Moniker**: allmyvideos.net and vidspot.net
+ - **mooshare**: Mooshare.biz
+ - **Morningstar**: morningstar.com
+ - **Motherless**
+ - **Motorsport**: motorsport.com
+ - **MovieClips**
+ - **Moviezine**
+ - **movshare**: MovShare
+ - **MPORA**
+ - **MTV**
+ - **mtviggy.com**
+ - **mtvservices:embedded**
+ - **MuenchenTV**: münchen.tv
+ - **MusicPlayOn**
+ - **MusicVault**
+ - **muzu.tv**
+ - **MySpace**
+ - **MySpace:album**
+ - **MySpass**
+ - **myvideo**
+ - **MyVidster**
+ - **Naver**
+ - **NBA**
+ - **NBC**
+ - **NBCNews**
+ - **ndr**: NDR.de - Mediathek
+ - **NDTV**
+ - **NerdCubedFeed**
+ - **Newgrounds**
+ - **Newstube**
+ - **nfb**: National Film Board of Canada
+ - **nfl.com**
+ - **nhl.com**
+ - **nhl.com:videocenter**: NHL videocenter category
+ - **niconico**: ニコニコ動画
+ - **NiconicoPlaylist**
+ - **Noco**
+ - **Normalboots**
+ - **NosVideo**
+ - **novamov**: NovaMov
+ - **Nowness**
+ - **nowvideo**: NowVideo
+ - **npo.nl**
+ - **NRK**
+ - **NRKTV**
+ - **NTV**
+ - **Nuvid**
+ - **NYTimes**
+ - **ocw.mit.edu**
+ - **OktoberfestTV**
+ - **on.aol.com**
+ - **Ooyala**
+ - **orf:oe1**: Radio Österreich 1
+ - **orf:tvthek**: ORF TVthek
+ - **ORFFM4**: radio FM4
+ - **parliamentlive.tv**: UK parliament videos
+ - **Patreon**
+ - **PBS**
+ - **Phoenix**
+ - **Photobucket**
+ - **PlanetaPlay**
+ - **play.fm**
+ - **played.to**
+ - **Playvid**
+ - **plus.google**: Google Plus
+ - **pluzz.francetv.fr**
+ - **podomatic**
+ - **PornHd**
+ - **PornHub**
+ - **Pornotube**
+ - **PornoXO**
+ - **PromptFile**
+ - **prosiebensat1**: ProSiebenSat.1 Digital
+ - **Pyvideo**
+ - **QuickVid**
+ - **radio.de**
+ - **radiofrance**
+ - **Rai**
+ - **RBMARadio**
+ - **RedTube**
+ - **Restudy**
+ - **ReverbNation**
+ - **RingTV**
+ - **RottenTomatoes**
+ - **Roxwel**
+ - **RTBF**
+ - **RTLnow**
+ - **rtlxl.nl**
+ - **RTP**
+ - **RTS**: RTS.ch
+ - **rtve.es:alacarta**: RTVE a la carta
+ - **rtve.es:live**: RTVE.es live streams
+ - **RUHD**
+ - **rutube**: Rutube videos
+ - **rutube:channel**: Rutube channels
+ - **rutube:movie**: Rutube movies
+ - **rutube:person**: Rutube person videos
+ - **RUTV**: RUTV.RU
+ - **Sapo**: SAPO Vídeos
+ - **savefrom.net**
+ - **SBS**: sbs.com.au
+ - **SciVee**
+ - **screen.yahoo:search**: Yahoo screen search
+ - **Screencast**
+ - **ScreencastOMatic**
+ - **ScreenwaveMedia**
+ - **ServingSys**
+ - **Sexu**
+ - **SexyKarma**: Sexy Karma and Watch Indian Porn
+ - **Shared**
+ - **ShareSix**
+ - **Sina**
+ - **Slideshare**
+ - **Slutload**
+ - **smotri**: Smotri.com
+ - **smotri:broadcast**: Smotri.com broadcasts
+ - **smotri:community**: Smotri.com community videos
+ - **smotri:user**: Smotri.com user videos
+ - **Snotr**
+ - **Sockshare**
+ - **Sohu**
+ - **soundcloud**
+ - **soundcloud:playlist**
+ - **soundcloud:set**
+ - **soundcloud:user**
+ - **Soundgasm**
+ - **southpark.cc.com**
+ - **southpark.de**
+ - **Space**
+ - **Spankwire**
+ - **Spiegel**
+ - **Spiegel:Article**: Articles on spiegel.de
+ - **Spiegeltv**
+ - **Spike**
+ - **Sport5**
+ - **SportBox**
+ - **SportDeutschland**
+ - **SRMediathek**: Süddeutscher Rundfunk
+ - **stanfordoc**: Stanford Open ClassRoom
+ - **Steam**
+ - **streamcloud.eu**
+ - **StreamCZ**
+ - **SunPorno**
+ - **SWRMediathek**
+ - **Syfy**
+ - **SztvHu**
+ - **Tagesschau**
+ - **Tapely**
+ - **Tass**
+ - **teachertube**: teachertube.com videos
+ - **teachertube:user:collection**: teachertube.com user and collection videos
+ - **TeachingChannel**
+ - **Teamcoco**
+ - **TeamFour**
+ - **TechTalks**
+ - **techtv.mit.edu**
+ - **TED**
+ - **tegenlicht.vpro.nl**
+ - **TeleBruxelles**
+ - **telecinco.es**
+ - **TeleMB**
+ - **TenPlay**
+ - **TF1**
+ - **TheOnion**
+ - **ThePlatform**
+ - **TheSixtyOne**
+ - **ThisAV**
+ - **THVideo**
+ - **THVideoPlaylist**
+ - **tinypic**: tinypic.com videos
+ - **tlc.com**
+ - **tlc.de**
+ - **TMZ**
+ - **TNAFlix**
+ - **tou.tv**
+ - **Toypics**: Toypics user profile
+ - **ToypicsUser**: Toypics user profile
+ - **TrailerAddict** (Currently broken)
+ - **Trilulilu**
+ - **TruTube**
+ - **Tube8**
+ - **Tudou**
+ - **Tumblr**
+ - **TuneIn**
+ - **Turbo**
+ - **Tutv**
+ - **tv.dfb.de**
+ - **tvigle**: Интернет-телевидение Tvigle.ru
+ - **tvp.pl**
+ - **TVPlay**: TV3Play and related services
+ - **Twitch**
+ - **Ubu**
+ - **udemy**
+ - **udemy:course**
+ - **Unistra**
+ - **Urort**: NRK P3 Urørt
+ - **ustream**
+ - **ustream:channel**
+ - **Vbox7**
+ - **VeeHD**
+ - **Veoh**
+ - **Vesti**: Вести.Ru
+ - **Vevo**
+ - **VGTV**
+ - **vh1.com**
+ - **Vice**
+ - **Viddler**
+ - **video.google:search**: Google Video search
+ - **video.mit.edu**
+ - **VideoBam**
+ - **VideoDetective**
+ - **videofy.me**
+ - **videolectures.net**
+ - **VideoMega**
+ - **VideoPremium**
+ - **VideoTt**: video.tt - Your True Tube
+ - **videoweed**: VideoWeed
+ - **Vidme**
+ - **Vidzi**
+ - **viki**
+ - **vimeo**
+ - **vimeo:album**
+ - **vimeo:channel**
+ - **vimeo:group**
+ - **vimeo:likes**: Vimeo user likes
+ - **vimeo:review**: Review pages on vimeo
+ - **vimeo:user**
+ - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
+ - **Vimple**: Vimple.ru
+ - **Vine**
+ - **vine:user**
+ - **vk.com**
+ - **vk.com:user-videos**: vk.com:All of a user's videos
+ - **Vodlocker**
+ - **Vporn**
+ - **VRT**
+ - **vube**: Vube.com
+ - **VuClip**
+ - **vulture.com**
+ - **Walla**
+ - **WashingtonPost**
+ - **wat.tv**
+ - **WayOfTheMaster**
+ - **WDR**
+ - **wdr:mobile**
+ - **WDRMaus**: Sendung mit der Maus
+ - **Weibo**
+ - **Wimp**
+ - **Wistia**
+ - **WorldStarHipHop**
+ - **wrzuta.pl**
+ - **XBef**
+ - **XboxClips**
+ - **XHamster**
+ - **XMinus**
+ - **XNXX**
+ - **XTube**
+ - **XTubeUser**: XTube user profile
+ - **XVideos**
+ - **Yahoo**: Yahoo screen and movies
+ - **YesJapan**
+ - **Ynet**
+ - **YouJizz**
+ - **Youku**
+ - **YouPorn**
+ - **YourUpload**
+ - **youtube**: YouTube.com
+ - **youtube:channel**: YouTube.com channels
+ - **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
+ - **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
+ - **youtube:playlist**: YouTube.com playlists
+ - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
+ - **youtube:search**: YouTube.com searches
+ - **youtube:search:date**: YouTube.com searches, newest videos first
+ - **youtube:search_url**: YouTube.com search URLs
+ - **youtube:show**: YouTube.com (multi-season) shows
+ - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
+ - **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks")
+ - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
+ - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **ZDF**
+ - **ZDFChannel**
+ - **zingmp3:album**: mp3.zing.vn albums
+ - **zingmp3:song**: mp3.zing.vn songs
index 9a7f0746ec4ca02ed9f197395b8e48a1fffff38a..c416f388cbfe335678269b0226cc10708c49a850 100644 (file)
@@ -82,24 +82,14 @@ class FakeYDL(YoutubeDL):
 
 def gettestcases(include_onlymatching=False):
     for ie in youtube_dl.extractor.gen_extractors():
-        t = getattr(ie, '_TEST', None)
-        if t:
-            assert not hasattr(ie, '_TESTS'), \
-                '%s has _TEST and _TESTS' % type(ie).__name__
-            tests = [t]
-        else:
-            tests = getattr(ie, '_TESTS', [])
-        for t in tests:
-            if not include_onlymatching and t.get('only_matching', False):
-                continue
-            t['name'] = type(ie).__name__[:-len('IE')]
-            yield t
+        for tc in ie.get_testcases(include_onlymatching):
+            yield tc
 
 
 md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
 
 
-def expect_info_dict(self, expected_dict, got_dict):
+def expect_info_dict(self, got_dict, expected_dict):
     for info_field, expected in expected_dict.items():
         if isinstance(expected, compat_str) and expected.startswith('re:'):
             got = got_dict.get(info_field)
@@ -120,6 +110,20 @@ def expect_info_dict(self, expected_dict, got_dict):
         else:
             if isinstance(expected, compat_str) and expected.startswith('md5:'):
                 got = 'md5:' + md5(got_dict.get(info_field))
+            elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
+                got = got_dict.get(info_field)
+                self.assertTrue(
+                    isinstance(got, list),
+                    'Expected field %s to be a list, but it is of type %s' % (
+                        info_field, type(got).__name__))
+                expected_num = int(expected.partition(':')[2])
+                assertGreaterEqual(
+                    self, len(got), expected_num,
+                    'Expected %d items in field %s, but only got %d' % (
+                        expected_num, info_field, len(got)
+                    )
+                )
+                continue
             else:
                 got = got_dict.get(info_field)
             self.assertEqual(expected, got,
@@ -161,7 +165,9 @@ def assertRegexpMatches(self, text, regexp, msg=None):
     else:
         m = re.match(regexp, text)
         if not m:
-            note = 'Regexp didn\'t match: %r not found in %r' % (regexp, text)
+            note = 'Regexp didn\'t match: %r not found' % (regexp)
+            if len(text) < 1000:
+                note += ' in %r' % text
             if msg is None:
                 msg = note
             else:
index 150b23dc4efb5cfa86186b104ca38532e0539a02..240fd6632ea59fd3b8a87dee190a0484bd2f6acc 100644 (file)
Binary files a/test/swftests/ArrayAccess.swf and b/test/swftests/ArrayAccess.swf differ
index 8a194602cf79f94cc659e75629cf5735311444ce..8bdfe06c356790a1251904675a294c0e6e441af7 100644 (file)
Binary files a/test/swftests/ClassCall.swf and b/test/swftests/ClassCall.swf differ
index 01b21414228d1fe0f16041f4c0bfe80d4342ebcb..576eb30da2cc21cf849c6ac9ca5ba15de5419c60 100644 (file)
Binary files a/test/swftests/ClassConstruction.swf and b/test/swftests/ClassConstruction.swf differ
index 70c5a7c93a84c3abb271722bc308e7de022ad0a5..0d902fd30f6775762bb0b454271649a273f05690 100644 (file)
Binary files a/test/swftests/ConstArrayAccess.swf and b/test/swftests/ConstArrayAccess.swf differ
index a81677c9c0b20ae1c4077b7ca2a2a108761132ec..b8bd0cb97124c000374cba1c8da50747718dab13 100644 (file)
Binary files a/test/swftests/ConstantInt.swf and b/test/swftests/ConstantInt.swf differ
index e786bceade87bafa490f5c4e611fdc11b317dcfd..3fa3559d674476ced91adc20d6844e59c6a98dbe 100644 (file)
Binary files a/test/swftests/DictCall.swf and b/test/swftests/DictCall.swf differ
index 63ad7d434ce86e7df21a2db48cd8bc1d988d556b..33487f078548efffa633add3400f41ab60fa0a58 100644 (file)
Binary files a/test/swftests/EqualsOperator.swf and b/test/swftests/EqualsOperator.swf differ
index d003d4ffded80169e5ce448355abe9d257ebfacc..42102af2780a532ff8fbd25029cefd3904670541 100644 (file)
Binary files a/test/swftests/LocalVars.swf and b/test/swftests/LocalVars.swf differ
index ba89ef02be26e6a787326d2b7418505ac104ba3c..c3ec5137257ff18dbc5a96f53b9a8d355b687d5b 100644 (file)
Binary files a/test/swftests/MemberAssignment.swf and b/test/swftests/MemberAssignment.swf differ
index ee2487c697455a7fb44a725f5aac644fc5f22d99..a251f7a20be922af071b237b906311f985c70819 100644 (file)
Binary files a/test/swftests/NeOperator.swf and b/test/swftests/NeOperator.swf differ
index 7abbfec41b021a7b3ac489abfca5408a36757253..7fa395a5bc595a2d28dded0420278084936bcdc7 100644 (file)
Binary files a/test/swftests/PrivateCall.swf and b/test/swftests/PrivateCall.swf differ
index 89dbdd91a794339854aa34ed5bb36fa68c0f9946..09a857ecbf002a69f2cd96820a7a5b6f5937e882 100644 (file)
Binary files a/test/swftests/PrivateVoidCall.swf and b/test/swftests/PrivateVoidCall.swf differ
index 972758959b114616b37f951aaf4d9ee529964aa6..dff661c8df854e7362c80b385fadc18475a69bbf 100644 (file)
Binary files a/test/swftests/StaticAssignment.swf and b/test/swftests/StaticAssignment.swf differ
index 3b6366e0c8ae455ee06c65d86a162da3880bca1d..622c40dad581db07e9a6ecc3bc483d7a7dee6108 100644 (file)
Binary files a/test/swftests/StaticRetrieval.swf and b/test/swftests/StaticRetrieval.swf differ
index 6ade5f7ab36e2fee7ec758d9d6d612b7bf9b7b67..2784c2b65edcb61fc471fcf010f4499387f95d1b 100644 (file)
Binary files a/test/swftests/StringBasics.swf and b/test/swftests/StringBasics.swf differ
index d09b8be6501a9ad114e0c523f1d2bc7f5bea7555..e81fce18d34c11a969d872a2b8e94850a01d6f9a 100644 (file)
Binary files a/test/swftests/StringCharCodeAt.swf and b/test/swftests/StringCharCodeAt.swf differ
index 28546c6dad318c1bacb48f7e65ce6fb0b1202747..188d56ef85849598acb032b47872c7e5f561713a 100644 (file)
Binary files a/test/swftests/StringConversion.swf and b/test/swftests/StringConversion.swf differ
index 13c18ed95d4ea65111b6a5bc1406d0a5703336c2..be8d12997a1a5aba2cb62270068363f339a5eac6 100644 (file)
@@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
 
+    def test_html_search_meta(self):
+        ie = self.ie
+        html = '''
+            <meta name="a" content="1" />
+            <meta name='b' content='2'>
+            <meta name="c" content='3'>
+            <meta name=d content='4'>
+            <meta property="e" content='5' >
+            <meta content="6" name="f">
+        '''
+
+        self.assertEqual(ie._html_search_meta('a', html), '1')
+        self.assertEqual(ie._html_search_meta('b', html), '2')
+        self.assertEqual(ie._html_search_meta('c', html), '3')
+        self.assertEqual(ie._html_search_meta('d', html), '4')
+        self.assertEqual(ie._html_search_meta('e', html), '5')
+        self.assertEqual(ie._html_search_meta('f', html), '6')
+
 if __name__ == '__main__':
     unittest.main()
index f8e4f930ebe6d5aefdbf128909b1be720ec046f3..85d87f2c31e803aff668f1d71a6bbdfba33cdcd8 100644 (file)
@@ -8,6 +8,8 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import copy
+
 from test.helper import FakeYDL, assertRegexpMatches
 from youtube_dl import YoutubeDL
 from youtube_dl.extractor import YoutubeIE
@@ -192,6 +194,37 @@ class TestFormatSelection(unittest.TestCase):
         downloaded = ydl.downloaded_info_dicts[0]
         self.assertEqual(downloaded['format_id'], 'vid-high')
 
+    def test_format_selection_audio_exts(self):
+        formats = [
+            {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+        ]
+
+        info_dict = _make_result(formats)
+        ydl = YDL({'format': 'best'})
+        ie = YoutubeIE(ydl)
+        ie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(copy.deepcopy(info_dict))
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'aac-64')
+
+        ydl = YDL({'format': 'mp3'})
+        ie = YoutubeIE(ydl)
+        ie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(copy.deepcopy(info_dict))
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'mp3-64')
+
+        ydl = YDL({'prefer_free_formats': True})
+        ie = YoutubeIE(ydl)
+        ie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(copy.deepcopy(info_dict))
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'ogg-64')
+
     def test_format_selection_video(self):
         formats = [
             {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
@@ -218,7 +251,7 @@ class TestFormatSelection(unittest.TestCase):
             # 3D
             '85', '84', '102', '83', '101', '82', '100',
             # Dash video
-            '138', '137', '248', '136', '247', '135', '246',
+            '137', '248', '136', '247', '135', '246',
             '245', '244', '134', '243', '133', '242', '160',
             # Dash audio
             '141', '172', '140', '171', '139',
index 5be065c437ae978d8c4ccc1a67bea87a599a77ed..6f5513faa2c5551ce3f3c9d2967a55b28adad858 100644 (file)
@@ -45,11 +45,6 @@ class TestAgeRestriction(unittest.TestCase):
             'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
             '505835.mp4', 2, old_age=25)
 
-    def test_pornotube(self):
-        self._assert_restricted(
-            'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
-            '1689755.flv', 13)
-
 
 if __name__ == '__main__':
     unittest.main()
index a009aa475442ae588405a99f432af6feade92836..412f3dbce8683766ba53061fb2aecee95339b829 100644 (file)
@@ -155,7 +155,7 @@ def generator(test_case):
             if is_playlist:
                 self.assertEqual(res_dict['_type'], 'playlist')
                 self.assertTrue('entries' in res_dict)
-                expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
+                expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
 
             if 'playlist_mincount' in test_case:
                 assertGreaterEqual(
@@ -204,7 +204,7 @@ def generator(test_case):
                 with io.open(info_json_fn, encoding='utf-8') as infof:
                     info_dict = json.load(infof)
 
-                expect_info_dict(self, tc.get('info_dict', {}), info_dict)
+                expect_info_dict(self, info_dict, tc.get('info_dict', {}))
         finally:
             try_rm_tcs_files()
             if is_playlist and res_dict is not None and res_dict.get('entries'):
index 7c4cd8218e218a6d3319747705c2893c4fbf4cd2..6336dd317ca5a77ebced2e55d3c49873b58ebda6 100644 (file)
@@ -17,6 +17,7 @@ from youtube_dl.extractor import (
     TEDIE,
     VimeoIE,
     WallaIE,
+    CeskaTelevizeIE,
 )
 
 
@@ -88,6 +89,14 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
         subtitles = self.getSubtitles()
         self.assertTrue(subtitles['it'] is not None)
 
+    def test_youtube_translated_subtitles(self):
+        # This video has a subtitles track, which can be translated
+        self.url = 'Ky9eprVWzlI'
+        self.DL.params['writeautomaticsub'] = True
+        self.DL.params['subtitleslangs'] = ['it']
+        subtitles = self.getSubtitles()
+        self.assertTrue(subtitles['it'] is not None)
+
     def test_youtube_nosubtitles(self):
         self.DL.expect_warning('video doesn\'t have subtitles')
         self.url = 'n5BB19UTcdA'
@@ -309,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles):
         self.assertEqual(len(subtitles), 0)
 
 
+class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
+    url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
+    IE = CeskaTelevizeIE
+
+    def test_list_subtitles(self):
+        self.DL.expect_warning('Automatic Captions not supported by this server')
+        self.DL.params['listsubtitles'] = True
+        info_dict = self.getInfoDict()
+        self.assertEqual(info_dict, None)
+
+    def test_allsubtitles(self):
+        self.DL.expect_warning('Automatic Captions not supported by this server')
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['cs']))
+        self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
+
+    def test_nosubtitles(self):
+        self.DL.expect_warning('video doesn\'t have subtitles')
+        self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles), 0)
+
+
 if __name__ == '__main__':
     unittest.main()
index 2cc431b0be5c58db1aaf9ac2de8e495090089f65..7f816698e7b2e20bc982b0eeb9638885edf19b48 100644 (file)
@@ -1,9 +1,13 @@
 from __future__ import unicode_literals
 
-import io
+# Allow direct execution
 import os
-import re
+import sys
 import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import io
+import re
 
 rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
@@ -14,6 +18,9 @@ IGNORED_FILES = [
 ]
 
 
+from test.helper import assertRegexpMatches
+
+
 class TestUnicodeLiterals(unittest.TestCase):
     def test_all_files(self):
         for dirpath, _, filenames in os.walk(rootDir):
@@ -29,9 +36,10 @@ class TestUnicodeLiterals(unittest.TestCase):
 
                 if "'" not in code and '"' not in code:
                     continue
-                self.assertRegexpMatches(
+                assertRegexpMatches(
+                    self,
                     code,
-                    r'(?:#.*\n*)?from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
+                    r'(?:(?:#.*?|\s*)\n)*from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
                     'unicode_literals import  missing in %s' % fn)
 
                 m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
index baa3a215657026245bf93960c53374bc3abdd61b..206760d995c98299c60458181f88b5017e65d964 100644 (file)
@@ -16,38 +16,41 @@ import json
 import xml.etree.ElementTree
 
 from youtube_dl.utils import (
+    age_restricted,
+    args_to_str,
     clean_html,
     DateRange,
+    detect_exe_version,
     encodeFilename,
+    escape_rfc3986,
+    escape_url,
     find_xpath_attr,
     fix_xml_ampersands,
-    orderedSet,
-    OnDemandPagedList,
     InAdvancePagedList,
+    intlist_to_bytes,
+    js_to_json,
+    limit_length,
+    OnDemandPagedList,
+    orderedSet,
     parse_duration,
+    parse_filesize,
+    parse_iso8601,
     read_batch_urls,
     sanitize_filename,
     shell_quote,
     smuggle_url,
     str_to_int,
+    strip_jsonp,
     struct_unpack,
     timeconvert,
     unescapeHTML,
     unified_strdate,
     unsmuggle_url,
+    uppercase_escape,
     url_basename,
     urlencode_postdata,
+    version_tuple,
     xpath_with_ns,
-    parse_iso8601,
-    strip_jsonp,
-    uppercase_escape,
-    limit_length,
-    escape_rfc3986,
-    escape_url,
-    js_to_json,
-    intlist_to_bytes,
-    args_to_str,
-    parse_filesize,
 )
 
 
@@ -76,6 +79,10 @@ class TestUtil(unittest.TestCase):
         tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430'
         self.assertEqual(sanitize_filename(tests), tests)
 
+        self.assertEqual(
+            sanitize_filename('New World record at 0:12:34'),
+            'New World record at 0_12_34')
+
         forbidden = '"\0\\/'
         for fc in forbidden:
             for fbc in forbidden:
@@ -141,8 +148,12 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_strdate('8/7/2009'), '20090708')
         self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
         self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
+        self.assertEqual(unified_strdate('1968 12 10'), '19681210')
         self.assertEqual(unified_strdate('1968-12-10'), '19681210')
         self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
+        self.assertEqual(
+            unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
+            '20141126')
 
     def test_find_xpath_attr(self):
         testxml = '''<root>
@@ -202,6 +213,8 @@ class TestUtil(unittest.TestCase):
 
     def test_parse_duration(self):
         self.assertEqual(parse_duration(None), None)
+        self.assertEqual(parse_duration(False), None)
+        self.assertEqual(parse_duration('invalid'), None)
         self.assertEqual(parse_duration('1'), 1)
         self.assertEqual(parse_duration('1337:12'), 80232)
         self.assertEqual(parse_duration('9:12:43'), 33163)
@@ -220,6 +233,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_duration('0s'), 0)
         self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
         self.assertEqual(parse_duration('T30M38S'), 1838)
+        self.assertEqual(parse_duration('5 s'), 5)
+        self.assertEqual(parse_duration('3 min'), 180)
+        self.assertEqual(parse_duration('2.5 hours'), 9000)
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(
@@ -376,6 +392,30 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_filesize('2 MiB'), 2097152)
         self.assertEqual(parse_filesize('5 GB'), 5000000000)
         self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
+        self.assertEqual(parse_filesize('1,24 KB'), 1240)
+
+    def test_version_tuple(self):
+        self.assertEqual(version_tuple('1'), (1,))
+        self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
+        self.assertEqual(version_tuple('10.1-6'), (10, 1, 6))  # avconv style
+
+    def test_detect_exe_version(self):
+        self.assertEqual(detect_exe_version('''ffmpeg version 1.2.1
+built on May 27 2013 08:37:26 with gcc 4.7 (Debian 4.7.3-4)
+configuration: --prefix=/usr --extra-'''), '1.2.1')
+        self.assertEqual(detect_exe_version('''ffmpeg version N-63176-g1fb4685
+built on May 15 2014 22:09:06 with gcc 4.8.2 (GCC)'''), 'N-63176-g1fb4685')
+        self.assertEqual(detect_exe_version('''X server found. dri2 connection failed!
+Trying to open render node...
+Success at /dev/dri/renderD128.
+ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
+
+    def test_age_restricted(self):
+        self.assertFalse(age_restricted(None, 10))  # unrestricted content
+        self.assertFalse(age_restricted(1, None))  # unrestricted policy
+        self.assertFalse(age_restricted(8, 10))
+        self.assertTrue(age_restricted(18, 14))
+        self.assertFalse(age_restricted(18, 18))
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_write_info_json.py b/test/test_write_info_json.py
deleted file mode 100644 (file)
index 0396ef2..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-from __future__ import unicode_literals
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from test.helper import get_params
-
-
-import io
-import json
-
-import youtube_dl.YoutubeDL
-import youtube_dl.extractor
-
-
-class YoutubeDL(youtube_dl.YoutubeDL):
-    def __init__(self, *args, **kwargs):
-        super(YoutubeDL, self).__init__(*args, **kwargs)
-        self.to_stderr = self.to_screen
-
-params = get_params({
-    'writeinfojson': True,
-    'skip_download': True,
-    'writedescription': True,
-})
-
-
-TEST_ID = 'BaW_jenozKc'
-INFO_JSON_FILE = TEST_ID + '.info.json'
-DESCRIPTION_FILE = TEST_ID + '.mp4.description'
-EXPECTED_DESCRIPTION = '''test chars:  "'/\ä↭𝕐
-test URL: https://github.com/rg3/youtube-dl/issues/1892
-
-This is a test video for youtube-dl.
-
-For more information, contact phihag@phihag.de .'''
-
-
-class TestInfoJSON(unittest.TestCase):
-    def setUp(self):
-        # Clear old files
-        self.tearDown()
-
-    def test_info_json(self):
-        ie = youtube_dl.extractor.YoutubeIE()
-        ydl = YoutubeDL(params)
-        ydl.add_info_extractor(ie)
-        ydl.download([TEST_ID])
-        self.assertTrue(os.path.exists(INFO_JSON_FILE))
-        with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
-            jd = json.load(jsonf)
-        self.assertEqual(jd['upload_date'], '20121002')
-        self.assertEqual(jd['description'], EXPECTED_DESCRIPTION)
-        self.assertEqual(jd['id'], TEST_ID)
-        self.assertEqual(jd['extractor'], 'youtube')
-        self.assertEqual(jd['title'], '''youtube-dl test video "'/\ä↭𝕐''')
-        self.assertEqual(jd['uploader'], 'Philipp Hagemeister')
-
-        self.assertTrue(os.path.exists(DESCRIPTION_FILE))
-        with io.open(DESCRIPTION_FILE, 'r', encoding='utf-8') as descf:
-            descr = descf.read()
-        self.assertEqual(descr, EXPECTED_DESCRIPTION)
-
-    def tearDown(self):
-        if os.path.exists(INFO_JSON_FILE):
-            os.remove(INFO_JSON_FILE)
-        if os.path.exists(DESCRIPTION_FILE):
-            os.remove(DESCRIPTION_FILE)
-
-if __name__ == '__main__':
-    unittest.main()
index 49ec0137f18bc03474bde30d7c2948f257dbfa95..65d5ba3c3dfd1ca7d0b9adf52deb1a1715006cbd 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 08e2ae3dede2310e10a880875dbbfa77d77a642a..cb69deed11dee9827c1d1a21930c0b4a0335b97f 100644 (file)
@@ -4,7 +4,7 @@
 youtube\-dl \- download videos from youtube.com or other video platforms
 .SH SYNOPSIS
 .PP
-\f[B]youtube\-dl\f[] OPTIONS (#options) URL [URL...]
+\f[B]youtube\-dl\f[] [OPTIONS] URL [URL...]
 .SH DESCRIPTION
 .PP
 \f[B]youtube\-dl\f[] is a small command\-line program to download videos
@@ -33,10 +33,6 @@ redistribute it or use it however you like.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ they\ would\ handle
 \-\-extractor\-descriptions\ \ \ \ \ \ \ \ \ Output\ descriptions\ of\ all\ supported
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ extractors
-\-\-proxy\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ specified\ HTTP/HTTPS\ proxy.\ Pass\ in
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ an\ empty\ string\ (\-\-proxy\ "")\ for\ direct
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ connection
-\-\-socket\-timeout\ None\ \ \ \ \ \ \ \ \ \ \ \ Time\ to\ wait\ before\ giving\ up,\ in\ seconds
 \-\-default\-search\ PREFIX\ \ \ \ \ \ \ \ \ \ Use\ this\ prefix\ for\ unqualified\ URLs.\ For
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example\ "gvsearch2:"\ downloads\ two\ videos
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ google\ videos\ for\ \ youtube\-dl\ "large
@@ -48,14 +44,30 @@ redistribute it or use it however you like.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ this\ is\ not\ possible\ instead\ of\ searching.
 \-\-ignore\-config\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ read\ configuration\ files.\ When\ given
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ in\ the\ global\ configuration\ file\ /etc
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /youtube\-dl.conf:\ do\ not\ read\ the\ user
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ configuration\ in\ ~/.config/youtube\-dl.conf
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (%APPDATA%/youtube\-dl/config.txt\ on
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Windows)
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /youtube\-dl.conf:\ Do\ not\ read\ the\ user
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ configuration\ in\ ~/.config/youtube\-
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ dl/config\ (%APPDATA%/youtube\-dl/config.txt
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ on\ Windows)
 \-\-flat\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ extract\ the\ videos\ of\ a\ playlist,
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only\ list\ them.
 \f[]
 .fi
+.SS Network Options:
+.IP
+.nf
+\f[C]
+\-\-proxy\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ specified\ HTTP/HTTPS\ proxy.\ Pass\ in
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ an\ empty\ string\ (\-\-proxy\ "")\ for\ direct
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ connection
+\-\-socket\-timeout\ SECONDS\ \ \ \ \ \ \ \ \ Time\ to\ wait\ before\ giving\ up,\ in\ seconds
+\-\-source\-address\ IP\ \ \ \ \ \ \ \ \ \ \ \ \ \ Client\-side\ IP\ address\ to\ bind\ to
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)
+\-4,\ \-\-force\-ipv4\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Make\ all\ connections\ via\ IPv4
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)
+\-6,\ \-\-force\-ipv6\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Make\ all\ connections\ via\ IPv6
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)
+\f[]
+.fi
 .SS Video Selection:
 .IP
 .nf
@@ -104,6 +116,7 @@ redistribute it or use it however you like.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ size.\ By\ default,\ the\ buffer\ size\ is
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ automatically\ resized\ from\ an\ initial\ value
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ of\ SIZE.
+\-\-playlist\-reverse\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ playlist\ videos\ in\ reverse\ order
 \f[]
 .fi
 .SS Filesystem Options:
@@ -113,7 +126,6 @@ redistribute it or use it however you like.
 \-a,\ \-\-batch\-file\ FILE\ \ \ \ \ \ \ \ \ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]\-\[aq]\ for
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ stdin)
 \-\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ only\ video\ ID\ in\ file\ name
-\-A,\ \-\-auto\-number\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ number\ downloaded\ files\ starting\ from\ 00000
 \-o,\ \-\-output\ TEMPLATE\ \ \ \ \ \ \ \ \ \ \ \ output\ filename\ template.\ Use\ %(title)s\ to
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ get\ the\ title,\ %(uploader)s\ for\ the
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ uploader\ name,\ %(uploader_id)s\ for\ the
@@ -147,6 +159,9 @@ redistribute it or use it however you like.
 \-\-restrict\-filenames\ \ \ \ \ \ \ \ \ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ characters,\ and\ avoid\ "&"\ and\ spaces\ in
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
+\-A,\ \-\-auto\-number\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated;\ use\ \ \-o
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "%(autonumber)s\-%(title)s.%(ext)s"\ ]\ number
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloaded\ files\ starting\ from\ 00000
 \-t,\ \-\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ use\ title\ in\ file\ name
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (default)
 \-l,\ \-\-literal\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-title
@@ -205,6 +220,8 @@ redistribute it or use it however you like.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ for\ each\ command\-line\ argument.\ If\ the\ URL
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ refers\ to\ a\ playlist,\ dump\ the\ whole
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist\ information\ in\ a\ single\ line.
+\-\-print\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Be\ quiet\ and\ print\ the\ video\ information\ as
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ JSON\ (video\ is\ still\ being\ downloaded).
 \-\-newline\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ output\ progress\ bar\ as\ new\ lines
 \-\-no\-progress\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ print\ progress\ bar
 \-\-console\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ display\ progress\ in\ console\ titlebar
@@ -215,6 +232,10 @@ redistribute it or use it however you like.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ files\ in\ the\ current\ directory\ to\ debug
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ problems
 \-\-print\-traffic\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Display\ sent\ and\ read\ HTTP\ traffic
+\-C,\ \-\-call\-home\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Contact\ the\ youtube\-dl\ server\ for
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ debugging.
+\-\-no\-call\-home\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ NOT\ contact\ the\ youtube\-dl\ server\ for
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ debugging.
 \f[]
 .fi
 .SS Workarounds:
@@ -242,14 +263,15 @@ redistribute it or use it however you like.
 .nf
 \f[C]
 \-f,\ \-\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ format\ code,\ specify\ the\ order\ of
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ preference\ using\ slashes:\ \-f\ 22/17/18\ .\ \ \-f
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mp4\ ,\ \-f\ m4a\ and\ \ \-f\ flv\ \ are\ also
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ supported.\ You\ can\ also\ use\ the\ special
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ names\ "best",\ "bestvideo",\ "bestaudio",
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "worst",\ "worstvideo"\ and\ "worstaudio".\ By
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ youtube\-dl\ will\ pick\ the\ best
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ quality.\ Use\ commas\ to\ download\ multiple
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ audio\ formats,\ such\ as\ \-f
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ preference\ using\ slashes,\ as\ in\ \-f\ 22/17/18
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ .\ \ Instead\ of\ format\ codes,\ you\ can\ select
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ by\ extension\ for\ the\ extensions\ aac,\ m4a,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mp3,\ mp4,\ ogg,\ wav,\ webm.\ You\ can\ also\ use
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ special\ names\ "best",\ "bestvideo",
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "bestaudio",\ "worst".\ \ By\ default,\ youtube\-
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ dl\ will\ pick\ the\ best\ quality.\ Use\ commas
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ to\ download\ multiple\ audio\ formats,\ such\ as
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-f
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 136/137/mp4/bestvideo,140/m4a/bestaudio.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ You\ can\ merge\ the\ video\ and\ audio\ of\ two
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ formats\ into\ a\ single\ file\ using\ \-f\ <video\-
@@ -263,6 +285,10 @@ redistribute it or use it however you like.
 \-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ list\ all\ available\ formats
 \-\-youtube\-skip\-dash\-manifest\ \ \ \ \ Do\ not\ download\ the\ DASH\ manifest\ on
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ YouTube\ videos
+\-\-merge\-output\-format\ FORMAT\ \ \ \ \ If\ a\ merge\ is\ required\ (e.g.
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ bestvideo+bestaudio),\ output\ to\ given
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ container\ format.\ One\ of\ mkv,\ mp4,\ ogg,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ webm,\ flv.Ignored\ if\ no\ merge\ is\ required
 \f[]
 .fi
 .SS Subtitle Options:
@@ -321,6 +347,11 @@ redistribute it or use it however you like.
 \-\-add\-metadata\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ metadata\ to\ the\ video\ file
 \-\-xattrs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ metadata\ to\ the\ video\ file\[aq]s\ xattrs
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (using\ dublin\ core\ and\ xdg\ standards)
+\-\-fixup\ POLICY\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)\ Automatically\ correct\ known
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ faults\ of\ the\ file.\ One\ of\ never\ (do
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ nothing),\ warn\ (only\ emit\ a\ warning),
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ detect_or_warn(check\ whether\ we\ can\ do
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ anything\ about\ it,\ warn\ otherwise
 \-\-prefer\-avconv\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ avconv\ over\ ffmpeg\ for\ running\ the
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ postprocessors\ (default)
 \-\-prefer\-ffmpeg\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ ffmpeg\ over\ avconv\ for\ running\ the
@@ -339,7 +370,7 @@ and not copy the mtime) into \f[C]/etc/youtube\-dl.conf\f[] and/or
 \f[C]~/.config/youtube\-dl/config\f[].
 On Windows, the configuration file locations are
 \f[C]%APPDATA%\\youtube\-dl\\config.txt\f[] and
-\f[C]C:\\Users\\<Yourname>\\youtube\-dl.conf\f[].
+\f[C]C:\\Users\\<user\ name>\\youtube\-dl.conf\f[].
 .SH OUTPUT TEMPLATE
 .PP
 The \f[C]\-o\f[] option allows users to indicate a template for the
@@ -521,14 +552,33 @@ youtube URL, solving the CAPTCHA, and restart youtube\-dl.
 .PP
 Once the video is fully downloaded, use any video player, such as
 vlc (http://www.videolan.org) or mplayer (http://www.mplayerhq.hu/).
-.SS The links provided by youtube\-dl \-g are not working anymore
+.SS I extracted a video URL with \-g, but it does not play on another
+machine / in my webbrowser.
 .PP
-The URLs youtube\-dl outputs require the downloader to have the correct
-cookies.
+It depends a lot on the service.
+In many cases, requests for the video (to download/play it) must come
+from the same IP address and with the same cookies.
 Use the \f[C]\-\-cookies\f[] option to write the required cookies into a
 file, and advise your downloader to read cookies from that file.
 Some sites also require a common user agent to be used, use
 \f[C]\-\-dump\-user\-agent\f[] to see the one in use by youtube\-dl.
+.PP
+It may be beneficial to use IPv6; in some cases, the restrictions are
+only applied to IPv4.
+Some services (sometimes only for a subset of videos) do not restrict
+the video URL by IP address, cookie, or user\-agent, but these are the
+exception rather than the rule.
+.PP
+Please bear in mind that some URL protocols are \f[B]not\f[] supported
+by browsers out of the box, including RTMP.
+If you are using \-g, your own downloader must support these as well.
+.PP
+If you want to play the video on a machine that is not running
+youtube\-dl, you can relay the video content from the machine that runs
+youtube\-dl.
+You can use \f[C]\-o\ \-\f[] to let youtube\-dl stream a video to
+stdout, or simply allow the player to download the files written by
+youtube\-dl in turn.
 .SS ERROR: no fmt_url_map or conn information found in video info
 .PP
 youtube has switched to a new video info format in July 2011 which is
@@ -565,6 +615,51 @@ To recompile the executable, run \f[C]make\ youtube\-dl\f[].
 To run the exe you need to install first the Microsoft Visual C++ 2008
 Redistributable
 Package (http://www.microsoft.com/en-us/download/details.aspx?id=29).
+.SS On Windows, how should I set up ffmpeg and youtube\-dl? Where should
+I put the exe files?
+.PP
+If you put youtube\-dl and ffmpeg in the same directory that you\[aq]re
+running the command from, it will work, but that\[aq]s rather
+cumbersome.
+.PP
+To make a different directory work \- either for ffmpeg, or for
+youtube\-dl, or for both \- simply create the directory (say,
+\f[C]C:\\bin\f[], or \f[C]C:\\Users\\<User\ name>\\bin\f[]), put all the
+executables directly in there, and then set your PATH environment
+variable (https://www.java.com/en/download/help/path.xml) to include
+that directory.
+.PP
+From then on, after restarting your shell, you will be able to access
+both youtube\-dl and ffmpeg (and youtube\-dl will be able to find
+ffmpeg) by simply typing \f[C]youtube\-dl\f[] or \f[C]ffmpeg\f[], no
+matter what directory you\[aq]re in.
+.SS How can I detect whether a given URL is supported by youtube\-dl?
+.PP
+For one, have a look at the list of supported
+sites (docs/supportedsites).
+Note that it can sometimes happen that the site changes its URL scheme
+(say, from http://example.com/v/1234567 to http://example.com/v/1234567
+) and youtube\-dl reports an URL of a service in that list as
+unsupported.
+In that case, simply report a bug.
+.PP
+It is \f[I]not\f[] possible to detect whether a URL is supported or not.
+That\[aq]s because youtube\-dl contains a generic extractor which
+matches \f[B]all\f[] URLs.
+You may be tempted to disable, exclude, or remove the generic extractor,
+but the generic extractor not only allows users to extract videos from
+lots of websites that embed a video from another service, but may also
+be used to extract video from a service that it\[aq]s hosting itself.
+Therefore, we neither recommend nor support disabling, excluding, or
+removing the generic extractor.
+.PP
+If you want to find out whether a given URL is supported, simply call
+youtube\-dl with it.
+If you get no videos back, chances are the URL is either not referring
+to a video or unsupported.
+You can find out which by examining the output (if you run youtube\-dl
+on the console) or catching an \f[C]UnsupportedError\f[] exception if
+you run it from a Python program.
 .SH DEVELOPER INSTRUCTIONS
 .PP
 Most users do not need to build youtube\-dl and can download the
@@ -733,6 +828,46 @@ For a list of what can be done, have a look at
 youtube_dl/YoutubeDL.py (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69).
 For a start, if you want to intercept youtube\-dl\[aq]s output, set a
 \f[C]logger\f[] object.
+.PP
+Here\[aq]s a more complete example of a program that outputs only errors
+(and a short message after the download is finished), and
+downloads/converts the video to an mp3 file:
+.IP
+.nf
+\f[C]
+import\ youtube_dl
+
+
+class\ MyLogger(object):
+\ \ \ \ def\ debug(self,\ msg):
+\ \ \ \ \ \ \ \ pass
+
+\ \ \ \ def\ warning(self,\ msg):
+\ \ \ \ \ \ \ \ pass
+
+\ \ \ \ def\ error(self,\ msg):
+\ \ \ \ \ \ \ \ print(msg)
+
+
+def\ my_hook(d):
+\ \ \ \ if\ d[\[aq]status\[aq]]\ ==\ \[aq]finished\[aq]:
+\ \ \ \ \ \ \ \ print(\[aq]Done\ downloading,\ now\ converting\ ...\[aq])
+
+
+ydl_opts\ =\ {
+\ \ \ \ \[aq]format\[aq]:\ \[aq]bestaudio/best\[aq],
+\ \ \ \ \[aq]postprocessors\[aq]:\ [{
+\ \ \ \ \ \ \ \ \[aq]key\[aq]:\ \[aq]FFmpegExtractAudio\[aq],
+\ \ \ \ \ \ \ \ \[aq]preferredcodec\[aq]:\ \[aq]mp3\[aq],
+\ \ \ \ \ \ \ \ \[aq]preferredquality\[aq]:\ \[aq]192\[aq],
+\ \ \ \ }],
+\ \ \ \ \[aq]logger\[aq]:\ MyLogger(),
+\ \ \ \ \[aq]progress_hooks\[aq]:\ [my_hook],
+}
+with\ youtube_dl.YoutubeDL(ydl_opts)\ as\ ydl:
+\ \ \ \ ydl.download([\[aq]http://www.youtube.com/watch?v=BaW_jenozKc\[aq]])
+\f[]
+.fi
 .SH BUGS
 .PP
 Bugs and suggestions should be reported at:
@@ -740,17 +875,17 @@ Bugs and suggestions should be reported at:
 Unless you were prompted so or there is another pertinent reason (e.g.
 GitHub fails to accept the bug report), please do not send bug reports
 via personal email.
+For discussions, join us in the irc channel #youtube\-dl on freenode.
+.PP
+\f[B]Please include the full output of youtube\-dl when run with
+\f[C]\-v\f[]\f[].
 .PP
-Please include the full output of the command when run with
-\f[C]\-\-verbose\f[].
 The output (including the first lines) contain important debugging
 information.
 Issues without the full output are often not reproducible and therefore
 do not get solved in short order, if ever.
 .PP
-For discussions, join us in the irc channel #youtube\-dl on freenode.
-.PP
-When you submit a request, please re\-read it once to avoid a couple of
+Please re\-read your issue once again to avoid a couple of common
 mistakes (you can and should use this as a checklist):
 .SS Is the description of the issue itself sufficient?
 .PP
index 9ee1b7ac12f7ef586fc15862d4e6aa4df60a22da..0bad8b653a6245b271308eeb535f2e690edeffe1 100644 (file)
@@ -4,7 +4,7 @@ __youtube_dl()
     COMPREPLY=()
     cur="${COMP_WORDS[COMP_CWORD]}"
     prev="${COMP_WORDS[COMP_CWORD-1]}"
-    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --proxy --socket-timeout --default-search --ignore-config --flat-playlist --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --batch-file --id --auto-number --output --autonumber-size --restrict-filenames --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --write-thumbnail --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --prefer-avconv --prefer-ffmpeg --exec"
+    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --write-thumbnail --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --exec"
     keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
     fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
     diropts="--cache-dir"
index 923fd60f4bc63eeacaf0e693635d412a76ca6352..76857d4103e4e329092f756787240371dc194b4e 100644 (file)
@@ -7,11 +7,14 @@ complete --command youtube-dl --long-option abort-on-error --description 'Abort
 complete --command youtube-dl --long-option dump-user-agent --description 'display the current browser identification'
 complete --command youtube-dl --long-option list-extractors --description 'List all supported extractors and the URLs they would handle'
 complete --command youtube-dl --long-option extractor-descriptions --description 'Output descriptions of all supported extractors'
-complete --command youtube-dl --long-option proxy --description 'Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection'
-complete --command youtube-dl --long-option socket-timeout --description 'Time to wait before giving up, in seconds'
 complete --command youtube-dl --long-option default-search --description 'Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.'
-complete --command youtube-dl --long-option ignore-config --description 'Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)'
+complete --command youtube-dl --long-option ignore-config --description 'Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)'
 complete --command youtube-dl --long-option flat-playlist --description 'Do not extract the videos of a playlist, only list them.'
+complete --command youtube-dl --long-option proxy --description 'Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection'
+complete --command youtube-dl --long-option socket-timeout --description 'Time to wait before giving up, in seconds'
+complete --command youtube-dl --long-option source-address --description 'Client-side IP address to bind to (experimental)'
+complete --command youtube-dl --long-option force-ipv4 --short-option 4 --description 'Make all connections via IPv4 (experimental)'
+complete --command youtube-dl --long-option force-ipv6 --short-option 6 --description 'Make all connections via IPv6 (experimental)'
 complete --command youtube-dl --long-option playlist-start --description 'playlist video to start at (default is %default)'
 complete --command youtube-dl --long-option playlist-end --description 'playlist video to end at (default is last)'
 complete --command youtube-dl --long-option match-title --description 'download only matching titles (regex or caseless sub-string)'
@@ -33,12 +36,13 @@ complete --command youtube-dl --long-option retries --short-option R --descripti
 complete --command youtube-dl --long-option buffer-size --description 'size of download buffer (e.g. 1024 or 16K) (default is %default)'
 complete --command youtube-dl --long-option no-resize-buffer --description 'do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.'
 complete --command youtube-dl --long-option test
+complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
 complete --command youtube-dl --long-option batch-file --short-option a --description 'file containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
 complete --command youtube-dl --long-option id --description 'use only video ID in file name'
-complete --command youtube-dl --long-option auto-number --short-option A --description 'number downloaded files starting from 00000'
 complete --command youtube-dl --long-option output --short-option o --description 'output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube'"'"'s itags: "137"), %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id, %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format. %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o '"'"'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s'"'"' .'
 complete --command youtube-dl --long-option autonumber-size --description 'Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given'
 complete --command youtube-dl --long-option restrict-filenames --description 'Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames'
+complete --command youtube-dl --long-option auto-number --short-option A --description '[deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000'
 complete --command youtube-dl --long-option title --short-option t --description '[deprecated] use title in file name (default)'
 complete --command youtube-dl --long-option literal --short-option l --description '[deprecated] alias of --title'
 complete --command youtube-dl --long-option no-overwrites --short-option w --description 'do not overwrite files'
@@ -69,6 +73,7 @@ complete --command youtube-dl --long-option get-filename --description 'simulate
 complete --command youtube-dl --long-option get-format --description 'simulate, quiet but print output format'
 complete --command youtube-dl --long-option dump-json --short-option j --description 'simulate, quiet but print JSON information. See --output for a description of available keys.'
 complete --command youtube-dl --long-option dump-single-json --short-option J --description 'simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.'
+complete --command youtube-dl --long-option print-json --description 'Be quiet and print the video information as JSON (video is still being downloaded).'
 complete --command youtube-dl --long-option newline --description 'output progress bar as new lines'
 complete --command youtube-dl --long-option no-progress --description 'do not print progress bar'
 complete --command youtube-dl --long-option console-title --description 'display progress in console titlebar'
@@ -77,6 +82,8 @@ complete --command youtube-dl --long-option dump-intermediate-pages --descriptio
 complete --command youtube-dl --long-option write-pages --description 'Write downloaded intermediary pages to files in the current directory to debug problems'
 complete --command youtube-dl --long-option youtube-print-sig-code
 complete --command youtube-dl --long-option print-traffic --description 'Display sent and read HTTP traffic'
+complete --command youtube-dl --long-option call-home --short-option C --description 'Contact the youtube-dl server for debugging.'
+complete --command youtube-dl --long-option no-call-home --description 'Do NOT contact the youtube-dl server for debugging.'
 complete --command youtube-dl --long-option encoding --description 'Force the specified encoding (experimental)'
 complete --command youtube-dl --long-option no-check-certificate --description 'Suppress HTTPS certificate validation.'
 complete --command youtube-dl --long-option prefer-insecure --description 'Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)'
@@ -84,13 +91,14 @@ complete --command youtube-dl --long-option user-agent --description 'specify a
 complete --command youtube-dl --long-option referer --description 'specify a custom referer, use if the video access is restricted to one domain'
 complete --command youtube-dl --long-option add-header --description 'specify a custom HTTP header and its value, separated by a colon '"'"':'"'"'. You can use this option multiple times'
 complete --command youtube-dl --long-option bidi-workaround --description 'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH'
-complete --command youtube-dl --long-option format --short-option f --description 'video format code, specify the order of preference using slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f  136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.'
+complete --command youtube-dl --long-option format --short-option f --description 'video format code, specify the order of preference using slashes, as in -f 22/17/18 .  Instead of format codes, you can select by extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", "worst".  By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f  136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.'
 complete --command youtube-dl --long-option all-formats --description 'download all available video formats'
 complete --command youtube-dl --long-option prefer-free-formats --description 'prefer free video formats unless a specific one is requested'
 complete --command youtube-dl --long-option max-quality --description 'highest quality format to download'
 complete --command youtube-dl --long-option list-formats --short-option F --description 'list all available formats'
 complete --command youtube-dl --long-option youtube-include-dash-manifest
 complete --command youtube-dl --long-option youtube-skip-dash-manifest --description 'Do not download the DASH manifest on YouTube videos'
+complete --command youtube-dl --long-option merge-output-format --description 'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no merge is required'
 complete --command youtube-dl --long-option write-sub --description 'write subtitle file'
 complete --command youtube-dl --long-option write-auto-sub --description 'write automatic subtitle file (youtube only)'
 complete --command youtube-dl --long-option all-subs --description 'downloads all the available subtitles of the video'
@@ -112,6 +120,7 @@ complete --command youtube-dl --long-option embed-subs --description 'embed subt
 complete --command youtube-dl --long-option embed-thumbnail --description 'embed thumbnail in the audio as cover art'
 complete --command youtube-dl --long-option add-metadata --description 'write metadata to the video file'
 complete --command youtube-dl --long-option xattrs --description 'write metadata to the video file'"'"'s xattrs (using dublin core and xdg standards)'
+complete --command youtube-dl --long-option fixup --description '(experimental) Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(check whether we can do anything about it, warn otherwise'
 complete --command youtube-dl --long-option prefer-avconv --description 'Prefer avconv over ffmpeg for running the postprocessors (default)'
 complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors'
 complete --command youtube-dl --long-option exec --description 'Execute a command on the file after downloading, similar to find'"'"'s -exec syntax. Example: --exec '"'"'adb push {} /sdcard/Music/ && rm {}'"'"''
index af7a61a8c9b1836740a203d66edef354bcc88da1..b4cef180d3d8329f026bed7d1f10105b810b84a9 100644 (file)
@@ -19,7 +19,7 @@ __youtube_dl() {
             elif [[ ${prev} == "--recode-video" ]]; then
                 _arguments '*: :(mp4 flv ogg webm mkv)'
             else
-                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --proxy --socket-timeout --default-search --ignore-config --flat-playlist --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --batch-file --id --auto-number --output --autonumber-size --restrict-filenames --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --write-thumbnail --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --prefer-avconv --prefer-ffmpeg --exec)'
+                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --write-thumbnail --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --exec)'
             fi
         ;;
     esac
index 21c7c298a830dd8b4a1b4651419506aaa8c8a8bc..772fddd4542f6726b57878a950410c90ee1a640c 100755 (executable)
@@ -7,6 +7,7 @@ import collections
 import datetime
 import errno
 import io
+import itertools
 import json
 import locale
 import os
@@ -26,6 +27,7 @@ from .compat import (
     compat_cookiejar,
     compat_expanduser,
     compat_http_client,
+    compat_kwargs,
     compat_str,
     compat_urllib_error,
     compat_urllib_request,
@@ -56,17 +58,24 @@ from .utils import (
     takewhile_inclusive,
     UnavailableVideoError,
     url_basename,
+    version_tuple,
     write_json_file,
     write_string,
     YoutubeDLHandler,
     prepend_extension,
     args_to_str,
+    age_restricted,
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractors
 from .downloader import get_suitable_downloader
 from .downloader.rtmp import rtmpdump_version
-from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
+from .postprocessor import (
+    FFmpegFixupStretchedPP,
+    FFmpegMergerPP,
+    FFmpegPostProcessor,
+    get_postprocessor,
+)
 from .version import __version__
 
 
@@ -115,7 +124,7 @@ class YoutubeDL(object):
     dump_single_json:  Force printing the info_dict of the whole playlist
                        (or video) as a single JSON line.
     simulate:          Do not download the video files.
-    format:            Video format code.
+    format:            Video format code. See options.py for more information.
     format_limit:      Highest quality format to try.
     outtmpl:           Template for output names.
     restrictfilenames: Do not allow "&" and spaces in file names
@@ -123,6 +132,7 @@ class YoutubeDL(object):
     nooverwrites:      Prevent overwriting files.
     playliststart:     Playlist item to start at.
     playlistend:       Playlist item to end at.
+    playlistreverse:   Download playlist items in reverse order.
     matchtitle:        Download only matching titles.
     rejecttitle:       Reject downloads for matching titles.
     logger:            Log messages to a logging.Logger instance.
@@ -174,6 +184,38 @@ class YoutubeDL(object):
     extract_flat:      Do not resolve URLs, return the immediate result.
                        Pass in 'in_playlist' to only show this behavior for
                        playlist items.
+    postprocessors:    A list of dictionaries, each with an entry
+                       * key:  The name of the postprocessor. See
+                               youtube_dl/postprocessor/__init__.py for a list.
+                       as well as any further keyword arguments for the
+                       postprocessor.
+    progress_hooks:    A list of functions that get called on download
+                       progress, with a dictionary with the entries
+                       * filename: The final filename
+                       * status: One of "downloading" and "finished"
+
+                       The dict may also have some of the following entries:
+
+                       * downloaded_bytes: Bytes on disk
+                       * total_bytes: Size of the whole file, None if unknown
+                       * tmpfilename: The filename we're currently writing to
+                       * eta: The estimated time in seconds, None if unknown
+                       * speed: The download speed in bytes/second, None if
+                                unknown
+
+                       Progress hooks are guaranteed to be called at least once
+                       (with status "finished") if the download is successful.
+    merge_output_format: Extension to use when merging formats.
+    fixup:             Automatically correct known faults of the file.
+                       One of:
+                       - "never": do nothing
+                       - "warn": only emit a warning
+                       - "detect_or_warn": check whether we can do anything
+                                           about it, warn otherwise
+    source_address:    (Experimental) Client-side IP address to bind to.
+    call_home:         Boolean, true iff we are allowed to contact the
+                       youtube-dl servers for debugging.
+
 
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
@@ -254,6 +296,16 @@ class YoutubeDL(object):
             self.print_debug_header()
             self.add_default_info_extractors()
 
+        for pp_def_raw in self.params.get('postprocessors', []):
+            pp_class = get_postprocessor(pp_def_raw['key'])
+            pp_def = dict(pp_def_raw)
+            del pp_def['key']
+            pp = pp_class(self, **compat_kwargs(pp_def))
+            self.add_post_processor(pp)
+
+        for ph in self.params.get('progress_hooks', []):
+            self.add_progress_hook(ph)
+
     def warn_if_short_id(self, argv):
         # short YouTube ID starting with dash?
         idxs = [
@@ -511,13 +563,8 @@ class YoutubeDL(object):
             max_views = self.params.get('max_views')
             if max_views is not None and view_count > max_views:
                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
-        age_limit = self.params.get('age_limit')
-        if age_limit is not None:
-            actual_age_limit = info_dict.get('age_limit')
-            if actual_age_limit is None:
-                actual_age_limit = 0
-            if age_limit < actual_age_limit:
-                return 'Skipping "' + title + '" because it is age restricted'
+        if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+            return 'Skipping "%s" because it is age restricted' % title
         if self.in_download_archive(info_dict):
             return '%s has already been recorded in archive' % video_title
         return None
@@ -621,23 +668,15 @@ class YoutubeDL(object):
                 ie_result['url'], ie_key=ie_result.get('ie_key'),
                 extra_info=extra_info, download=False, process=False)
 
-            def make_result(embedded_info):
-                new_result = ie_result.copy()
-                for f in ('_type', 'url', 'ext', 'player_url', 'formats',
-                          'entries', 'ie_key', 'duration',
-                          'subtitles', 'annotations', 'format',
-                          'thumbnail', 'thumbnails'):
-                    if f in new_result:
-                        del new_result[f]
-                    if f in embedded_info:
-                        new_result[f] = embedded_info[f]
-                return new_result
-            new_result = make_result(info)
+            force_properties = dict(
+                (k, v) for k, v in ie_result.items() if v is not None)
+            for f in ('_type', 'url'):
+                if f in force_properties:
+                    del force_properties[f]
+            new_result = info.copy()
+            new_result.update(force_properties)
 
             assert new_result.get('_type') != 'url_transparent'
-            if new_result.get('_type') == 'compat_list':
-                new_result['entries'] = [
-                    make_result(e) for e in new_result['entries']]
 
             return self.process_ie_result(
                 new_result, download=download, extra_info=extra_info)
@@ -654,24 +693,34 @@ class YoutubeDL(object):
             if playlistend == -1:
                 playlistend = None
 
-            if isinstance(ie_result['entries'], list):
-                n_all_entries = len(ie_result['entries'])
-                entries = ie_result['entries'][playliststart:playlistend]
+            ie_entries = ie_result['entries']
+            if isinstance(ie_entries, list):
+                n_all_entries = len(ie_entries)
+                entries = ie_entries[playliststart:playlistend]
                 n_entries = len(entries)
                 self.to_screen(
                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
-            else:
-                assert isinstance(ie_result['entries'], PagedList)
-                entries = ie_result['entries'].getslice(
+            elif isinstance(ie_entries, PagedList):
+                entries = ie_entries.getslice(
                     playliststart, playlistend)
                 n_entries = len(entries)
                 self.to_screen(
                     "[%s] playlist %s: Downloading %d videos" %
                     (ie_result['extractor'], playlist, n_entries))
+            else:  # iterable
+                entries = list(itertools.islice(
+                    ie_entries, playliststart, playlistend))
+                n_entries = len(entries)
+                self.to_screen(
+                    "[%s] playlist %s: Downloading %d videos" %
+                    (ie_result['extractor'], playlist, n_entries))
+
+            if self.params.get('playlistreverse', False):
+                entries = entries[::-1]
 
             for i, entry in enumerate(entries, 1):
-                self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
+                self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
                 extra = {
                     'n_entries': n_entries,
                     'playlist': playlist,
@@ -749,7 +798,7 @@ class YoutubeDL(object):
             if video_formats:
                 return video_formats[0]
         else:
-            extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
+            extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
             if format_spec in extensions:
                 filter_f = lambda f: f['ext'] == format_spec
             else:
@@ -787,6 +836,10 @@ class YoutubeDL(object):
             info_dict['display_id'] = info_dict['id']
 
         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
+            # Working around negative timestamps in Windows
+            # (see http://bugs.python.org/issue1646728)
+            if info_dict['timestamp'] < 0 and os.name == 'nt':
+                info_dict['timestamp'] = 0
             upload_date = datetime.datetime.utcfromtimestamp(
                 info_dict['timestamp'])
             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
@@ -868,10 +921,24 @@ class YoutubeDL(object):
                                                   'contain the video, try using '
                                                   '"-f %s+%s"' % (format_2, format_1))
                                 return
+                            output_ext = (
+                                formats_info[0]['ext']
+                                if self.params.get('merge_output_format') is None
+                                else self.params['merge_output_format'])
                             selected_format = {
                                 'requested_formats': formats_info,
                                 'format': rf,
                                 'ext': formats_info[0]['ext'],
+                                'width': formats_info[0].get('width'),
+                                'height': formats_info[0].get('height'),
+                                'resolution': formats_info[0].get('resolution'),
+                                'fps': formats_info[0].get('fps'),
+                                'vcodec': formats_info[0].get('vcodec'),
+                                'vbr': formats_info[0].get('vbr'),
+                                'stretched_ratio': formats_info[0].get('stretched_ratio'),
+                                'acodec': formats_info[1].get('acodec'),
+                                'abr': formats_info[1].get('abr'),
+                                'ext': output_ext,
                             }
                         else:
                             selected_format = None
@@ -930,8 +997,12 @@ class YoutubeDL(object):
         if self.params.get('forceid', False):
             self.to_stdout(info_dict['id'])
         if self.params.get('forceurl', False):
-            # For RTMP URLs, also include the playpath
-            self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+            if info_dict.get('requested_formats') is not None:
+                for f in info_dict['requested_formats']:
+                    self.to_stdout(f['url'] + f.get('play_path', ''))
+            else:
+                # For RTMP URLs, also include the playpath
+                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
             self.to_stdout(info_dict['thumbnail'])
         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
@@ -967,13 +1038,13 @@ class YoutubeDL(object):
             descfn = filename + '.description'
             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
                 self.to_screen('[info] Video description is already present')
+            elif info_dict.get('description') is None:
+                self.report_warning('There\'s no description to write.')
             else:
                 try:
                     self.to_screen('[info] Writing video description to: ' + descfn)
                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
                         descfile.write(info_dict['description'])
-                except (KeyError, TypeError):
-                    self.report_warning('There\'s no description to write.')
                 except (OSError, IOError):
                     self.report_error('Cannot write description file ' + descfn)
                     return
@@ -1050,58 +1121,75 @@ class YoutubeDL(object):
                                             (info_dict['thumbnail'], compat_str(err)))
 
         if not self.params.get('skip_download', False):
-            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
-                success = True
-            else:
-                try:
-                    def dl(name, info):
-                        fd = get_suitable_downloader(info)(self, self.params)
-                        for ph in self._progress_hooks:
-                            fd.add_progress_hook(ph)
-                        if self.params.get('verbose'):
-                            self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
-                        return fd.download(name, info)
-                    if info_dict.get('requested_formats') is not None:
-                        downloaded = []
-                        success = True
-                        merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
-                        if not merger._executable:
-                            postprocessors = []
-                            self.report_warning('You have requested multiple '
-                                                'formats but ffmpeg or avconv are not installed.'
-                                                ' The formats won\'t be merged')
-                        else:
-                            postprocessors = [merger]
-                        for f in info_dict['requested_formats']:
-                            new_info = dict(info_dict)
-                            new_info.update(f)
-                            fname = self.prepare_filename(new_info)
-                            fname = prepend_extension(fname, 'f%s' % f['format_id'])
-                            downloaded.append(fname)
-                            partial_success = dl(fname, new_info)
-                            success = success and partial_success
-                        info_dict['__postprocessors'] = postprocessors
-                        info_dict['__files_to_merge'] = downloaded
+            try:
+                def dl(name, info):
+                    fd = get_suitable_downloader(info)(self, self.params)
+                    for ph in self._progress_hooks:
+                        fd.add_progress_hook(ph)
+                    if self.params.get('verbose'):
+                        self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
+                    return fd.download(name, info)
+                if info_dict.get('requested_formats') is not None:
+                    downloaded = []
+                    success = True
+                    merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
+                    if not merger._executable:
+                        postprocessors = []
+                        self.report_warning('You have requested multiple '
+                                            'formats but ffmpeg or avconv are not installed.'
+                                            ' The formats won\'t be merged')
                     else:
-                        # Just a single file
-                        success = dl(filename, info_dict)
-                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                    self.report_error('unable to download video data: %s' % str(err))
-                    return
-                except (OSError, IOError) as err:
-                    raise UnavailableVideoError(err)
-                except (ContentTooShortError, ) as err:
-                    self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
-                    return
+                        postprocessors = [merger]
+                    for f in info_dict['requested_formats']:
+                        new_info = dict(info_dict)
+                        new_info.update(f)
+                        fname = self.prepare_filename(new_info)
+                        fname = prepend_extension(fname, 'f%s' % f['format_id'])
+                        downloaded.append(fname)
+                        partial_success = dl(fname, new_info)
+                        success = success and partial_success
+                    info_dict['__postprocessors'] = postprocessors
+                    info_dict['__files_to_merge'] = downloaded
+                else:
+                    # Just a single file
+                    success = dl(filename, info_dict)
+            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                self.report_error('unable to download video data: %s' % str(err))
+                return
+            except (OSError, IOError) as err:
+                raise UnavailableVideoError(err)
+            except (ContentTooShortError, ) as err:
+                self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+                return
 
             if success:
+                # Fixup content
+                stretched_ratio = info_dict.get('stretched_ratio')
+                if stretched_ratio is not None and stretched_ratio != 1:
+                    fixup_policy = self.params.get('fixup')
+                    if fixup_policy is None:
+                        fixup_policy = 'detect_or_warn'
+                    if fixup_policy == 'warn':
+                        self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
+                            info_dict['id'], stretched_ratio))
+                    elif fixup_policy == 'detect_or_warn':
+                        stretched_pp = FFmpegFixupStretchedPP(self)
+                        if stretched_pp.available:
+                            info_dict.setdefault('__postprocessors', [])
+                            info_dict['__postprocessors'].append(stretched_pp)
+                        else:
+                            self.report_warning(
+                                '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
+                                    info_dict['id'], stretched_ratio))
+                    else:
+                        assert fixup_policy == 'ignore'
+
                 try:
                     self.post_process(filename, info_dict)
                 except (PostProcessingError) as err:
                     self.report_error('postprocessing: %s' % str(err))
                     return
-
-        self.record_download_archive(info_dict)
+                self.record_download_archive(info_dict)
 
     def download(self, url_list):
         """Download a given list of URLs."""
@@ -1144,14 +1232,15 @@ class YoutubeDL(object):
         """Run all the postprocessors on the given file."""
         info = dict(ie_info)
         info['filepath'] = filename
-        keep_video = None
         pps_chain = []
         if ie_info.get('__postprocessors') is not None:
             pps_chain.extend(ie_info['__postprocessors'])
         pps_chain.extend(self._pps)
         for pp in pps_chain:
+            keep_video = None
+            old_filename = info['filepath']
             try:
-                keep_video_wish, new_info = pp.run(info)
+                keep_video_wish, info = pp.run(info)
                 if keep_video_wish is not None:
                     if keep_video_wish:
                         keep_video = keep_video_wish
@@ -1160,12 +1249,12 @@ class YoutubeDL(object):
                         keep_video = keep_video_wish
             except PostProcessingError as e:
                 self.report_error(e.msg)
-        if keep_video is False and not self.params.get('keepvideo', False):
-            try:
-                self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
-                os.remove(encodeFilename(filename))
-            except (IOError, OSError):
-                self.report_warning('Unable to remove downloaded video file')
+            if keep_video is False and not self.params.get('keepvideo', False):
+                try:
+                    self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
+                    os.remove(encodeFilename(old_filename))
+                except (IOError, OSError):
+                    self.report_warning('Unable to remove downloaded video file')
 
     def _make_archive_id(self, info_dict):
         # Future-proof against any change in case
@@ -1285,7 +1374,9 @@ class YoutubeDL(object):
         formats = info_dict.get('formats', [info_dict])
         idlen = max(len('format code'),
                     max(len(f['format_id']) for f in formats))
-        formats_s = [line(f, idlen) for f in formats]
+        formats_s = [
+            line(f, idlen) for f in formats
+            if f.get('preference') is None or f['preference'] >= -1000]
         if len(formats) > 1:
             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
@@ -1374,6 +1465,17 @@ class YoutubeDL(object):
                 proxy_map.update(handler.proxies)
         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
 
+        if self.params.get('call_home', False):
+            ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
+            self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+            latest_version = self.urlopen(
+                'https://yt-dl.org/latest/version').read().decode('utf-8')
+            if version_tuple(latest_version) > version_tuple(__version__):
+                self.report_warning(
+                    'You are using an outdated version (newest version: %s)! '
+                    'See https://yt-dl.org/update if you need help updating.' %
+                    latest_version)
+
     def _setup_opener(self):
         timeout_val = self.params.get('socket_timeout')
         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
@@ -1404,9 +1506,8 @@ class YoutubeDL(object):
         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
 
         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
-        https_handler = make_HTTPS_handler(
-            self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
-        ydlh = YoutubeDLHandler(debuglevel=debuglevel)
+        https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
+        ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
         opener = compat_urllib_request.build_opener(
             https_handler, proxy_handler, cookie_processor, ydlh)
         # Delete the default user-agent header, which would otherwise apply in
index 77b3384a05fa45d6d4cada65decdb5588ade9100..ddf6260d1e9a4a11fd0140ccbc8d6a4c324df78b 100644 (file)
@@ -38,18 +38,8 @@ from .update import update_self
 from .downloader import (
     FileDownloader,
 )
-from .extractor import gen_extractors
+from .extractor import gen_extractors, list_extractors
 from .YoutubeDL import YoutubeDL
-from .postprocessor import (
-    AtomicParsleyPP,
-    FFmpegAudioFixPP,
-    FFmpegMetadataPP,
-    FFmpegVideoConvertor,
-    FFmpegExtractAudioPP,
-    FFmpegEmbedSubtitlePP,
-    XAttrMetadataPP,
-    ExecAfterDownloadPP,
-)
 
 
 def _real_main(argv=None):
@@ -105,24 +95,22 @@ def _real_main(argv=None):
     _enc = preferredencoding()
     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
 
-    extractors = gen_extractors()
-
     if opts.list_extractors:
-        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+        for ie in list_extractors(opts.age_limit):
             compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
             matchedUrls = [url for url in all_urls if ie.suitable(url)]
             for mu in matchedUrls:
                 compat_print('  ' + mu)
         sys.exit(0)
     if opts.list_extractor_descriptions:
-        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+        for ie in list_extractors(opts.age_limit):
             if not ie._WORKING:
                 continue
             desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
             if desc is False:
                 continue
             if hasattr(ie, 'SEARCH_KEY'):
-                _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny')
+                _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
                 _COUNTS = ('', '5', '10', 'all')
                 desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
             compat_print(desc)
@@ -178,6 +166,7 @@ def _real_main(argv=None):
     if opts.recodevideo is not None:
         if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
             parser.error('invalid video recode format specified')
+
     if opts.date is not None:
         date = DateRange.day(opts.date)
     else:
@@ -209,16 +198,54 @@ def _real_main(argv=None):
                      ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
                      ' template'.format(outtmpl))
 
-    any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+    any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+    any_printing = opts.print_json
     download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
 
+    # PostProcessors
+    postprocessors = []
+    # Add the metadata pp first, the other pps will copy it
+    if opts.addmetadata:
+        postprocessors.append({'key': 'FFmpegMetadata'})
+    if opts.extractaudio:
+        postprocessors.append({
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': opts.audioformat,
+            'preferredquality': opts.audioquality,
+            'nopostoverwrites': opts.nopostoverwrites,
+        })
+    if opts.recodevideo:
+        postprocessors.append({
+            'key': 'FFmpegVideoConvertor',
+            'preferedformat': opts.recodevideo,
+        })
+    if opts.embedsubtitles:
+        postprocessors.append({
+            'key': 'FFmpegEmbedSubtitle',
+            'subtitlesformat': opts.subtitlesformat,
+        })
+    if opts.xattrs:
+        postprocessors.append({'key': 'XAttrMetadata'})
+    if opts.embedthumbnail:
+        if not opts.addmetadata:
+            postprocessors.append({'key': 'FFmpegAudioFix'})
+        postprocessors.append({'key': 'AtomicParsley'})
+    # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
+    # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
+    if opts.exec_cmd:
+        postprocessors.append({
+            'key': 'ExecAfterDownload',
+            'verboseOutput': opts.verbose,
+            'exec_cmd': opts.exec_cmd,
+        })
+
     ydl_opts = {
         'usenetrc': opts.usenetrc,
         'username': opts.username,
         'password': opts.password,
         'twofactor': opts.twofactor,
         'videopassword': opts.videopassword,
-        'quiet': (opts.quiet or any_printing),
+        'quiet': (opts.quiet or any_getting or any_printing),
         'no_warnings': opts.no_warnings,
         'forceurl': opts.geturl,
         'forcetitle': opts.gettitle,
@@ -228,9 +255,9 @@ def _real_main(argv=None):
         'forceduration': opts.getduration,
         'forcefilename': opts.getfilename,
         'forceformat': opts.getformat,
-        'forcejson': opts.dumpjson,
+        'forcejson': opts.dumpjson or opts.print_json,
         'dump_single_json': opts.dump_single_json,
-        'simulate': opts.simulate or any_printing,
+        'simulate': opts.simulate or any_getting,
         'skip_download': opts.skip_download,
         'format': opts.format,
         'format_limit': opts.format_limit,
@@ -249,6 +276,7 @@ def _real_main(argv=None):
         'progress_with_newline': opts.progress_with_newline,
         'playliststart': opts.playliststart,
         'playlistend': opts.playlistend,
+        'playlistreverse': opts.playlist_reverse,
         'noplaylist': opts.noplaylist,
         'logtostderr': opts.outtmpl == '-',
         'consoletitle': opts.consoletitle,
@@ -296,32 +324,14 @@ def _real_main(argv=None):
         'encoding': opts.encoding,
         'exec_cmd': opts.exec_cmd,
         'extract_flat': opts.extract_flat,
+        'merge_output_format': opts.merge_output_format,
+        'postprocessors': postprocessors,
+        'fixup': opts.fixup,
+        'source_address': opts.source_address,
+        'call_home': opts.call_home,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
-        # PostProcessors
-        # Add the metadata pp first, the other pps will copy it
-        if opts.addmetadata:
-            ydl.add_post_processor(FFmpegMetadataPP())
-        if opts.extractaudio:
-            ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
-        if opts.recodevideo:
-            ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
-        if opts.embedsubtitles:
-            ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
-        if opts.xattrs:
-            ydl.add_post_processor(XAttrMetadataPP())
-        if opts.embedthumbnail:
-            if not opts.addmetadata:
-                ydl.add_post_processor(FFmpegAudioFixPP())
-            ydl.add_post_processor(AtomicParsleyPP())
-
-        # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
-        # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
-        if opts.exec_cmd:
-            ydl.add_post_processor(ExecAfterDownloadPP(
-                verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
-
         # Update version
         if opts.update_self:
             update_self(ydl.to_screen, opts.verbose)
@@ -359,3 +369,5 @@ def main(argv=None):
         sys.exit('ERROR: fixed output name but more than one file to download')
     except KeyboardInterrupt:
         sys.exit('\nERROR: Interrupted by user')
+
+__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
index 27596687d0d2c354990e6112027e054865c1c79c..4453b34fceea50861ccedec0ec177a12f424274d 100644 (file)
@@ -4,6 +4,7 @@ import getpass
 import optparse
 import os
 import re
+import socket
 import subprocess
 import sys
 
@@ -247,7 +248,7 @@ else:
                 userhome = compat_getenv('HOME')
             elif 'USERPROFILE' in os.environ:
                 userhome = compat_getenv('USERPROFILE')
-            elif not 'HOMEPATH' in os.environ:
+            elif 'HOMEPATH' not in os.environ:
                 return path
             else:
                 try:
@@ -297,7 +298,9 @@ else:
 
 # Old 2.6 and 2.7 releases require kwargs to be bytes
 try:
-    (lambda x: x)(**{'x': 0})
+    def _testfunc(x):
+        pass
+    _testfunc(**{'x': 0})
 except TypeError:
     def compat_kwargs(kwargs):
         return dict((bytes(k), v) for k, v in kwargs.items())
@@ -305,6 +308,32 @@ else:
     compat_kwargs = lambda kwargs: kwargs
 
 
+if sys.version_info < (2, 7):
+    def compat_socket_create_connection(address, timeout, source_address=None):
+        host, port = address
+        err = None
+        for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
+            af, socktype, proto, canonname, sa = res
+            sock = None
+            try:
+                sock = socket.socket(af, socktype, proto)
+                sock.settimeout(timeout)
+                if source_address:
+                    sock.bind(source_address)
+                sock.connect(sa)
+                return sock
+            except socket.error as _:
+                err = _
+                if sock is not None:
+                    sock.close()
+        if err is not None:
+            raise err
+        else:
+            raise socket.error("getaddrinfo returns an empty list")
+else:
+    compat_socket_create_connection = socket.create_connection
+
+
 # Fix https://github.com/rg3/youtube-dl/issues/4223
 # See http://bugs.python.org/issue9161 for what is broken
 def workaround_optparse_bug9161():
@@ -340,6 +369,7 @@ __all__ = [
     'compat_ord',
     'compat_parse_qs',
     'compat_print',
+    'compat_socket_create_connection',
     'compat_str',
     'compat_subprocess_get_DEVNULL',
     'compat_urllib_error',
index c0af50c59182ad560a23264738d027d4ad3c8697..de6b9311d59b3a270cc0a7dc58d05b4f0692e896 100644 (file)
@@ -5,8 +5,8 @@ import re
 import sys
 import time
 
+from ..compat import compat_str
 from ..utils import (
-    compat_str,
     encodeFilename,
     format_bytes,
     timeconvert,
@@ -80,6 +80,8 @@ class FileDownloader(object):
     def calc_eta(start, now, total, current):
         if total is None:
             return None
+        if now is None:
+            now = time.time()
         dif = now - start
         if current == 0 or dif < 0.001:  # One millisecond
             return None
@@ -146,18 +148,19 @@ class FileDownloader(object):
     def report_error(self, *args, **kargs):
         self.ydl.report_error(*args, **kargs)
 
-    def slow_down(self, start_time, byte_counter):
+    def slow_down(self, start_time, now, byte_counter):
         """Sleep if the download speed is over the rate limit."""
         rate_limit = self.params.get('ratelimit', None)
         if rate_limit is None or byte_counter == 0:
             return
-        now = time.time()
+        if now is None:
+            now = time.time()
         elapsed = now - start_time
         if elapsed <= 0.0:
             return
         speed = float(byte_counter) / elapsed
         if speed > rate_limit:
-            time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
+            time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
 
     def temp_name(self, filename):
         """Returns a temporary filename for the given filename."""
@@ -281,8 +284,19 @@ class FileDownloader(object):
         """Download to a filename using the info from info_dict
         Return True on success and False otherwise
         """
+        nooverwrites_and_exists = (
+            self.params.get('nooverwrites', False)
+            and os.path.exists(encodeFilename(filename))
+        )
+
+        continuedl_and_exists = (
+            self.params.get('continuedl', False)
+            and os.path.isfile(encodeFilename(filename))
+            and not self.params.get('nopart', False)
+        )
+
         # Check file already present
-        if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
+        if filename != '-' and nooverwrites_and_exists or continuedl_and_exists:
             self.report_file_already_downloaded(filename)
             self._hook_progress({
                 'filename': filename,
@@ -302,19 +316,6 @@ class FileDownloader(object):
             ph(status)
 
     def add_progress_hook(self, ph):
-        """ ph gets called on download progress, with a dictionary with the entries
-        * filename: The final filename
-        * status: One of "downloading" and "finished"
-
-        It can also have some of the following entries:
-
-        * downloaded_bytes: Bytes on disks
-        * total_bytes: Total bytes, None if unknown
-        * tmpfilename: The filename we're currently writing to
-        * eta: The estimated time in seconds, None if unknown
-        * speed: The download speed in bytes/second, None if unknown
-
-        Hooks are guaranteed to be called at least once (with status "finished")
-        if the download is successful.
-        """
+        # See YoutubeDl.py (search for progress_hooks) for a description of
+        # this interface
         self._progress_hooks.append(ph)
index 7cd22c504e463ad2551692728bd3933e8bcf20ab..c460c167a2db78be7a1cdb8e81a3efe89e677ed1 100644 (file)
@@ -9,10 +9,12 @@ import xml.etree.ElementTree as etree
 
 from .common import FileDownloader
 from .http import HttpFD
+from ..compat import (
+    compat_urlparse,
+)
 from ..utils import (
     struct_pack,
     struct_unpack,
-    compat_urlparse,
     format_bytes,
     encodeFilename,
     sanitize_open,
@@ -185,24 +187,34 @@ def build_fragments_list(boot_info):
     return res
 
 
-def write_flv_header(stream, metadata):
-    """Writes the FLV header and the metadata to stream"""
+def write_unsigned_int(stream, val):
+    stream.write(struct_pack('!I', val))
+
+
+def write_unsigned_int_24(stream, val):
+    stream.write(struct_pack('!I', val)[1:])
+
+
+def write_flv_header(stream):
+    """Writes the FLV header to stream"""
     # FLV header
     stream.write(b'FLV\x01')
     stream.write(b'\x05')
     stream.write(b'\x00\x00\x00\x09')
-    # FLV File body
     stream.write(b'\x00\x00\x00\x00')
-    # FLVTAG
-    # Script data
-    stream.write(b'\x12')
-    # Size of the metadata with 3 bytes
-    stream.write(struct_pack('!L', len(metadata))[1:])
-    stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
-    stream.write(metadata)
-    # Magic numbers extracted from the output files produced by AdobeHDS.php
-    #(https://github.com/K-S-V/Scripts)
-    stream.write(b'\x00\x00\x01\x73')
+
+
+def write_metadata_tag(stream, metadata):
+    """Writes optional metadata tag to stream"""
+    SCRIPT_TAG = b'\x12'
+    FLV_TAG_HEADER_LEN = 11
+
+    if metadata:
+        stream.write(SCRIPT_TAG)
+        write_unsigned_int_24(stream, len(metadata))
+        stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
+        stream.write(metadata)
+        write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
 
 
 def _add_ns(prop):
@@ -231,6 +243,7 @@ class F4mFD(FileDownloader):
                 'continuedl': True,
                 'quiet': True,
                 'noprogress': True,
+                'ratelimit': self.params.get('ratelimit', None),
                 'test': self.params.get('test', False),
             }
         )
@@ -253,7 +266,11 @@ class F4mFD(FileDownloader):
             bootstrap = self.ydl.urlopen(bootstrap_url).read()
         else:
             bootstrap = base64.b64decode(bootstrap_node.text)
-        metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
+        metadata_node = media.find(_add_ns('metadata'))
+        if metadata_node is not None:
+            metadata = base64.b64decode(metadata_node.text)
+        else:
+            metadata = None
         boot_info = read_bootstrap_info(bootstrap)
 
         fragments_list = build_fragments_list(boot_info)
@@ -266,7 +283,8 @@ class F4mFD(FileDownloader):
 
         tmpfilename = self.temp_name(filename)
         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
-        write_flv_header(dest_stream, metadata)
+        write_flv_header(dest_stream)
+        write_metadata_tag(dest_stream, metadata)
 
         # This dict stores the download progress, it's updated by the progress
         # hook
index 954beffd50e51db43ce203931d31ad2fbeee95dc..aa58b52abb5998ba8879e6eba3a1d974484467e2 100644 (file)
@@ -4,11 +4,13 @@ import os
 import re
 import subprocess
 
+from ..postprocessor.ffmpeg import FFmpegPostProcessor
 from .common import FileDownloader
-from ..utils import (
+from ..compat import (
     compat_urlparse,
     compat_urllib_request,
-    check_executable,
+)
+from ..utils import (
     encodeFilename,
 )
 
@@ -24,12 +26,12 @@ class HlsFD(FileDownloader):
             '-bsf:a', 'aac_adtstoasc',
             encodeFilename(tmpfilename, for_subprocess=True)]
 
-        for program in ['avconv', 'ffmpeg']:
-            if check_executable(program, ['-version']):
-                break
-        else:
+        ffpp = FFmpegPostProcessor(downloader=self)
+        program = ffpp._executable
+        if program is None:
             self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
             return False
+        ffpp.check_version()
         cmd = [program] + args
 
         retval = subprocess.call(cmd)
index 8491cee8aa2769e8465176411a92ded85b07ad13..e68f20c9f46a93ebfeca2ff47dc0843f4ab94874 100644 (file)
@@ -4,11 +4,12 @@ import os
 import time
 
 from .common import FileDownloader
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
     compat_urllib_error,
+)
+from ..utils import (
     ContentTooShortError,
-
     encodeFilename,
     sanitize_open,
     format_bytes,
@@ -136,16 +137,21 @@ class HttpFD(FileDownloader):
         byte_counter = 0 + resume_len
         block_size = self.params.get('buffersize', 1024)
         start = time.time()
+
+        # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
+        now = None  # needed for slow_down() in the first loop run
+        before = start  # start measuring
         while True:
+
             # Download and write
-            before = time.time()
             data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
-            after = time.time()
+            byte_counter += len(data_block)
+
+            # exit loop when download is finished
             if len(data_block) == 0:
                 break
-            byte_counter += len(data_block)
 
-            # Open file just in time
+            # Open destination file just in time
             if stream is None:
                 try:
                     (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
@@ -161,11 +167,22 @@ class HttpFD(FileDownloader):
                 self.to_stderr('\n')
                 self.report_error('unable to write data: %s' % str(err))
                 return False
+
+            # Apply rate limit
+            self.slow_down(start, now, byte_counter - resume_len)
+
+            # end measuring of one loop run
+            now = time.time()
+            after = now
+
+            # Adjust block size
             if not self.params.get('noresizebuffer', False):
                 block_size = self.best_block_size(after - before, len(data_block))
 
+            before = after
+
             # Progress message
-            speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
+            speed = self.calc_speed(start, now, byte_counter - resume_len)
             if data_len is None:
                 eta = percent = None
             else:
@@ -186,9 +203,6 @@ class HttpFD(FileDownloader):
             if is_test and byte_counter == data_len:
                 break
 
-            # Apply rate limit
-            self.slow_down(start, byte_counter - resume_len)
-
         if stream is None:
             self.to_stderr('\n')
             self.report_error('Did not get any data blocks')
index c53195da0c9471d55a61b53b1041e05ee209697e..72cef30eaf3718ad8932814a627042cc0bdff361 100644 (file)
@@ -4,8 +4,8 @@ import os
 import subprocess
 
 from .common import FileDownloader
-from ..compat import compat_subprocess_get_DEVNULL
 from ..utils import (
+    check_executable,
     encodeFilename,
 )
 
@@ -20,11 +20,7 @@ class MplayerFD(FileDownloader):
             'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
             '-dumpstream', '-dumpfile', tmpfilename, url]
         # Check for mplayer first
-        try:
-            subprocess.call(
-                ['mplayer', '-h'],
-                stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
-        except (OSError, IOError):
+        if not check_executable('mplayer', ['-h']):
             self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
             return False
 
index 58ae2005c014eb4aafbd68e087d5018d26d02a21..5346cb9a0ae8ab7d02f4cd91e9e4b17019baf88a 100644 (file)
@@ -7,9 +7,9 @@ import sys
 import time
 
 from .common import FileDownloader
+from ..compat import compat_str
 from ..utils import (
     check_executable,
-    compat_str,
     encodeFilename,
     format_bytes,
     get_exe_version,
@@ -185,7 +185,7 @@ class RtmpFD(FileDownloader):
             cursize = os.path.getsize(encodeFilename(tmpfilename))
             if prevsize == cursize and retval == RD_FAILED:
                 break
-             # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
+            # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
             if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
                 self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
                 retval = RD_SUCCESS
index 0339d13860ab81d1a38a3dc624f2336b42e4a6a5..0902eb4374caf943e1af4e23aeb7c74a8386a794 100644 (file)
@@ -3,8 +3,11 @@ from __future__ import unicode_literals
 from .abc import ABCIE
 from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
+from .adobetv import AdobeTVIE
 from .adultswim import AdultSwimIE
 from .aftonbladet import AftonbladetIE
+from .aljazeera import AlJazeeraIE
+from .alphaporno import AlphaPornoIE
 from .anitube import AnitubeIE
 from .anysex import AnySexIE
 from .aol import AolIE
@@ -22,13 +25,17 @@ from .arte import (
     ArteTVDDCIE,
     ArteTVEmbedIE,
 )
-from .audiomack import AudiomackIE
+from .atresplayer import AtresPlayerIE
+from .atttechchannel import ATTTechChannelIE
+from .audiomack import AudiomackIE, AudiomackAlbumIE
 from .auengine import AUEngineIE
+from .azubu import AzubuIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
+from .bet import BetIE
 from .bild import BildIE
 from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
@@ -49,7 +56,7 @@ from .cbsnews import CBSNewsIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
-from .cinemassacre import CinemassacreIE
+from .cinchcast import CinchcastIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
 from .clipsyndicate import ClipsyndicateIE
@@ -60,9 +67,13 @@ from .cnet import CNETIE
 from .cnn import (
     CNNIE,
     CNNBlogsIE,
+    CNNArticleIE,
 )
 from .collegehumor import CollegeHumorIE
+from .collegerama import CollegeRamaIE
 from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
+from .comcarcoff import ComCarCoffIE
+from .commonmistakes import CommonMistakesIE
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .criterion import CriterionIE
@@ -82,14 +93,17 @@ from .deezer import DeezerPlaylistIE
 from .dfb import DFBIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
+from .drbonanza import DRBonanzaIE
 from .drtuber import DrTuberIE
 from .drtv import DRTVIE
+from .dvtv import DVTVIE
 from .dump import DumpIE
 from .defense import DefenseGouvFrIE
 from .discovery import DiscoveryIE
 from .divxstage import DivxStageIE
 from .dropbox import DropboxIE
 from .ebaumsworld import EbaumsWorldIE
+from .echomsk import EchoMskIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .einthusan import EinthusanIE
@@ -102,6 +116,7 @@ from .elpais import ElPaisIE
 from .empflix import EMPFlixIE
 from .engadget import EngadgetIE
 from .eporner import EpornerIE
+from .eroprofile import EroProfileIE
 from .escapist import EscapistIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
@@ -121,6 +136,8 @@ from .fktv import (
 from .flickr import FlickrIE
 from .folketinget import FolketingetIE
 from .fourtube import FourTubeIE
+from .foxgay import FoxgayIE
+from .foxnews import FoxNewsIE
 from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
 from .francetv import (
@@ -144,6 +161,8 @@ from .gamestar import GameStarIE
 from .gametrailers import GametrailersIE
 from .gdcvault import GDCVaultIE
 from .generic import GenericIE
+from .giantbomb import GiantBombIE
+from .giga import GigaIE
 from .glide import GlideIE
 from .globo import GloboIE
 from .godtube import GodTubeIE
@@ -154,10 +173,13 @@ from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
 from .goshgay import GoshgayIE
 from .grooveshark import GroovesharkIE
+from .groupon import GrouponIE
 from .hark import HarkIE
 from .heise import HeiseIE
+from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
+from .hitbox import HitboxIE, HitboxLiveIE
 from .hornbunny import HornBunnyIE
 from .hostingbulk import HostingBulkIE
 from .hotnewhiphop import HotNewHipHopIE
@@ -187,6 +209,7 @@ from .jove import JoveIE
 from .jukebox import JukeboxIE
 from .jpopsukitv import JpopsukiIE
 from .kankan import KankanIE
+from .karaoketv import KaraoketvIE
 from .keezmovies import KeezMoviesIE
 from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
@@ -203,6 +226,7 @@ from .livestream import (
     LivestreamOriginalIE,
     LivestreamShortenerIE,
 )
+from .lnkgo import LnkGoIE
 from .lrt import LRTIE
 from .lynda import (
     LyndaIE,
@@ -216,6 +240,7 @@ from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mgoon import MgoonIE
+from .minhateca import MinhatecaIE
 from .ministrygrid import MinistryGridIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mitele import MiTeleIE
@@ -245,6 +270,7 @@ from .muzu import MuzuTVIE
 from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
+from .myvidster import MyVidsterIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import (
@@ -253,6 +279,8 @@ from .nbc import (
 )
 from .ndr import NDRIE
 from .ndtv import NDTVIE
+from .netzkino import NetzkinoIE
+from .nerdcubed import NerdCubedFeedIE
 from .newgrounds import NewgroundsIE
 from .newstube import NewstubeIE
 from .nfb import NFBIE
@@ -268,6 +296,7 @@ from .nowness import NownessIE
 from .nowvideo import NowVideoIE
 from .npo import (
     NPOIE,
+    NPOLiveIE,
     TegenlichtVproIE,
 )
 from .nrk import (
@@ -279,6 +308,7 @@ from .nytimes import NYTimesIE
 from .nuvid import NuvidIE
 from .oktoberfesttv import OktoberfestTVIE
 from .ooyala import OoyalaIE
+from .openfilm import OpenFilmIE
 from .orf import (
     ORFTVthekIE,
     ORFOE1IE,
@@ -302,24 +332,30 @@ from .promptfile import PromptFileIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .pyvideo import PyvideoIE
 from .quickvid import QuickVidIE
+from .radiode import RadioDeIE
+from .radiobremen import RadioBremenIE
 from .radiofrance import RadioFranceIE
 from .rai import RaiIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
+from .restudy import RestudyIE
 from .reverbnation import ReverbNationIE
 from .ringtv import RingTVIE
 from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtbf import RTBFIE
+from .rte import RteIE
 from .rtlnl import RtlXlIE
 from .rtlnow import RTLnowIE
+from .rtp import RTPIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE
 from .ruhd import RUHDIE
 from .rutube import (
     RutubeIE,
     RutubeChannelIE,
+    RutubeEmbedIE,
     RutubeMovieIE,
     RutubePersonIE,
 )
@@ -329,6 +365,8 @@ from .savefrom import SaveFromIE
 from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
+from .screencastomatic import ScreencastOMaticIE
+from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
 from .servingsys import ServingSysIE
 from .sexu import SexuIE
 from .sexykarma import SexyKarmaIE
@@ -388,6 +426,7 @@ from .ted import TEDIE
 from .telebruxelles import TeleBruxellesIE
 from .telecinco import TelecincoIE
 from .telemb import TeleMBIE
+from .teletask import TeleTaskIE
 from .tenplay import TenPlayIE
 from .testurl import TestURLIE
 from .tf1 import TF1IE
@@ -415,8 +454,9 @@ from .tunein import TuneInIE
 from .turbo import TurboIE
 from .tutv import TutvIE
 from .tvigle import TvigleIE
-from .tvp import TvpIE
+from .tvp import TvpIE, TvpSeriesIE
 from .tvplay import TVPlayIE
+from .twentyfourvideo import TwentyFourVideoIE
 from .twitch import TwitchIE
 from .ubu import UbuIE
 from .udemy import (
@@ -445,6 +485,7 @@ from .videott import VideoTtIE
 from .videoweed import VideoWeedIE
 from .vidme import VidmeIE
 from .vidzi import VidziIE
+from .vier import VierIE, VierVideosIE
 from .vimeo import (
     VimeoIE,
     VimeoAlbumIE,
@@ -480,6 +521,7 @@ from .wdr import (
     WDRMobileIE,
     WDRMausIE,
 )
+from .webofstories import WebOfStoriesIE
 from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wistia import WistiaIE
@@ -492,10 +534,12 @@ from .xminus import XMinusIE
 from .xnxx import XNXXIE
 from .xvideos import XVideosIE
 from .xtube import XTubeUserIE, XTubeIE
+from .xxxymovies import XXXYMoviesIE
 from .yahoo import (
     YahooIE,
     YahooSearchIE,
 )
+from .yesjapan import YesJapanIE
 from .ynet import YnetIE
 from .youjizz import YouJizzIE
 from .youku import YoukuIE
@@ -513,12 +557,12 @@ from .youtube import (
     YoutubeSearchURLIE,
     YoutubeShowIE,
     YoutubeSubscriptionsIE,
-    YoutubeTopListIE,
+    YoutubeTruncatedIDIE,
     YoutubeTruncatedURLIE,
     YoutubeUserIE,
     YoutubeWatchLaterIE,
 )
-from .zdf import ZDFIE
+from .zdf import ZDFIE, ZDFChannelIE
 from .zingmp3 import (
     ZingMp3SongIE,
     ZingMp3AlbumIE,
@@ -539,6 +583,17 @@ def gen_extractors():
     return [klass() for klass in _ALL_CLASSES]
 
 
+def list_extractors(age_limit):
+    """
+    Return a list of extractors that are suitable for the given age,
+    sorted by extractor ID.
+    """
+
+    return sorted(
+        filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
+        key=lambda ie: ie.IE_NAME.lower())
+
+
 def get_info_extractor(ie_name):
     """Returns the info extractor class with the given ie_name"""
     return globals()[ie_name + 'IE']
diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py
new file mode 100644 (file)
index 0000000..28e07f8
--- /dev/null
@@ -0,0 +1,70 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    unified_strdate,
+    str_to_int,
+)
+
+
+class AdobeTVIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P<id>[^/]+)'
+
+    _TEST = {
+        'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
+        'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
+        'info_dict': {
+            'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop',
+            'ext': 'mp4',
+            'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
+            'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'upload_date': '20110914',
+            'duration': 60,
+            'view_count': int,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        player = self._parse_json(
+            self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'),
+            video_id)
+
+        title = player.get('title') or self._search_regex(
+            r'data-title="([^"]+)"', webpage, 'title')
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        upload_date = unified_strdate(
+            self._html_search_meta('datepublished', webpage, 'upload date'))
+
+        duration = parse_duration(
+            self._html_search_meta('duration', webpage, 'duration')
+            or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
+
+        view_count = str_to_int(self._search_regex(
+            r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
+            webpage, 'view count'))
+
+        formats = [{
+            'url': source['src'],
+            'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None,
+            'tbr': source.get('bitrate'),
+        } for source in player['sources']]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'duration': duration,
+            'view_count': view_count,
+            'formats': formats,
+        }
index 0d05cbb4b16b470aaa1a82d318c07f323ccd1bf7..502a9c25ad8fd6ab8fed0a46f2f52077f988aad9 100644 (file)
 from __future__ import unicode_literals
 
 import re
+import json
 
 from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    xpath_text,
+    float_or_none,
+)
 
 
 class AdultSwimIE(InfoExtractor):
-    _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
-    _TEST = {
-        'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
+    _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
+
+    _TESTS = [{
+        'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
         'playlist': [
             {
-                'md5': '4da359ec73b58df4575cd01a610ba5dc',
-                'info_dict': {
-                    'id': '8a250ba1450996e901453d7f02ca02f5',
-                    'ext': 'flv',
-                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
-                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
-                    'uploader': 'Rick and Morty',
-                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
-                }
-            },
-            {
-                'md5': 'ffbdf55af9331c509d95350bd0cc1819',
+                'md5': '247572debc75c7652f253c8daa51a14d',
                 'info_dict': {
-                    'id': '8a250ba1450996e901453d7f4bd102f6',
+                    'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
                     'ext': 'flv',
-                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
-                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
-                    'uploader': 'Rick and Morty',
-                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
-                }
+                    'title': 'Rick and Morty - Pilot Part 1',
+                    'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
+                },
             },
             {
-                'md5': 'b92409635540304280b4b6c36bd14a0a',
+                'md5': '77b0e037a4b20ec6b98671c4c379f48d',
                 'info_dict': {
-                    'id': '8a250ba1450996e901453d7fa73c02f7',
+                    'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
                     'ext': 'flv',
-                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
-                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
-                    'uploader': 'Rick and Morty',
-                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
-                }
+                    'title': 'Rick and Morty - Pilot Part 4',
+                    'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
+                },
             },
+        ],
+        'info_dict': {
+            'title': 'Rick and Morty - Pilot',
+            'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
+        }
+    }, {
+        'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
+        'playlist': [
             {
-                'md5': 'e8818891d60e47b29cd89d7b0278156d',
+                'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
                 'info_dict': {
-                    'id': '8a250ba1450996e901453d7fc8ba02f8',
+                    'id': '-t8CamQlQ2aYZ49ItZCFog-0',
                     'ext': 'flv',
-                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
-                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
-                    'uploader': 'Rick and Morty',
-                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
-                }
+                    'title': 'American Dad - Putting Francine Out of Business',
+                    'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
+                },
             }
-        ]
-    }
-
-    _video_extensions = {
-        '3500': 'flv',
-        '640': 'mp4',
-        '150': 'mp4',
-        'ipad': 'm3u8',
-        'iphone': 'm3u8'
-    }
-    _video_dimensions = {
-        '3500': (1280, 720),
-        '640': (480, 270),
-        '150': (320, 180)
-    }
+        ],
+        'info_dict': {
+            'title': 'American Dad - Putting Francine Out of Business',
+            'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
+        },
+    }]
+
+    @staticmethod
+    def find_video_info(collection, slug):
+        for video in collection.get('videos'):
+            if video.get('slug') == slug:
+                return video
+
+    @staticmethod
+    def find_collection_by_linkURL(collections, linkURL):
+        for collection in collections:
+            if collection.get('linkURL') == linkURL:
+                return collection
+
+    @staticmethod
+    def find_collection_containing_video(collections, slug):
+        for collection in collections:
+            for video in collection.get('videos'):
+                if video.get('slug') == slug:
+                    return collection, video
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_path = mobj.group('path')
-
-        webpage = self._download_webpage(url, video_path)
-        episode_id = self._html_search_regex(
-            r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>',
-            webpage, 'episode_id')
-        title = self._og_search_title(webpage)
-
-        index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
-        idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
-
-        episode_el = idoc.find('.//episode')
-        show_title = episode_el.attrib.get('collectionTitle')
-        episode_title = episode_el.attrib.get('title')
-        thumbnail = episode_el.attrib.get('thumbnailUrl')
-        description = episode_el.find('./description').text.strip()
+        show_path = mobj.group('show_path')
+        episode_path = mobj.group('episode_path')
+        is_playlist = True if mobj.group('is_playlist') else False
+
+        webpage = self._download_webpage(url, episode_path)
+
+        # Extract the value of `bootstrappedData` from the Javascript in the page.
+        bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
+
+        try:
+            bootstrappedData = json.loads(bootstrappedDataJS)
+        except ValueError as ve:
+            errmsg = '%s: Failed to parse JSON ' % episode_path
+            raise ExtractorError(errmsg, cause=ve)
+
+        # Downloading videos from a /videos/playlist/ URL needs to be handled differently.
+        # NOTE: We are only downloading one video (the current one) not the playlist
+        if is_playlist:
+            collections = bootstrappedData['playlists']['collections']
+            collection = self.find_collection_by_linkURL(collections, show_path)
+            video_info = self.find_video_info(collection, episode_path)
+
+            show_title = video_info['showTitle']
+            segment_ids = [video_info['videoPlaybackID']]
+        else:
+            collections = bootstrappedData['show']['collections']
+            collection, video_info = self.find_collection_containing_video(collections, episode_path)
+
+            show = bootstrappedData['show']
+            show_title = show['title']
+            segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
+
+        episode_id = video_info['id']
+        episode_title = video_info['title']
+        episode_description = video_info['description']
+        episode_duration = video_info.get('duration')
 
         entries = []
-        segment_els = episode_el.findall('./segments/segment')
+        for part_num, segment_id in enumerate(segment_ids):
+            segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id
 
-        for part_num, segment_el in enumerate(segment_els):
-            segment_id = segment_el.attrib.get('id')
-            segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
-            thumbnail = segment_el.attrib.get('thumbnailUrl')
-            duration = segment_el.attrib.get('duration')
+            segment_title = '%s - %s' % (show_title, episode_title)
+            if len(segment_ids) > 1:
+                segment_title += ' Part %d' % (part_num + 1)
 
-            segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
             idoc = self._download_xml(
                 segment_url, segment_title,
                 'Downloading segment information', 'Unable to download segment information')
 
+            segment_duration = float_or_none(
+                xpath_text(idoc, './/trt', 'segment duration').strip())
+
             formats = []
             file_els = idoc.findall('.//files/file')
 
             for file_el in file_els:
                 bitrate = file_el.attrib.get('bitrate')
-                type = file_el.attrib.get('type')
-                width, height = self._video_dimensions.get(bitrate, (None, None))
+                ftype = file_el.attrib.get('type')
+
                 formats.append({
-                    'format_id': '%s-%s' % (bitrate, type),
-                    'url': file_el.text,
-                    'ext': self._video_extensions.get(bitrate, 'mp4'),
+                    'format_id': '%s_%s' % (bitrate, ftype),
+                    'url': file_el.text.strip(),
                     # The bitrate may not be a number (for example: 'iphone')
                     'tbr': int(bitrate) if bitrate.isdigit() else None,
-                    'height': height,
-                    'width': width
+                    'quality': 1 if ftype == 'hd' else -1
                 })
 
             self._sort_formats(formats)
@@ -127,18 +154,16 @@ class AdultSwimIE(InfoExtractor):
                 'id': segment_id,
                 'title': segment_title,
                 'formats': formats,
-                'uploader': show_title,
-                'thumbnail': thumbnail,
-                'duration': duration,
-                'description': description
+                'duration': segment_duration,
+                'description': episode_description
             })
 
         return {
             '_type': 'playlist',
             'id': episode_id,
-            'display_id': video_path,
+            'display_id': episode_path,
             'entries': entries,
-            'title': '%s %s' % (show_title, episode_title),
-            'description': description,
-            'thumbnail': thumbnail
+            'title': '%s %s' % (show_title, episode_title),
+            'description': episode_description,
+            'duration': episode_duration
         }
diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py
new file mode 100644 (file)
index 0000000..612708e
--- /dev/null
@@ -0,0 +1,35 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class AlJazeeraIE(InfoExtractor):
+    _VALID_URL = r'http://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
+
+    _TEST = {
+        'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
+        'info_dict': {
+            'id': '3792260579001',
+            'ext': 'mp4',
+            'title': 'The Slum - Episode 1: Deliverance',
+            'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
+            'uploader': 'Al Jazeera English',
+        },
+        'add_ie': ['Brightcove'],
+    }
+
+    def _real_extract(self, url):
+        program_name = self._match_id(url)
+        webpage = self._download_webpage(url, program_name)
+        brightcove_id = self._search_regex(
+            r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
+
+        return {
+            '_type': 'url',
+            'url': (
+                'brightcove:'
+                'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
+                '&%40videoPlayer={0}'.format(brightcove_id)
+            ),
+            'ie_key': 'Brightcove',
+        }
index 398e93bfb4f8472a23d2b8669e7f83f867933244..7d65b81931fb2d9b3acb5dd4ab2961a2aec52bea 100644 (file)
@@ -5,15 +5,14 @@ import re
 import json
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
-    compat_str,
     qualities,
-    determine_ext,
 )
 
 
 class AllocineIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?'
+    _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
 
     _TESTS = [{
         'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
@@ -45,6 +44,9 @@ class AllocineIE(InfoExtractor):
             'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
             'thumbnail': 're:http://.*\.jpg',
         },
+    }, {
+        'url': 'http://www.allocine.fr/video/video-19550147/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -75,9 +77,7 @@ class AllocineIE(InfoExtractor):
                     'format_id': format_id,
                     'quality': quality(format_id),
                     'url': v,
-                    'ext': determine_ext(v),
                 })
-
         self._sort_formats(formats)
 
         return {
diff --git a/youtube_dl/extractor/alphaporno.py b/youtube_dl/extractor/alphaporno.py
new file mode 100644 (file)
index 0000000..c34719d
--- /dev/null
@@ -0,0 +1,77 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    parse_duration,
+    parse_filesize,
+    int_or_none,
+)
+
+
+class AlphaPornoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
+        'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
+        'info_dict': {
+            'id': '258807',
+            'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
+            'ext': 'mp4',
+            'title': 'Sensual striptease porn with Samantha Alexandra',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'timestamp': 1418694611,
+            'upload_date': '20141216',
+            'duration': 387,
+            'filesize_approx': 54120000,
+            'tbr': 1145,
+            'categories': list,
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
+
+        video_url = self._search_regex(
+            r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
+        ext = self._html_search_meta(
+            'encodingFormat', webpage, 'ext', default='.mp4')[1:]
+
+        title = self._search_regex(
+            [r'<meta content="([^"]+)" itemprop="description">',
+             r'class="title" itemprop="name">([^<]+)<'],
+            webpage, 'title')
+        thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
+        timestamp = parse_iso8601(self._html_search_meta(
+            'uploadDate', webpage, 'upload date'))
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, 'duration'))
+        filesize_approx = parse_filesize(self._html_search_meta(
+            'contentSize', webpage, 'file size'))
+        bitrate = int_or_none(self._html_search_meta(
+            'bitrate', webpage, 'bitrate'))
+        categories = self._html_search_meta(
+            'keywords', webpage, 'categories', default='').split(',')
+
+        age_limit = self._rta_search(webpage)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'ext': ext,
+            'title': title,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'filesize_approx': filesize_approx,
+            'tbr': bitrate,
+            'categories': categories,
+            'age_limit': age_limit,
+        }
index 47f8e415777ee21bfa5e001921077f3c9aaa16af..b51eafc45928f8e6ff4ce571763593f71b715583 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from .fivemin import FiveMinIE
 
 
 class AolIE(InfoExtractor):
@@ -42,31 +41,30 @@ class AolIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-
         playlist_id = mobj.group('playlist_id')
-        if playlist_id and not self._downloader.params.get('noplaylist'):
-            self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+        if not playlist_id or self._downloader.params.get('noplaylist'):
+            return self.url_result('5min:%s' % video_id)
 
-            webpage = self._download_webpage(url, playlist_id)
-            title = self._html_search_regex(
-                r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
-            playlist_html = self._search_regex(
-                r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
-                'playlist HTML')
-            entries = [{
-                '_type': 'url',
-                'url': 'aol-video:%s' % m.group('id'),
-                'ie_key': 'Aol',
-            } for m in re.finditer(
-                r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
-                playlist_html)]
+        self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
 
-            return {
-                '_type': 'playlist',
-                'id': playlist_id,
-                'display_id': mobj.group('playlist_display_id'),
-                'title': title,
-                'entries': entries,
-            }
+        webpage = self._download_webpage(url, playlist_id)
+        title = self._html_search_regex(
+            r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
+        playlist_html = self._search_regex(
+            r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
+            'playlist HTML')
+        entries = [{
+            '_type': 'url',
+            'url': 'aol-video:%s' % m.group('id'),
+            'ie_key': 'Aol',
+        } for m in re.finditer(
+            r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
+            playlist_html)]
 
-        return FiveMinIE._build_result(video_id)
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'display_id': mobj.group('playlist_display_id'),
+            'title': title,
+            'entries': entries,
+        }
index 0c01fa1a13ffa6fbfbfe7b7fb2283d5ed4f8b70f..7cd0482c75d7157df218071a2e22ce2904d094b6 100644 (file)
@@ -4,8 +4,8 @@ import re
 import json
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
-    compat_urlparse,
     int_or_none,
 )
 
index 34ce8429b121261784a1645c28e2a33cb76bcacb..9fc35a42b8612d828ccc3ae43c9e4f74782f5352 100644 (file)
@@ -1,42 +1,48 @@
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
-from ..utils import (
-    unified_strdate,
-)
+from ..utils import unified_strdate
 
 
 class ArchiveOrgIE(InfoExtractor):
     IE_NAME = 'archive.org'
     IE_DESC = 'archive.org videos'
-    _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
-    _TEST = {
-        "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
-        'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
+    _VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
+    _TESTS = [{
+        'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
         'md5': '8af1d4cf447933ed3c7f4871162602db',
         'info_dict': {
-            "title": "1968 Demo - FJCC Conference Presentation Reel #1",
-            "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
-            "upload_date": "19681210",
-            "uploader": "SRI International"
+            'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
+            'ext': 'ogv',
+            'title': '1968 Demo - FJCC Conference Presentation Reel #1',
+            'description': 'md5:1780b464abaca9991d8968c877bb53ed',
+            'upload_date': '19681210',
+            'uploader': 'SRI International'
+        }
+    }, {
+        'url': 'https://archive.org/details/Cops1922',
+        'md5': '18f2a19e6d89af8425671da1cf3d4e04',
+        'info_dict': {
+            'id': 'Cops1922',
+            'ext': 'ogv',
+            'title': 'Buster Keaton\'s "Cops" (1922)',
+            'description': 'md5:70f72ee70882f713d4578725461ffcc3',
         }
-    }
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         json_url = url + ('?' if '?' in url else '&') + 'output=json'
-        json_data = self._download_webpage(json_url, video_id)
-        data = json.loads(json_data)
+        data = self._download_json(json_url, video_id)
+
+        def get_optional(data_dict, field):
+            return data_dict['metadata'].get(field, [None])[0]
 
-        title = data['metadata']['title'][0]
-        description = data['metadata']['description'][0]
-        uploader = data['metadata']['creator'][0]
-        upload_date = unified_strdate(data['metadata']['date'][0])
+        title = get_optional(data, 'title')
+        description = get_optional(data, 'description')
+        uploader = get_optional(data, 'creator')
+        upload_date = unified_strdate(get_optional(data, 'date'))
 
         formats = [
             {
index 219631b9b0dfa690a37e09a7b3473566543370e2..929dd3cc5550beb1b2da8874763084b5146d2f33 100644 (file)
@@ -37,7 +37,7 @@ class ArteTvIE(InfoExtractor):
             config_xml_url, video_id, note='Downloading configuration')
 
         formats = [{
-            'forma_id': q.attrib['quality'],
+            'format_id': q.attrib['quality'],
             # The playpath starts at 'mp4:', if we don't manually
             # split the url, rtmpdump will incorrectly parse them
             'url': q.text.split('mp4:', 1)[0],
@@ -133,7 +133,7 @@ class ArteTVPlus7IE(InfoExtractor):
                 'width': int_or_none(f.get('width')),
                 'height': int_or_none(f.get('height')),
                 'tbr': int_or_none(f.get('bitrate')),
-                'quality': qfunc(f['quality']),
+                'quality': qfunc(f.get('quality')),
                 'source_preference': source_pref,
             }
 
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py
new file mode 100644 (file)
index 0000000..5db1941
--- /dev/null
@@ -0,0 +1,144 @@
+from __future__ import unicode_literals
+
+import time
+import hmac
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    xpath_text,
+    ExtractorError,
+)
+
+
+class AtresPlayerIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
+    _TESTS = [
+        {
+            'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
+            'md5': 'efd56753cda1bb64df52a3074f62e38a',
+            'info_dict': {
+                'id': 'capitulo-10-especial-solidario-nochebuena',
+                'ext': 'mp4',
+                'title': 'Especial Solidario de Nochebuena',
+                'description': 'md5:e2d52ff12214fa937107d21064075bf1',
+                'duration': 5527.6,
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+        {
+            'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
+            'only_matching': True,
+        },
+    ]
+
+    _USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
+    _MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
+    _TIMESTAMP_SHIFT = 30000
+
+    _TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
+    _URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
+    _PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
+    _EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
+
+    _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_form = {
+            'j_username': username,
+            'j_password': password,
+        }
+
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        response = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        error = self._html_search_regex(
+            r'(?s)<ul class="list_error">(.+?)</ul>', response, 'error', default=None)
+        if error:
+            raise ExtractorError(
+                'Unable to login: %s' % error, expected=True)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        episode_id = self._search_regex(
+            r'episode="([^"]+)"', webpage, 'episode id')
+
+        timestamp = int_or_none(self._download_webpage(
+            self._TIME_API_URL,
+            video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
+        timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
+        token = hmac.new(
+            self._MAGIC.encode('ascii'),
+            (episode_id + timestamp_shifted).encode('utf-8')
+        ).hexdigest()
+
+        formats = []
+        for fmt in ['windows', 'android_tablet']:
+            request = compat_urllib_request.Request(
+                self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
+            request.add_header('Youtubedl-user-agent', self._USER_AGENT)
+
+            fmt_json = self._download_json(
+                request, video_id, 'Downloading %s video JSON' % fmt)
+
+            result = fmt_json.get('resultDes')
+            if result.lower() != 'ok':
+                raise ExtractorError(
+                    '%s returned error: %s' % (self.IE_NAME, result), expected=True)
+
+            for _, video_url in fmt_json['resultObject'].items():
+                if video_url.endswith('/Manifest'):
+                    formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id))
+                else:
+                    formats.append({
+                        'url': video_url,
+                        'format_id': 'android',
+                        'preference': 1,
+                    })
+        self._sort_formats(formats)
+
+        player = self._download_json(
+            self._PLAYER_URL_TEMPLATE % episode_id,
+            episode_id)
+
+        path_data = player.get('pathData')
+
+        episode = self._download_xml(
+            self._EPISODE_URL_TEMPLATE % path_data,
+            video_id, 'Downloading episode XML')
+
+        duration = float_or_none(xpath_text(
+            episode, './media/asset/info/technical/contentDuration', 'duration'))
+
+        art = episode.find('./media/asset/info/art')
+        title = xpath_text(art, './name', 'title')
+        description = xpath_text(art, './description', 'description')
+        thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/atttechchannel.py b/youtube_dl/extractor/atttechchannel.py
new file mode 100644 (file)
index 0000000..b01d35b
--- /dev/null
@@ -0,0 +1,55 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class ATTTechChannelIE(InfoExtractor):
+    _VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)'
+    _TEST = {
+        'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
+        'info_dict': {
+            'id': '11316',
+            'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
+            'ext': 'flv',
+            'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use',
+            'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20140127',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_url = self._search_regex(
+            r"url\s*:\s*'(rtmp://[^']+)'",
+            webpage, 'video URL')
+
+        video_id = self._search_regex(
+            r'mediaid\s*=\s*(\d+)',
+            webpage, 'video id', fatal=False)
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+        upload_date = unified_strdate(self._search_regex(
+            r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})',
+            webpage, 'upload date', fatal=False), False)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'ext': 'flv',
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+        }
index eeeec768fe302a508008e095f138690477594553..8bfe502143a2cbf9c076bdabaad3e7ad5d2090b6 100644 (file)
@@ -1,11 +1,15 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import itertools
+import time
+
 from .common import InfoExtractor
 from .soundcloud import SoundcloudIE
-from ..utils import ExtractorError
-
-import time
+from ..utils import (
+    ExtractorError,
+    url_basename,
+)
 
 
 class AudiomackIE(InfoExtractor):
@@ -17,53 +21,119 @@ class AudiomackIE(InfoExtractor):
             'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
             'info_dict':
             {
-                'id': 'roosh-williams/extraordinary',
+                'id': '310086',
                 'ext': 'mp3',
-                'title': 'Roosh Williams - Extraordinary'
+                'uploader': 'Roosh Williams',
+                'title': 'Extraordinary'
             }
         },
-        # hosted on soundcloud via audiomack
+        # audiomack wrapper around soundcloud song
         {
+            'add_ie': ['Soundcloud'],
             'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
-            'file': '172419696.mp3',
-            'info_dict':
-            {
+            'info_dict': {
+                'id': '172419696',
                 'ext': 'mp3',
+                'description': 'md5:1fc3272ed7a635cce5be1568c2822997',
                 'title': 'Young Thug ft Lil Wayne - Take Kare',
-                "upload_date": "20141016",
-                "description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n",
-                "uploader": "Young Thug World"
+                'uploader': 'Young Thug World',
+                'upload_date': '20141016',
             }
-        }
+        },
     ]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        # URLs end with [uploader name]/[uploader title]
+        # this title is whatever the user types in, and is rarely
+        # the proper song title.  Real metadata is in the api response
+        album_url_tag = self._match_id(url)
 
+        # Request the extended version of the api for extra fields like artist and title
         api_response = self._download_json(
-            "http://www.audiomack.com/api/music/url/song/%s?_=%d" % (
-                video_id, time.time()),
-            video_id)
+            'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % (
+                album_url_tag, time.time()),
+            album_url_tag)
 
-        if "url" not in api_response:
-            raise ExtractorError("Unable to deduce api url of song")
-        realurl = api_response["url"]
+        # API is inconsistent with errors
+        if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
+            raise ExtractorError('Invalid url %s', url)
 
         # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
-        # - if so, pass the work off to the soundcloud extractor
-        if SoundcloudIE.suitable(realurl):
-            return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
-
-        webpage = self._download_webpage(url, video_id)
-        artist = self._html_search_regex(
-            r'<span class="artist">(.*?)</span>', webpage, "artist")
-        songtitle = self._html_search_regex(
-            r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>',
-            webpage, "title")
-        title = artist + " - " + songtitle
+        # if so, pass the work off to the soundcloud extractor
+        if SoundcloudIE.suitable(api_response['url']):
+            return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
 
         return {
-            'id': video_id,
-            'title': title,
-            'url': realurl,
+            'id': api_response.get('id', album_url_tag),
+            'uploader': api_response.get('artist'),
+            'title': api_response.get('title'),
+            'url': api_response['url'],
+        }
+
+
+class AudiomackAlbumIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
+    IE_NAME = 'audiomack:album'
+    _TESTS = [
+        # Standard album playlist
+        {
+            'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
+            'playlist_count': 15,
+            'info_dict':
+            {
+                'id': '812251',
+                'title': 'Tha Tour: Part 2 (Official Mixtape)'
+            }
+        },
+        # Album playlist ripped from fakeshoredrive with no metadata
+        {
+            'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
+            'playlist': [{
+                'info_dict': {
+                    'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
+                    'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
+                    'ext': 'mp3',
+                }
+            }],
+            'params': {
+                'playliststart': 8,
+                'playlistend': 8,
+            }
         }
+    ]
+
+    def _real_extract(self, url):
+        # URLs end with [uploader name]/[uploader title]
+        # this title is whatever the user types in, and is rarely
+        # the proper song title.  Real metadata is in the api response
+        album_url_tag = self._match_id(url)
+        result = {'_type': 'playlist', 'entries': []}
+        # There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
+        # Therefore we don't know how many songs the album has and must infi-loop until failure
+        for track_no in itertools.count():
+            # Get song's metadata
+            api_response = self._download_json(
+                'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
+                % (album_url_tag, track_no, time.time()), album_url_tag,
+                note='Querying song information (%d)' % (track_no + 1))
+
+            # Total failure, only occurs when url is totally wrong
+            # Won't happen in middle of valid playlist (next case)
+            if 'url' not in api_response or 'error' in api_response:
+                raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
+            # URL is good but song id doesn't exist - usually means end of playlist
+            elif not api_response['url']:
+                break
+            else:
+                # Pull out the album metadata and add to result (if it exists)
+                for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
+                    if apikey in api_response and resultkey not in result:
+                        result[resultkey] = api_response[apikey]
+                song_id = url_basename(api_response['url']).rpartition('.')[0]
+                result['entries'].append({
+                    'id': api_response.get('id', song_id),
+                    'uploader': api_response.get('artist'),
+                    'title': api_response.get('title', song_id),
+                    'url': api_response['url'],
+                })
+        return result
index 1c765532a00d9274c2531277cc1ad81b75053dfd..a1b666be0a4ce1610cfad79f5393ec23a9427bc8 100644 (file)
@@ -3,10 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse
 from ..utils import (
-    compat_urllib_parse,
     determine_ext,
     ExtractorError,
+    remove_end,
 )
 
 
@@ -27,23 +28,18 @@ class AUEngineIE(InfoExtractor):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
-        title = title.strip()
-        links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
-        links = map(compat_urllib_parse.unquote, links)
-
-        thumbnail = None
-        video_url = None
-        for link in links:
-            if link.endswith('.png'):
-                thumbnail = link
-            elif '/videos/' in link:
-                video_url = link
+        title = self._html_search_regex(
+            r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title')
+        video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage)
+        video_url = compat_urllib_parse.unquote(video_urls[0])
+        thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage)
+        thumbnail = compat_urllib_parse.unquote(thumbnails[0])
+
         if not video_url:
             raise ExtractorError('Could not find video URL')
+
         ext = '.' + determine_ext(video_url)
-        if ext == title[-len(ext):]:
-            title = title[:-len(ext)]
+        title = remove_end(title, ext)
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py
new file mode 100644 (file)
index 0000000..0961d33
--- /dev/null
@@ -0,0 +1,93 @@
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class AzubuIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
+            'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
+            'info_dict': {
+                'id': '15575',
+                'ext': 'mp4',
+                'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
+                'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
+                'thumbnail': 're:^https?://.*\.jpe?g',
+                'timestamp': 1417523507.334,
+                'upload_date': '20141202',
+                'duration': 9988.7,
+                'uploader': 'GSL',
+                'uploader_id': 414310,
+                'view_count': int,
+            },
+        },
+        {
+            'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
+            'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
+            'info_dict': {
+                'id': '9344',
+                'ext': 'mp4',
+                'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
+                'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
+                'thumbnail': 're:^https?://.*\.jpe?g',
+                'timestamp': 1410530893.320,
+                'upload_date': '20140912',
+                'duration': 172.385,
+                'uploader': 'FnaticTV',
+                'uploader_id': 272749,
+                'view_count': int,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        data = self._download_json(
+            'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
+
+        title = data['title'].strip()
+        description = data['description']
+        thumbnail = data['thumbnail']
+        view_count = data['view_count']
+        uploader = data['user']['username']
+        uploader_id = data['user']['id']
+
+        stream_params = json.loads(data['stream_params'])
+
+        timestamp = float_or_none(stream_params['creationDate'], 1000)
+        duration = float_or_none(stream_params['length'], 1000)
+
+        renditions = stream_params.get('renditions') or []
+        video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
+        if video:
+            renditions.append(video)
+
+        formats = [{
+            'url': fmt['url'],
+            'width': fmt['frameWidth'],
+            'height': fmt['frameHeight'],
+            'vbr': float_or_none(fmt['encodingRate'], 1000),
+            'filesize': fmt['size'],
+            'vcodec': fmt['videoCodec'],
+            'container': fmt['videoContainer'],
+        } for fmt in renditions if fmt['url']]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'view_count': view_count,
+            'formats': formats,
+        }
index 1ca0b7cf2bf78717fd45a11d17e3cce7e5191b9b..98e1443ab0c3d380737f34be2c67fa760e08a221 100644 (file)
@@ -5,7 +5,7 @@ import json
 import itertools
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
 )
 
index acddbc8f1d19ebc48b721b3867b98fc30af3c133..aea0263d6b681ba33e66d20cc1a247213299540f 100644 (file)
@@ -4,9 +4,11 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
     compat_urlparse,
+)
+from ..utils import (
     ExtractorError,
 )
 
@@ -104,7 +106,7 @@ class BandcampIE(InfoExtractor):
 
 class BandcampAlbumIE(InfoExtractor):
     IE_NAME = 'Bandcamp:album'
-    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
+    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))'
 
     _TESTS = [{
         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -139,6 +141,12 @@ class BandcampAlbumIE(InfoExtractor):
             'title': 'Hierophany of the Open Grave',
         },
         'playlist_mincount': 9,
+    }, {
+        'url': 'http://dotscale.bandcamp.com',
+        'info_dict': {
+            'title': 'Loom',
+        },
+        'playlist_mincount': 7,
     }]
 
     def _real_extract(self, url):
@@ -153,7 +161,8 @@ class BandcampAlbumIE(InfoExtractor):
         entries = [
             self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
             for t_path in tracks_paths]
-        title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title')
+        title = self._search_regex(
+            r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
         return {
             '_type': 'playlist',
             'id': playlist_id,
index beb6cfc8ae88a3c40ac2b4ee7f6b0ae2c6ddfc87..1cf48fe0dd739b328478899a83f0d8aba94e6c4a 100644 (file)
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import re
 import xml.etree.ElementTree
 
 from .subtitles import SubtitlesInfoExtractor
@@ -11,7 +10,7 @@ from ..compat import compat_HTTPError
 class BBCCoUkIE(SubtitlesInfoExtractor):
     IE_NAME = 'bbc.co.uk'
     IE_DESC = 'BBC iPlayer'
-    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
+    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
 
     _TESTS = [
         {
@@ -19,8 +18,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
             'info_dict': {
                 'id': 'b039d07m',
                 'ext': 'flv',
-                'title': 'Kaleidoscope: Leonard Cohen',
-                'description': 'md5:db4755d7a665ae72343779f7dacb402c',
+                'title': 'Kaleidoscope, Leonard Cohen',
+                'description': 'The Canadian poet and songwriter reflects on his musical career.',
                 'duration': 1740,
             },
             'params': {
@@ -72,7 +71,54 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
                 'skip_download': True,
             },
             'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
-        },
+        }, {
+            'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
+            'info_dict': {
+                'id': 'b04v209v',
+                'ext': 'flv',
+                'title': 'Pete Tong, The Essential New Tune Special',
+                'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
+                'duration': 10800,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            }
+        }, {
+            'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
+            'note': 'Audio',
+            'info_dict': {
+                'id': 'p02frcch',
+                'ext': 'flv',
+                'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
+                'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
+                'duration': 3507,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            }
+        }, {
+            'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
+            'note': 'Video',
+            'info_dict': {
+                'id': 'p025c103',
+                'ext': 'flv',
+                'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
+                'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
+                'duration': 226,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            }
+        }, {
+            'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
+            'only_matching': True,
+        }, {
+            'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
+            'only_matching': True,
+        }
     ]
 
     def _extract_asx_playlist(self, connection, programme_id):
@@ -204,13 +250,66 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
 
         return formats, subtitles
 
+    def _download_playlist(self, playlist_id):
+        try:
+            playlist = self._download_json(
+                'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
+                playlist_id, 'Downloading playlist JSON')
+
+            version = playlist.get('defaultAvailableVersion')
+            if version:
+                smp_config = version['smpConfig']
+                title = smp_config['title']
+                description = smp_config['summary']
+                for item in smp_config['items']:
+                    kind = item['kind']
+                    if kind != 'programme' and kind != 'radioProgramme':
+                        continue
+                    programme_id = item.get('vpid')
+                    duration = int(item.get('duration'))
+                    formats, subtitles = self._download_media_selector(programme_id)
+                return programme_id, title, description, duration, formats, subtitles
+        except ExtractorError as ee:
+            if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+                raise
+
+        # fallback to legacy playlist
+        playlist = self._download_xml(
+            'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
+            playlist_id, 'Downloading legacy playlist XML')
+
+        no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
+        if no_items is not None:
+            reason = no_items.get('reason')
+            if reason == 'preAvailability':
+                msg = 'Episode %s is not yet available' % playlist_id
+            elif reason == 'postAvailability':
+                msg = 'Episode %s is no longer available' % playlist_id
+            elif reason == 'noMedia':
+                msg = 'Episode %s is not currently available' % playlist_id
+            else:
+                msg = 'Episode %s is not available: %s' % (playlist_id, reason)
+            raise ExtractorError(msg, expected=True)
+
+        for item in self._extract_items(playlist):
+            kind = item.get('kind')
+            if kind != 'programme' and kind != 'radioProgramme':
+                continue
+            title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
+            description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
+            programme_id = item.get('identifier')
+            duration = int(item.get('duration'))
+            formats, subtitles = self._download_media_selector(programme_id)
+
+        return programme_id, title, description, duration, formats, subtitles
+
     def _real_extract(self, url):
         group_id = self._match_id(url)
 
         webpage = self._download_webpage(url, group_id, 'Downloading video page')
 
         programme_id = self._search_regex(
-            r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
+            r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
         if programme_id:
             player = self._download_json(
                 'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
@@ -220,32 +319,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
             duration = player['duration']
             formats, subtitles = self._download_media_selector(programme_id)
         else:
-            playlist = self._download_xml(
-                'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
-                group_id, 'Downloading playlist XML')
-
-            no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
-            if no_items is not None:
-                reason = no_items.get('reason')
-                if reason == 'preAvailability':
-                    msg = 'Episode %s is not yet available' % group_id
-                elif reason == 'postAvailability':
-                    msg = 'Episode %s is no longer available' % group_id
-                elif reason == 'noMedia':
-                    msg = 'Episode %s is not currently available' % group_id
-                else:
-                    msg = 'Episode %s is not available: %s' % (group_id, reason)
-                raise ExtractorError(msg, expected=True)
-
-            for item in self._extract_items(playlist):
-                kind = item.get('kind')
-                if kind != 'programme' and kind != 'radioProgramme':
-                    continue
-                title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
-                description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
-                programme_id = item.get('identifier')
-                duration = int(item.get('duration'))
-                formats, subtitles = self._download_media_selector(programme_id)
+            programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
 
         if self._downloader.params.get('listsubtitles', False):
             self._list_available_subtitles(programme_id, subtitles)
index 31fdc0dcc0614babf4ff3b48186566904cfcc57a..1bdc25812b6afb4cf133007f2d12b89fd56b353f 100644 (file)
@@ -10,15 +10,15 @@ from ..utils import url_basename
 class BehindKinkIE(InfoExtractor):
     _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
     _TEST = {
-        'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
-        'md5': '41ad01222b8442089a55528fec43ec01',
+        'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
+        'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
         'info_dict': {
-            'id': '36370',
+            'id': '37127',
             'ext': 'mp4',
-            'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
-            'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
-            'upload_date': '20140814',
-            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
+            'title': 'What are you passionate about – Marley Blaze',
+            'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
+            'upload_date': '20141205',
+            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
             'age_limit': 18,
         }
     }
@@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         display_id = mobj.group('id')
-        year = mobj.group('year')
-        month = mobj.group('month')
-        day = mobj.group('day')
-        upload_date = year + month + day
 
         webpage = self._download_webpage(url, display_id)
 
         video_url = self._search_regex(
-            r"'file':\s*'([^']+)'",
-            webpage, 'URL base')
-
-        video_id = url_basename(video_url)
-        video_id = video_id.split('_')[0]
+            r'<source src="([^"]+)"', webpage, 'video URL')
+        video_id = url_basename(video_url).split('_')[0]
+        upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')
 
         return {
             'id': video_id,
+            'display_id': display_id,
             'url': video_url,
-            'ext': 'mp4',
             'title': self._og_search_title(webpage),
-            'display_id': display_id,
             'thumbnail': self._og_search_thumbnail(webpage),
             'description': self._og_search_description(webpage),
             'upload_date': upload_date,
diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py
new file mode 100644 (file)
index 0000000..d2abd4d
--- /dev/null
@@ -0,0 +1,107 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import (
+    xpath_text,
+    xpath_with_ns,
+    int_or_none,
+    parse_iso8601,
+)
+
+
+class BetIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
+    _TESTS = [
+        {
+            'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
+            'info_dict': {
+                'id': '740ab250-bb94-4a8a-8787-fe0de7c74471',
+                'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
+                'ext': 'flv',
+                'title': 'BET News Presents: A Conversation With President Obama',
+                'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
+                'duration': 1534,
+                'timestamp': 1418075340,
+                'upload_date': '20141208',
+                'uploader': 'admin',
+                'thumbnail': 're:(?i)^https?://.*\.jpg$',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
+            'info_dict': {
+                'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d',
+                'display_id': 'justice-for-ferguson-a-community-reacts',
+                'ext': 'flv',
+                'title': 'Justice for Ferguson: A Community Reacts',
+                'description': 'A BET News special.',
+                'duration': 1696,
+                'timestamp': 1416942360,
+                'upload_date': '20141125',
+                'uploader': 'admin',
+                'thumbnail': 're:(?i)^https?://.*\.jpg$',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        }
+    ]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        media_url = compat_urllib_parse.unquote(self._search_regex(
+            [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
+            webpage, 'media URL'))
+
+        mrss = self._download_xml(media_url, display_id)
+
+        item = mrss.find('./channel/item')
+
+        NS_MAP = {
+            'dc': 'http://purl.org/dc/elements/1.1/',
+            'media': 'http://search.yahoo.com/mrss/',
+            'ka': 'http://kickapps.com/karss',
+        }
+
+        title = xpath_text(item, './title', 'title')
+        description = xpath_text(
+            item, './description', 'description', fatal=False)
+
+        video_id = xpath_text(item, './guid', 'video id', fatal=False)
+
+        timestamp = parse_iso8601(xpath_text(
+            item, xpath_with_ns('./dc:date', NS_MAP),
+            'upload date', fatal=False))
+        uploader = xpath_text(
+            item, xpath_with_ns('./dc:creator', NS_MAP),
+            'uploader', fatal=False)
+
+        media_content = item.find(
+            xpath_with_ns('./media:content', NS_MAP))
+        duration = int_or_none(media_content.get('duration'))
+        smil_url = media_content.get('url')
+
+        thumbnail = media_content.find(
+            xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
+
+        formats = self._extract_smil_formats(smil_url, display_id)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'duration': duration,
+            'formats': formats,
+        }
index 0d5889f5d17c17ffa75eeca1f1079efd7f9c2b8f..75d744852edc382721cee8556067f89ccb0092df 100644 (file)
@@ -5,8 +5,6 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    compat_parse_qs,
-    ExtractorError,
     int_or_none,
     unified_strdate,
 )
@@ -29,10 +27,9 @@ class BiliBiliIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
         video_code = self._search_regex(
             r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
 
@@ -55,45 +52,38 @@ class BiliBiliIE(InfoExtractor):
         thumbnail = self._html_search_meta(
             'thumbnailUrl', video_code, 'thumbnail', fatal=False)
 
-        player_params = compat_parse_qs(self._html_search_regex(
-            r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"',
-            webpage, 'player params'))
+        cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
 
-        if 'cid' in player_params:
-            cid = player_params['cid'][0]
+        lq_doc = self._download_xml(
+            'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
+            video_id,
+            note='Downloading LQ video info'
+        )
+        lq_durl = lq_doc.find('./durl')
+        formats = [{
+            'format_id': 'lq',
+            'quality': 1,
+            'url': lq_durl.find('./url').text,
+            'filesize': int_or_none(
+                lq_durl.find('./size'), get_attr='text'),
+        }]
 
-            lq_doc = self._download_xml(
-                'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
-                video_id,
-                note='Downloading LQ video info'
-            )
-            lq_durl = lq_doc.find('.//durl')
-            formats = [{
-                'format_id': 'lq',
-                'quality': 1,
-                'url': lq_durl.find('./url').text,
+        hq_doc = self._download_xml(
+            'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
+            video_id,
+            note='Downloading HQ video info',
+            fatal=False,
+        )
+        if hq_doc is not False:
+            hq_durl = hq_doc.find('./durl')
+            formats.append({
+                'format_id': 'hq',
+                'quality': 2,
+                'ext': 'flv',
+                'url': hq_durl.find('./url').text,
                 'filesize': int_or_none(
-                    lq_durl.find('./size'), get_attr='text'),
-            }]
-
-            hq_doc = self._download_xml(
-                'http://interface.bilibili.cn/playurl?cid=%s' % cid,
-                video_id,
-                note='Downloading HQ video info',
-                fatal=False,
-            )
-            if hq_doc is not False:
-                hq_durl = hq_doc.find('.//durl')
-                formats.append({
-                    'format_id': 'hq',
-                    'quality': 2,
-                    'ext': 'flv',
-                    'url': hq_durl.find('./url').text,
-                    'filesize': int_or_none(
-                        hq_durl.find('./size'), get_attr='text'),
-                })
-        else:
-            raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
+                    hq_durl.find('./size'), get_attr='text'),
+            })
 
         self._sort_formats(formats)
         return {
index da47f27bdd6702d3927f3fde72fc0ebe064df53a..14b814120be3b8215a28fc00a95f87bd22e0c062 100644 (file)
@@ -4,13 +4,17 @@ import re
 
 from .common import InfoExtractor
 from .subtitles import SubtitlesInfoExtractor
-from ..utils import (
+
+from ..compat import (
+    compat_str,
     compat_urllib_request,
-    unescapeHTML,
-    parse_iso8601,
     compat_urlparse,
+)
+from ..utils import (
     clean_html,
-    compat_str,
+    int_or_none,
+    parse_iso8601,
+    unescapeHTML,
 )
 
 
@@ -78,7 +82,25 @@ class BlipTVIE(SubtitlesInfoExtractor):
                 'uploader': 'NostalgiaCritic',
                 'uploader_id': '246467',
             }
-        }
+        },
+        {
+            # https://github.com/rg3/youtube-dl/pull/4404
+            'note': 'Audio only',
+            'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
+            'md5': '76c0a56f24e769ceaab21fbb6416a351',
+            'info_dict': {
+                'id': '7103299',
+                'ext': 'flv',
+                'title': 'Weekly Manga Recap: Kingdom',
+                'description': 'And then Shin breaks the enemy line, and he&apos;s all like HWAH! And then he slices a guy and it&apos;s all like FWASHING! And... it&apos;s really hard to describe the best parts of this series without breaking down into sound effects, okay?',
+                'timestamp': 1417660321,
+                'upload_date': '20141204',
+                'uploader': 'The Rollo T',
+                'uploader_id': '407429',
+                'duration': 7251,
+                'vcodec': 'none',
+            }
+        },
     ]
 
     def _real_extract(self, url):
@@ -145,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor):
                     'url': real_url,
                     'format_id': role,
                     'format_note': media_type,
-                    'vcodec': media_content.get(blip('vcodec')),
+                    'vcodec': media_content.get(blip('vcodec')) or 'none',
                     'acodec': media_content.get(blip('acodec')),
                     'filesize': media_content.get('filesize'),
-                    'width': int(media_content.get('width')),
-                    'height': int(media_content.get('height')),
+                    'width': int_or_none(media_content.get('width')),
+                    'height': int_or_none(media_content.get('height')),
                 })
         self._sort_formats(formats)
 
index 2c0e5eea2e0285ffce1b89340e9ae8894260f866..4bcc897c95229ea0ee509fe53443d355309a66aa 100644 (file)
@@ -14,7 +14,6 @@ class BreakIE(InfoExtractor):
     _VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
-        'md5': '33aa4ff477ecd124d18d7b5d23b87ce5',
         'info_dict': {
             'id': '2468056',
             'ext': 'mp4',
index 2db7f9fef4f50f4f7f95b812d5245c224dc7ed20..003152c4e6d6ec9880a54016870e41e42635f41c 100644 (file)
@@ -6,25 +6,26 @@ import json
 import xml.etree.ElementTree
 
 from .common import InfoExtractor
-from ..utils import (
-    compat_urllib_parse,
-    find_xpath_attr,
-    fix_xml_ampersands,
-    compat_urlparse,
-    compat_str,
-    compat_urllib_request,
+from ..compat import (
     compat_parse_qs,
+    compat_str,
+    compat_urllib_parse,
     compat_urllib_parse_urlparse,
-
+    compat_urllib_request,
+    compat_urlparse,
+)
+from ..utils import (
     determine_ext,
     ExtractorError,
-    unsmuggle_url,
+    find_xpath_attr,
+    fix_xml_ampersands,
     unescapeHTML,
+    unsmuggle_url,
 )
 
 
 class BrightcoveIE(InfoExtractor):
-    _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*?\?(?P<query>.*)'
+    _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
 
     _TESTS = [
@@ -265,6 +266,7 @@ class BrightcoveIE(InfoExtractor):
                 url = rend['defaultURL']
                 if not url:
                     continue
+                ext = None
                 if rend['remote']:
                     url_comp = compat_urllib_parse_urlparse(url)
                     if url_comp.path.endswith('.m3u8'):
@@ -276,7 +278,7 @@ class BrightcoveIE(InfoExtractor):
                         # akamaihd.net, but they don't use f4m manifests
                         url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
                         ext = 'flv'
-                else:
+                if ext is None:
                     ext = determine_ext(url)
                 size = rend.get('size')
                 formats.append({
index a40a1bbc47b38529b7a5191bb0a6aeb96c15c532..a5d2af1749f188a086e5384b1de6a2441e624902 100644 (file)
@@ -33,7 +33,7 @@ class BuzzFeedIE(InfoExtractor):
             'skip_download': True,  # Got enough YouTube download tests
         },
         'info_dict': {
-            'description': 'Munchkin the Teddy Bear is back !',
+            'description': 're:Munchkin the Teddy Bear is back ?!',
             'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
         },
         'playlist': [{
@@ -42,9 +42,9 @@ class BuzzFeedIE(InfoExtractor):
                 'ext': 'mp4',
                 'upload_date': '20141124',
                 'uploader_id': 'CindysMunchkin',
-                'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
+                'description': 're:© 2014 Munchkin the Shih Tzu',
                 'uploader': 'Munchkin the Shih Tzu',
-                'title': 'Munchkin the Teddy Bear gets her exercise',
+                'title': 're:Munchkin the Teddy Bear gets her exercise',
             },
         }]
     }]
index 9873728df6f3bb1adbfddc1959aa5e7e70241f5b..11d18d74ace31a513a7c06be40baf6ce89c858b3 100644 (file)
@@ -5,6 +5,8 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
+    HEADRequest,
     unified_strdate,
     url_basename,
     qualities,
@@ -76,6 +78,16 @@ class CanalplusIE(InfoExtractor):
 
         preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'])
 
+        fmt_url = next(iter(media.find('VIDEOS'))).text
+        if '/geo' in fmt_url.lower():
+            response = self._request_webpage(
+                HEADRequest(fmt_url), video_id,
+                'Checking if the video is georestricted')
+            if '/blocage' in response.geturl():
+                raise ExtractorError(
+                    'The video is not available in your country',
+                    expected=True)
+
         formats = []
         for fmt in media.find('VIDEOS'):
             format_url = fmt.text
index 97feb6704075831fb8b5ef95a547428ecc57ec3f..f70e090bb5b01942713149493e48bc0e51f7f74b 100644 (file)
@@ -3,55 +3,50 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
-from ..utils import (
+from .subtitles import SubtitlesInfoExtractor
+from ..compat import (
     compat_urllib_request,
     compat_urllib_parse,
     compat_urllib_parse_urlparse,
+)
+from ..utils import (
     ExtractorError,
+    float_or_none,
 )
 
 
-class CeskaTelevizeIE(InfoExtractor):
+class CeskaTelevizeIE(SubtitlesInfoExtractor):
     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
 
     _TESTS = [
         {
-            'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
-            'info_dict': {
-                'id': '213512120230004',
-                'ext': 'flv',
-                'title': 'První republika: Španělská chřipka',
-                'duration': 3107.4,
-            },
-            'params': {
-                'skip_download': True,  # requires rtmpdump
-            },
-            'skip': 'Works only from Czech Republic.',
-        },
-        {
-            'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
+            'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
             'info_dict': {
-                'id': '20138143440',
-                'ext': 'flv',
-                'title': 'Tsatsiki, maminka a policajt',
-                'duration': 6754.1,
+                'id': '214411058091220',
+                'ext': 'mp4',
+                'title': 'Hyde Park Civilizace',
+                'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'duration': 3350,
             },
             'params': {
-                'skip_download': True,  # requires rtmpdump
+                # m3u8 download
+                'skip_download': True,
             },
-            'skip': 'Works only from Czech Republic.',
         },
         {
             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
             'info_dict': {
                 'id': '14716',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'První republika: Zpěvačka z Dupárny Bobina',
-                'duration': 90,
+                'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'duration': 88.4,
             },
             'params': {
-                'skip_download': True,  # requires rtmpdump
+                # m3u8 download
+                'skip_download': True,
             },
         },
     ]
@@ -78,8 +73,9 @@ class CeskaTelevizeIE(InfoExtractor):
             'requestSource': 'iVysilani',
         }
 
-        req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
-                                            data=compat_urllib_parse.urlencode(data))
+        req = compat_urllib_request.Request(
+            'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
+            data=compat_urllib_parse.urlencode(data))
 
         req.add_header('Content-type', 'application/x-www-form-urlencoded')
         req.add_header('x-addr', '127.0.0.1')
@@ -88,39 +84,72 @@ class CeskaTelevizeIE(InfoExtractor):
 
         playlistpage = self._download_json(req, video_id)
 
-        req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
+        playlist_url = playlistpage['url']
+        if playlist_url == 'error_region':
+            raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+
+        req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
         req.add_header('Referer', url)
 
-        playlist = self._download_xml(req, video_id)
+        playlist = self._download_json(req, video_id)
 
+        item = playlist['playlist'][0]
         formats = []
-        for i in playlist.find('smilRoot/body'):
-            if 'AD' not in i.attrib['id']:
-                base_url = i.attrib['base']
-                parsedurl = compat_urllib_parse_urlparse(base_url)
-                duration = i.attrib['duration']
-
-                for video in i.findall('video'):
-                    if video.attrib['label'] != 'AD':
-                        format_id = video.attrib['label']
-                        play_path = video.attrib['src']
-                        vbr = int(video.attrib['system-bitrate'])
-
-                        formats.append({
-                            'format_id': format_id,
-                            'url': base_url,
-                            'vbr': vbr,
-                            'play_path': play_path,
-                            'app': parsedurl.path[1:] + '?' + parsedurl.query,
-                            'rtmp_live': True,
-                            'ext': 'flv',
-                        })
-
+        for format_id, stream_url in item['streamUrls'].items():
+            formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4'))
         self._sort_formats(formats)
 
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        duration = float_or_none(item.get('duration'))
+        thumbnail = item.get('previewImageUrl')
+
+        subtitles = {}
+        subs = item.get('subtitles')
+        if subs:
+            subtitles['cs'] = subs[0]['url']
+
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, subtitles)
+            return
+
+        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
+
         return {
             'id': episode_id,
-            'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
-            'duration': float(duration),
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
             'formats': formats,
+            'subtitles': subtitles,
         }
+
+    @staticmethod
+    def _fix_subtitles(subtitles):
+        """ Convert millisecond-based subtitles to SRT """
+        if subtitles is None:
+            return subtitles  # subtitles not requested
+
+        def _msectotimecode(msec):
+            """ Helper utility to convert milliseconds to timecode """
+            components = []
+            for divider in [1000, 60, 60, 100]:
+                components.append(msec % divider)
+                msec //= divider
+            return "{3:02}:{2:02}:{1:02},{0:03}".format(*components)
+
+        def _fix_subtitle(subtitle):
+            for line in subtitle.splitlines():
+                m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line)
+                if m:
+                    yield m.group(1)
+                    start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
+                    yield "{0} --> {1}".format(start, stop)
+                else:
+                    yield line
+
+        fixed_subtitles = {}
+        for k, v in subtitles.items():
+            fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
+        return fixed_subtitles
index 2a05813f8d600540f66015a75ce4df647096d3fc..3dfc24f5ba447ea92858e89868ad3684caf3a6d2 100644 (file)
@@ -236,16 +236,17 @@ class Channel9IE(InfoExtractor):
         if contents is None:
             return contents
 
-        session_meta = {'session_code': self._extract_session_code(html),
-                        'session_day': self._extract_session_day(html),
-                        'session_room': self._extract_session_room(html),
-                        'session_speakers': self._extract_session_speakers(html),
-                        }
+        session_meta = {
+            'session_code': self._extract_session_code(html),
+            'session_day': self._extract_session_day(html),
+            'session_room': self._extract_session_room(html),
+            'session_speakers': self._extract_session_speakers(html),
+        }
 
         for content in contents:
             content.update(session_meta)
 
-        return contents
+        return self.playlist_result(contents)
 
     def _extract_list(self, content_path):
         rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
diff --git a/youtube_dl/extractor/cinchcast.py b/youtube_dl/extractor/cinchcast.py
new file mode 100644 (file)
index 0000000..0c9a24b
--- /dev/null
@@ -0,0 +1,52 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    unified_strdate,
+    xpath_text,
+)
+
+
+class CinchcastIE(InfoExtractor):
+    _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)'
+    _TEST = {
+        # Actual test is run in generic, look for undergroundwellness
+        'url': 'http://player.cinchcast.com/?platformId=1&#038;assetType=single&#038;assetId=7141703',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        doc = self._download_xml(
+            'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id,
+            video_id)
+
+        item = doc.find('.//item')
+        title = xpath_text(item, './title', fatal=True)
+        date_str = xpath_text(
+            item, './{http://developer.longtailvideo.com/trac/}date')
+        upload_date = unified_strdate(date_str, day_first=False)
+        # duration is present but wrong
+        formats = []
+        formats.append({
+            'format_id': 'main',
+            'url': item.find(
+                './{http://search.yahoo.com/mrss/}content').attrib['url'],
+        })
+        backup_url = xpath_text(
+            item, './{http://developer.longtailvideo.com/trac/}backupContent')
+        if backup_url:
+            formats.append({
+                'preference': 2,  # seems to be more reliable
+                'format_id': 'backup',
+                'url': backup_url,
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'upload_date': upload_date,
+            'formats': formats,
+        }
index 710d5009b71aafe0da901771048b8c0ba68def04..3145b30514ea2a075f92077b9f87b64c9e8820a7 100644 (file)
@@ -2,12 +2,10 @@
 from __future__ import unicode_literals
 
 import json
-import re
 
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
-    int_or_none,
 )
 
 
@@ -15,23 +13,24 @@ class CNETIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
     _TEST = {
         'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
-        'md5': '041233212a0d06b179c87cbcca1577b8',
         'info_dict': {
             'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Hands-on with Microsoft Windows 8.1 Update',
             'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
             'thumbnail': 're:^http://.*/flmswindows8.jpg$',
-            'uploader_id': 'sarah.mitroff@cbsinteractive.com',
+            'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
             'uploader': 'Sarah Mitroff',
+        },
+        'params': {
+            'skip_download': 'requires rtmpdump',
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('id')
-
+        display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
+
         data_json = self._html_search_regex(
             r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
             webpage, 'data json')
@@ -42,37 +41,31 @@ class CNETIE(InfoExtractor):
         if not vdata:
             raise ExtractorError('Cannot find video data')
 
+        mpx_account = data['config']['players']['default']['mpx_account']
+        vid = vdata['files']['rtmp']
+        tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
+
         video_id = vdata['id']
         title = vdata.get('headline')
         if title is None:
             title = vdata.get('title')
         if title is None:
             raise ExtractorError('Cannot find title!')
-        description = vdata.get('dek')
         thumbnail = vdata.get('image', {}).get('path')
         author = vdata.get('author')
         if author:
             uploader = '%s %s' % (author['firstName'], author['lastName'])
-            uploader_id = author.get('email')
+            uploader_id = author.get('id')
         else:
             uploader = None
             uploader_id = None
 
-        formats = [{
-            'format_id': '%s-%s-%s' % (
-                f['type'], f['format'],
-                int_or_none(f.get('bitrate'), 1000, default='')),
-            'url': f['uri'],
-            'tbr': int_or_none(f.get('bitrate'), 1000),
-        } for f in vdata['files']['data']]
-        self._sort_formats(formats)
-
         return {
+            '_type': 'url_transparent',
+            'url': tp_link,
             'id': video_id,
             'display_id': display_id,
             'title': title,
-            'formats': formats,
-            'description': description,
             'uploader': uploader,
             'uploader_id': uploader_id,
             'thumbnail': thumbnail,
index 81142ee419d45b9df9f75bdc152ab87e1317650f..93e8d0de355d7ccb239f06aee956468d33cb43d9 100644 (file)
@@ -11,14 +11,14 @@ from ..utils import (
 
 
 class CNNIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
-        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''
+    _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
+        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
 
     _TESTS = [{
         'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
         'md5': '3e6121ea48df7e2259fe73a0628605c4',
         'info_dict': {
-            'id': 'sports_2013_06_09_nadal-1-on-1.cnn',
+            'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
             'ext': 'mp4',
             'title': 'Nadal wins 8th French Open title',
             'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
@@ -35,6 +35,16 @@ class CNNIE(InfoExtractor):
             "description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
             "upload_date": "20130821",
         }
+    }, {
+        'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
+        'md5': 'f14d02ebd264df951feb2400e2c25a1b',
+        'info_dict': {
+            'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
+            'ext': 'mp4',
+            'title': 'Nashville Ep. 1: Hand crafted skateboards',
+            'description': 'md5:e7223a503315c9f150acac52e76de086',
+            'upload_date': '20141222',
+        }
     }]
 
     def _real_extract(self, url):
@@ -127,3 +137,28 @@ class CNNBlogsIE(InfoExtractor):
             'url': cnn_url,
             'ie_key': CNNIE.ie_key(),
         }
+
+
+class CNNArticleIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
+    _TEST = {
+        'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
+        'md5': '275b326f85d80dff7592a9820f5dc887',
+        'info_dict': {
+            'id': 'bestoftv/2014/12/21/sotu-crowley-president-obama-north-korea-not-going-to-be-intimidated.cnn',
+            'ext': 'mp4',
+            'title': 'Obama: We\'re not going to be intimidated',
+            'description': 'md5:e735586f3dc936075fa654a4d91b21f9',
+            'upload_date': '20141220',
+        },
+        'add_ie': ['CNN'],
+    }
+
+    def _real_extract(self, url):
+        webpage = self._download_webpage(url, url_basename(url))
+        cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
+        return {
+            '_type': 'url',
+            'url': 'http://cnn.com/video/?/video/' + cnn_url,
+            'ie_key': CNNIE.ie_key(),
+        }
diff --git a/youtube_dl/extractor/collegerama.py b/youtube_dl/extractor/collegerama.py
new file mode 100644 (file)
index 0000000..fedd484
--- /dev/null
@@ -0,0 +1,92 @@
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import (
+    float_or_none,
+    int_or_none,
+)
+
+
+class CollegeRamaIE(InfoExtractor):
+    _VALID_URL = r'https?://collegerama\.tudelft\.nl/Mediasite/Play/(?P<id>[\da-f]+)'
+    _TESTS = [
+        {
+            'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
+            'md5': '481fda1c11f67588c0d9d8fbdced4e39',
+            'info_dict': {
+                'id': '585a43626e544bdd97aeb71a0ec907a01d',
+                'ext': 'mp4',
+                'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
+                'description': '',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 7713.088,
+                'timestamp': 1413309600,
+                'upload_date': '20141014',
+            },
+        },
+        {
+            'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
+            'md5': 'ef1fdded95bdf19b12c5999949419c92',
+            'info_dict': {
+                'id': '86a9ea9f53e149079fbdb4202b521ed21d',
+                'ext': 'wmv',
+                'title': '64ste Vakantiecursus: Afvalwater',
+                'description': 'md5:7fd774865cc69d972f542b157c328305',
+                'duration': 10853,
+                'timestamp': 1326446400,
+                'upload_date': '20120113',
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        player_options_request = {
+            "getPlayerOptionsRequest": {
+                "ResourceId": video_id,
+                "QueryString": "",
+            }
+        }
+
+        request = compat_urllib_request.Request(
+            'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
+            json.dumps(player_options_request))
+        request.add_header('Content-Type', 'application/json')
+
+        player_options = self._download_json(request, video_id)
+
+        presentation = player_options['d']['Presentation']
+        title = presentation['Title']
+        description = presentation.get('Description')
+        thumbnail = None
+        duration = float_or_none(presentation.get('Duration'), 1000)
+        timestamp = int_or_none(presentation.get('UnixTime'), 1000)
+
+        formats = []
+        for stream in presentation['Streams']:
+            for video in stream['VideoUrls']:
+                thumbnail_url = stream.get('ThumbnailUrl')
+                if thumbnail_url:
+                    thumbnail = 'http://collegerama.tudelft.nl' + thumbnail_url
+                format_id = video['MediaType']
+                if format_id == 'SS':
+                    continue
+                formats.append({
+                    'url': video['Location'],
+                    'format_id': format_id,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py
new file mode 100644 (file)
index 0000000..9c25b22
--- /dev/null
@@ -0,0 +1,57 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import parse_iso8601
+
+
+class ComCarCoffIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
+    _TESTS = [{
+        'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
+        'info_dict': {
+            'id': 'miranda-sings-happy-thanksgiving-miranda',
+            'ext': 'mp4',
+            'upload_date': '20141127',
+            'timestamp': 1417107600,
+            'title': 'Happy Thanksgiving Miranda',
+            'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
+            'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
+        },
+        'params': {
+            'skip_download': 'requires ffmpeg',
+        }
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        if not display_id:
+            display_id = 'comediansincarsgettingcoffee.com'
+        webpage = self._download_webpage(url, display_id)
+
+        full_data = json.loads(self._search_regex(
+            r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
+            webpage, 'full data json'))
+
+        video_id = full_data['activeVideo']['video']
+        video_data = full_data['videos'][video_id]
+        thumbnails = [{
+            'url': video_data['images']['thumb'],
+        }, {
+            'url': video_data['images']['poster'],
+        }]
+        formats = self._extract_m3u8_formats(
+            video_data['mediaUrl'], video_id, ext='mp4')
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': video_data['title'],
+            'description': video_data.get('description'),
+            'timestamp': parse_iso8601(video_data.get('pubDate')),
+            'thumbnails': thumbnails,
+            'formats': formats,
+            'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
+        }
index 2e3ef3fdab4c25f2818f46a820702056ceb3f294..8d27af5e57348e56a924d1d633df8799343245e4 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .mtv import MTVServicesInfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
     compat_urllib_parse,
+)
+from ..utils import (
     ExtractorError,
     float_or_none,
     unified_strdate,
@@ -48,7 +50,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
                           )|
                           (?P<interview>
                               extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
-                     (?:[?#].*|$)'''
+                     '''
     _TESTS = [{
         'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
         'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
@@ -81,6 +83,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
     }, {
         'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
         'only_matching': True,
+    }, {
+        'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
+        'only_matching': True,
     }, {
         'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
         'only_matching': True,
index e80a2dad0b2e12c5e9d14a486feb0b600de66823..03f3f18c83012cdced0e305fe1cc02d69a85bb7c 100644 (file)
@@ -21,6 +21,7 @@ from ..compat import (
     compat_str,
 )
 from ..utils import (
+    age_restricted,
     clean_html,
     compiled_regex_type,
     ExtractorError,
@@ -40,7 +41,7 @@ class InfoExtractor(object):
     information about the video (or videos) the URL refers to. This
     information includes the real video URL, the video title, author and
     others. The information is stored in a dictionary which is then
-    passed to the FileDownloader. The FileDownloader processes this
+    passed to the YoutubeDL. The YoutubeDL processes this
     information possibly downloading the video to the file system, among
     other possible outcomes.
 
@@ -92,6 +93,8 @@ class InfoExtractor(object):
                                  by this field, regardless of all other values.
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
+                                 < -1000 to hide the format (if there is
+                                    another one which is strictly better)
                     * language_preference  Is this in the correct requested
                                  language?
                                  10 if it's what the URL is about,
@@ -111,6 +114,9 @@ class InfoExtractor(object):
                                  to add to the request.
                     * http_post_data  Additional data to send with a POST
                                  request.
+                    * stretched_ratio  If given and not 1, indicates that the
+                                       video's pixels are not square.
+                                       width : height ratio as float.
     url:            Final video URL.
     ext:            Video filename extension.
     format:         The video format, defaults to ext (used for --get-format)
@@ -118,6 +124,7 @@ class InfoExtractor(object):
 
     The following fields are optional:
 
+    alt_title:      A secondary title of the video.
     display_id      An alternative identifier for the video, not necessarily
                     unique, but available before title. Typically, id is
                     something like "4234987", title "Dancing naked mole rats",
@@ -129,7 +136,7 @@ class InfoExtractor(object):
                         * "resolution" (optional, string "{width}x{height"},
                                         deprecated)
     thumbnail:      Full URL to a video thumbnail image.
-    description:    One-line video description.
+    description:    Full video description.
     uploader:       Full name of the video uploader.
     timestamp:      UNIX timestamp of the moment the video became available.
     upload_date:    Video upload date (YYYYMMDD).
@@ -143,6 +150,17 @@ class InfoExtractor(object):
     like_count:     Number of positive ratings of the video
     dislike_count:  Number of negative ratings of the video
     comment_count:  Number of comments on the video
+    comments:       A list of comments, each with one or more of the following
+                    properties (all but one of text or html optional):
+                        * "author" - human-readable name of the comment author
+                        * "author_id" - user ID of the comment author
+                        * "id" - Comment ID
+                        * "html" - Comment as HTML
+                        * "text" - Plain text of the comment
+                        * "timestamp" - UNIX timestamp of comment
+                        * "parent" - ID of the comment this one is replying to.
+                                     Set to "root" to indicate that this is a
+                                     comment to the original video.
     age_limit:      Age restriction for the video, as an integer (years)
     webpage_url:    The url to the video webpage, if given to youtube-dl it
                     should allow to get the same result again. (It will be set
@@ -158,8 +176,8 @@ class InfoExtractor(object):
 
 
     _type "playlist" indicates multiple videos.
-    There must be a key "entries", which is a list or a PagedList object, each
-    element of which is a valid dictionary under this specfication.
+    There must be a key "entries", which is a list, an iterable, or a PagedList
+    object, each element of which is a valid dictionary by this specification.
 
     Additionally, playlists can have "title" and "id" attributes with the same
     semantics as videos (see above).
@@ -174,9 +192,10 @@ class InfoExtractor(object):
     _type "url" indicates that the video must be extracted from another
     location, possibly by a different extractor. Its only required key is:
     "url" - the next URL to extract.
-
-    Additionally, it may have properties believed to be identical to the
-    resolved entity, for example "title" if the title of the referred video is
+    The key "ie_key" can be set to the class name (minus the trailing "IE",
+    e.g. "Youtube") if the extractor class is known in advance.
+    Additionally, the dictionary may have any properties of the resolved entity
+    known in advance, for example "title" if the title of the referred video is
     known ahead of time.
 
 
@@ -360,9 +379,19 @@ class InfoExtractor(object):
 
         return content
 
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
+    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
         """ Returns the data of the page as a string """
-        res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+        success = False
+        try_count = 0
+        while success is False:
+            try:
+                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+                success = True
+            except compat_http_client.IncompleteRead as e:
+                try_count += 1
+                if try_count >= tries:
+                    raise e
+                self._sleep(timeout, video_id)
         if res is False:
             return res
         else:
@@ -390,6 +419,10 @@ class InfoExtractor(object):
             url_or_request, video_id, note, errnote, fatal=fatal)
         if (not fatal) and json_string is False:
             return None
+        return self._parse_json(
+            json_string, video_id, transform_source=transform_source, fatal=fatal)
+
+    def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
         if transform_source:
             json_string = transform_source(json_string)
         try:
@@ -439,7 +472,7 @@ class InfoExtractor(object):
         return video_info
 
     @staticmethod
-    def playlist_result(entries, playlist_id=None, playlist_title=None):
+    def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
         """Returns a playlist"""
         video_info = {'_type': 'playlist',
                       'entries': entries}
@@ -447,6 +480,8 @@ class InfoExtractor(object):
             video_info['id'] = playlist_id
         if playlist_title:
             video_info['title'] = playlist_title
+        if playlist_description:
+            video_info['description'] = playlist_description
         return video_info
 
     def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
@@ -581,9 +616,9 @@ class InfoExtractor(object):
         if display_name is None:
             display_name = name
         return self._html_search_regex(
-            r'''(?ix)<meta
+            r'''(?isx)<meta
                     (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
-                    [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
+                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
             html, display_name, fatal=fatal, group='content', **kwargs)
 
     def _dc_search_uploader(self, html):
@@ -707,8 +742,14 @@ class InfoExtractor(object):
             'Unable to download f4m manifest')
 
         formats = []
+        manifest_version = '1.0'
         media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
+        if not media_nodes:
+            manifest_version = '2.0'
+            media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
         for i, media_el in enumerate(media_nodes):
+            if manifest_version == '2.0':
+                manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
             tbr = int_or_none(media_el.attrib.get('bitrate'))
             format_id = 'f4m-%d' % (i if tbr is None else tbr)
             formats.append({
@@ -790,6 +831,49 @@ class InfoExtractor(object):
         self._sort_formats(formats)
         return formats
 
+    # TODO: improve extraction
+    def _extract_smil_formats(self, smil_url, video_id):
+        smil = self._download_xml(
+            smil_url, video_id, 'Downloading SMIL file',
+            'Unable to download SMIL file')
+
+        base = smil.find('./head/meta').get('base')
+
+        formats = []
+        rtmp_count = 0
+        for video in smil.findall('./body/switch/video'):
+            src = video.get('src')
+            if not src:
+                continue
+            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+            width = int_or_none(video.get('width'))
+            height = int_or_none(video.get('height'))
+            proto = video.get('proto')
+            if not proto:
+                if base:
+                    if base.startswith('rtmp'):
+                        proto = 'rtmp'
+                    elif base.startswith('http'):
+                        proto = 'http'
+            ext = video.get('ext')
+            if proto == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(src, video_id, ext))
+            elif proto == 'rtmp':
+                rtmp_count += 1
+                streamer = video.get('streamer') or base
+                formats.append({
+                    'url': streamer,
+                    'play_path': src,
+                    'ext': 'flv',
+                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+                    'tbr': bitrate,
+                    'width': width,
+                    'height': height,
+                })
+        self._sort_formats(formats)
+
+        return formats
+
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()
@@ -819,10 +903,40 @@ class InfoExtractor(object):
         return res
 
     def _set_cookie(self, domain, name, value, expire_time=None):
-        cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
+        cookie = compat_cookiejar.Cookie(
+            0, name, value, None, None, domain, None,
             None, '/', True, False, expire_time, '', None, None, None)
         self._downloader.cookiejar.set_cookie(cookie)
 
+    def get_testcases(self, include_onlymatching=False):
+        t = getattr(self, '_TEST', None)
+        if t:
+            assert not hasattr(self, '_TESTS'), \
+                '%s has _TEST and _TESTS' % type(self).__name__
+            tests = [t]
+        else:
+            tests = getattr(self, '_TESTS', [])
+        for t in tests:
+            if not include_onlymatching and t.get('only_matching', False):
+                continue
+            t['name'] = type(self).__name__[:-len('IE')]
+            yield t
+
+    def is_suitable(self, age_limit):
+        """ Test whether the extractor is generally suitable for the given
+        age limit (i.e. pornographic sites are not, all others usually are) """
+
+        any_restricted = False
+        for tc in self.get_testcases(include_onlymatching=False):
+            if 'playlist' in tc:
+                tc = tc['playlist'][0]
+            is_restricted = age_restricted(
+                tc.get('info_dict', {}).get('age_limit'), age_limit)
+            if not is_restricted:
+                return True
+            any_restricted = any_restricted or is_restricted
+        return not any_restricted
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py
new file mode 100644 (file)
index 0000000..75c0690
--- /dev/null
@@ -0,0 +1,29 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class CommonMistakesIE(InfoExtractor):
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'''(?x)
+        (?:url|URL)
+    '''
+
+    _TESTS = [{
+        'url': 'url',
+        'only_matching': True,
+    }, {
+        'url': 'URL',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        msg = (
+            'You\'ve asked youtube-dl to download the URL "%s". '
+            'That doesn\'t make any sense. '
+            'Simply remove the parameter in your command or configuration.'
+        ) % url
+        if self._downloader.params.get('verbose'):
+            msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
+        raise ExtractorError(msg, expected=True)
index 7a7e79360423ec39e341cf651b2aef4ca762d244..3db4db4e4db816ae532060bc2386cd91a9c71a92 100644 (file)
@@ -5,12 +5,14 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
-    orderedSet,
     compat_urllib_parse_urlparse,
     compat_urlparse,
 )
+from ..utils import (
+    orderedSet,
+)
 
 
 class CondeNastIE(InfoExtractor):
index d7e2b841e10856cadf0526fe8ff6d4c280dc0dae..1680f532f80167a65c2dbdc3b5bc0bfa83f7fc66 100644 (file)
@@ -10,10 +10,12 @@ import xml.etree.ElementTree
 from hashlib import sha1
 from math import pow, sqrt, floor
 from .subtitles import SubtitlesInfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
     bytes_to_intlist,
     intlist_to_bytes,
     unified_strdate,
@@ -27,10 +29,9 @@ from .common import InfoExtractor
 
 
 class CrunchyrollIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+    _TESTS = [{
         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
-        #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
         'info_dict': {
             'id': '645513',
             'ext': 'flv',
@@ -45,7 +46,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
             # rtmp
             'skip_download': True,
         },
-    }
+    }, {
+        'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
+        'only_matching': True,
+    }]
 
     _FORMAT_IDS = {
         '360': ('60', '106'),
@@ -224,7 +228,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
 
         formats = []
-        for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
+        for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
             stream_quality, stream_format = self._FORMAT_IDS[fmt]
             video_format = fmt + 'p'
             streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
index 5411066846eb94b9c9295bae4f8860e07112b2d1..955119d40be3797e073b030790b3685b7ca4be15 100644 (file)
@@ -27,7 +27,6 @@ class CSpanIE(InfoExtractor):
         'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
         # For whatever reason, the served video alternates between
         # two different ones
-        #'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
         'info_dict': {
             'id': '340723',
             'ext': 'mp4',
index 936c13cd60b0ec44f376818a7e17cb9ccf4d1384..cf5841a7c6e92e115d7f685d8f7ce337a51cb92a 100644 (file)
@@ -8,13 +8,15 @@ import itertools
 from .common import InfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 
-from ..utils import (
-    compat_urllib_request,
+from ..compat import (
     compat_str,
+    compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
     orderedSet,
     str_to_int,
-    int_or_none,
-    ExtractorError,
     unescapeHTML,
 )
 
index 45d66e2e663fa376cec8f4fc7931e84006ee30b9..934da765ee700712721281a85dd955c28405001e 100644 (file)
@@ -5,7 +5,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
@@ -38,7 +38,7 @@ class DaumIE(InfoExtractor):
         canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
         webpage = self._download_webpage(canonical_url, video_id)
         full_id = self._search_regex(
-            r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
+            r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']',
             webpage, 'full id')
         query = compat_urllib_parse.urlencode({'vid': full_id})
         info = self._download_xml(
index 1d3e2ff087fe611d6b660a59963a16765e3ac53e..2122176254eeacce3241a9e517a70daf90b0b187 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     float_or_none,
     int_or_none,
@@ -61,7 +62,7 @@ class DBTVIE(InfoExtractor):
         self._sort_formats(formats)
 
         return {
-            'id': video['id'],
+            'id': compat_str(video['id']),
             'display_id': display_id,
             'title': video['title'],
             'description': clean_html(video['desc']),
index 52c2d7ddf99873779b7f3223b0acfe4563e2b5d9..d3e6675283cddcb8f6a6dfffbfbd1e1ea3da11bc 100644 (file)
@@ -1,47 +1,45 @@
 from __future__ import unicode_literals
 
-import re
-import json
-
 from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    int_or_none,
+)
 
 
 class DiscoveryIE(InfoExtractor):
-    _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
+    _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
     _TEST = {
         'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
-        'md5': 'e12614f9ee303a6ccef415cb0793eba2',
+        'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
         'info_dict': {
-            'id': '614784',
-            'ext': 'mp4',
-            'title': 'MythBusters: Mission Impossible Outtakes',
+            'id': 'mission-impossible-outtakes',
+            'ext': 'flv',
+            'title': 'Mission Impossible Outtakes',
             'description': ('Watch Jamie Hyneman and Adam Savage practice being'
                             ' each other -- to the point of confusing Jamie\'s dog -- and '
                             'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
                             ' back.'),
             'duration': 156,
+            'timestamp': 1303099200,
+            'upload_date': '20110418',
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
-                                             webpage, 'video list', flags=re.DOTALL)
-        video_list = json.loads(video_list_json)
-        info = video_list['clips'][0]
-        formats = []
-        for f in info['mp4']:
-            formats.append(
-                {'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})
+        info = self._parse_json(self._search_regex(
+            r'(?s)<script type="application/ld\+json">(.*?)</script>',
+            webpage, 'video info'), video_id)
 
         return {
-            'id': info['contentId'],
-            'title': video_list['name'],
-            'formats': formats,
-            'description': info['videoCaption'],
-            'thumbnail': info.get('videoStillURL') or info.get('thumbnailURL'),
-            'duration': info['duration'],
+            'id': video_id,
+            'title': info['name'],
+            'url': info['contentURL'],
+            'description': info.get('description'),
+            'thumbnail': info.get('thumbnailUrl'),
+            'timestamp': parse_iso8601(info.get('uploadDate')),
+            'duration': int_or_none(info.get('duration')),
         }
diff --git a/youtube_dl/extractor/drbonanza.py b/youtube_dl/extractor/drbonanza.py
new file mode 100644 (file)
index 0000000..7626219
--- /dev/null
@@ -0,0 +1,131 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+)
+
+
+class DRBonanzaIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)'
+
+    _TESTS = [{
+        'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
+        'md5': 'fe330252ddea607635cf2eb2c99a0af3',
+        'info_dict': {
+            'id': '65517',
+            'ext': 'mp4',
+            'title': 'Talkshowet - Leonard Cohen',
+            'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca',
+            'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+            'timestamp': 1295537932,
+            'upload_date': '20110120',
+            'duration': 3664,
+        },
+    }, {
+        'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
+        'md5': '6dfe039417e76795fb783c52da3de11d',
+        'info_dict': {
+            'id': '59410',
+            'ext': 'mp3',
+            'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission',
+            'description': 'md5:501e5a195749480552e214fbbed16c4e',
+            'thumbnail': 're:^https?://.*\.(?:gif|jpg)$',
+            'timestamp': 1223274900,
+            'upload_date': '20081006',
+            'duration': 7369,
+        },
+    }]
+
+    def _real_extract(self, url):
+        url_id = self._match_id(url)
+        webpage = self._download_webpage(url, url_id)
+
+        if url_id:
+            info = json.loads(self._html_search_regex(r'({.*?%s.*})' % url_id, webpage, 'json'))
+        else:
+            # Just fetch the first video on that page
+            info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json'))
+
+        asset_id = str(info['AssetId'])
+        title = info['Title'].rstrip(' \'\"-,.:;!?')
+        duration = int_or_none(info.get('Duration'), scale=1000)
+        # First published online. "FirstPublished" contains the date for original airing.
+        timestamp = parse_iso8601(
+            re.sub(r'\.\d+$', '', info['Created']))
+
+        def parse_filename_info(url):
+            match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
+            if match:
+                return {
+                    'width': int(match.group('width')),
+                    'height': int(match.group('height')),
+                    'vbr': int(match.group('bitrate')),
+                    'ext': match.group('ext')
+                }
+            match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
+            if match:
+                return {
+                    'vbr': int(match.group('bitrate')),
+                    'ext': match.group(2)
+                }
+            return {}
+
+        video_types = ['VideoHigh', 'VideoMid', 'VideoLow']
+        preferencemap = {
+            'VideoHigh': -1,
+            'VideoMid': -2,
+            'VideoLow': -3,
+            'Audio': -4,
+        }
+
+        formats = []
+        for file in info['Files']:
+            if info['Type'] == "Video":
+                if file['Type'] in video_types:
+                    format = parse_filename_info(file['Location'])
+                    format.update({
+                        'url': file['Location'],
+                        'format_id': file['Type'].replace('Video', ''),
+                        'preference': preferencemap.get(file['Type'], -10),
+                    })
+                    formats.append(format)
+                elif file['Type'] == "Thumb":
+                    thumbnail = file['Location']
+            elif info['Type'] == "Audio":
+                if file['Type'] == "Audio":
+                    format = parse_filename_info(file['Location'])
+                    format.update({
+                        'url': file['Location'],
+                        'format_id': file['Type'],
+                        'vcodec': 'none',
+                    })
+                    formats.append(format)
+                elif file['Type'] == "Thumb":
+                    thumbnail = file['Location']
+
+        description = '%s\n%s\n%s\n' % (
+            info['Description'], info['Actors'], info['Colophon'])
+
+        for f in formats:
+            f['url'] = f['url'].replace('rtmp://vod-bonanza.gss.dr.dk/bonanza/', 'http://vodfiles.dr.dk/')
+            f['url'] = f['url'].replace('mp4:bonanza', 'bonanza')
+        self._sort_formats(formats)
+
+        display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
+        display_id = re.sub(r'-+', '-', display_id)
+
+        return {
+            'id': asset_id,
+            'display_id': display_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+        }
index 93b3c9f36094724cd751cb340f9f925f2d04554c..c44adb1099bf6f0a2d08ccad7cebebef3939ddf9 100644 (file)
@@ -6,7 +6,7 @@ from ..utils import parse_iso8601
 
 
 class DRTVIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)+(?P<id>[\da-z-]+)(?:[/#?]|$)'
+    _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)+(?P<id>[\da-z-]+)(?:[/#?]|$)'
 
     _TEST = {
         'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py
new file mode 100644 (file)
index 0000000..c1a4bc7
--- /dev/null
@@ -0,0 +1,125 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    unescapeHTML,
+    ExtractorError,
+)
+
+
+class DVTVIE(InfoExtractor):
+    IE_NAME = 'dvtv'
+    IE_DESC = 'http://video.aktualne.cz/'
+
+    _VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
+
+    _TESTS = [{
+        'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
+        'md5': '67cb83e4a955d36e1b5d31993134a0c2',
+        'info_dict': {
+            'id': 'dc0768de855511e49e4b0025900fea04',
+            'ext': 'mp4',
+            'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
+        }
+    }, {
+        'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
+        'md5': '6388f1941b48537dbd28791f712af8bf',
+        'info_dict': {
+            'id': '72c02230849211e49f60002590604f2e',
+            'ext': 'mp4',
+            'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala',
+        }
+    }, {
+        'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
+        'info_dict': {
+            'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
+            'id': '973eb3bc854e11e498be002590604f2e',
+        },
+        'playlist': [{
+            'md5': 'da7ca6be4935532241fa9520b3ad91e4',
+            'info_dict': {
+                'id': 'b0b40906854d11e4bdad0025900fea04',
+                'ext': 'mp4',
+                'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne'
+            }
+        }, {
+            'md5': '5f7652a08b05009c1292317b449ffea2',
+            'info_dict': {
+                'id': '420ad9ec854a11e4bdad0025900fea04',
+                'ext': 'mp4',
+                'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka'
+            }
+        }, {
+            'md5': '498eb9dfa97169f409126c617e2a3d64',
+            'info_dict': {
+                'id': '95d35580846a11e4b6d20025900fea04',
+                'ext': 'mp4',
+                'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?'
+            }
+        }, {
+            'md5': 'b8dc6b744844032dab6ba3781a7274b9',
+            'info_dict': {
+                'id': '6fe14d66853511e4833a0025900fea04',
+                'ext': 'mp4',
+                'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády'
+            }
+        }],
+    }, {
+        'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
+        'only_matching': True,
+    }]
+
+    def _parse_video_metadata(self, js, video_id):
+        metadata = self._parse_json(js, video_id, transform_source=js_to_json)
+
+        formats = []
+        for video in metadata['sources']:
+            ext = video['type'][6:]
+            formats.append({
+                'url': video['file'],
+                'ext': ext,
+                'format_id': '%s-%s' % (ext, video['label']),
+                'height': int(video['label'].rstrip('p')),
+                'fps': 25,
+            })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': metadata['mediaid'],
+            'title': unescapeHTML(metadata['title']),
+            'thumbnail': self._proto_relative_url(metadata['image'], 'http:'),
+            'formats': formats
+        }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        # single video
+        item = self._search_regex(
+            r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});",
+            webpage, 'video', default=None, fatal=False)
+
+        if item:
+            return self._parse_video_metadata(item, video_id)
+
+        # playlist
+        items = re.findall(
+            r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
+            webpage)
+
+        if items:
+            return {
+                '_type': 'playlist',
+                'id': video_id,
+                'title': self._og_search_title(webpage),
+                'entries': [self._parse_video_metadata(i, video_id) for i in items]
+            }
+
+        raise ExtractorError('Could not find neither video nor playlist')
index 63c2549d37aa528cc79f83822c7a267d391b74cc..b6bfd2b2dedc5388ef383a3cd8853bbb0c541f68 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -20,8 +18,7 @@ class EbaumsWorldIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         config = self._download_xml(
             'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
         video_url = config.find('file').text
diff --git a/youtube_dl/extractor/echomsk.py b/youtube_dl/extractor/echomsk.py
new file mode 100644 (file)
index 0000000..d2d9404
--- /dev/null
@@ -0,0 +1,46 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class EchoMskIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.echo.msk.ru/sounds/1464134.html',
+        'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
+        'info_dict': {
+            'id': '1464134',
+            'ext': 'mp3',
+            'title': 'Особое мнение - 29 декабря 2014, 19:08',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        audio_url = self._search_regex(
+            r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL')
+
+        title = self._html_search_regex(
+            r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>',
+            webpage, 'title')
+
+        air_date = self._html_search_regex(
+            r'(?s)<div class="date">(.+?)</div>',
+            webpage, 'date', fatal=False, default=None)
+
+        if air_date:
+            air_date = re.sub(r'(\s)\1+', r'\1', air_date)
+            if air_date:
+                title = '%s - %s' % (title, air_date)
+
+        return {
+            'id': video_id,
+            'url': audio_url,
+            'title': title,
+        }
index b766e17f26a9e79d654d4b160fa8f98f5f21503f..9cb1bf301b9ae3e327e4831bdb8a7d2437b43803 100644 (file)
@@ -1,8 +1,6 @@
 from __future__ import unicode_literals
 
-import re
-
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 from .common import InfoExtractor
@@ -24,11 +22,10 @@ class EHowIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
-                                       webpage, 'video URL')
+        video_url = self._search_regex(
+            r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
         final_url = compat_urllib_parse.unquote(video_url)
         uploader = self._html_search_meta('uploader', webpage)
         title = self._og_search_title(webpage).replace(' | eHow', '')
index f4c1e2a72bf74821afd476dee86b806c0dbb56c7..fb5dbbe2b0c7d9bd15b87426e446ce73f903a6eb 100644 (file)
@@ -6,9 +6,12 @@ import random
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
 )
+from ..utils import (
+    ExtractorError,
+)
 
 
 class EightTracksIE(InfoExtractor):
@@ -112,14 +115,29 @@ class EightTracksIE(InfoExtractor):
         session = str(random.randint(0, 1000000000))
         mix_id = data['id']
         track_count = data['tracks_count']
+        duration = data['duration']
+        avg_song_duration = float(duration) / track_count
         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
         next_url = first_url
         entries = []
+
         for i in range(track_count):
-            api_json = self._download_webpage(
-                next_url, playlist_id,
-                note='Downloading song information %d/%d' % (i + 1, track_count),
-                errnote='Failed to download song information')
+            api_json = None
+            download_tries = 0
+
+            while api_json is None:
+                try:
+                    api_json = self._download_webpage(
+                        next_url, playlist_id,
+                        note='Downloading song information %d/%d' % (i + 1, track_count),
+                        errnote='Failed to download song information')
+                except ExtractorError:
+                    if download_tries > 3:
+                        raise
+                    else:
+                        download_tries += 1
+                        self._sleep(avg_song_duration, playlist_id)
+
             api_data = json.loads(api_json)
             track_data = api_data['set']['track']
             info = {
@@ -131,6 +149,7 @@ class EightTracksIE(InfoExtractor):
                 'ext': 'm4a',
             }
             entries.append(info)
+
             next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
                 session, mix_id, track_data['id'])
         return {
index 3e7923648992d334357bad7206d745a17313b23e..fc92ff8253734f151fa973ea4979adbe5c6063bf 100644 (file)
@@ -1,7 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
 import json
 
 from .common import InfoExtractor
@@ -12,32 +11,49 @@ from ..utils import (
 
 
 class EllenTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
+    _TESTS = [{
         'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
         'md5': 'e4af06f3bf0d5f471921a18db5764642',
         'info_dict': {
             'id': '0-7jqrsr18',
             'ext': 'mp4',
             'title': 'What\'s Wrong with These Photos? A Whole Lot',
+            'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
             'timestamp': 1406876400,
             'upload_date': '20140801',
         }
-    }
+    }, {
+        'url': 'http://ellentube.com/videos/0-dvzmabd5/',
+        'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb',
+        'info_dict': {
+            'id': '0-dvzmabd5',
+            'ext': 'mp4',
+            'title': '1 year old twin sister makes her brother laugh',
+            'description': '1 year old twin sister makes her brother laugh',
+            'timestamp': 1419542075,
+            'upload_date': '20141225',
+        }
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
+        video_url = self._html_search_meta('VideoURL', webpage, 'url')
+        title = self._og_search_title(webpage, default=None) or self._search_regex(
+            r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
+        description = self._html_search_meta(
+            'description', webpage, 'description') or self._og_search_description(webpage)
         timestamp = parse_iso8601(self._search_regex(
             r'<span class="publish-date"><time datetime="([^"]+)">',
             webpage, 'timestamp'))
 
         return {
             'id': video_id,
-            'title': self._og_search_title(webpage),
-            'url': self._html_search_meta('VideoURL', webpage, 'url'),
+            'url': video_url,
+            'title': title,
+            'description': description,
             'timestamp': timestamp,
         }
 
@@ -55,8 +71,7 @@ class EllenTVClipsIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        playlist_id = mobj.group('id')
+        playlist_id = self._match_id(url)
 
         webpage = self._download_webpage(url, playlist_id)
         playlist = self._extract_playlist(webpage)
index 4277202a2eea45afdcd750e3e22e651d5ac9342c..00a69e6312aede6069e062c6abff29137939daa9 100644 (file)
@@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import unified_strdate
 
@@ -24,9 +22,7 @@ class ElPaisIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         prefix = self._html_search_regex(
index 92ada81d24b4b542d93222d4d9ba5be877005629..4ea37ebd9f2072ea7610cfc4a8630e120fcfa81b 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from .fivemin import FiveMinIE
 from ..utils import (
     url_basename,
 )
@@ -27,11 +26,10 @@ class EngadgetIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         if video_id is not None:
-            return FiveMinIE._build_result(video_id)
+            return self.url_result('5min:%s' % video_id)
         else:
             title = url_basename(url)
             webpage = self._download_webpage(url, title)
@@ -39,5 +37,5 @@ class EngadgetIE(InfoExtractor):
             return {
                 '_type': 'playlist',
                 'title': title,
-                'entries': [FiveMinIE._build_result(id) for id in ids]
+                'entries': [self.url_result('5min:%s' % vid) for vid in ids]
             }
diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py
new file mode 100644 (file)
index 0000000..79e2fbd
--- /dev/null
@@ -0,0 +1,45 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class EroProfileIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
+        'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
+        'info_dict': {
+            'id': '3733775',
+            'display_id': 'sexy-babe-softcore',
+            'ext': 'm4v',
+            'title': 'sexy babe softcore',
+            'thumbnail': 're:https?://.*\.jpg',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
+            webpage, 'video id', default=None)
+
+        video_url = self._search_regex(
+            r'<source src="([^"]+)', webpage, 'video url')
+        title = self._html_search_regex(
+            r'Title:</th><td>([^<]+)</td>', webpage, 'title')
+        thumbnail = self._search_regex(
+            r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
+            webpage, 'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'age_limit': 18,
+        }
index 476fc22b93424b13255d5eec3578eb985dbfbdfd..e240cb8591ecc467c44d98742685740f4354cbda 100644 (file)
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
-
+)
+from ..utils import (
     ExtractorError,
 )
 
index d237a82813ea2556175e32a882d87bd5d1831924..d872d828fcc8e10fea4770e1e56ab21cda027336 100644 (file)
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
 )
 
index aacbf14141f6d5109d265b8e4dfa37883cee81ab..36ba331285b434136b8d3c10e6a8a16bef18e7b7 100644 (file)
@@ -3,16 +3,18 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_parse,
+)
+from ..utils import (
     str_to_int,
 )
 
 
 class ExtremeTubeIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
     _TESTS = [{
         'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
         'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
@@ -31,7 +33,7 @@ class ExtremeTubeIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')
         url = 'http://www.' + mobj.group('url')
 
         req = compat_urllib_request.Request(url)
index 2139f68aa3cb16facdc45b5fd9e014621e1c6674..1ad4e77a8a334dc0bfec62a0fb4752676e2e1435 100644 (file)
@@ -13,9 +13,10 @@ from ..compat import (
     compat_urllib_request,
 )
 from ..utils import (
-    urlencode_postdata,
     ExtractorError,
+    int_or_none,
     limit_length,
+    urlencode_postdata,
 )
 
 
@@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '637842556329505',
             'ext': 'mp4',
-            'duration': 38,
             'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
         }
     }, {
@@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor):
         self._login()
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
         webpage = self._download_webpage(url, video_id)
 
@@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor):
             'id': video_id,
             'title': video_title,
             'url': video_url,
-            'duration': int(video_data['video_duration']),
-            'thumbnail': video_data['thumbnail_src'],
+            'duration': int_or_none(video_data.get('video_duration')),
+            'thumbnail': video_data.get('thumbnail_src'),
         }
index 6f5d23559b78dfb621bbf6d819612d20b96fd597..81ceace53289709b93d7c647f6627197320381ef 100644 (file)
@@ -1,19 +1,20 @@
 #! -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-import re
 import hashlib
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_request,
     compat_urlparse,
 )
+from ..utils import (
+    ExtractorError,
+)
 
 
 class FC2IE(InfoExtractor):
-    _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)'
+    _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
     IE_NAME = 'fc2'
     _TEST = {
         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
@@ -26,9 +27,7 @@ class FC2IE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         self._downloader.cookiejar.clear_session_cookies()  # must clear
 
index af439ccfeefeade46f75b693627b09ba6ed830d6..3191116d96a0df0e61081fbc85e5745c815f1f99 100644 (file)
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
+from ..utils import (
+    ExtractorError,
+)
 
 
 class FiredriveIE(InfoExtractor):
@@ -28,11 +30,8 @@ class FiredriveIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         url = 'http://firedrive.com/file/%s' % video_id
-
         webpage = self._download_webpage(url, video_id)
 
         if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
index f9c127ce67bd7edefd22e7e7953ecce57e888d15..5b24b921c13d497d09474fa405df5b164451dd80 100644 (file)
@@ -1,11 +1,11 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
     compat_urllib_parse,
+)
+from ..utils import (
     ExtractorError,
 )
 
@@ -13,7 +13,7 @@ from ..utils import (
 class FiveMinIE(InfoExtractor):
     IE_NAME = '5min'
     _VALID_URL = r'''(?x)
-        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(.*?&)?playList=|
+        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
             5min:)
         (?P<id>\d+)
         '''
@@ -41,13 +41,8 @@ class FiveMinIE(InfoExtractor):
         },
     ]
 
-    @classmethod
-    def _build_result(cls, video_id):
-        return cls.url_result('5min:%s' % video_id, cls.ie_key())
-
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
         embed_page = self._download_webpage(embed_url, video_id,
                                             'Downloading embed page')
index d09d1c13a70cffb725329f69368f37359d7f7a08..190d9f9adc292bfc33d2085eb9bd057ec4c95502 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 class FKTVIE(InfoExtractor):
     IE_NAME = 'fernsehkritik.tv'
-    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
+    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
 
     _TEST = {
         'url': 'http://fernsehkritik.tv/folge-1',
@@ -26,29 +26,32 @@ class FKTVIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        episode = int(mobj.group('ep'))
+        episode = int(self._match_id(url))
 
-        server = random.randint(2, 4)
-        video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
-        start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
+        video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode
+        start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode,
                                                episode)
         playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
                                       'playlist', flags=re.DOTALL)
         files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
-        # TODO: return a single multipart video
+
         videos = []
         for i, _ in enumerate(files, 1):
             video_id = '%04d%d' % (episode, i)
-            video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
+            video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i)
             videos.append({
+                'ext': 'flv',
                 'id': video_id,
                 'url': video_url,
                 'title': clean_html(get_element_by_id('eptitle', start_webpage)),
                 'description': clean_html(get_element_by_id('contentlist', start_webpage)),
                 'thumbnail': video_thumbnail
             })
-        return videos
+        return {
+            '_type': 'multi_video',
+            'entries': videos,
+            'id': 'folge-%s' % episode,
+        }
 
 
 class FKTVPosteckeIE(InfoExtractor):
index b22ce2acb5d1beae29d1b298b4ef63c8e7639e5f..b2284ab01cad03fa3152fbc0a2edb70df2ab020f 100644 (file)
@@ -3,12 +3,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
-    unified_strdate,
-    str_to_int,
+)
+from ..utils import (
     parse_duration,
-    clean_html,
+    parse_iso8601,
+    str_to_int,
 )
 
 
@@ -26,70 +27,81 @@ class FourTubeIE(InfoExtractor):
             'uploader': 'WCP Club',
             'uploader_id': 'wcp-club',
             'upload_date': '20131031',
+            'timestamp': 1383263892,
             'duration': 583,
+            'view_count': int,
+            'like_count': int,
+            'categories': list,
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('id')
-        webpage_url = 'http://www.4tube.com/videos/' + video_id
-        webpage = self._download_webpage(webpage_url, video_id)
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
 
-        self.report_extraction(video_id)
+        title = self._html_search_meta('name', webpage)
+        timestamp = parse_iso8601(self._html_search_meta(
+            'uploadDate', webpage))
+        thumbnail = self._html_search_meta('thumbnailUrl', webpage)
+        uploader_id = self._html_search_regex(
+            r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
+            webpage, 'uploader id')
+        uploader = self._html_search_regex(
+            r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
+            webpage, 'uploader')
 
-        playlist_json = self._html_search_regex(r'var playerConfigPlaylist\s+=\s+([^;]+)', webpage, 'Playlist')
-        media_id = self._search_regex(r'idMedia:\s*(\d+)', playlist_json, 'Media Id')
-        sources = self._search_regex(r'sources:\s*\[([^\]]*)\]', playlist_json, 'Sources').split(',')
-        title = self._search_regex(r'title:\s*"([^"]*)', playlist_json, 'Title')
-        thumbnail_url = self._search_regex(r'image:\s*"([^"]*)', playlist_json, 'Thumbnail', fatal=False)
+        categories_html = self._search_regex(
+            r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>',
+            webpage, 'categories', fatal=False)
+        categories = None
+        if categories_html:
+            categories = [
+                c.strip() for c in re.findall(
+                    r'(?s)<li><a.*?>(.*?)</a>', categories_html)]
 
-        uploader_str = self._search_regex(r'<span>Uploaded by</span>(.*?)<span>', webpage, 'uploader', fatal=False)
-        mobj = re.search(r'<a href="/sites/(?P<id>[^"]+)"><strong>(?P<name>[^<]+)</strong></a>', uploader_str)
-        (uploader, uploader_id) = (mobj.group('name'), mobj.group('id')) if mobj else (clean_html(uploader_str), None)
+        view_count = str_to_int(self._search_regex(
+            r'<meta itemprop="interactionCount" content="UserPlays:([0-9,]+)">',
+            webpage, 'view count', fatal=False))
+        like_count = str_to_int(self._search_regex(
+            r'<meta itemprop="interactionCount" content="UserLikes:([0-9,]+)">',
+            webpage, 'like count', fatal=False))
+        duration = parse_duration(self._html_search_meta('duration', webpage))
 
-        upload_date = None
-        view_count = None
-        duration = None
-        description = self._html_search_meta('description', webpage, 'description')
-        if description:
-            upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date',
-                                             fatal=False)
-            if upload_date:
-                upload_date = unified_strdate(upload_date)
-            view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False)
-            if view_count:
-                view_count = str_to_int(view_count)
-            duration = parse_duration(self._search_regex(r'Length: (\d+m\d+s)', description, 'duration', fatal=False))
+        params_js = self._search_regex(
+            r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)',
+            webpage, 'initialization parameters'
+        )
+        params = self._parse_json('[%s]' % params_js, video_id)
+        media_id = params[0]
+        sources = ['%s' % p for p in params[2]]
 
-        token_url = "http://tkn.4tube.com/{0}/desktop/{1}".format(media_id, "+".join(sources))
+        token_url = 'http://tkn.4tube.com/{0}/desktop/{1}'.format(
+            media_id, '+'.join(sources))
         headers = {
             b'Content-Type': b'application/x-www-form-urlencoded',
             b'Origin': b'http://www.4tube.com',
         }
         token_req = compat_urllib_request.Request(token_url, b'{}', headers)
         tokens = self._download_json(token_req, video_id)
-
         formats = [{
             'url': tokens[format]['token'],
             'format_id': format + 'p',
             'resolution': format + 'p',
             'quality': int(format),
         } for format in sources]
-
         self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': title,
             'formats': formats,
-            'thumbnail': thumbnail_url,
+            'categories': categories,
+            'thumbnail': thumbnail,
             'uploader': uploader,
             'uploader_id': uploader_id,
-            'upload_date': upload_date,
+            'timestamp': timestamp,
+            'like_count': like_count,
             'view_count': view_count,
             'duration': duration,
             'age_limit': 18,
-            'webpage_url': webpage_url,
         }
diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dl/extractor/foxgay.py
new file mode 100644 (file)
index 0000000..08b8ea3
--- /dev/null
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FoxgayIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
+    _TEST = {
+        'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
+        'md5': '80d72beab5d04e1655a56ad37afe6841',
+        'info_dict': {
+            'id': '2582',
+            'ext': 'mp4',
+            'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a',
+            'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf',
+            'age_limit': 18,
+            'thumbnail': 're:https?://.*\.jpg$',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            r'<title>(?P<title>.*?)</title>',
+            webpage, 'title', fatal=False)
+        description = self._html_search_regex(
+            r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>',
+            webpage, 'description', fatal=False)
+
+        # Find the URL for the iFrame which contains the actual video.
+        iframe = self._download_webpage(
+            self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'),
+            video_id)
+        video_url = self._html_search_regex(
+            r"v_path = '(?P<vid>http://.*?)'", iframe, 'url')
+        thumb_url = self._html_search_regex(
+            r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'description': description,
+            'thumbnail': thumb_url,
+            'age_limit': 18,
+        }
diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py
new file mode 100644 (file)
index 0000000..917f76b
--- /dev/null
@@ -0,0 +1,94 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    int_or_none,
+)
+
+
+class FoxNewsIE(InfoExtractor):
+    _VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
+            'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
+            'info_dict': {
+                'id': '3937480',
+                'ext': 'flv',
+                'title': 'Frozen in Time',
+                'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
+                'duration': 265,
+                'timestamp': 1304411491,
+                'upload_date': '20110503',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+        {
+            'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
+            'md5': '5846c64a1ea05ec78175421b8323e2df',
+            'info_dict': {
+                'id': '3922535568001',
+                'ext': 'mp4',
+                'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
+                'description': "Congressman discusses the president's executive action",
+                'duration': 292,
+                'timestamp': 1417662047,
+                'upload_date': '20141204',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+        {
+            'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
+            'only_matching': True,
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_json(
+            'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
+
+        item = video['channel']['item']
+        title = item['title']
+        description = item['description']
+        timestamp = parse_iso8601(item['dc-date'])
+
+        media_group = item['media-group']
+        duration = None
+        formats = []
+        for media in media_group['media-content']:
+            attributes = media['@attributes']
+            video_url = attributes['url']
+            if video_url.endswith('.f4m'):
+                formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
+            elif video_url.endswith('.m3u8'):
+                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
+            elif not video_url.endswith('.smil'):
+                duration = int_or_none(attributes.get('duration'))
+                formats.append({
+                    'url': video_url,
+                    'format_id': media['media-category']['@attributes']['label'],
+                    'preference': 1,
+                    'vbr': int_or_none(attributes.get('bitrate')),
+                    'filesize': int_or_none(attributes.get('fileSize'))
+                })
+        self._sort_formats(formats)
+
+        media_thumbnail = media_group['media-thumbnail']['@attributes']
+        thumbnails = [{
+            'url': media_thumbnail['url'],
+            'width': int_or_none(media_thumbnail.get('width')),
+            'height': int_or_none(media_thumbnail.get('height')),
+        }] if media_thumbnail else []
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'timestamp': timestamp,
+            'formats': formats,
+            'thumbnails': thumbnails,
+        }
index 898e0dda780df7a83f91226216b7fce4d59818c2..0c29721629a25369621072e4f451e7decdc8df0b 100644 (file)
@@ -5,7 +5,7 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_parse_qs,
     compat_urlparse,
 )
index e0420a48f8cacb5661b6882f797c09755de4df46..bbc760a4990cac1b6cdb731c161d61c853a72729 100644 (file)
@@ -6,13 +6,15 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
+    compat_urllib_parse_urlparse,
     compat_urlparse,
-    ExtractorError,
+)
+from ..utils import (
     clean_html,
-    parse_duration,
-    compat_urllib_parse_urlparse,
+    ExtractorError,
     int_or_none,
+    parse_duration,
 )
 
 
index 3022f539d2571f34ae87bc91dc4cf1f1e25ccdc0..a07d69841f9278b932754b603249dc9f8eea53a0 100644 (file)
@@ -6,7 +6,9 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     xpath_with_ns,
-    parse_iso8601
+    parse_iso8601,
+    float_or_none,
+    int_or_none,
 )
 
 NAMESPACE_MAP = {
@@ -21,25 +23,41 @@ RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
 
 class GameOneIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://www.gameone.de/tv/288',
-        'md5': '136656b7fb4c9cb4a8e2d500651c499b',
-        'info_dict': {
-            'id': '288',
-            'ext': 'mp4',
-            'title': 'Game One - Folge 288',
-            'duration': 1238,
-            'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
-            'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
-            'age_limit': 16,
-            'upload_date': '20140513',
-            'timestamp': 1399980122,
+    _TESTS = [
+        {
+            'url': 'http://www.gameone.de/tv/288',
+            'md5': '136656b7fb4c9cb4a8e2d500651c499b',
+            'info_dict': {
+                'id': '288',
+                'ext': 'mp4',
+                'title': 'Game One - Folge 288',
+                'duration': 1238,
+                'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
+                'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
+                'age_limit': 16,
+                'upload_date': '20140513',
+                'timestamp': 1399980122,
+            }
+        },
+        {
+            'url': 'http://gameone.de/tv/220',
+            'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
+            'info_dict': {
+                'id': '220',
+                'ext': 'mp4',
+                'upload_date': '20120918',
+                'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
+                'timestamp': 1347971451,
+                'title': 'Game One - Folge 220',
+                'duration': 896.62,
+                'age_limit': 16,
+            }
         }
-    }
+
+    ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
         og_video = self._og_search_video_url(webpage, secure=False)
@@ -66,13 +84,13 @@ class GameOneIE(InfoExtractor):
             video_id,
             'Downloading media:content')
         rendition_items = content.findall('.//rendition')
-        duration = int(rendition_items[0].get('duration'))
+        duration = float_or_none(rendition_items[0].get('duration'))
         formats = [
             {
                 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
-                'width': int(r.get('width')),
-                'height': int(r.get('height')),
-                'tbr': int(r.get('bitrate')),
+                'width': int_or_none(r.get('width')),
+                'height': int_or_none(r.get('height')),
+                'tbr': int_or_none(r.get('bitrate')),
             }
             for r in rendition_items
         ]
@@ -105,7 +123,8 @@ class GameOnePlaylistIE(InfoExtractor):
         webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
         max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
         entries = [
-            self.url_result('http://www.gameone.de/tv/%d' % video_id, 'GameOne')
+            self.url_result('http://www.gameone.de/tv/%d' %
+                            video_id, 'GameOne')
             for video_id in range(max_id, 0, -1)]
 
         return {
index d570e3f6a85ca399d81328e3afedee4f98158e5f..47373e21540030d4c9a19dbfc1c5943f468fea4f 100644 (file)
@@ -4,9 +4,11 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urlparse,
+)
+from ..utils import (
     unescapeHTML,
 )
 
index de14ae1fb1edd0600488b8f04c7b400bf310ef5a..fed968f5179ebf6159212da5ab75b024b3bc0a03 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
@@ -39,7 +39,8 @@ class GDCVaultIE(InfoExtractor):
                 'id': '1015301',
                 'ext': 'flv',
                 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
-            }
+            },
+            'skip': 'Requires login',
         }
     ]
 
index 328301de396e5dd289b139808754ef20e1af652b..7a5bf939237ff45731fd3befca5ad0b7dfc0df1f 100644 (file)
@@ -23,6 +23,7 @@ from ..utils import (
     unescapeHTML,
     unified_strdate,
     unsmuggle_url,
+    UnsupportedError,
     url_basename,
 )
 from .brightcove import BrightcoveIE
@@ -130,12 +131,13 @@ class GenericIE(InfoExtractor):
         # ooyala video
         {
             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
-            'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
+            'md5': '166dd577b433b4d4ebfee10b0824d8ff',
             'info_dict': {
                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
                 'ext': 'mp4',
                 'title': '2cc213299525360.mov',  # that's what we get
             },
+            'add_ie': ['Ooyala'],
         },
         # google redirect
         {
@@ -145,7 +147,7 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'upload_date': '20130224',
                 'uploader_id': 'TheVerge',
-                'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
+                'description': 're:^Chris Ziegler takes a look at the\.*',
                 'uploader': 'The Verge',
                 'title': 'First Firefox OS phones side-by-side',
             },
@@ -180,6 +182,14 @@ class GenericIE(InfoExtractor):
                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
             },
         },
+        # BBC iPlayer embeds
+        {
+            'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
+            'info_dict': {
+                'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
+            },
+            'playlist_mincount': 18,
+        },
         # RUTV embed
         {
             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
@@ -467,8 +477,17 @@ class GenericIE(InfoExtractor):
             'expected_warnings': [
                 'URL could be a direct video link, returning it as such.'
             ]
-        }
-
+        },
+        # Cinchcast embed
+        {
+            'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
+            'info_dict': {
+                'id': '7141703',
+                'ext': 'mp3',
+                'upload_date': '20141126',
+                'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
+            }
+        },
     ]
 
     def report_following_redirect(self, new_url):
@@ -689,9 +708,9 @@ class GenericIE(InfoExtractor):
             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 
         # Helper method
-        def _playlist_from_matches(matches, getter, ie=None):
+        def _playlist_from_matches(matches, getter=None, ie=None):
             urlrs = orderedSet(
-                self.url_result(self._proto_relative_url(getter(m)), ie)
+                self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
                 for m in matches)
             return self.playlist_result(
                 urlrs, playlist_id=video_id, playlist_title=video_title)
@@ -895,6 +914,11 @@ class GenericIE(InfoExtractor):
             return _playlist_from_matches(
                 matches, getter=unescapeHTML, ie='FunnyOrDie')
 
+        # Look for BBC iPlayer embed
+        matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
+        if matches:
+            return _playlist_from_matches(matches, ie='BBCCoUk')
+
         # Look for embedded RUTV player
         rutv_url = RUTVIE._extract_url(webpage)
         if rutv_url:
@@ -902,7 +926,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded TED player
         mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'TED')
 
@@ -962,6 +986,13 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'SBS')
 
+        # Look for embedded Cinchcast player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Cinchcast')
+
         mobj = re.search(
             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
             webpage)
@@ -1041,7 +1072,7 @@ class GenericIE(InfoExtractor):
                     'url': new_url,
                 }
         if not found:
-            raise ExtractorError('Unsupported URL: %s' % url)
+            raise UnsupportedError(url)
 
         entries = []
         for video_url in found:
diff --git a/youtube_dl/extractor/giantbomb.py b/youtube_dl/extractor/giantbomb.py
new file mode 100644 (file)
index 0000000..87cd191
--- /dev/null
@@ -0,0 +1,81 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    unescapeHTML,
+    qualities,
+    int_or_none,
+)
+
+
+class GiantBombIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
+    _TEST = {
+        'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
+        'md5': '57badeface303ecf6b98b812de1b9018',
+        'info_dict': {
+            'id': '2300-9782',
+            'display_id': 'quick-look-destiny-the-dark-below',
+            'ext': 'mp4',
+            'title': 'Quick Look: Destiny: The Dark Below',
+            'description': 'md5:0aa3aaf2772a41b91d44c63f30dfad24',
+            'duration': 2399,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        video = json.loads(unescapeHTML(self._search_regex(
+            r'data-video="([^"]+)"', webpage, 'data-video')))
+
+        duration = int_or_none(video.get('lengthSeconds'))
+
+        quality = qualities([
+            'f4m_low', 'progressive_low', 'f4m_high',
+            'progressive_high', 'f4m_hd', 'progressive_hd'])
+
+        formats = []
+        for format_id, video_url in video['videoStreams'].items():
+            if format_id == 'f4m_stream':
+                continue
+            if video_url.endswith('.f4m'):
+                f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id)
+                if f4m_formats:
+                    f4m_formats[0]['quality'] = quality(format_id)
+                    formats.extend(f4m_formats)
+            else:
+                formats.append({
+                    'url': video_url,
+                    'format_id': format_id,
+                    'quality': quality(format_id),
+                })
+
+        if not formats:
+            youtube_id = video.get('youtubeID')
+            if youtube_id:
+                return self.url_result(youtube_id, 'Youtube')
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/giga.py b/youtube_dl/extractor/giga.py
new file mode 100644 (file)
index 0000000..7758901
--- /dev/null
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+    qualities,
+    compat_str,
+    parse_duration,
+    parse_iso8601,
+    str_to_int,
+)
+
+
+class GigaIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
+    _TESTS = [{
+        'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
+        'md5': '6bc5535e945e724640664632055a584f',
+        'info_dict': {
+            'id': '2622086',
+            'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
+            'ext': 'mp4',
+            'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss',
+            'description': 'md5:afdf5862241aded4718a30dff6a57baf',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 578,
+            'timestamp': 1414749706,
+            'upload_date': '20141031',
+            'uploader': 'Robin Schweiger',
+            'view_count': int,
+        },
+    }, {
+        'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            [r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
+            webpage, 'video id')
+
+        playlist = self._download_json(
+            'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
+            % video_id, video_id)[0]
+
+        quality = qualities(['normal', 'hd720'])
+
+        formats = []
+        for format_id in itertools.count(0):
+            fmt = playlist.get(compat_str(format_id))
+            if not fmt:
+                break
+            formats.append({
+                'url': fmt['src'],
+                'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
+                'quality': quality(fmt['quality']),
+            })
+        self._sort_formats(formats)
+
+        title = self._html_search_meta(
+            'title', webpage, 'title', fatal=True)
+        description = self._html_search_meta(
+            'description', webpage, 'description')
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        duration = parse_duration(self._search_regex(
+            r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
+            webpage, 'duration', fatal=False))
+
+        timestamp = parse_iso8601(self._search_regex(
+            r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
+        uploader = self._search_regex(
+            r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
+
+        view_count = str_to_int(self._search_regex(
+            r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'view_count': view_count,
+            'formats': formats,
+        }
index 10001d4d95f9895c5e965f88c967aa87f63ff7f2..0fb5097244a6218afca0a0908448127f74073b77 100644 (file)
@@ -1,9 +1,6 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-)
 
 
 class GoldenMoustacheIE(InfoExtractor):
@@ -17,7 +14,6 @@ class GoldenMoustacheIE(InfoExtractor):
             'title': 'Suricate - Le Poker',
             'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9',
             'thumbnail': 're:^https?://.*\.jpg$',
-            'view_count': int,
         }
     }, {
         'url': 'http://www.goldenmoustache.com/le-lab-tout-effacer-mc-fly-et-carlito-55249/',
@@ -28,7 +24,6 @@ class GoldenMoustacheIE(InfoExtractor):
             'title': 'Le LAB - Tout Effacer (Mc Fly et Carlito)',
             'description': 'md5:9b7fbf11023fb2250bd4b185e3de3b2a',
             'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
-            'view_count': int,
         }
     }]
 
@@ -42,9 +37,6 @@ class GoldenMoustacheIE(InfoExtractor):
             r'<title>(.*?)(?: - Golden Moustache)?</title>', webpage, 'title')
         thumbnail = self._og_search_thumbnail(webpage)
         description = self._og_search_description(webpage)
-        view_count = int_or_none(self._html_search_regex(
-            r'<strong>([0-9]+)</strong>\s*VUES</span>',
-            webpage, 'view count', fatal=False))
 
         return {
             'id': video_id,
@@ -53,5 +45,4 @@ class GoldenMoustacheIE(InfoExtractor):
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
-            'view_count': view_count,
         }
index 53714f47f1a0a8cd1abb8aab0ec09cdbd283d51b..2bfb9904022c6a3830901baa2ee380b6f4f14714 100644 (file)
@@ -2,8 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
+)
+from ..utils import (
     determine_ext,
 )
 
index 469e1f9357eaf66ef48b3bed6c3d464c48f2b5dc..498304cb2bd9b605d44e67291a2f38bf4481a6f8 100644 (file)
@@ -4,7 +4,7 @@ import itertools
 import re
 
 from .common import SearchInfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
index 1ac1da8569e20b84c2baf38011488d6304a090cb..ae24aff84fd85c6796c7a4374964f70629175f43 100644 (file)
@@ -4,11 +4,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    determine_ext,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
     int_or_none,
 )
 
@@ -106,7 +107,6 @@ class GorillaVidIE(InfoExtractor):
         formats = [{
             'format_id': 'sd',
             'url': video_url,
-            'ext': determine_ext(video_url),
             'quality': 1,
         }]
 
index 18474cbb72684ac6327957fb3385fb9de3bc4480..b116d251d5d3f30c6affc852454e7e326d14f660 100644 (file)
@@ -2,57 +2,52 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+)
 from ..utils import (
-    compat_urlparse,
-    ExtractorError,
+    parse_duration,
 )
 
 
 class GoshgayIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)'
+    _VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)'
     _TEST = {
-        'url': 'http://www.goshgay.com/video4116282',
-        'md5': '268b9f3c3229105c57859e166dd72b03',
+        'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
+        'md5': '027fcc54459dff0feb0bc06a7aeda680',
         'info_dict': {
-            'id': '4116282',
+            'id': '299069',
             'ext': 'flv',
-            'title': 'md5:089833a4790b5e103285a07337f245bf',
-            'thumbnail': 're:http://.*\.jpg',
+            'title': 'DIESEL SFW XXX Video',
+            'thumbnail': 're:^http://.*\.jpg$',
+            'duration': 79,
             'age_limit': 18,
         }
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
         webpage = self._download_webpage(url, video_id)
-        title = self._og_search_title(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
+
+        title = self._html_search_regex(
+            r'<h2>(.*?)<', webpage, 'title')
+        duration = parse_duration(self._html_search_regex(
+            r'<span class="duration">\s*-?\s*(.*?)</span>',
+            webpage, 'duration', fatal=False))
         family_friendly = self._html_search_meta(
             'isFamilyFriendly', webpage, default='false')
-        config_url = self._search_regex(
-            r"'config'\s*:\s*'([^']+)'", webpage, 'config URL')
-
-        config = self._download_xml(
-            config_url, video_id, 'Downloading player config XML')
-
-        if config is None:
-            raise ExtractorError('Missing config XML')
-        if config.tag != 'config':
-            raise ExtractorError('Missing config attribute')
-        fns = config.findall('file')
-        if len(fns) < 1:
-            raise ExtractorError('Missing media URI')
-        video_url = fns[0].text
 
-        url_comp = compat_urlparse.urlparse(url)
-        ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
+        flashvars = compat_parse_qs(self._html_search_regex(
+            r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
+            webpage, 'flashvars'))
+        thumbnail = flashvars.get('url_bigthumb', [None])[0]
+        video_url = flashvars['flv_url'][0]
 
         return {
             'id': video_id,
             'url': video_url,
             'title': title,
             'thumbnail': thumbnail,
-            'http_referer': ref,
+            'duration': duration,
             'age_limit': 0 if family_friendly == 'true' else 18,
         }
diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py
new file mode 100644 (file)
index 0000000..8b9e0e2
--- /dev/null
@@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class GrouponIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.groupon\.com/deals/(?P<id>[^?#]+)'
+
+    _TEST = {
+        'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
+        'info_dict': {
+            'id': 'bikram-yoga-huntington-beach-2',
+            'title': '$49 for 10 Yoga Classes or One Month of Unlimited Classes at Bikram Yoga Huntington Beach ($180 Value)',
+            'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
+        },
+        'playlist': [{
+            'info_dict': {
+                'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
+                'ext': 'mp4',
+                'title': 'Bikram Yoga Huntington Beach | Orange County',
+            },
+        }],
+        'params': {
+            'skip_download': 'HLS',
+        }
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        payload = self._parse_json(self._search_regex(
+            r'var\s+payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id)
+        videos = payload['carousel'].get('dealVideos', [])
+        entries = []
+        for v in videos:
+            if v.get('provider') != 'OOYALA':
+                self.report_warning(
+                    '%s: Unsupported video provider %s, skipping video' %
+                    (playlist_id, v.get('provider')))
+                continue
+            entries.append(self.url_result('ooyala:%s' % v['media']))
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'entries': entries,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+        }
diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py
new file mode 100644 (file)
index 0000000..7a1c75b
--- /dev/null
@@ -0,0 +1,71 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    remove_end,
+)
+
+
+class HellPornoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
+        'md5': '1fee339c610d2049699ef2aa699439f1',
+        'info_dict': {
+            'id': '149116',
+            'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
+            'ext': 'mp4',
+            'title': 'Dixie is posing with naked ass very erotic',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = remove_end(self._html_search_regex(
+            r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
+
+        flashvars = self._parse_json(self._search_regex(
+            r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
+            display_id, transform_source=js_to_json)
+
+        video_id = flashvars.get('video_id')
+        thumbnail = flashvars.get('preview_url')
+        ext = flashvars.get('postfix', '.mp4')[1:]
+
+        formats = []
+        for video_url_key in ['video_url', 'video_alt_url']:
+            video_url = flashvars.get(video_url_key)
+            if not video_url:
+                continue
+            video_text = flashvars.get('%s_text' % video_url_key)
+            fmt = {
+                'url': video_url,
+                'ext': ext,
+                'format_id': video_text,
+            }
+            m = re.search(r'^(?P<height>\d+)[pP]', video_text)
+            if m:
+                fmt['height'] = int(m.group('height'))
+            formats.append(fmt)
+        self._sort_formats(formats)
+
+        categories = self._html_search_meta(
+            'keywords', webpage, 'categories', default='').split(',')
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'age_limit': 18,
+            'formats': formats,
+        }
index 5268efa49433ca8c7cb0c2288df0705165e876a3..93107b3064ebfba513b3aa208556b5822f6cf979 100644 (file)
@@ -2,9 +2,8 @@
 
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
+from ..utils import js_to_json
 
 
 class HelsinkiIE(InfoExtractor):
@@ -24,39 +23,21 @@ class HelsinkiIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        formats = []
-
-        mobj = re.search(r'file=((\w+):[^&]+)', webpage)
-        if mobj:
-            formats.append({
-                'ext': mobj.group(2),
-                'play_path': mobj.group(1),
-                'url': 'rtmp://flashvideo.it.helsinki.fi/vod/',
-                'player_url': 'http://video.helsinki.fi/player.swf',
-                'format_note': 'sd',
-                'quality': 0,
-            })
-
-        mobj = re.search(r'hd\.file=((\w+):[^&]+)', webpage)
-        if mobj:
-            formats.append({
-                'ext': mobj.group(2),
-                'play_path': mobj.group(1),
-                'url': 'rtmp://flashvideo.it.helsinki.fi/vod/',
-                'player_url': 'http://video.helsinki.fi/player.swf',
-                'format_note': 'hd',
-                'quality': 1,
-            })
 
+        params = self._parse_json(self._html_search_regex(
+            r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);',
+            webpage, 'player code'), video_id, transform_source=js_to_json)
+        formats = [{
+            'url': s['file'],
+            'ext': 'mp4',
+        } for s in params['sources']]
         self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': self._og_search_title(webpage).replace('Video: ', ''),
             'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
             'formats': formats,
         }
diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py
new file mode 100644 (file)
index 0000000..84bd7c0
--- /dev/null
@@ -0,0 +1,166 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    parse_iso8601,
+    float_or_none,
+    int_or_none,
+    compat_str,
+)
+
+
+class HitboxIE(InfoExtractor):
+    IE_NAME = 'hitbox'
+    _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.hitbox.tv/video/203213',
+        'info_dict': {
+            'id': '203213',
+            'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
+            'alt_title': 'hitboxlive - Aug 9th #6',
+            'description': '',
+            'ext': 'mp4',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 215.1666,
+            'resolution': 'HD 720p',
+            'uploader': 'hitboxlive',
+            'view_count': int,
+            'timestamp': 1407576133,
+            'upload_date': '20140809',
+            'categories': ['Live Show'],
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    def _extract_metadata(self, url, video_id):
+        thumb_base = 'https://edge.sf.hitbox.tv'
+        metadata = self._download_json(
+            '%s/%s' % (url, video_id), video_id)
+
+        date = 'media_live_since'
+        media_type = 'livestream'
+        if metadata.get('media_type') == 'video':
+            media_type = 'video'
+            date = 'media_date_added'
+
+        video_meta = metadata.get(media_type, [])[0]
+        title = video_meta.get('media_status')
+        alt_title = video_meta.get('media_title')
+        description = clean_html(
+            video_meta.get('media_description') or
+            video_meta.get('media_description_md'))
+        duration = float_or_none(video_meta.get('media_duration'))
+        uploader = video_meta.get('media_user_name')
+        views = int_or_none(video_meta.get('media_views'))
+        timestamp = parse_iso8601(video_meta.get(date), ' ')
+        categories = [video_meta.get('category_name')]
+        thumbs = [
+            {'url': thumb_base + video_meta.get('media_thumbnail'),
+             'width': 320,
+             'height': 180},
+            {'url': thumb_base + video_meta.get('media_thumbnail_large'),
+             'width': 768,
+             'height': 432},
+        ]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'alt_title': alt_title,
+            'description': description,
+            'ext': 'mp4',
+            'thumbnails': thumbs,
+            'duration': duration,
+            'uploader': uploader,
+            'view_count': views,
+            'timestamp': timestamp,
+            'categories': categories,
+        }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        metadata = self._extract_metadata(
+            'https://www.hitbox.tv/api/media/video',
+            video_id)
+
+        player_config = self._download_json(
+            'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
+            video_id)
+
+        clip = player_config.get('clip')
+        video_url = clip.get('url')
+        res = clip.get('bitrates', [])[0].get('label')
+
+        metadata['resolution'] = res
+        metadata['url'] = video_url
+        metadata['protocol'] = 'm3u8'
+
+        return metadata
+
+
+class HitboxLiveIE(HitboxIE):
+    IE_NAME = 'hitbox:live'
+    _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
+    _TEST = {
+        'url': 'http://www.hitbox.tv/dimak',
+        'info_dict': {
+            'id': 'dimak',
+            'ext': 'mp4',
+            'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
+            'timestamp': int,
+            'upload_date': compat_str,
+            'title': compat_str,
+            'uploader': 'Dimak',
+        },
+        'params': {
+            # live
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        metadata = self._extract_metadata(
+            'https://www.hitbox.tv/api/media/live',
+            video_id)
+
+        player_config = self._download_json(
+            'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
+            video_id)
+
+        formats = []
+        cdns = player_config.get('cdns')
+        servers = []
+        for cdn in cdns:
+            base_url = cdn.get('netConnectionUrl')
+            host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
+            if base_url not in servers:
+                servers.append(base_url)
+                for stream in cdn.get('bitrates'):
+                    label = stream.get('label')
+                    if label != 'Auto':
+                        formats.append({
+                            'url': '%s/%s' % (base_url, stream.get('url')),
+                            'ext': 'mp4',
+                            'vbr': stream.get('bitrate'),
+                            'resolution': label,
+                            'rtmp_live': True,
+                            'format_note': host,
+                            'page_url': url,
+                            'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
+                        })
+
+        self._sort_formats(formats)
+        metadata['formats'] = formats
+        metadata['is_live'] = True
+        metadata['title'] = self._live_title(metadata.get('title'))
+        return metadata
index 8e812b66976e31e43ad594dbee6344c5e34629cf..704d0285d3e1c2ce10e8f3929543c6c66b0fd58a 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
     ExtractorError,
-    compat_urllib_request,
     int_or_none,
     urlencode_postdata,
 )
@@ -30,9 +32,7 @@ class HostingBulkIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
 
         # Custom request with cookie to set language to English, so our file
index fccc238840887fd70ed56b2f642c47ea6aa4f43e..e9733912132798d99be18bb935dcd3c3b190525d 100644 (file)
@@ -1,12 +1,12 @@
 from __future__ import unicode_literals
 
-import re
-import json
-import random
-import string
-
 from .common import InfoExtractor
-from ..utils import find_xpath_attr
+from ..utils import (
+    find_xpath_attr,
+    int_or_none,
+    js_to_json,
+    unescapeHTML,
+)
 
 
 class HowStuffWorksIE(InfoExtractor):
@@ -16,98 +16,74 @@ class HowStuffWorksIE(InfoExtractor):
             'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
             'info_dict': {
                 'id': '450221',
-                'display_id': 'cool-jobs-iditarod-musher',
                 'ext': 'flv',
                 'title': 'Cool Jobs - Iditarod Musher',
-                'description': 'md5:82bb58438a88027b8186a1fccb365f90',
+                'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.',
+                'display_id': 'cool-jobs-iditarod-musher',
                 'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 161,
             },
-            'params': {
-                # md5 is not consistent
-                'skip_download': True
-            }
         },
         {
             'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
             'info_dict': {
                 'id': '453464',
-                'display_id': 'survival-zone-food-and-water-in-the-savanna',
                 'ext': 'mp4',
                 'title': 'Survival Zone: Food and Water In the Savanna',
-                'description': 'md5:7e1c89f6411434970c15fa094170c371',
+                'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.',
+                'display_id': 'survival-zone-food-and-water-in-the-savanna',
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
-            'params': {
-                # md5 is not consistent
-                'skip_download': True
-            }
         },
         {
             'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
             'info_dict': {
                 'id': '440011',
-                'display_id': 'sword-swallowing-1-by-dan-meyer',
                 'ext': 'flv',
                 'title': 'Sword Swallowing #1 by Dan Meyer',
-                'description': 'md5:b2409e88172913e2e7d3d1159b0ef735',
+                'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
+                'display_id': 'sword-swallowing-1-by-dan-meyer',
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
-            'params': {
-                # md5 is not consistent
-                'skip_download': True
-            }
         },
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('id')
+        display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
+        clip_js = self._search_regex(
+            r'(?s)var clip = ({.*?});', webpage, 'clip info')
+        clip_info = self._parse_json(
+            clip_js, display_id, transform_source=js_to_json)
 
-        content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id')
-
-        mp4 = self._search_regex(
-            r'''(?xs)var\s+clip\s*=\s*{\s*
-                .+?\s*
-                content_id\s*:\s*%s\s*,\s*
-                .+?\s*
-                mp4\s*:\s*\[(.*?),?\]\s*
-                };\s*
-                videoData\.push\(clip\);''' % content_id,
-            webpage, 'mp4', fatal=False, default=None)
-
-        smil = self._download_xml(
-            'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id,
-            content_id, 'Downloading video SMIL')
-
-        http_base = find_xpath_attr(
-            smil,
-            './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
-            'name',
-            'httpBase').get('content')
-
-        def random_string(str_len=0):
-            return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)])
-
-        URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12))
-
+        video_id = clip_info['content_id']
         formats = []
+        m3u8_url = clip_info.get('m3u8')
+        if m3u8_url:
+            formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+        for video in clip_info.get('mp4', []):
+            formats.append({
+                'url': video['src'],
+                'format_id': video['bitrate'],
+                'vbr': int(video['bitrate'].rstrip('k')),
+            })
+
+        if not formats:
+            smil = self._download_xml(
+                'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id,
+                video_id, 'Downloading video SMIL')
+
+            http_base = find_xpath_attr(
+                smil,
+                './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
+                'name',
+                'httpBase').get('content')
+
+            URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A'
 
-        if mp4:
-            for video in json.loads('[%s]' % mp4):
-                bitrate = video['bitrate']
-                fmt = {
-                    'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX,
-                    'format_id': bitrate,
-                }
-                m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate)
-                if m:
-                    fmt['vbr'] = int(m.group('vbr'))
-                formats.append(fmt)
-        else:
             for video in smil.findall(
-                    './/{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
-                vbr = int(video.attrib['system-bitrate']) / 1000
+                    './{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
+                vbr = int_or_none(video.attrib['system-bitrate'], scale=1000)
                 formats.append({
                     'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
                     'format_id': '%dk' % vbr,
@@ -116,19 +92,12 @@ class HowStuffWorksIE(InfoExtractor):
 
         self._sort_formats(formats)
 
-        title = self._og_search_title(webpage)
-        TITLE_SUFFIX = ' : HowStuffWorks'
-        if title.endswith(TITLE_SUFFIX):
-            title = title[:-len(TITLE_SUFFIX)]
-
-        description = self._og_search_description(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
-
         return {
-            'id': content_id,
+            'id': '%s' % video_id,
             'display_id': display_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
+            'title': unescapeHTML(clip_info['clip_title']),
+            'description': unescapeHTML(clip_info.get('caption')),
+            'thumbnail': clip_info.get('video_still_url'),
+            'duration': clip_info.get('duration'),
             'formats': formats,
         }
index 4ccf6b9b8a82c3ef28c1d9d04dcc6f26ce2a8f8d..a38eae421a9199b578b3a724d205b13e6367c67a 100644 (file)
@@ -39,8 +39,9 @@ class HuffPostIE(InfoExtractor):
         data = self._download_json(api_url, video_id)['data']
 
         video_title = data['title']
-        duration = parse_duration(data['running_time'])
-        upload_date = unified_strdate(data['schedule']['starts_at'])
+        duration = parse_duration(data.get('running_time'))
+        upload_date = unified_strdate(
+            data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
         description = data.get('description')
 
         thumbnails = []
@@ -59,16 +60,11 @@ class HuffPostIE(InfoExtractor):
             'ext': 'mp4',
             'url': url,
             'vcodec': 'none' if key.startswith('audio/') else None,
-        } for key, url in data['sources']['live'].items()]
-        if data.get('fivemin_id'):
-            fid = data['fivemin_id']
-            fcat = str(int(fid) // 100 + 1)
-            furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
-            formats.append({
-                'format': 'fivemin',
-                'url': furl,
-                'preference': 1,
-            })
+        } for key, url in data.get('sources', {}).get('live', {}).items()]
+
+        if not formats and data.get('fivemin_id'):
+            return self.url_result('5min:%s' % data['fivemin_id'])
+
         self._sort_formats(formats)
 
         return {
index 6d0d847c6d3461a02c6eab71b24848247e9678ab..aa0724a02353840e5f5533a1eedbc7005aa63008 100644 (file)
@@ -1,20 +1,20 @@
 from __future__ import unicode_literals
 
 import json
-import re
 import time
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
-
+)
+from ..utils import (
     ExtractorError,
 )
 
 
 class HypemIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
+    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
     _TEST = {
         'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
         'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
@@ -27,8 +27,7 @@ class HypemIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        track_id = mobj.group(1)
+        track_id = self._match_id(url)
 
         data = {'ax': 1, 'ts': time.time()}
         data_encoded = compat_urllib_parse.urlencode(data)
index f2c1c10f5c1dec44129ea4cd7d4cff69c7c07206..f29df36b5bf6bd7e732ad84cbfd7d3eeb412f5ff 100644 (file)
@@ -4,7 +4,7 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
 )
 
@@ -16,7 +16,6 @@ class ImdbIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://www.imdb.com/video/imdb/vi2524815897',
-        'md5': '9f34fa777ade3a6e57a054fdbcb3a068',
         'info_dict': {
             'id': '2524815897',
             'ext': 'mp4',
index e76dd222d1ee81dc0e0b2d5b1b3c28ef22e1bd83..f25f43664e262b25473557c5f11dae91e697e3f6 100644 (file)
@@ -1,10 +1,9 @@
 from __future__ import unicode_literals
 
 import base64
-import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
@@ -24,9 +23,7 @@ class InfoQIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
index 1e47991874ecf6afa75181f99b6bf98a8dd60916..483cc6f9e62da3bc272ba66efc540b95c17116e7 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
     compat_urllib_parse,
+)
+from ..utils import (
     xpath_with_ns,
 )
 
@@ -20,7 +22,7 @@ class InternetVideoArchiveIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'SKYFALL',
             'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
-            'duration': 149,
+            'duration': 152,
         },
     }
 
index 4247d6391fa25f674449d9d8ac44b428c7c387e0..8529bedfc0ab283790e74144bc9d570df19dc4b3 100644 (file)
@@ -6,8 +6,10 @@ from random import random
 from math import floor
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
 )
 
index f0fba1adb7dba9c4c09717132731d98ff0e5ffd3..7a400323dc4df3807057a77b25f7401ce5e2a3b8 100644 (file)
@@ -5,8 +5,10 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
 )
 
diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py
new file mode 100644 (file)
index 0000000..e3b43ff
--- /dev/null
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import (
+    js_to_json,
+)
+
+
+class KaraoketvIE(InfoExtractor):
+    _VALID_URL = r'http://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://karaoketv.co.il/?container=songs&id=171568',
+        'info_dict': {
+            'id': '171568',
+            'ext': 'mp4',
+            'title': 'אל העולם שלך - רותם כהן - שרים קריוקי',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        page_video_url = self._og_search_video_url(webpage, video_id)
+        config_json = compat_urllib_parse.unquote_plus(self._search_regex(
+            r'config=(.*)', page_video_url, 'configuration'))
+
+        urls_info_json = self._download_json(
+            config_json, video_id, 'Downloading configuration',
+            transform_source=js_to_json)
+
+        url = urls_info_json['playlist'][0]['url']
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'url': url,
+        }
index 5d679e88d811c6ad55c9fe475267c2842a641f83..c0956ba0902be3b8fd9a9188872eb90ab9acdefa 100644 (file)
@@ -1,34 +1,39 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
 class KeekIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<id>\w+)'
     IE_NAME = 'keek'
     _TEST = {
         'url': 'https://www.keek.com/ytdl/keeks/NODfbab',
-        'file': 'NODfbab.mp4',
-        'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
+        'md5': '09c5c109067536c1cec8bac8c21fea05',
         'info_dict': {
-            'uploader': 'ytdl',
+            'id': 'NODfbab',
+            'ext': 'mp4',
+            'uploader': 'youtube-dl project',
+            'uploader_id': 'ytdl',
             'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .',
         },
     }
 
     def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('videoID')
+        video_id = self._match_id(url)
 
         video_url = 'http://cdn.keek.com/keek/video/%s' % video_id
         thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
         webpage = self._download_webpage(url, video_id)
 
-        uploader = self._html_search_regex(
-            r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
-            webpage, 'uploader', fatal=False)
+        raw_desc = self._html_search_meta('description', webpage)
+        if raw_desc:
+            uploader = self._html_search_regex(
+                r'Watch (.*?)\s+\(', raw_desc, 'uploader', fatal=False)
+            uploader_id = self._html_search_regex(
+                r'Watch .*?\(@(.+?)\)', raw_desc, 'uploader_id', fatal=False)
+        else:
+            uploader = None
+            uploader_id = None
 
         return {
             'id': video_id,
@@ -36,5 +41,6 @@ class KeekIE(InfoExtractor):
             'ext': 'mp4',
             'title': self._og_search_title(webpage),
             'thumbnail': thumbnail,
-            'uploader': uploader
+            'uploader': uploader,
+            'uploader_id': uploader_id,
         }
index 75b63cffb5961f33ea2d2f5ae37803dfb0fe37fc..97dcb518a3587406bc93a44c39344630cafe7119 100644 (file)
@@ -4,7 +4,7 @@ import os
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_parse,
@@ -15,7 +15,7 @@ from ..aes import (
 
 
 class KeezMoviesIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)'
+    _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
     _TEST = {
         'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
         'file': '1214711.mp4',
@@ -27,8 +27,7 @@ class KeezMoviesIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = self._match_id(url)
 
         req = compat_urllib_request.Request(url)
         req.add_header('Cookie', 'age_verified=1')
index 408d00944ceb83e1551c12b5707031c961bb4f5d..08a671fa86a007d3327ef03c257f1b943bd425db 100644 (file)
@@ -22,8 +22,10 @@ class KhanAcademyIE(InfoExtractor):
             'description': 'The perfect cipher',
             'duration': 176,
             'uploader': 'Brit Cruise',
+            'uploader_id': 'khanacademy',
             'upload_date': '20120411',
-        }
+        },
+        'add_ie': ['Youtube'],
     }, {
         'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
         'info_dict': {
index 41fd62009ac16100c1e6bb776028020cd12e9ff2..720bc939bfd4c3a30c9a3709968c6008e6472067 100644 (file)
@@ -10,13 +10,14 @@ from ..utils import int_or_none
 class KontrTubeIE(InfoExtractor):
     IE_NAME = 'kontrtube'
     IE_DESC = 'KontrTube.ru - Труба зовёт'
-    _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
+    _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
 
     _TEST = {
         'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
         'md5': '975a991a4926c9a85f383a736a2e6b80',
         'info_dict': {
             'id': '2678',
+            'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
             'ext': 'mp4',
             'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
             'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
@@ -28,21 +29,28 @@ class KontrTubeIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
 
-        webpage = self._download_webpage(url, video_id, 'Downloading page')
+        webpage = self._download_webpage(
+            url, display_id, 'Downloading page')
 
-        video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
-        thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
+        video_url = self._html_search_regex(
+            r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
+        thumbnail = self._html_search_regex(
+            r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
         title = self._html_search_regex(
             r'<title>(.+?)</title>', webpage, 'video title')
-        description = self._html_search_meta('description', webpage, 'video description')
+        description = self._html_search_meta(
+            'description', webpage, 'video description')
 
         mobj = re.search(
-            r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
+            r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
+            webpage)
         duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
 
         view_count = self._html_search_regex(
-            r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
+            r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
+            webpage, 'view count', fatal=False)
 
         comment_count = None
         comment_str = self._html_search_regex(
@@ -56,6 +64,7 @@ class KontrTubeIE(InfoExtractor):
 
         return {
             'id': video_id,
+            'display_id': display_id,
             'url': video_url,
             'thumbnail': thumbnail,
             'title': title,
index 03c4691c6abb6794557a7d83b30fce49a2a2e4e7..5247c6f58500e301dab50ed48039df0c070b493a 100644 (file)
@@ -4,10 +4,12 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
     compat_urllib_parse_urlparse,
     compat_urlparse,
+)
+from ..utils import (
     ExtractorError,
     find_xpath_attr,
     int_or_none,
diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py
new file mode 100644 (file)
index 0000000..a8e3578
--- /dev/null
@@ -0,0 +1,124 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    unified_strdate,
+)
+
+
+class LnkGoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi\-video/(?P<show>[^/]+)/ziurek\-(?P<display_id>[A-Za-z0-9\-]+)'
+    _TESTS = [{
+        'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
+        'info_dict': {
+            'id': '46712',
+            'ext': 'mp4',
+            'title': 'Yra kaip yra',
+            'upload_date': '20150107',
+            'description': 'md5:d82a5e36b775b7048617f263a0e3475e',
+            'age_limit': 7,
+            'duration': 3019,
+            'thumbnail': 're:^https?://.*\.jpg$'
+        },
+        'params': {
+            'skip_download': True,  # HLS download
+        },
+    }, {
+        'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
+        'info_dict': {
+            'id': '47289',
+            'ext': 'mp4',
+            'title': 'Nėrdas: Kompiuterio Valymas',
+            'upload_date': '20150113',
+            'description': 'md5:7352d113a242a808676ff17e69db6a69',
+            'age_limit': 18,
+            'duration': 346,
+            'thumbnail': 're:^https?://.*\.jpg$'
+        },
+        'params': {
+            'skip_download': True,  # HLS download
+        },
+    }]
+    _AGE_LIMITS = {
+        'N-7': 7,
+        'N-14': 14,
+        'S': 18,
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(
+            url, display_id, 'Downloading player webpage')
+
+        video_id = self._search_regex(
+            r'data-ep="([^"]+)"', webpage, 'video ID')
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+
+        thumbnail_w = int_or_none(
+            self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False))
+        thumbnail_h = int_or_none(
+            self._og_search_property('image:height', webpage, 'thumbnail height', fatal=False))
+        thumbnail = {
+            'url': self._og_search_thumbnail(webpage),
+        }
+        if thumbnail_w and thumbnail_h:
+            thumbnail.update({
+                'width': thumbnail_w,
+                'height': thumbnail_h,
+            })
+
+        upload_date = unified_strdate(self._search_regex(
+            r'class="meta-item\sair-time">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
+        duration = int_or_none(self._search_regex(
+            r'VideoDuration = "([^"]+)"', webpage, 'duration', fatal=False))
+
+        pg_rating = self._search_regex(
+            r'pgrating="([^"]+)"', webpage, 'PG rating', fatal=False, default='')
+        age_limit = self._AGE_LIMITS.get(pg_rating.upper(), 0)
+
+        sources_js = self._search_regex(
+            r'(?s)sources:\s(\[.*?\]),', webpage, 'sources')
+        sources = self._parse_json(
+            sources_js, video_id, transform_source=js_to_json)
+
+        formats = []
+        for source in sources:
+            if source.get('provider') == 'rtmp':
+                m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', source['file'])
+                if not m:
+                    continue
+                formats.append({
+                    'format_id': 'rtmp',
+                    'ext': 'flv',
+                    'url': m.group('url'),
+                    'play_path': m.group('play_path'),
+                    'page_url': url,
+                })
+            elif source.get('file').endswith('.m3u8'):
+                formats.append({
+                    'format_id': 'hls',
+                    'ext': source.get('type', 'mp4'),
+                    'url': source['file'],
+                })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'formats': formats,
+            'thumbnails': [thumbnail],
+            'duration': duration,
+            'description': description,
+            'age_limit': age_limit,
+            'upload_date': upload_date,
+        }
index d72d470aa8dbb532f80f44796354dabd3de92261..9c2fbdd96788aeb4a854777cfcb6c67dff18f0ce 100644 (file)
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
 from ..utils import (
@@ -28,7 +27,6 @@ class LRTIE(InfoExtractor):
         'params': {
             'skip_download': True,  # HLS download
         },
-
     }
 
     def _real_extract(self, url):
@@ -44,7 +42,9 @@ class LRTIE(InfoExtractor):
 
         formats = []
         for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage):
-            data = json.loads(js_to_json(js))
+            data = self._parse_json(js, video_id, transform_source=js_to_json)
+            if 'provider' not in data:
+                continue
             if data['provider'] == 'rtmp':
                 formats.append({
                     'format_id': 'rtmp',
index 2160d6cb08ae5b71584ffdc0d76982e2a9bdf0c0..26e84970d49463068f032dcf05afbc03e485e859 100644 (file)
@@ -5,12 +5,14 @@ import json
 
 from .subtitles import SubtitlesInfoExtractor
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
+    compat_str,
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
     int_or_none,
-    compat_str,
 )
 
 
index 1abf6e4f85d52cbe4d257280cf26277e715dbdb1..0b85a59d1c644d7d04e573aae0bdd03ebd4f6c80 100644 (file)
@@ -1,43 +1,33 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
 
 class MalemotionIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
+    _VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
     _TEST = {
-        'url': 'http://malemotion.com/video/bien-dur.10ew',
-        'file': '10ew.mp4',
-        'md5': 'b3cc49f953b107e4a363cdff07d100ce',
+        'url': 'http://malemotion.com/video/bete-de-concours.ltc',
+        'md5': '3013e53a0afbde2878bc39998c33e8a5',
         'info_dict': {
-            "title": "Bien dur",
-            "age_limit": 18,
+            'id': 'ltc',
+            'ext': 'mp4',
+            'title': 'Bête de Concours',
+            'age_limit': 18,
         },
-        'skip': 'This video has been deleted.'
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group("id")
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        self.report_extraction(video_id)
-
-        # Extract video URL
-        video_url = compat_urllib_parse.unquote(
-            self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
-
-        # Extract title
+        video_url = compat_urllib_parse.unquote(self._search_regex(
+            r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
         video_title = self._html_search_regex(
             r'<title>(.*?)</title', webpage, 'title')
-
-        # Extract video thumbnail
         video_thumbnail = self._search_regex(
             r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
 
@@ -47,14 +37,12 @@ class MalemotionIE(InfoExtractor):
             'format_id': 'mp4',
             'preference': 1,
         }]
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'formats': formats,
-            'uploader': None,
-            'upload_date': None,
             'title': video_title,
             'thumbnail': video_thumbnail,
-            'description': None,
             'age_limit': 18,
         }
index 858c1c0c31f4c08c3068a62983781129288dc3b8..8bc333b0277e27e6fd8f3d4f11b3c9c7eabdd7d7 100644 (file)
@@ -3,10 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_parse_qs,
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py
new file mode 100644 (file)
index 0000000..14934b7
--- /dev/null
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_filesize,
+)
+
+
+class MinhatecaIE(InfoExtractor):
+    _VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
+    _TEST = {
+        'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
+        'info_dict': {
+            'id': '125848331',
+            'ext': 'mp4',
+            'title': 'youtube-dl test video',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'filesize_approx': 1530000,
+            'duration': 9,
+            'view_count': int,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        token = self._html_search_regex(
+            r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
+            webpage, 'request token')
+        token_data = [
+            ('fileId', video_id),
+            ('__RequestVerificationToken', token),
+        ]
+        req = compat_urllib_request.Request(
+            'http://minhateca.com.br/action/License/Download',
+            data=compat_urllib_parse.urlencode(token_data))
+        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        data = self._download_json(
+            req, video_id, note='Downloading metadata')
+
+        video_url = data['redirectUrl']
+        title_str = self._html_search_regex(
+            r'<h1.*?>(.*?)</h1>', webpage, 'title')
+        title, _, ext = title_str.rpartition('.')
+        filesize_approx = parse_filesize(self._html_search_regex(
+            r'<p class="fileSize">(.*?)</p>',
+            webpage, 'file size approximation', fatal=False))
+        duration = parse_duration(self._html_search_regex(
+            r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
+            webpage, 'duration', fatal=False))
+        view_count = int_or_none(self._html_search_regex(
+            r'<p class="downloadsCounter">([0-9]+)</p>',
+            webpage, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'ext': ext,
+            'filesize_approx': filesize_approx,
+            'duration': duration,
+            'view_count': view_count,
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
index 807b1dc89b608333e06c1fbab2e9d806fb7d090f..3c61a850f296c32861cdfd35095746c2cf1ef4ad 100644 (file)
@@ -5,8 +5,10 @@ import json
 
 from .common import InfoExtractor
 from .youtube import YoutubeIE
-from ..utils import (
+from ..compat import (
     compat_urlparse,
+)
+from ..utils import (
     clean_html,
     ExtractorError,
     get_element_by_id,
@@ -15,7 +17,7 @@ from ..utils import (
 
 class TechTVMITIE(InfoExtractor):
     IE_NAME = 'techtv.mit.edu'
-    _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
+    _VALID_URL = r'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
@@ -29,8 +31,7 @@ class TechTVMITIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         raw_page = self._download_webpage(
             'http://techtv.mit.edu/videos/%s' % video_id, video_id)
         clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
@@ -104,7 +105,10 @@ class OCWMITIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
                 'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
-                #'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
+                'upload_date': '20121109',
+                'uploader_id': 'MIT',
+                'uploader': 'MIT OpenCourseWare',
+                # 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
             }
         },
         {
@@ -113,8 +117,11 @@ class OCWMITIE(InfoExtractor):
                 'id': '7K1sB05pE0A',
                 'ext': 'mp4',
                 'title': 'Session 1: Introduction to Derivatives',
+                'upload_date': '20090818',
+                'uploader_id': 'MIT',
+                'uploader': 'MIT OpenCourseWare',
                 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
-                #'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
+                # 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
             }
         }
     ]
index 6691521e58435682a74af87559ce1d1fd9046fbf..2567583235617e52b6420419863dbc8d319c8201 100644 (file)
@@ -1,12 +1,13 @@
 from __future__ import unicode_literals
 
-import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urlparse,
+)
+from ..utils import (
     get_element_by_attribute,
     parse_duration,
     strip_jsonp,
@@ -15,7 +16,7 @@ from ..utils import (
 
 class MiTeleIE(InfoExtractor):
     IE_NAME = 'mitele.es'
-    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
+    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
 
     _TEST = {
         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
@@ -31,12 +32,10 @@ class MiTeleIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        episode = mobj.group('episode')
+        episode = self._match_id(url)
         webpage = self._download_webpage(url, episode)
         embed_data_json = self._search_regex(
-            r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
-            flags=re.DOTALL
+            r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
         ).replace('\'', '"')
         embed_data = json.loads(embed_data_json)
 
index bb8937c4d53d33df6b560aff7d56df80740bf1cc..07d194562e77044a8d8d87138ed32205842a1a25 100644 (file)
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
+)
+from ..utils import (
     ExtractorError,
     HEADRequest,
     int_or_none,
@@ -70,7 +72,7 @@ class MixcloudIE(InfoExtractor):
             raise ExtractorError('Unable to extract track url')
 
         PREFIX = (
-            r'<div class="cloudcast-play-button-container[^"]*?"'
+            r'<span class="play-button[^"]*?"'
             r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
         title = self._html_search_regex(
             PREFIX + r'm-title="([^"]+)"', webpage, 'title')
index 2ff79b9b88590e87f1aecf9fdfd32f242bd98420..5a66302f6ec317f89c4153248565159ebd075010 100644 (file)
@@ -5,10 +5,12 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
     int_or_none,
 )
 
@@ -50,7 +52,8 @@ class MoeVideoIE(InfoExtractor):
                 'height': 296,
                 'duration': 6027,
                 'filesize': 588257923,
-            }
+            },
+            'skip': 'Video has been removed',
         },
     ]
 
index d658647e6ca6d9b7675dd76ea55c58f52887374d..2cec12d35ec1797dd7612ad49c5739e87f77e6c9 100644 (file)
@@ -4,7 +4,7 @@ import os
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_parse,
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class MofosexIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<id>[0-9]+)/.*?\.html)'
     _TEST = {
         'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
         'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
@@ -26,7 +26,7 @@ class MofosexIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')
         url = 'http://www.' + mobj.group('url')
 
         req = compat_urllib_request.Request(url)
index 1c4f589cce1605b17e099d47c050097fef1ad0a9..5de719bdc41d2af56d6133a85b998c4ed85af726 100644 (file)
@@ -5,7 +5,7 @@ import os.path
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
@@ -37,10 +37,9 @@ class MonikerIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         orig_webpage = self._download_webpage(url, video_id)
+
         fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
         data = dict(fields)
 
index 34a4bec3a0d1fb91208acce4684a4078188b847d..7603af5e2f567bdbba116c8fca26591e7f2a3b1c 100644 (file)
@@ -1,14 +1,15 @@
 from __future__ import unicode_literals
 
 import re
-import time
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_request,
     compat_urllib_parse,
 )
+from ..utils import (
+    ExtractorError,
+)
 
 
 class MooshareIE(InfoExtractor):
@@ -43,9 +44,7 @@ class MooshareIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         page = self._download_webpage(url, video_id, 'Downloading page')
 
         if re.search(r'>Video Not Found or Deleted<', page) is not None:
@@ -64,8 +63,7 @@ class MooshareIE(InfoExtractor):
             'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 
-        self.to_screen('%s: Waiting for timeout' % video_id)
-        time.sleep(5)
+        self._sleep(5, video_id)
 
         video_page = self._download_webpage(request, video_id, 'Downloading video page')
 
index 7c0ec6a127e97dca1068db9db740954e851a8447..c1a482dba39fb98efdb28e85b681565eb58e3f9e 100644 (file)
@@ -1,63 +1,49 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import hashlib
-import json
-import re
-import time
-
 from .common import InfoExtractor
-from ..utils import (
-    compat_parse_qs,
-    compat_str,
-    int_or_none,
+from ..compat import (
+    compat_urlparse,
 )
 
 
 class MotorsportIE(InfoExtractor):
     IE_DESC = 'motorsport.com'
-    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])'
+    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
     _TEST = {
         'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
-        'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',
         'info_dict': {
-            'id': '7063',
+            'id': '2-T3WuR-KMM',
             'ext': 'mp4',
             'title': 'Red Bull Racing: 2014 Rules Explained',
-            'duration': 207,
+            'duration': 208,
             'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.',
-            'uploader': 'rainiere',
-            'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$'
-        }
+            'uploader': 'mcomstaff',
+            'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ',
+            'upload_date': '20140903',
+            'thumbnail': r're:^https?://.+\.jpg$'
+        },
+        'add_ie': ['Youtube'],
+        'params': {
+            'skip_download': True,
+        },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('id')
-
+        display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
-        flashvars_code = self._html_search_regex(
-            r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
-        flashvars = compat_parse_qs(flashvars_code)
-        params = json.loads(flashvars['parameters'][0])
-
-        e = compat_str(int(time.time()) + 24 * 60 * 60)
-        base_video_url = params['location'] + '?e=' + e
-        s = 'h3hg713fh32'
-        h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
-        video_url = base_video_url + '&h=' + h
 
-        uploader = self._html_search_regex(
-            r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage,
-            'uploader', fatal=False)
+        iframe_path = self._html_search_regex(
+            r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage,
+            'iframe path')
+        iframe = self._download_webpage(
+            compat_urlparse.urljoin(url, iframe_path), display_id,
+            'Downloading iframe')
+        youtube_id = self._search_regex(
+            r'www.youtube.com/embed/(.{11})', iframe, 'youtube id')
 
         return {
-            'id': params['video_id'],
+            '_type': 'url_transparent',
             'display_id': display_id,
-            'title': params['title'],
-            'url': video_url,
-            'description': params.get('description'),
-            'thumbnail': params.get('main_thumb'),
-            'duration': int_or_none(params.get('duration')),
-            'uploader': uploader,
+            'url': 'https://youtube.com/watch?v=%s' % youtube_id,
         }
index 456807dd1c4487332a4e0006448074010e86117b..04e17d0551c7a46feff1822c4dc4be38d00cc520 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+)
 from ..utils import (
     ExtractorError,
-    compat_str,
     clean_html,
 )
 
index b482d6d4dfb09416bc46ef43ac21dff8bf2d5744..5ebc78033a4abbb98310096c279fe11459b4a791 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
     find_xpath_attr,
     fix_xml_ampersands,
index e626146705cfcc961ebcffad93c889b4a4f90e62..83414a2325586d7319c06247fa037c42bb2b199a 100644 (file)
@@ -88,6 +88,7 @@ class MySpaceIE(InfoExtractor):
                 self.report_warning(
                     '%s: No downloadable song on this page' % video_id)
                 return
+
             def search_data(name):
                 return self._search_regex(
                     r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
index 51e540814be209856a9a71f891e55eeb4ba559c2..5b9b9fbcd0844897d6d63305ed00729e70c7f4fb 100644 (file)
@@ -2,9 +2,10 @@ from __future__ import unicode_literals
 import os.path
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
-
+)
+from ..utils import (
     ExtractorError,
 )
 
diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dl/extractor/myvidster.py
new file mode 100644 (file)
index 0000000..a94ab83
--- /dev/null
@@ -0,0 +1,29 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MyVidsterIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
+
+    _TEST = {
+        'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making',
+        'md5': '95296d0231c1363222c3441af62dc4ca',
+        'info_dict': {
+            'id': '3685814',
+            'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
+            'upload_date': '20141027',
+            'uploader_id': 'utkualp',
+            'ext': 'mp4',
+            'age_limit': 18,
+        },
+        'add_ie': ['XHamster'],
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        return self.url_result(self._html_search_regex(
+            r'rel="videolink" href="(?P<real_url>.*)">',
+            webpage, 'real video url'))
index fbe34defd868694d44f4371825322a41b39019a3..c10405f04d3cc1b3e89004029b7502112e9baa29 100644 (file)
@@ -4,8 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
+)
+from ..utils import (
     ExtractorError,
     clean_html,
 )
@@ -26,9 +28,9 @@ class NaverIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
         m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
                          webpage)
         if m_id is None:
index f69fe0925ee4d5d68699f09d84a568ce34f787ec..862b706bf96719aa071f1f89c73f2a4ef45a20b1 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     remove_end,
@@ -10,8 +8,8 @@ from ..utils import (
 
 
 class NBAIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
-    _TEST = {
+    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
+    _TESTS = [{
         'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
         'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
         'info_dict': {
@@ -21,12 +19,13 @@ class NBAIE(InfoExtractor):
             'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
             'duration': 181,
         },
-    }
+    }, {
+        'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
@@ -37,7 +36,7 @@ class NBAIE(InfoExtractor):
 
         description = self._og_search_description(webpage)
         duration = parse_duration(
-            self._html_search_meta('duration', webpage, 'duration', fatal=False))
+            self._html_search_meta('duration', webpage, 'duration'))
 
         return {
             'id': shortened_video_id,
index 7b5449031ebd2b7245d452c04ae50dfaf970d6ca..690c46b6a57be11edf36899b959318af5e482119 100644 (file)
@@ -4,31 +4,47 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
+)
+from ..utils import (
     ExtractorError,
     find_xpath_attr,
 )
 
 
 class NBCIE(InfoExtractor):
-    _VALID_URL = r'http://www\.nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)'
-
-    _TEST = {
-        'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
-        # md5 checksum is not stable
-        'info_dict': {
-            'id': 'bTmnLCvIbaaH',
-            'ext': 'flv',
-            'title': 'I Am a Firefighter',
-            'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
+    _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
+
+    _TESTS = [
+        {
+            'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
+            # md5 checksum is not stable
+            'info_dict': {
+                'id': 'bTmnLCvIbaaH',
+                'ext': 'flv',
+                'title': 'I Am a Firefighter',
+                'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
+            },
         },
-    }
+        {
+            'url': 'http://www.nbc.com/the-tonight-show/episodes/176',
+            'info_dict': {
+                'id': 'XwU9KZkp98TH',
+                'ext': 'flv',
+                'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen',
+                'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.',
+            },
+            'skip': 'Only works from US',
+        },
+    ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url')
+        theplatform_url = self._search_regex(
+            '(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
+            webpage, 'theplatform url').replace('_no_endcard', '')
         if theplatform_url.startswith('//'):
             theplatform_url = 'http:' + theplatform_url
         return self.url_result(theplatform_url)
diff --git a/youtube_dl/extractor/nerdcubed.py b/youtube_dl/extractor/nerdcubed.py
new file mode 100644 (file)
index 0000000..efc903a
--- /dev/null
@@ -0,0 +1,35 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+
+from .common import InfoExtractor
+
+
+class NerdCubedFeedIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json'
+    _TEST = {
+        'url': 'http://www.nerdcubed.co.uk/feed.json',
+        'info_dict': {
+            'title': 'nerdcubed.co.uk feed',
+        },
+        'playlist_mincount': 1300,
+    }
+
+    def _real_extract(self, url):
+        feed = self._download_json(url, url, "Downloading NerdCubed JSON feed")
+
+        entries = [{
+            '_type': 'url',
+            'title': feed_entry['title'],
+            'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
+            'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
+            'url': "http://www.youtube.com/watch?v=" + feed_entry['youtube_id'],
+        } for feed_entry in feed]
+
+        return {
+            '_type': 'playlist',
+            'title': 'nerdcubed.co.uk feed',
+            'id': 'nerdcubed-feed',
+            'entries': entries,
+        }
diff --git a/youtube_dl/extractor/netzkino.py b/youtube_dl/extractor/netzkino.py
new file mode 100644 (file)
index 0000000..93567d1
--- /dev/null
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    int_or_none,
+    js_to_json,
+    parse_iso8601,
+)
+
+
+class NetzkinoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
+
+    _TEST = {
+        'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
+        'md5': '92a3f8b76f8d7220acce5377ea5d4873',
+        'info_dict': {
+            'id': 'rakete-zum-mond',
+            'ext': 'mp4',
+            'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
+            'comments': 'mincount:3',
+            'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
+            'upload_date': '20120813',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'timestamp': 1344858571,
+            'age_limit': 12,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        category_id = mobj.group('category')
+        video_id = mobj.group('id')
+
+        api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
+        api_info = self._download_json(api_url, video_id)
+        info = next(
+            p for p in api_info['posts'] if p['slug'] == video_id)
+        custom_fields = info['custom_fields']
+
+        production_js = self._download_webpage(
+            'http://www.netzkino.de/beta/dist/production.min.js', video_id,
+            note='Downloading player code')
+        avo_js = self._search_regex(
+            r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})',
+            production_js, 'URL templates')
+        templates = self._parse_json(
+            avo_js, video_id, transform_source=js_to_json)
+
+        suffix = {
+            'hds': '.mp4/manifest.f4m',
+            'hls': '.mp4/master.m3u8',
+            'pmd': '.mp4',
+        }
+        film_fn = custom_fields['Streaming'][0]
+        formats = [{
+            'format_id': key,
+            'ext': 'mp4',
+            'url': tpl.replace('{}', film_fn) + suffix[key],
+        } for key, tpl in templates.items()]
+        self._sort_formats(formats)
+
+        comments = [{
+            'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
+            'id': c['id'],
+            'author': c['name'],
+            'html': c['content'],
+            'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
+        } for c in info.get('comments', [])]
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'comments': comments,
+            'title': info['title'],
+            'age_limit': int_or_none(custom_fields.get('FSK')[0]),
+            'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
+            'description': clean_html(info.get('content')),
+            'thumbnail': info.get('thumbnail'),
+            'playlist_title': api_info.get('title'),
+            'playlist_id': category_id,
+        }
index 7ce1d481d0a73218598bf24fee964ca8ec65956d..ea077254b4320fe18e59eb9b67461b13c146b873 100644 (file)
@@ -1,9 +1,7 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
     compat_urllib_parse,
 )
@@ -12,7 +10,7 @@ from ..utils import (
 class NFBIE(InfoExtractor):
     IE_NAME = 'nfb'
     IE_DESC = 'National Film Board of Canada'
-    _VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
 
     _TEST = {
         'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
@@ -32,10 +30,10 @@ class NFBIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
+        video_id = self._match_id(url)
+        page = self._download_webpage(
+            'https://www.nfb.ca/film/%s' % video_id, video_id,
+            'Downloading film page')
 
         uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
                                               page, 'director id', fatal=False)
index cc7c921c364d64ee504fa6d31265d13a96565e8d..606e2294efb716cfe755d1b9564357dbda7f9039 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
     ExtractorError,
-    compat_urllib_parse_urlparse,
     int_or_none,
     remove_end,
 )
index bdcf7e23953870e61a523a11b4daaf10cfc3ab75..d3a4fc51387a8e1a1b56718ba9dafabf1d9a7db5 100644 (file)
@@ -2,11 +2,13 @@ from __future__ import unicode_literals
 
 import re
 import json
+import os
 
 from .common import InfoExtractor
 from ..compat import (
     compat_urlparse,
     compat_urllib_parse,
+    compat_urllib_parse_urlparse
 )
 from ..utils import (
     unified_strdate,
@@ -24,9 +26,12 @@ class NHLBaseInfoExtractor(InfoExtractor):
 
         initial_video_url = info['publishPoint']
         if info['formats'] == '1':
+            parsed_url = compat_urllib_parse_urlparse(initial_video_url)
+            filename, ext = os.path.splitext(parsed_url.path)
+            path = '%s_sd%s' % (filename, ext)
             data = compat_urllib_parse.urlencode({
                 'type': 'fvod',
-                'path': initial_video_url.replace('.mp4', '_sd.mp4'),
+                'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
             })
             path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
             path_doc = self._download_xml(
@@ -49,7 +54,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
 
 class NHLIE(NHLBaseInfoExtractor):
     IE_NAME = 'nhl.com'
-    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9a-z-]+)'
+    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
 
     _TESTS = [{
         'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
@@ -73,6 +78,17 @@ class NHLIE(NHLBaseInfoExtractor):
             'duration': 0,
             'upload_date': '20141011',
         },
+    }, {
+        'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
+        'md5': 'c78fc64ea01777e426cfc202b746c825',
+        'info_dict': {
+            'id': '58665',
+            'ext': 'flv',
+            'title': 'Classic Game In Six - April 22, 1979',
+            'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
+            'duration': 400,
+            'upload_date': '20100129'
+        },
     }, {
         'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
         'only_matching': True,
@@ -90,7 +106,7 @@ class NHLIE(NHLBaseInfoExtractor):
 class NHLVideocenterIE(NHLBaseInfoExtractor):
     IE_NAME = 'nhl.com:videocenter'
     IE_DESC = 'NHL videocenter category'
-    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
+    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
     _TEST = {
         'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
         'info_dict': {
index 1d9c1a096403e7e7b4f3835f31dbee31812594c9..4c18904169d3f69a0bf7e95fb21d98218bca7e91 100644 (file)
@@ -5,14 +5,16 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
     compat_urlparse,
-    unified_strdate,
-    parse_duration,
-    int_or_none,
+)
+from ..utils import (
     ExtractorError,
+    int_or_none,
+    parse_duration,
+    unified_strdate,
 )
 
 
index 16a02ad7939082627ffb9edd41d7bf6e62fd1f6d..7f842b5c2560211cc88280e2b97cf107af588bfe 100644 (file)
@@ -23,6 +23,9 @@ class NineGagIE(InfoExtractor):
             "ext": "mp4",
             "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
             "title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
+            'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
+            'uploader': 'CompilationChannel',
+            'upload_date': '20131110',
             "view_count": int,
             "thumbnail": "re:^https?://",
         },
@@ -35,6 +38,9 @@ class NineGagIE(InfoExtractor):
             'display_id': 'alternate-banned-opening-scene-of-gravity',
             "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
             'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
+            'uploader': 'Krishna Shenoi',
+            'upload_date': '20140401',
+            'uploader_id': 'krishnashenoi93',
         },
     }]
 
index 7d2ff7b9a149d0284da1d52a26f4c0a85bf0f6ce..251e6da07457b7e7be6b5703b5769214ae299c3d 100644 (file)
@@ -6,13 +6,15 @@ import time
 import hashlib
 
 from .common import InfoExtractor
-from ..utils import (
-    compat_urllib_request,
+from ..compat import (
+    compat_str,
     compat_urllib_parse,
-    ExtractorError,
+    compat_urllib_request,
+)
+from ..utils import (
     clean_html,
+    ExtractorError,
     unified_strdate,
-    compat_str,
 )
 
 
index 3d35b11ac81286a359a06620d0e08e8fae18043b..c13ff0d650bcd443bf7a6b6d2444215892d72732 100644 (file)
@@ -22,7 +22,11 @@ class NormalbootsIE(InfoExtractor):
             'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
             'uploader': 'JonTron',
             'upload_date': '20140125',
-        }
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
     }
 
     def _real_extract(self, url):
index f3be8f552c3764995057acf18b74514537960d4e..f5ef856db0155dd84f10d5db4a8cef8e6c08213c 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
     ExtractorError,
-    compat_urllib_request,
     urlencode_postdata,
     xpath_text,
     xpath_with_ns,
@@ -32,8 +34,7 @@ class NosVideoIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         fields = {
             'id': video_id,
index 38d05e46604a859247c0b155625ee41f5b556b36..04d779890af1960d65b070d0b2f80e429db21d07 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urlparse,
+)
 from ..utils import (
     ExtractorError,
-    compat_urlparse
 )
 
 
index ecb38de2d3d969b96145b0b191054e2b34f1801e..dec09cdfef0087ee3400394b82750daf434ed29d 100644 (file)
@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
     IE_NAME = 'nowvideo'
     IE_DESC = 'NowVideo'
 
-    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'}
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co|li)'}
 
     _HOST = 'www.nowvideo.ch'
 
index ce31694a506a99d1ae21460f0c458ef3100c1850..175b14583efbad65d9fbb1777d14dbb5576c1cc1 100644 (file)
@@ -1,18 +1,26 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
+    fix_xml_ampersands,
     parse_duration,
     qualities,
     strip_jsonp,
+    unified_strdate,
     url_basename,
 )
 
 
-class NPOIE(InfoExtractor):
+class NPOBaseIE(InfoExtractor):
+    def _get_token(self, video_id):
+        token_page = self._download_webpage(
+            'http://ida.omroep.nl/npoplayer/i.js',
+            video_id, note='Downloading token')
+        return self._search_regex(
+            r'npoplayer\.token = "(.+?)"', token_page, 'token')
+
+
+class NPOIE(NPOBaseIE):
     IE_NAME = 'npo.nl'
     _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
 
@@ -51,12 +59,35 @@ class NPOIE(InfoExtractor):
                 'upload_date': '20130225',
                 'duration': 3000,
             },
-        }
+        },
+        {
+            'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
+            'info_dict': {
+                'id': 'WO_VPRO_043706',
+                'ext': 'wmv',
+                'title': 'De nieuwe mens - Deel 1',
+                'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
+                'duration': 4680,
+            },
+            'params': {
+                # mplayer mms download
+                'skip_download': True,
+            }
+        },
+        # non asf in streams
+        {
+            'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
+            'md5': 'b3da13de374cbe2d5332a7e910bef97f',
+            'info_dict': {
+                'id': 'WO_NOS_762771',
+                'ext': 'mp4',
+                'title': 'Hoe gaat Europa verder na Parijs?',
+            },
+        },
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         return self._get_info(video_id)
 
     def _get_info(self, video_id):
@@ -66,39 +97,68 @@ class NPOIE(InfoExtractor):
             # We have to remove the javascript callback
             transform_source=strip_jsonp,
         )
-        token_page = self._download_webpage(
-            'http://ida.omroep.nl/npoplayer/i.js',
-            video_id,
-            note='Downloading token'
-        )
-        token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
+
+        token = self._get_token(video_id)
 
         formats = []
-        quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
-        for format_id in metadata['pubopties']:
-            format_info = self._download_json(
-                'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token),
-                video_id, 'Downloading %s JSON' % format_id)
-            if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
-                continue
-            streams = format_info.get('streams')
-            if streams:
-                video_info = self._download_json(
-                    streams[0] + '&type=json',
-                    video_id, 'Downloading %s stream JSON' % format_id)
-            else:
-                video_info = format_info
-            video_url = video_info.get('url')
-            if not video_url:
-                continue
-            if format_id == 'adaptive':
-                formats.extend(self._extract_m3u8_formats(video_url, video_id))
-            else:
+
+        pubopties = metadata.get('pubopties')
+        if pubopties:
+            quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
+            for format_id in pubopties:
+                format_info = self._download_json(
+                    'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s'
+                    % (video_id, format_id, token),
+                    video_id, 'Downloading %s JSON' % format_id)
+                if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
+                    continue
+                streams = format_info.get('streams')
+                if streams:
+                    video_info = self._download_json(
+                        streams[0] + '&type=json',
+                        video_id, 'Downloading %s stream JSON' % format_id)
+                else:
+                    video_info = format_info
+                video_url = video_info.get('url')
+                if not video_url:
+                    continue
+                if format_id == 'adaptive':
+                    formats.extend(self._extract_m3u8_formats(video_url, video_id))
+                else:
+                    formats.append({
+                        'url': video_url,
+                        'format_id': format_id,
+                        'quality': quality(format_id),
+                    })
+
+        streams = metadata.get('streams')
+        if streams:
+            for i, stream in enumerate(streams):
+                stream_url = stream.get('url')
+                if not stream_url:
+                    continue
+                if '.asf' not in stream_url:
+                    formats.append({
+                        'url': stream_url,
+                        'quality': stream.get('kwaliteit'),
+                    })
+                    continue
+                asx = self._download_xml(
+                    stream_url, video_id,
+                    'Downloading stream %d ASX playlist' % i,
+                    transform_source=fix_xml_ampersands)
+                ref = asx.find('./ENTRY/Ref')
+                if ref is None:
+                    continue
+                video_url = ref.get('href')
+                if not video_url:
+                    continue
                 formats.append({
                     'url': video_url,
-                    'format_id': format_id,
-                    'quality': quality(format_id),
+                    'ext': stream.get('formaat', 'asf'),
+                    'quality': stream.get('kwaliteit'),
                 })
+
         self._sort_formats(formats)
 
         return {
@@ -112,6 +172,83 @@ class NPOIE(InfoExtractor):
         }
 
 
+class NPOLiveIE(NPOBaseIE):
+    IE_NAME = 'npo.nl:live'
+    _VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)'
+
+    _TEST = {
+        'url': 'http://www.npo.nl/live/npo-1',
+        'info_dict': {
+            'id': 'LI_NEDERLAND1_136692',
+            'display_id': 'npo-1',
+            'ext': 'mp4',
+            'title': 're:^Nederland 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'Livestream',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        live_id = self._search_regex(
+            r'data-prid="([^"]+)"', webpage, 'live id')
+
+        metadata = self._download_json(
+            'http://e.omroep.nl/metadata/%s' % live_id,
+            display_id, transform_source=strip_jsonp)
+
+        token = self._get_token(display_id)
+
+        formats = []
+
+        streams = metadata.get('streams')
+        if streams:
+            for stream in streams:
+                stream_type = stream.get('type').lower()
+                if stream_type == 'ss':
+                    continue
+                stream_info = self._download_json(
+                    'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
+                    % (stream.get('url'), token),
+                    display_id, 'Downloading %s JSON' % stream_type)
+                if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0):
+                    continue
+                stream_url = self._download_json(
+                    stream_info['stream'], display_id,
+                    'Downloading %s URL' % stream_type,
+                    transform_source=strip_jsonp)
+                if stream_type == 'hds':
+                    f4m_formats = self._extract_f4m_formats(stream_url, display_id)
+                    # f4m downloader downloads only piece of live stream
+                    for f4m_format in f4m_formats:
+                        f4m_format['preference'] = -1
+                    formats.extend(f4m_formats)
+                elif stream_type == 'hls':
+                    formats.extend(self._extract_m3u8_formats(stream_url, display_id, 'mp4'))
+                else:
+                    formats.append({
+                        'url': stream_url,
+                    })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': live_id,
+            'display_id': display_id,
+            'title': self._live_title(metadata['titel']),
+            'description': metadata['info'],
+            'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
+            'formats': formats,
+            'is_live': True,
+        }
+
+
 class TegenlichtVproIE(NPOIE):
     IE_NAME = 'tegenlicht.vpro.nl'
     _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
index 96f0ae1ebde53402a3e651dcd86c1397d141c542..f6de260222c678e2233b668d4b557e22e51d224c 100644 (file)
@@ -7,8 +7,10 @@ from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     float_or_none,
+    parse_duration,
     unified_strdate,
 )
+from .subtitles import SubtitlesInfoExtractor
 
 
 class NRKIE(InfoExtractor):
@@ -71,13 +73,13 @@ class NRKIE(InfoExtractor):
         }
 
 
-class NRKTVIE(InfoExtractor):
-    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})'
+class NRKTVIE(SubtitlesInfoExtractor):
+    _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
 
     _TESTS = [
         {
             'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
-            'md5': '7b96112fbae1faf09a6f9ae1aff6cb84',
+            'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
             'info_dict': {
                 'id': 'MUHH48000314',
                 'ext': 'flv',
@@ -85,11 +87,11 @@ class NRKTVIE(InfoExtractor):
                 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
                 'upload_date': '20140523',
                 'duration': 1741.52,
-            }
+            },
         },
         {
             'url': 'http://tv.nrk.no/program/mdfp15000514',
-            'md5': 'af01795a31f1cf7265c8657534d8077b',
+            'md5': '383650ece2b25ecec996ad7b5bb2a384',
             'info_dict': {
                 'id': 'mdfp15000514',
                 'ext': 'flv',
@@ -97,42 +99,155 @@ class NRKTVIE(InfoExtractor):
                 'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
                 'upload_date': '20140524',
                 'duration': 4605.0,
-            }
+            },
+        },
+        {
+            # single playlist video
+            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
+            'md5': 'adbd1dbd813edaf532b0a253780719c2',
+            'info_dict': {
+                'id': 'MSPO40010515-part2',
+                'ext': 'flv',
+                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
+                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                'upload_date': '20150106',
+            },
+            'skip': 'Only works from Norway',
         },
+        {
+            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
+            'playlist': [
+                {
+                    'md5': '9480285eff92d64f06e02a5367970a7a',
+                    'info_dict': {
+                        'id': 'MSPO40010515-part1',
+                        'ext': 'flv',
+                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
+                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                        'upload_date': '20150106',
+                    },
+                },
+                {
+                    'md5': 'adbd1dbd813edaf532b0a253780719c2',
+                    'info_dict': {
+                        'id': 'MSPO40010515-part2',
+                        'ext': 'flv',
+                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
+                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                        'upload_date': '20150106',
+                    },
+                },
+            ],
+            'info_dict': {
+                'id': 'MSPO40010515',
+                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
+                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                'upload_date': '20150106',
+                'duration': 6947.5199999999995,
+            },
+            'skip': 'Only works from Norway',
+        }
     ]
 
+    def _seconds2str(self, s):
+        return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000)
+
+    def _debug_print(self, txt):
+        if self._downloader.params.get('verbose', False):
+            self.to_screen('[debug] %s' % txt)
+
+    def _extract_captions(self, subtitlesurl, video_id, baseurl):
+        url = "%s%s" % (baseurl, subtitlesurl)
+        self._debug_print('%s: Subtitle url: %s' % (video_id, url))
+        captions = self._download_xml(url, video_id, 'Downloading subtitles')
+        lang = captions.get('lang', 'no')
+        ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
+        srt = ''
+        for pos, p in enumerate(ps):
+            begin = parse_duration(p.get('begin'))
+            duration = parse_duration(p.get('dur'))
+            starttime = self._seconds2str(begin)
+            endtime = self._seconds2str(begin + duration)
+            text = '\n'.join(p.itertext())
+            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
+        return {lang: srt}
+
+    def _extract_f4m(self, manifest_url, video_id):
+        return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
+        part_id = mobj.group('part_id')
+        baseurl = mobj.group('baseurl')
 
-        page = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_meta(
+            'title', webpage, 'title')
+        description = self._html_search_meta(
+            'description', webpage, 'description')
 
-        title = self._html_search_meta('title', page, 'title')
-        description = self._html_search_meta('description', page, 'description')
-        thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
-        upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
-        duration = float_or_none(
-            self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False))
+        thumbnail = self._html_search_regex(
+            r'data-posterimage="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+        upload_date = unified_strdate(self._html_search_meta(
+            'rightsfrom', webpage, 'upload date', fatal=False))
+        duration = float_or_none(self._html_search_regex(
+            r'data-duration="([^"]+)"',
+            webpage, 'duration', fatal=False))
+
+        # playlist
+        parts = re.findall(
+            r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage)
+        if parts:
+            entries = []
+            for current_part_id, stream_url, part_title in parts:
+                if part_id and current_part_id != part_id:
+                    continue
+                video_part_id = '%s-part%s' % (video_id, current_part_id)
+                formats = self._extract_f4m(stream_url, video_part_id)
+                entries.append({
+                    'id': video_part_id,
+                    'title': part_title,
+                    'description': description,
+                    'thumbnail': thumbnail,
+                    'upload_date': upload_date,
+                    'formats': formats,
+                })
+            if part_id:
+                if entries:
+                    return entries[0]
+            else:
+                playlist = self.playlist_result(entries, video_id, title, description)
+                playlist.update({
+                    'thumbnail': thumbnail,
+                    'upload_date': upload_date,
+                    'duration': duration,
+                })
+                return playlist
 
         formats = []
 
-        f4m_url = re.search(r'data-media="([^"]+)"', page)
+        f4m_url = re.search(r'data-media="([^"]+)"', webpage)
         if f4m_url:
-            formats.append({
-                'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
-                'format_id': 'f4m',
-                'ext': 'flv',
-            })
+            formats.extend(self._extract_f4m(f4m_url.group(1), video_id))
 
-        m3u8_url = re.search(r'data-hls-media="([^"]+)"', page)
+        m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
         if m3u8_url:
-            formats.append({
-                'url': m3u8_url.group(1),
-                'format_id': 'm3u8',
-            })
-
+            formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))
         self._sort_formats(formats)
 
+        subtitles_url = self._html_search_regex(
+            r'data-subtitlesurl[ ]*=[ ]*"([^"]+)"',
+            webpage, 'subtitle URL', default=None)
+        subtitles = None
+        if subtitles_url:
+            subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, subtitles)
+            return
+
         return {
             'id': video_id,
             'title': title,
@@ -141,4 +256,5 @@ class NRKTVIE(InfoExtractor):
             'upload_date': upload_date,
             'duration': duration,
             'formats': formats,
+            'subtitles': subtitles,
         }
index 13c8d79cd8ac6346dbe4e9810bc8bf0b20825dcc..ee740cd9c0fe71a48b79aee00c40ea610e81ea99 100644 (file)
@@ -130,7 +130,7 @@ class NTVIE(InfoExtractor):
                 'rtmp_conn': 'B:1',
                 'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
                 'page_url': 'http://www.ntv.ru',
-                'flash_ver': 'LNX 11,2,202,341',
+                'flash_version': 'LNX 11,2,202,341',
                 'rtmp_live': True,
                 'ext': 'flv',
                 'filesize': int(size.text),
index 449c8a6a3e86c410daaafbf80878161693242e27..57928f2aedcc0acfa5ba71d6e9f0a62af9d67b71 100644 (file)
@@ -3,15 +3,17 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
     parse_duration,
     unified_strdate,
-    compat_urllib_request,
 )
 
 
 class NuvidIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://m.nuvid.com/video/1310741/',
         'md5': 'eab207b7ac4fccfb4e23c86201f11277',
@@ -26,8 +28,7 @@ class NuvidIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         formats = []
 
index f17a528583bf7296906431bdc1a0e3a3cbd6c71d..d5b05c18febb580a448263b4f7b2876ef3234957 100644 (file)
@@ -16,7 +16,6 @@ class OoyalaIE(InfoExtractor):
         {
             # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
             'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
-            'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
             'info_dict': {
                 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
                 'ext': 'mp4',
@@ -26,7 +25,6 @@ class OoyalaIE(InfoExtractor):
         }, {
             # Only available for ipad
             'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
-            'md5': '4b9754921fddb68106e48c142e2a01e6',
             'info_dict': {
                 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
                 'ext': 'mp4',
diff --git a/youtube_dl/extractor/openfilm.py b/youtube_dl/extractor/openfilm.py
new file mode 100644 (file)
index 0000000..2249657
--- /dev/null
@@ -0,0 +1,70 @@
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    compat_urllib_parse,
+    parse_age_limit,
+    int_or_none,
+)
+
+
+class OpenFilmIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)openfilm\.com/videos/(?P<id>.+)'
+    _TEST = {
+        'url': 'http://www.openfilm.com/videos/human-resources-remastered',
+        'md5': '42bcd88c2f3ec13b65edf0f8ad1cac37',
+        'info_dict': {
+            'id': '32736',
+            'display_id': 'human-resources-remastered',
+            'ext': 'mp4',
+            'title': 'Human Resources (Remastered)',
+            'description': 'Social Engineering in the 20th Century.',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 7164,
+            'timestamp': 1334756988,
+            'upload_date': '20120418',
+            'uploader_id': '41117',
+            'view_count': int,
+            'age_limit': 0,
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        player = compat_urllib_parse.unquote_plus(
+            self._og_search_video_url(webpage))
+
+        video = json.loads(self._search_regex(
+            r'\bp=({.+?})(?:&|$)', player, 'video JSON'))
+
+        video_url = '%s1.mp4' % video['location']
+        video_id = video.get('video_id')
+        display_id = video.get('alias') or display_id
+        title = video.get('title')
+        description = video.get('description')
+        thumbnail = video.get('main_thumb')
+        duration = int_or_none(video.get('duration'))
+        timestamp = parse_iso8601(video.get('dt_published'), ' ')
+        uploader_id = video.get('user_id')
+        view_count = int_or_none(video.get('views_count'))
+        age_limit = parse_age_limit(video.get('age_limit'))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'uploader_id': uploader_id,
+            'view_count': view_count,
+            'age_limit': age_limit,
+        }
index 572a234ad8c2514e5704d936fb98a19035662f40..4e293392b3d39b46ad1612d884068a2dbfaeef23 100644 (file)
@@ -17,24 +17,39 @@ from ..utils import (
 class ORFTVthekIE(InfoExtractor):
     IE_NAME = 'orf:tvthek'
     IE_DESC = 'ORF TVthek'
-    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
-
-    _TEST = {
-        'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747',
-        'file': '7319747.mp4',
-        'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375',
-        'info_dict': {
-            'title': 'Was Sie schon immer über Klassik wissen wollten',
-            'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4',
-            'duration': 3508,
-            'upload_date': '20140105',
-        },
-        'skip': 'Blocked outside of Austria',
-    }
+    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
+        'playlist': [{
+            'md5': '2942210346ed779588f428a92db88712',
+            'info_dict': {
+                'id': '8896777',
+                'ext': 'mp4',
+                'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
+                'description': 'md5:c1272f0245537812d4e36419c207b67d',
+                'duration': 2668,
+                'upload_date': '20141208',
+            },
+        }],
+        'skip': 'Blocked outside of Austria / Germany',
+    }, {
+        'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
+        'playlist': [{
+            'md5': '68f543909aea49d621dfc7703a11cfaf',
+            'info_dict': {
+                'id': '7982259',
+                'ext': 'mp4',
+                'title': 'Best of Ingrid Thurnher',
+                'upload_date': '20140527',
+                'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
+            }
+        }],
+        '_skip': 'Blocked outside of Austria / Germany',
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        playlist_id = mobj.group('id')
+        playlist_id = self._match_id(url)
         webpage = self._download_webpage(url, playlist_id)
 
         data_json = self._search_regex(
@@ -43,7 +58,9 @@ class ORFTVthekIE(InfoExtractor):
 
         def get_segments(all_data):
             for data in all_data:
-                if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM':
+                if data['name'] in (
+                        'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM',
+                        'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'):
                     return data['values']['segments']
 
         sdata = get_segments(all_data)
@@ -111,18 +128,19 @@ class ORFTVthekIE(InfoExtractor):
         }
 
 
-# Audios on ORF radio are only available for 7 days, so we can't add tests.
-
-
 class ORFOE1IE(InfoExtractor):
     IE_NAME = 'orf:oe1'
     IE_DESC = 'Radio Österreich 1'
-    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
+    _VALID_URL = r'http://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        show_id = mobj.group('id')
+    # Audios on ORF radio are only available for 7 days, so we can't add tests.
+    _TEST = {
+        'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211',
+        'only_matching': True,
+    }
 
+    def _real_extract(self, url):
+        show_id = self._match_id(url)
         data = self._download_json(
             'http://oe1.orf.at/programm/%s/konsole' % show_id,
             show_id
@@ -145,7 +163,7 @@ class ORFOE1IE(InfoExtractor):
 
 
 class ORFFM4IE(InfoExtractor):
-    IE_DESC = 'orf:fm4'
+    IE_NAME = 'orf:fm4'
     IE_DESC = 'radio FM4'
     _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
 
index 6118ed5c2021492ee91e22dccd642d564918604c..afce732e141a1ae6cec78cc28ed4376fa174ab1f 100644 (file)
@@ -4,6 +4,7 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
     unified_strdate,
     US_RATINGS,
 )
@@ -151,6 +152,19 @@ class PBSIE(InfoExtractor):
         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
         info = self._download_json(info_url, display_id)
 
+        redirect_url = info['alternate_encoding']['url']
+        redirect_info = self._download_json(
+            redirect_url + '?format=json', display_id,
+            'Downloading video url info')
+        if redirect_info['status'] == 'error':
+            if redirect_info['http_code'] == 403:
+                message = (
+                    'The video is not available in your region due to '
+                    'right restrictions')
+            else:
+                message = redirect_info['message']
+            raise ExtractorError(message, expected=True)
+
         rating_str = info.get('rating')
         if rating_str is not None:
             rating_str = rating_str.rpartition('-')[2]
@@ -160,7 +174,7 @@ class PBSIE(InfoExtractor):
             'id': video_id,
             'display_id': display_id,
             'title': info['title'],
-            'url': info['alternate_encoding']['url'],
+            'url': redirect_info['url'],
             'ext': 'mp4',
             'description': info['program'].get('description'),
             'thumbnail': info.get('image_url'),
index b4389e0b6feaf0726a4805bec674b77cd38e295b..c66db3cdc84e55a6a3a904ddf3ff7c09aaac9573 100644 (file)
@@ -4,16 +4,17 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..utils import compat_urllib_parse
+from ..compat import compat_urllib_parse
 
 
 class PhotobucketIE(InfoExtractor):
     _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
     _TEST = {
         'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
-        'file': 'zpsc0c3b9fa.mp4',
         'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
         'info_dict': {
+            'id': 'zpsc0c3b9fa',
+            'ext': 'mp4',
             'timestamp': 1367669341,
             'upload_date': '20130504',
             'uploader': 'rachaneronas',
index 17880471d9d160f6d3315ca9c6eadeada8ce91a7..45716c75d9505c5fcb7e8c6d73ec4feaef298aee 100644 (file)
@@ -5,11 +5,13 @@ import re
 import os.path
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
+from ..utils import (
+    ExtractorError,
+)
 
 
 class PlayedIE(InfoExtractor):
@@ -24,11 +26,11 @@ class PlayedIE(InfoExtractor):
             'ext': 'flv',
             'title': 'youtube-dl_test_video.mp4',
         },
+        'skip': 'Removed for copyright infringement.',  # oh wow
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
         orig_webpage = self._download_webpage(url, video_id)
 
         m_error = re.search(
index ebc0468042a22c2bccdfb5b7e45861c0bc45f61c..9576aed0e6668189c1959df3166b1e550facc7b0 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
     float_or_none,
     int_or_none,
index cd3905acb0fcd0ef9fe08beec2bda41fd2f94f70..c3e667e9e72ea0aaf6e5db731f630816e6a2861d 100644 (file)
@@ -3,31 +3,31 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+)
 from ..utils import (
-    ExtractorError,
     clean_html,
-    compat_urllib_parse,
+    ExtractorError,
 )
 
 
 class PlayvidIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
+    _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
     _TEST = {
-        'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
-        'md5': '44930f8afa616efdf9482daf4fe53e1e',
+        'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
+        'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
         'info_dict': {
-            'id': 'agbDDi7WZTV',
+            'id': 'RnmBNgtrrJu',
             'ext': 'mp4',
-            'title': 'Michelle Lewin in Miami Beach',
-            'duration': 240,
+            'title': 'md5:9256d01c6317e3f703848b5906880dc8',
+            'duration': 82,
             'age_limit': 18,
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         m_error = re.search(
index bac484c67dbb01bbafa319c117b7c6d152b7dd5d..954dfccb75954d50a9a46bc14bdb1d0dcbd5588c 100644 (file)
@@ -8,7 +8,6 @@ from ..utils import (
     int_or_none,
     js_to_json,
     qualities,
-    determine_ext,
 )
 
 
@@ -45,13 +44,18 @@ class PornHdIE(InfoExtractor):
         thumbnail = self._search_regex(
             r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
 
-        quality = qualities(['SD', 'HD'])
-        formats = [{
-            'url': source['file'],
-            'format_id': '%s-%s' % (source['label'], determine_ext(source['file'])),
-            'quality': quality(source['label']),
-        } for source in json.loads(js_to_json(self._search_regex(
-            r"(?s)'sources'\s*:\s*(\[.+?\])", webpage, 'sources')))]
+        quality = qualities(['sd', 'hd'])
+        sources = json.loads(js_to_json(self._search_regex(
+            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
+        formats = []
+        for container, s in sources.items():
+            for qname, video_url in s.items():
+                formats.append({
+                    'url': video_url,
+                    'container': container,
+                    'format_id': '%s-%s' % (container, qname),
+                    'quality': quality(qname),
+                })
         self._sort_formats(formats)
 
         return {
index 2ca15b717ec5dd9a36b6aa2bfc7e9019148c0493..634142d0d27300eb82ea2f460fd2163a20208709 100644 (file)
@@ -4,10 +4,12 @@ import os
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
+    compat_urllib_parse,
     compat_urllib_parse_urlparse,
     compat_urllib_request,
-    compat_urllib_parse,
+)
+from ..utils import (
     str_to_int,
 )
 from ..aes import (
@@ -16,7 +18,7 @@ from ..aes import (
 
 
 class PornHubIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
+    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
     _TEST = {
         'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
         'md5': '882f488fa1f0026f023f33576004a2ed',
index 5253aa3d30062ec7937c5e5f48d85998923c6e8f..34735c51e19c7dbbb1c07f2fc4a203df4dda70a9 100644 (file)
@@ -1,56 +1,94 @@
 from __future__ import unicode_literals
 
-import re
+import json
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
-    compat_urllib_parse,
-
-    unified_strdate,
+    int_or_none,
 )
 
 
 class PornotubeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
+    _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com/(?:[^?#]*?)/video/(?P<id>[0-9]+)'
     _TEST = {
-        'url': 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
-        'md5': '374dd6dcedd24234453b295209aa69b6',
+        'url': 'http://www.pornotube.com/orientation/straight/video/4964/title/weird-hot-and-wet-science',
+        'md5': '60fc5a4f0d93a97968fc7999d98260c9',
         'info_dict': {
-            'id': '1689755',
-            'ext': 'flv',
-            'upload_date': '20090708',
-            'title': 'Marilyn-Monroe-Bathing',
-            'age_limit': 18
+            'id': '4964',
+            'ext': 'mp4',
+            'upload_date': '20141203',
+            'title': 'Weird Hot and Wet Science',
+            'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0',
+            'categories': ['Adult Humor', 'Blondes'],
+            'uploader': 'Alpha Blue Archives',
+            'thumbnail': 're:^https?://.*\\.jpg$',
+            'timestamp': 1417582800,
+            'age_limit': 18,
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)
 
-        video_id = mobj.group('videoid')
-        video_title = mobj.group('title')
+        # Fetch origin token
+        js_config = self._download_webpage(
+            'http://www.pornotube.com/assets/src/app/config.js', video_id,
+            note='Download JS config')
+        originAuthenticationSpaceKey = self._search_regex(
+            r"constant\('originAuthenticationSpaceKey',\s*'([^']+)'",
+            js_config, 'originAuthenticationSpaceKey')
+
+        # Fetch actual token
+        token_req_data = {
+            'authenticationSpaceKey': originAuthenticationSpaceKey,
+            'credentials': 'Clip Application',
+        }
+        token_req = compat_urllib_request.Request(
+            'https://api.aebn.net/auth/v1/token/primal',
+            data=json.dumps(token_req_data).encode('utf-8'))
+        token_req.add_header('Content-Type', 'application/json')
+        token_req.add_header('Origin', 'http://www.pornotube.com')
+        token_answer = self._download_json(
+            token_req, video_id, note='Requesting primal token')
+        token = token_answer['tokenKey']
 
-        # Get webpage content
-        webpage = self._download_webpage(url, video_id)
+        # Get video URL
+        delivery_req = compat_urllib_request.Request(
+            'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id)
+        delivery_req.add_header('Authorization', token)
+        delivery_info = self._download_json(
+            delivery_req, video_id, note='Downloading delivery information')
+        video_url = delivery_info['mediaUrl']
 
-        # Get the video URL
-        VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
-        video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url')
-        video_url = compat_urllib_parse.unquote(video_url)
+        # Get additional info (title etc.)
+        info_req = compat_urllib_request.Request(
+            'https://api.aebn.net/content/v1/clips/%s?expand='
+            'title,description,primaryImageNumber,startSecond,endSecond,'
+            'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,'
+            'movie.studios,stars.name,studios.name,categories.name,'
+            'clipActive,movieActive,publishDate,orientations' % video_id)
+        info_req.add_header('Authorization', token)
+        info = self._download_json(
+            info_req, video_id, note='Downloading metadata')
 
-        # Get the uploaded date
-        VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
-        upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False)
-        if upload_date:
-            upload_date = unified_strdate(upload_date)
-        age_limit = self._rta_search(webpage)
+        timestamp = int_or_none(info.get('publishDate'), scale=1000)
+        uploader = info.get('studios', [{}])[0].get('name')
+        movie_id = info['movie']['movieId']
+        thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % (
+            movie_id, movie_id, info['primaryImageNumber'])
+        categories = [c['name'] for c in info.get('categories')]
 
         return {
             'id': video_id,
             'url': video_url,
-            'upload_date': upload_date,
-            'title': video_title,
-            'ext': 'flv',
-            'format': 'flv',
-            'age_limit': age_limit,
+            'title': info['title'],
+            'description': info.get('description'),
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'age_limit': 18,
         }
index 7fcde086c0b234f7020cfa9811425d22355808a3..f536e6e6cdfb3d71e21c98614e2baf117387493b 100644 (file)
@@ -4,12 +4,14 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    determine_ext,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
+from ..utils import (
+    determine_ext,
+    ExtractorError,
+)
 
 
 class PromptFileIE(InfoExtractor):
index 32d747ede0188a7347637aa1ee8075161ec5c1f8..385681d06e3dda356193d9f89c7ccbdd4cbde453 100644 (file)
@@ -5,8 +5,10 @@ import re
 
 from hashlib import sha1
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
+)
+from ..utils import (
     unified_strdate,
 )
 
@@ -85,7 +87,7 @@ class ProSiebenSat1IE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Im Interview: Kai Wiesinger',
                 'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
-                'upload_date': '20140225',
+                'upload_date': '20140203',
                 'duration': 522.56,
             },
             'params': {
@@ -100,7 +102,7 @@ class ProSiebenSat1IE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
                 'description': 'md5:2669cde3febe9bce13904f701e774eb6',
-                'upload_date': '20140225',
+                'upload_date': '20141014',
                 'duration': 2410.44,
             },
             'params': {
@@ -152,12 +154,22 @@ class ProSiebenSat1IE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
+            'info_dict': {
+                'id': '439664',
+                'title': 'Episode 8 - Ganze Folge - Playlist',
+                'description': 'md5:63b8963e71f481782aeea877658dec84',
+            },
+            'playlist_count': 2,
+        },
     ]
 
     _CLIPID_REGEXES = [
         r'"clip_id"\s*:\s+"(\d+)"',
         r'clipid: "(\d+)"',
         r'clip[iI]d=(\d+)',
+        r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
     ]
     _TITLE_REGEXES = [
         r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
@@ -178,11 +190,19 @@ class ProSiebenSat1IE(InfoExtractor):
         r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
         r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
     ]
+    _PAGE_TYPE_REGEXES = [
+        r'<meta name="page_type" content="([^"]+)">',
+        r"'itemType'\s*:\s*'([^']*)'",
+    ]
+    _PLAYLIST_ID_REGEXES = [
+        r'content[iI]d=(\d+)',
+        r"'itemId'\s*:\s*'([^']*)'",
+    ]
+    _PLAYLIST_CLIP_REGEXES = [
+        r'(?s)data-qvt=.+?<a href="([^"]+)"',
+    ]
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
+    def _extract_clip(self, url, webpage):
         clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
 
         access_token = 'testclient'
@@ -281,3 +301,31 @@ class ProSiebenSat1IE(InfoExtractor):
             'duration': duration,
             'formats': formats,
         }
+
+    def _extract_playlist(self, url, webpage):
+        playlist_id = self._html_search_regex(
+            self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
+        for regex in self._PLAYLIST_CLIP_REGEXES:
+            playlist_clips = re.findall(regex, webpage)
+            if playlist_clips:
+                title = self._html_search_regex(
+                    self._TITLE_REGEXES, webpage, 'title')
+                description = self._html_search_regex(
+                    self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
+                entries = [
+                    self.url_result(
+                        re.match('(.+?//.+?)/', url).group(1) + clip_path,
+                        'ProSiebenSat1')
+                    for clip_path in playlist_clips]
+                return self.playlist_result(entries, playlist_id, title, description)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        page_type = self._search_regex(
+            self._PAGE_TYPE_REGEXES, webpage,
+            'page type', default='clip').lower()
+        if page_type == 'clip':
+            return self._extract_clip(url, webpage)
+        elif page_type == 'playlist':
+            return self._extract_playlist(url, webpage)
index 3bc78060de3b0b4e9051c4ab07e74dc7d7ca5c9c..af7d76cf47e575277de3ffe480fdb8e1eb48b43c 100644 (file)
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
+)
+from ..utils import (
     determine_ext,
     int_or_none,
 )
diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py
new file mode 100644 (file)
index 0000000..0d70631
--- /dev/null
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class RadioBremenIE(InfoExtractor):
+    _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)'
+    IE_NAME = 'radiobremen'
+
+    _TEST = {
+        'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720',
+        'info_dict': {
+            'id': '114720',
+            'ext': 'mp4',
+            'duration': 1685,
+            'width': 512,
+            'title': 'buten un binnen vom 22. Dezember',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id
+        meta_doc = self._download_webpage(
+            meta_url, video_id, 'Downloading metadata')
+        title = self._html_search_regex(
+            r"<h1.*>(?P<title>.+)</h1>", meta_doc, "title")
+        description = self._html_search_regex(
+            r"<p>(?P<description>.*)</p>", meta_doc, "description", fatal=False)
+        duration = parse_duration(self._html_search_regex(
+            r"L&auml;nge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>",
+            meta_doc, "duration", fatal=False))
+
+        page_doc = self._download_webpage(
+            url, video_id, 'Downloading video information')
+        mobj = re.search(
+            r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)",
+            page_doc)
+        video_url = (
+            "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" %
+            (video_id, video_id, mobj.group("secret"), mobj.group('width')))
+
+        formats = [{
+            'url': video_url,
+            'ext': 'mp4',
+            'width': int(mobj.group("width")),
+        }]
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'formats': formats,
+            'thumbnail': mobj.group('thumbnail'),
+        }
diff --git a/youtube_dl/extractor/radiode.py b/youtube_dl/extractor/radiode.py
new file mode 100644 (file)
index 0000000..f95bc94
--- /dev/null
@@ -0,0 +1,55 @@
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+
+
+class RadioDeIE(InfoExtractor):
+    IE_NAME = 'radio.de'
+    _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
+    _TEST = {
+        'url': 'http://ndr2.radio.de/',
+        'md5': '3b4cdd011bc59174596b6145cda474a4',
+        'info_dict': {
+            'id': 'ndr2',
+            'ext': 'mp3',
+            'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'md5:591c49c702db1a33751625ebfb67f273',
+            'thumbnail': 're:^https?://.*\.png',
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        radio_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, radio_id)
+
+        broadcast = json.loads(self._search_regex(
+            r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
+            webpage, 'broadcast'))
+
+        title = self._live_title(broadcast['name'])
+        description = broadcast.get('description') or broadcast.get('shortDescription')
+        thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
+
+        formats = [{
+            'url': stream['streamUrl'],
+            'ext': stream['streamContentFormat'].lower(),
+            'acodec': stream['streamContentFormat'],
+            'abr': stream['bitRate'],
+            'asr': stream['sampleRate']
+        } for stream in broadcast['streamUrls']]
+        self._sort_formats(formats)
+
+        return {
+            'id': radio_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'is_live': True,
+            'formats': formats,
+        }
index 2d39ecfe4faa537cb86f156df930dc2b19241a0b..aa26b7e0bb0f4f0a489ad4cfdef330c704747680 100644 (file)
@@ -3,10 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .subtitles import SubtitlesInfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+)
 from ..utils import (
     parse_duration,
     unified_strdate,
-    compat_urllib_parse,
 )
 
 
diff --git a/youtube_dl/extractor/restudy.py b/youtube_dl/extractor/restudy.py
new file mode 100644 (file)
index 0000000..b17c2bf
--- /dev/null
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class RestudyIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?restudy\.dk/video/play/id/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://www.restudy.dk/video/play/id/1637',
+        'info_dict': {
+            'id': '1637',
+            'ext': 'flv',
+            'title': 'Leiden-frosteffekt',
+            'description': 'Denne video er et eksperiment med flydende kvælstof.',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage).strip()
+        description = self._og_search_description(webpage).strip()
+
+        formats = self._extract_smil_formats(
+            'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id,
+            video_id)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py
new file mode 100644 (file)
index 0000000..04158b9
--- /dev/null
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+    float_or_none,
+)
+
+
+class RteIE(InfoExtractor):
+    _VALID_URL = r'http?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/(?P<id>[0-9]+)/'
+    _TEST = {
+        'url': 'http://www.rte.ie/player/de/show/10363114/',
+        'info_dict': {
+            'id': '10363114',
+            'ext': 'mp4',
+            'title': 'One News',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'The One O\'Clock News followed by Weather.',
+            'duration': 436.844,
+        },
+        'params': {
+            'skip_download': 'f4m fails with --test atm'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_meta('description', webpage, 'description')
+        duration = float_or_none(self._html_search_meta(
+            'duration', webpage, 'duration', fatal=False), 1000)
+
+        thumbnail_id = self._search_regex(
+            r'<meta name="thumbnail" content="uri:irus:(.*?)" />', webpage, 'thumbnail')
+        thumbnail = 'http://img.rasset.ie/' + thumbnail_id + '.jpg'
+
+        feeds_url = self._html_search_meta("feeds-prefix", webpage, 'feeds url') + video_id
+        json_string = self._download_json(feeds_url, video_id)
+
+        # f4m_url = server + relative_url
+        f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
+        f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
+        f4m_formats = [{
+            'format_id': f['format_id'],
+            'url': f['url'],
+            'ext': 'mp4',
+            'width': f['width'],
+            'height': f['height'],
+        } for f in f4m_formats]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': f4m_formats,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+        }
index d029b0ec525fc2e1186aba7e1c3dadf9bed981b0..a3ca79f2ccfd2e00c09a4f9b2a9503fa85669b65 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import parse_duration
 
 class RtlXlIE(InfoExtractor):
     IE_NAME = 'rtlxl.nl'
-    _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
+    _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
 
     _TEST = {
         'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py
new file mode 100644 (file)
index 0000000..7736cab
--- /dev/null
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class RTPIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
+    _TESTS = [{
+        'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
+        'info_dict': {
+            'id': 'e174042',
+            'ext': 'mp3',
+            'title': 'Paixões Cruzadas',
+            'description': 'As paixões musicais de António Cartaxo e António Macedo',
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+        'params': {
+            'skip_download': True,  # RTMP download
+        },
+    }, {
+        'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_meta(
+            'twitter:title', webpage, display_name='title', fatal=True)
+        description = self._html_search_meta('description', webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        player_config = self._search_regex(
+            r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
+        config = json.loads(js_to_json(player_config))
+
+        path, ext = config.get('file').rsplit('.', 1)
+        formats = [{
+            'app': config.get('application'),
+            'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
+            'page_url': url,
+            'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
+            'rtmp_live': config.get('live', False),
+            'ext': ext,
+            'vcodec': config.get('type') == 'audio' and 'none' or None,
+            'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
index e8199b11446e503897500e0e423b3cbd5d52b441..5e84c109802e34ce8f57496ee3b7e2cd409c0788 100644 (file)
@@ -4,18 +4,20 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+)
 from ..utils import (
     int_or_none,
     parse_duration,
     parse_iso8601,
     unescapeHTML,
-    compat_str,
 )
 
 
 class RTSIE(InfoExtractor):
     IE_DESC = 'RTS.ch'
-    _VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html'
+    _VALID_URL = r'https?://(?:www\.)?rts\.ch/(?:(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html|play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+))'
 
     _TESTS = [
         {
@@ -23,6 +25,7 @@ class RTSIE(InfoExtractor):
             'md5': '753b877968ad8afaeddccc374d4256a5',
             'info_dict': {
                 'id': '3449373',
+                'display_id': 'les-enfants-terribles',
                 'ext': 'mp4',
                 'duration': 1488,
                 'title': 'Les Enfants Terribles',
@@ -30,7 +33,8 @@ class RTSIE(InfoExtractor):
                 'uploader': 'Divers',
                 'upload_date': '19680921',
                 'timestamp': -40280400,
-                'thumbnail': 're:^https?://.*\.image'
+                'thumbnail': 're:^https?://.*\.image',
+                'view_count': int,
             },
         },
         {
@@ -38,6 +42,7 @@ class RTSIE(InfoExtractor):
             'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
             'info_dict': {
                 'id': '5624067',
+                'display_id': 'entre-ciel-et-mer',
                 'ext': 'mp4',
                 'duration': 3720,
                 'title': 'Les yeux dans les cieux - Mon homard au Canada',
@@ -45,7 +50,8 @@ class RTSIE(InfoExtractor):
                 'uploader': 'Passe-moi les jumelles',
                 'upload_date': '20140404',
                 'timestamp': 1396635300,
-                'thumbnail': 're:^https?://.*\.image'
+                'thumbnail': 're:^https?://.*\.image',
+                'view_count': int,
             },
         },
         {
@@ -53,6 +59,7 @@ class RTSIE(InfoExtractor):
             'md5': 'b4326fecd3eb64a458ba73c73e91299d',
             'info_dict': {
                 'id': '5745975',
+                'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
                 'ext': 'mp4',
                 'duration': 48,
                 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
@@ -60,7 +67,8 @@ class RTSIE(InfoExtractor):
                 'uploader': 'Hockey',
                 'upload_date': '20140403',
                 'timestamp': 1396556882,
-                'thumbnail': 're:^https?://.*\.image'
+                'thumbnail': 're:^https?://.*\.image',
+                'view_count': int,
             },
             'skip': 'Blocked outside Switzerland',
         },
@@ -69,6 +77,7 @@ class RTSIE(InfoExtractor):
             'md5': '9bb06503773c07ce83d3cbd793cebb91',
             'info_dict': {
                 'id': '5745356',
+                'display_id': 'londres-cachee-par-un-epais-smog',
                 'ext': 'mp4',
                 'duration': 33,
                 'title': 'Londres cachée par un épais smog',
@@ -76,7 +85,8 @@ class RTSIE(InfoExtractor):
                 'uploader': 'Le Journal en continu',
                 'upload_date': '20140403',
                 'timestamp': 1396537322,
-                'thumbnail': 're:^https?://.*\.image'
+                'thumbnail': 're:^https?://.*\.image',
+                'view_count': int,
             },
         },
         {
@@ -84,6 +94,7 @@ class RTSIE(InfoExtractor):
             'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
             'info_dict': {
                 'id': '5706148',
+                'display_id': 'urban-hippie-de-damien-krisl-03-04-2014',
                 'ext': 'mp3',
                 'duration': 123,
                 'title': '"Urban Hippie", de Damien Krisl',
@@ -92,22 +103,44 @@ class RTSIE(InfoExtractor):
                 'timestamp': 1396551600,
             },
         },
+        {
+            'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260',
+            'md5': '968777c8779e5aa2434be96c54e19743',
+            'info_dict': {
+                'id': '6348260',
+                'display_id': 'le-19h30',
+                'ext': 'mp4',
+                'duration': 1796,
+                'title': 'Le 19h30',
+                'description': '',
+                'uploader': 'Le 19h30',
+                'upload_date': '20141201',
+                'timestamp': 1417458600,
+                'thumbnail': 're:^https?://.*\.image',
+                'view_count': int,
+            },
+        },
+        {
+            'url': 'http://www.rts.ch/play/tv/le-19h30/video/le-chantier-du-nouveau-parlement-vaudois-a-permis-une-trouvaille-historique?id=6348280',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
+        video_id = m.group('id') or m.group('id_new')
+        display_id = m.group('display_id') or m.group('display_id_new')
 
         def download_json(internal_id):
             return self._download_json(
                 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
-                video_id)
+                display_id)
 
         all_info = download_json(video_id)
 
         # video_id extracted out of URL is not always a real id
         if 'video' not in all_info and 'audio' not in all_info:
-            page = self._download_webpage(url, video_id)
+            page = self._download_webpage(url, display_id)
             internal_id = self._html_search_regex(
                 r'<(?:video|audio) data-id="([0-9]+)"', page,
                 'internal video id')
@@ -143,6 +176,7 @@ class RTSIE(InfoExtractor):
 
         return {
             'id': video_id,
+            'display_id': display_id,
             'formats': formats,
             'title': info['title'],
             'description': info.get('intro'),
index 0c8790da28c4b06cfbc941bdff7ad4e64b47ac74..5b1c3577a02bb541d912faa6958533086534af01 100644 (file)
@@ -5,10 +5,12 @@ import re
 import itertools
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
-    unified_strdate,
+)
+from ..utils import (
     ExtractorError,
+    unified_strdate,
 )
 
 
@@ -36,9 +38,7 @@ class RutubeIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         video = self._download_json(
             'http://rutube.ru/api/video/%s/?format=json' % video_id,
             video_id, 'Downloading video JSON')
@@ -53,6 +53,7 @@ class RutubeIE(InfoExtractor):
         m3u8_url = options['video_balancer'].get('m3u8')
         if m3u8_url is None:
             raise ExtractorError('Couldn\'t find m3u8 manifest url')
+        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
 
         return {
             'id': video['id'],
@@ -60,8 +61,7 @@ class RutubeIE(InfoExtractor):
             'description': video['description'],
             'duration': video['duration'],
             'view_count': video['hits'],
-            'url': m3u8_url,
-            'ext': 'mp4',
+            'formats': formats,
             'thumbnail': video['thumbnail_url'],
             'uploader': author.get('name'),
             'uploader_id': compat_str(author['id']) if author else None,
@@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor):
         }
 
 
+class RutubeEmbedIE(InfoExtractor):
+    IE_NAME = 'rutube:embed'
+    IE_DESC = 'Rutube embedded videos'
+    _VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
+        'info_dict': {
+            'id': 'a10e53b86e8f349080f718582ce4c661',
+            'ext': 'mp4',
+            'upload_date': '20131223',
+            'uploader_id': '297833',
+            'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
+            'uploader': 'subziro89 ILya',
+            'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
+        },
+        'params': {
+            'skip_download': 'Requires ffmpeg',
+        },
+    }
+
+    def _real_extract(self, url):
+        embed_id = self._match_id(url)
+        webpage = self._download_webpage(url, embed_id)
+
+        canonical_url = self._html_search_regex(
+            r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
+            'Canonical URL')
+        return self.url_result(canonical_url, 'Rutube')
+
+
 class RutubeChannelIE(InfoExtractor):
     IE_NAME = 'rutube:channel'
     IE_DESC = 'Rutube channels'
@@ -114,8 +145,7 @@ class RutubeMovieIE(RutubeChannelIE):
     _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        movie_id = mobj.group('id')
+        movie_id = self._match_id(url)
         movie = self._download_json(
             self._MOVIE_TEMPLATE % movie_id, movie_id,
             'Downloading movie JSON')
index c145f6fc72f1b9eed8a5089dce48dfdd5f106a79..dfd897ba3a3f0a7297164fb315e4543bb597d678 100644 (file)
@@ -1,14 +1,14 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_parse_qs,
     compat_urllib_request,
 )
+from ..utils import (
+    ExtractorError,
+)
 
 
 class ScreencastIE(InfoExtractor):
@@ -57,8 +57,7 @@ class ScreencastIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_url = self._html_search_regex(
diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py
new file mode 100644 (file)
index 0000000..0533742
--- /dev/null
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+    ExtractorError,
+    js_to_json,
+)
+
+
+class ScreencastOMaticIE(InfoExtractor):
+    _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
+    _TEST = {
+        'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
+        'md5': '483583cb80d92588f15ccbedd90f0c18',
+        'info_dict': {
+            'id': 'c2lD3BeOPl',
+            'ext': 'mp4',
+            'title': 'Welcome to 3-4 Philosophy @ DECV!',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        setup_js = self._search_regex(
+            r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);",
+            webpage, 'setup code')
+        data = self._parse_json(setup_js, video_id, transform_source=js_to_json)
+        try:
+            video_data = next(
+                m for m in data['modes'] if m.get('type') == 'html5')
+        except StopIteration:
+            raise ExtractorError('Could not find any video entries!')
+        video_url = compat_urlparse.urljoin(url, video_data['config']['file'])
+        thumbnail = data.get('image')
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'url': video_url,
+            'ext': 'mp4',
+            'thumbnail': thumbnail,
+        }
similarity index 50%
rename from youtube_dl/extractor/cinemassacre.py
rename to youtube_dl/extractor/screenwavemedia.py
index b7fa73c3bfc8f8c290d899662ce0cb102ac86670..6c9fdb7c1aceb35efc166c9207fd503603040b9b 100644 (file)
@@ -5,61 +5,27 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
     int_or_none,
+    unified_strdate,
 )
 
 
-class CinemassacreIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
-    _TESTS = [
-        {
-            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
-            'md5': 'fde81fbafaee331785f58cd6c0d46190',
-            'info_dict': {
-                'id': '19911',
-                'ext': 'mp4',
-                'upload_date': '20121110',
-                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
-                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
-            },
-        },
-        {
-            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
-            'md5': 'd72f10cd39eac4215048f62ab477a511',
-            'info_dict': {
-                'id': '521be8ef82b16',
-                'ext': 'mp4',
-                'upload_date': '20131002',
-                'title': 'The Mummy’s Hand (1940)',
-            },
-        }
-    ]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
-
-        webpage = self._download_webpage(url, display_id)
-        video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
-        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage)
-        if not mobj:
-            raise ExtractorError('Can\'t extract embed url and video id')
-        playerdata_url = mobj.group('embed_url')
-        video_id = mobj.group('video_id')
-        full_video_id = mobj.group('full_video_id')
+class ScreenwaveMediaIE(InfoExtractor):
+    _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
 
-        video_title = self._html_search_regex(
-            r'<title>(?P<title>.+?)\|', webpage, 'title')
-        video_description = self._html_search_regex(
-            r'<div class="entry-content">(?P<description>.+?)</div>',
-            webpage, 'description', flags=re.DOTALL, fatal=False)
-        video_thumbnail = self._og_search_thumbnail(webpage)
+    _TESTS = [{
+        'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
+        'only_matching': True,
+    }]
 
-        playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
 
+        vidtitle = self._search_regex(
+            r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
         vidurl = self._search_regex(
-            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
+            r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')
 
         videolist_url = None
 
@@ -67,7 +33,7 @@ class CinemassacreIE(InfoExtractor):
         if mobj:
             videoserver = mobj.group('videoserver')
             mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
-            vidid = mobj.group('vidid') if mobj else full_video_id
+            vidid = mobj.group('vidid') if mobj else video_id
             videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
         else:
             mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
@@ -85,34 +51,128 @@ class CinemassacreIE(InfoExtractor):
                 file_ = src.partition(':')[-1]
                 width = int_or_none(video.get('width'))
                 height = int_or_none(video.get('height'))
-                bitrate = int_or_none(video.get('system-bitrate'))
+                bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
                 format = {
                     'url': baseurl + file_,
                     'format_id': src.rpartition('.')[0].rpartition('_')[-1],
                 }
                 if width or height:
                     format.update({
-                        'tbr': bitrate // 1000 if bitrate else None,
+                        'tbr': bitrate,
                         'width': width,
                         'height': height,
                     })
                 else:
                     format.update({
-                        'abr': bitrate // 1000 if bitrate else None,
+                        'abr': bitrate,
                         'vcodec': 'none',
                     })
                 formats.append(format)
-            self._sort_formats(formats)
         else:
             formats = [{
                 'url': vidurl,
             }]
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': video_title,
+            'title': vidtitle,
             'formats': formats,
+        }
+
+
+class CinemassacreIE(InfoExtractor):
+    _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
+    _TESTS = [
+        {
+            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
+            'md5': 'fde81fbafaee331785f58cd6c0d46190',
+            'info_dict': {
+                'id': 'Cinemassacre-19911',
+                'ext': 'mp4',
+                'upload_date': '20121110',
+                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
+                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
+            },
+        },
+        {
+            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
+            'md5': 'd72f10cd39eac4215048f62ab477a511',
+            'info_dict': {
+                'id': 'Cinemassacre-521be8ef82b16',
+                'ext': 'mp4',
+                'upload_date': '20131002',
+                'title': 'The Mummy’s Hand (1940)',
+            },
+        }
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+        video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
+
+        webpage = self._download_webpage(url, display_id)
+
+        playerdata_url = self._search_regex(
+            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            webpage, 'player data URL')
+        video_title = self._html_search_regex(
+            r'<title>(?P<title>.+?)\|', webpage, 'title')
+        video_description = self._html_search_regex(
+            r'<div class="entry-content">(?P<description>.+?)</div>',
+            webpage, 'description', flags=re.DOTALL, fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
+            'description': video_description,
+            'upload_date': video_date,
+            'thumbnail': video_thumbnail,
+            'url': playerdata_url,
+        }
+
+
+class TeamFourIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
+    _TEST = {
+        'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
+        'info_dict': {
+            'id': 'TeamFourStar-5292a02f20bfa',
+            'ext': 'mp4',
+            'upload_date': '20130401',
+            'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
+            'title': 'A Moment With TFS Episode 4',
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        playerdata_url = self._search_regex(
+            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            webpage, 'player data URL')
+
+        video_title = self._html_search_regex(
+            r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
+            webpage, 'title')
+        video_date = unified_strdate(self._html_search_regex(
+            r'<div class="heroheadingdate">(?P<date>.+?)</div>',
+            webpage, 'date', fatal=False))
+        video_description = self._html_search_regex(
+            r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
+            webpage, 'description', fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
             'description': video_description,
             'upload_date': video_date,
             'thumbnail': video_thumbnail,
+            'url': playerdata_url,
         }
index c833fc8ee817bce2ce09fce724f67633a54ba459..6446d26dc416703da688386a578f904d24b102a4 100644 (file)
@@ -24,7 +24,7 @@ class SexyKarmaIE(InfoExtractor):
             'title': 'Taking a quick pee.',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': 'wildginger7',
-            'upload_date': '20141007',
+            'upload_date': '20141008',
             'duration': 22,
             'view_count': int,
             'comment_count': int,
@@ -45,6 +45,7 @@ class SexyKarmaIE(InfoExtractor):
             'view_count': int,
             'comment_count': int,
             'categories': list,
+            'age_limit': 18,
         }
     }, {
         'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
@@ -61,6 +62,7 @@ class SexyKarmaIE(InfoExtractor):
             'view_count': int,
             'comment_count': int,
             'categories': list,
+            'age_limit': 18,
         }
     }]
 
@@ -114,4 +116,5 @@ class SexyKarmaIE(InfoExtractor):
             'view_count': view_count,
             'comment_count': comment_count,
             'categories': categories,
+            'age_limit': 18,
         }
index fdc31603a709676713f7ba87325f4b2ba6b47f3e..26ced716e8a875f1c4c5c9527b856475dce83f9e 100644 (file)
@@ -4,10 +4,12 @@ import re
 import base64
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
 from ..utils import (
     ExtractorError,
-    compat_urllib_request,
-    compat_urllib_parse,
     int_or_none,
 )
 
@@ -26,26 +28,30 @@ class SharedIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        page = self._download_webpage(url, video_id)
-
-        if re.search(r'>File does not exist<', page) is not None:
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
 
-        download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
+        if '>File does not exist<' in webpage:
+            raise ExtractorError(
+                'Video %s does not exist' % video_id, expected=True)
 
-        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
+        download_form = dict(re.findall(
+            r'<input type="hidden" name="([^"]+)" value="([^"]*)"', webpage))
+        request = compat_urllib_request.Request(
+            url, compat_urllib_parse.urlencode(download_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 
-        video_page = self._download_webpage(request, video_id, 'Downloading video page')
+        video_page = self._download_webpage(
+            request, video_id, 'Downloading video page')
 
-        video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
-        title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
-        filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
+        video_url = self._html_search_regex(
+            r'data-url="([^"]+)"', video_page, 'video URL')
+        title = base64.b64decode(self._html_search_meta(
+            'full:title', webpage, 'title')).decode('utf-8')
+        filesize = int_or_none(self._html_search_meta(
+            'full:size', webpage, 'file size', fatal=False))
         thumbnail = self._html_search_regex(
-            r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
+            r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None)
 
         return {
             'id': video_id,
index 7531e8325bf88e3d89958dca1107334c41b78c6c..ac3e3adf22ad194a8af3e833ae4d8acf7484e8b4 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     parse_duration,
 )
 
index 5eadbb7eaea263b8a37307fbdcc01c3e54c5eaa2..a63d126d4560dda83133fa6280116ca517e71bdc 100644 (file)
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
     compat_urllib_parse,
 )
index 5864b9936cca2e4d0ba3a0fa217884c21f897ed7..9f79ff5c1b66d2bf37369a6009a914043493b407 100644 (file)
@@ -4,8 +4,10 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
+)
+from ..utils import (
     ExtractorError,
 )
 
@@ -28,7 +30,7 @@ class SlideshareIE(InfoExtractor):
         page_title = mobj.group('title')
         webpage = self._download_webpage(url, page_title)
         slideshare_obj = self._search_regex(
-            r'var slideshare_object =  ({.*?}); var user_info =',
+            r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
             webpage, 'slideshare object')
         info = json.loads(slideshare_obj)
         if info['slideshow']['type'] != 'video':
@@ -39,7 +41,7 @@ class SlideshareIE(InfoExtractor):
         ext = info['jsplayer']['video_extension']
         video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
         description = self._html_search_regex(
-            r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
+            r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
             'description', fatal=False)
 
         return {
index 0751efc6111c96ca1c089c66183429f9bde6147c..26f361c93990b6b92ff31d2447b70f7e08263d00 100644 (file)
@@ -7,9 +7,11 @@ import hashlib
 import uuid
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
     int_or_none,
     unified_strdate,
@@ -67,6 +69,7 @@ class SmotriIE(InfoExtractor):
             'params': {
                 'videopassword': 'qwerty',
             },
+            'skip': 'Video is not approved by moderator',
         },
         # age limit + video-password
         {
@@ -84,7 +87,22 @@ class SmotriIE(InfoExtractor):
             },
             'params': {
                 'videopassword': '333'
-            }
+            },
+            'skip': 'Video is not approved by moderator',
+        },
+        # not approved by moderator, but available
+        {
+            'url': 'http://smotri.com/video/view/?id=v28888533b73',
+            'md5': 'f44bc7adac90af518ef1ecf04893bb34',
+            'info_dict': {
+                'id': 'v28888533b73',
+                'ext': 'mp4',
+                'title': 'Russian Spies Killed By ISIL Child Soldier',
+                'uploader': 'Mopeder',
+                'uploader_id': 'mopeder',
+                'duration': 71,
+                'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
+            },
         },
         # swf player
         {
@@ -142,13 +160,16 @@ class SmotriIE(InfoExtractor):
 
         video = self._download_json(request, video_id, 'Downloading video JSON')
 
-        if video.get('_moderate_no') or not video.get('moderated'):
-            raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True)
+        video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
 
-        if video.get('error'):
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+        if not video_url:
+            if video.get('_moderate_no') or not video.get('moderated'):
+                raise ExtractorError(
+                    'Video %s has not been approved by moderator' % video_id, expected=True)
+
+            if video.get('error'):
+                raise ExtractorError('Video %s does not exist' % video_id, expected=True)
 
-        video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
         title = video['title']
         thumbnail = video['_imgURL']
         upload_date = unified_strdate(video['added'])
@@ -274,15 +295,18 @@ class SmotriBroadcastIE(InfoExtractor):
         broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
 
         if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
-            raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
+            raise ExtractorError(
+                'Broadcast %s does not exist' % broadcast_id, expected=True)
 
         # Adult content
         if re.search('EroConfirmText">', broadcast_page) is not None:
 
             (username, password) = self._get_login_info()
             if username is None:
-                raise ExtractorError('Erotic broadcasts allowed only for registered users, '
-                                     'use --username and --password options to provide account credentials.', expected=True)
+                raise ExtractorError(
+                    'Erotic broadcasts allowed only for registered users, '
+                    'use --username and --password options to provide account credentials.',
+                    expected=True)
 
             login_form = {
                 'login-hint53': '1',
@@ -291,9 +315,11 @@ class SmotriBroadcastIE(InfoExtractor):
                 'password': password,
             }
 
-            request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
+            request = compat_urllib_request.Request(
+                broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
             request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-            broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
+            broadcast_page = self._download_webpage(
+                request, broadcast_id, 'Logging in and confirming age')
 
             if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
                 raise ExtractorError('Unable to log in: bad username or password', expected=True)
@@ -303,7 +329,7 @@ class SmotriBroadcastIE(InfoExtractor):
             adult_content = False
 
         ticket = self._html_search_regex(
-            'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
+            r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
             broadcast_page, 'broadcast ticket')
 
         url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
@@ -312,26 +338,31 @@ class SmotriBroadcastIE(InfoExtractor):
         if broadcast_password:
             url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
 
-        broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
+        broadcast_json_page = self._download_webpage(
+            url, broadcast_id, 'Downloading broadcast JSON')
 
         try:
             broadcast_json = json.loads(broadcast_json_page)
 
             protected_broadcast = broadcast_json['_pass_protected'] == 1
             if protected_broadcast and not broadcast_password:
-                raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
+                raise ExtractorError(
+                    'This broadcast is protected by a password, use the --video-password option',
+                    expected=True)
 
             broadcast_offline = broadcast_json['is_play'] == 0
             if broadcast_offline:
                 raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
 
             rtmp_url = broadcast_json['_server']
-            if not rtmp_url.startswith('rtmp://'):
+            mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
+            if not mobj:
                 raise ExtractorError('Unexpected broadcast rtmp URL')
 
             broadcast_playpath = broadcast_json['_streamName']
+            broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
             broadcast_thumbnail = broadcast_json['_imgURL']
-            broadcast_title = broadcast_json['title']
+            broadcast_title = self._live_title(broadcast_json['title'])
             broadcast_description = broadcast_json['description']
             broadcaster_nick = broadcast_json['nick']
             broadcaster_login = broadcast_json['login']
@@ -352,6 +383,9 @@ class SmotriBroadcastIE(InfoExtractor):
             'age_limit': 18 if adult_content else 0,
             'ext': 'flv',
             'play_path': broadcast_playpath,
+            'player_url': 'http://pics.smotri.com/broadcast_play.swf',
+            'app': broadcast_app,
             'rtmp_live': True,
-            'rtmp_conn': rtmp_conn
+            'rtmp_conn': rtmp_conn,
+            'is_live': True,
         }
index c663e56d42ed02645313637cd7866a9071d10ae7..7d3c0e93783afeac3d8e939e0cf317177df4ca9f 100644 (file)
@@ -1,13 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from ..utils import (
-    ExtractorError,
+import re
+
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     determine_ext,
+    ExtractorError,
 )
-import re
 
 from .common import InfoExtractor
 
@@ -27,9 +30,7 @@ class SockshareIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         url = 'http://sockshare.com/file/%s' % video_id
         webpage = self._download_webpage(url, video_id)
 
index 07f514a46246657206ae3fd31e19b1d2932e6f15..c04791997f3672cdb643870086c7ed7f52db54c1 100644 (file)
@@ -1,11 +1,10 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from .common import compat_str
 
 
 class SohuIE(InfoExtractor):
@@ -29,60 +28,73 @@ class SohuIE(InfoExtractor):
                 base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
             else:
                 base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
-            data_url = base_data_url + str(vid_id)
-            data_json = self._download_webpage(
-                data_url, video_id,
-                note='Downloading JSON data for ' + str(vid_id))
-            return json.loads(data_json)
+
+            return self._download_json(
+                base_data_url + vid_id, video_id,
+                'Downloading JSON data for %s' % vid_id)
 
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         mytv = mobj.group('mytv') is not None
 
         webpage = self._download_webpage(url, video_id)
-        raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
-                                            webpage, 'video title')
+        raw_title = self._html_search_regex(
+            r'(?s)<title>(.+?)</title>',
+            webpage, 'video title')
         title = raw_title.partition('-')[0].strip()
 
-        vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
-                                      'video path')
-        data = _fetch_data(vid, mytv)
-
-        QUALITIES = ('ori', 'super', 'high', 'nor')
-        vid_ids = [data['data'][q + 'Vid']
-                   for q in QUALITIES
-                   if data['data'][q + 'Vid'] != 0]
-        if not vid_ids:
-            raise ExtractorError('No formats available for this video')
+        vid = self._html_search_regex(
+            r'var vid ?= ?["\'](\d+)["\']',
+            webpage, 'video path')
+        vid_data = _fetch_data(vid, mytv)
 
-        # For now, we just pick the highest available quality
-        vid_id = vid_ids[-1]
+        formats_json = {}
+        for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
+            vid_id = vid_data['data'].get('%sVid' % format_id)
+            if not vid_id:
+                continue
+            vid_id = compat_str(vid_id)
+            formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv)
 
-        format_data = data if vid == vid_id else _fetch_data(vid_id, mytv)
-        part_count = format_data['data']['totalBlocks']
-        allot = format_data['allot']
-        prot = format_data['prot']
-        clipsURL = format_data['data']['clipsURL']
-        su = format_data['data']['su']
+        part_count = vid_data['data']['totalBlocks']
 
         playlist = []
         for i in range(part_count):
-            part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
-                        (allot, prot, clipsURL[i], su[i]))
-            part_str = self._download_webpage(
-                part_url, video_id,
-                note='Downloading part %d of %d' % (i + 1, part_count))
-
-            part_info = part_str.split('|')
-            video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
-
-            video_info = {
-                'id': '%s_part%02d' % (video_id, i + 1),
+            formats = []
+            for format_id, format_data in formats_json.items():
+                allot = format_data['allot']
+                prot = format_data['prot']
+
+                data = format_data['data']
+                clips_url = data['clipsURL']
+                su = data['su']
+
+                part_str = self._download_webpage(
+                    'http://%s/?prot=%s&file=%s&new=%s' %
+                    (allot, prot, clips_url[i], su[i]),
+                    video_id,
+                    'Downloading %s video URL part %d of %d'
+                    % (format_id, i + 1, part_count))
+
+                part_info = part_str.split('|')
+                video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+
+                formats.append({
+                    'url': video_url,
+                    'format_id': format_id,
+                    'filesize': data['clipsBytes'][i],
+                    'width': data['width'],
+                    'height': data['height'],
+                    'fps': data['fps'],
+                })
+            self._sort_formats(formats)
+
+            playlist.append({
+                'id': '%s_part%d' % (video_id, i + 1),
                 'title': title,
-                'url': video_url,
-                'ext': 'mp4',
-            }
-            playlist.append(video_info)
+                'duration': vid_data['data']['clipsDuration'][i],
+                'formats': formats,
+            })
 
         if len(playlist) == 1:
             info = playlist[0]
diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py
new file mode 100644 (file)
index 0000000..feef33e
--- /dev/null
@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    HEADRequest,
+    urlhandle_detect_ext,
+)
+
+
+class SoulAnimeWatchingIE(InfoExtractor):
+    IE_NAME = "soulanime:watching"
+    IE_DESC = "SoulAnime video"
+    _TEST = {
+        'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
+        'md5': '05fae04abf72298098b528e98abf4298',
+        'info_dict': {
+            'id': 'seirei-tsukai-no-blade-dance-episode-9',
+            'ext': 'mp4',
+            'title': 'seirei-tsukai-no-blade-dance-episode-9',
+            'description': 'seirei-tsukai-no-blade-dance-episode-9'
+        }
+    }
+    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        domain = mobj.group('domain')
+
+        page = self._download_webpage(url, video_id)
+
+        video_url_encoded = self._html_search_regex(
+            r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
+        video_url = "http://www.soul-anime." + domain + video_url_encoded
+
+        ext_req = HEADRequest(video_url)
+        ext_handle = self._request_webpage(
+            ext_req, video_id, note='Determining extension')
+        ext = urlhandle_detect_ext(ext_handle)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': ext,
+            'title': video_id,
+            'description': video_id
+        }
+
+
+class SoulAnimeSeriesIE(InfoExtractor):
+    IE_NAME = "soulanime:series"
+    IE_DESC = "SoulAnime Series"
+
+    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
+
+    _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
+
+    _TEST = {
+        'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
+        'info_dict': {
+            'id': 'black-rock-shooter-tv'
+        },
+        'playlist_count': 8
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        series_id = mobj.group('id')
+        domain = mobj.group('domain')
+
+        pattern = re.compile(self._EPISODE_REGEX)
+
+        page = self._download_webpage(url, series_id, "Downloading series page")
+        mobj = pattern.findall(page)
+
+        entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
+
+        return self.playlist_result(entries, series_id)
index 3c1d058db9cf546bb94a299d9f1badd79fab12d3..5d60c4939588ad543840b501ef0e552ad0b1e673 100644 (file)
@@ -5,11 +5,12 @@ import re
 import itertools
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
     compat_urlparse,
     compat_urllib_parse,
-
+)
+from ..utils import (
     ExtractorError,
     int_or_none,
     unified_strdate,
@@ -32,7 +33,7 @@ class SoundcloudIE(InfoExtractor):
                             (?P<title>[\w\d-]+)/?
                             (?P<token>[^?]+?)?(?:[?].*)?$)
                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
-                          (?:/?\?secret_token=(?P<secret_token>[^&]+?))?$)
+                          (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
                        |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
                     )
                     '''
index 94602e89e56549243ed38ecb107ef842cd8ebd46..b936202f6f3005fe9ae085724566d709c6a484cc 100644 (file)
@@ -3,12 +3,14 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
+    compat_urllib_parse,
     compat_urllib_parse_urlparse,
     compat_urllib_request,
-    compat_urllib_parse,
-    unified_strdate,
+)
+from ..utils import (
     str_to_int,
+    unified_strdate,
 )
 from ..aes import aes_decrypt_text
 
index 1e55a9ffb5748b70969de11886c13720ff936be7..f345883c767438a91412e0619a993a70e3a21a92 100644 (file)
@@ -4,7 +4,14 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_urlparse,
+    compat_HTTPError,
+)
+from ..utils import (
+    HEADRequest,
+    ExtractorError,
+)
 from .spiegeltv import SpiegeltvIE
 
 
@@ -60,21 +67,31 @@ class SpiegelIE(InfoExtractor):
         xml_url = base_url + video_id + '.xml'
         idoc = self._download_xml(xml_url, video_id)
 
-        formats = [
-            {
-                'format_id': n.tag.rpartition('type')[2],
-                'url': base_url + n.find('./filename').text,
-                'width': int(n.find('./width').text),
-                'height': int(n.find('./height').text),
-                'abr': int(n.find('./audiobitrate').text),
-                'vbr': int(n.find('./videobitrate').text),
-                'vcodec': n.find('./codec').text,
-                'acodec': 'MP4A',
-            }
-            for n in list(idoc)
-            # Blacklist type 6, it's extremely LQ and not available on the same server
-            if n.tag.startswith('type') and n.tag != 'type6'
-        ]
+        formats = []
+        for n in list(idoc):
+            if n.tag.startswith('type') and n.tag != 'type6':
+                format_id = n.tag.rpartition('type')[2]
+                video_url = base_url + n.find('./filename').text
+                # Test video URLs beforehand as some of them are invalid
+                try:
+                    self._request_webpage(
+                        HEADRequest(video_url), video_id,
+                        'Checking %s video URL' % format_id)
+                except ExtractorError as e:
+                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+                        self.report_warning(
+                            '%s video URL is invalid, skipping' % format_id, video_id)
+                        continue
+                formats.append({
+                    'format_id': format_id,
+                    'url': video_url,
+                    'width': int(n.find('./width').text),
+                    'height': int(n.find('./height').text),
+                    'abr': int(n.find('./audiobitrate').text),
+                    'vbr': int(n.find('./videobitrate').text),
+                    'vcodec': n.find('./codec').text,
+                    'acodec': 'MP4A',
+                })
         duration = float(idoc[0].findall('./duration')[0].text)
 
         self._sort_formats(formats)
index 057ef5251dc6855c32a8df5abed6917556d190e9..1a57aebf16c4439174844b3e21bb42bf4331d02a 100644 (file)
@@ -4,8 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     parse_iso8601,
 )
 
@@ -58,9 +60,10 @@ class SportDeutschlandIE(InfoExtractor):
 
         categories = list(data.get('section', {}).get('tags', {}).values())
         asset = data['asset']
+        assets_info = self._download_json(asset['url'], video_id)
 
         formats = []
-        smil_url = asset['video']
+        smil_url = assets_info['video']
         if '.smil' in smil_url:
             m3u8_url = smil_url.replace('.smil', '.m3u8')
             formats.extend(
index c1178f26de0b961ad68eb6d1ddb89550746f4dd7..d4e1340158da92de09776b39a4dfebe8e38aeddf 100644 (file)
@@ -2,10 +2,9 @@
 from __future__ import unicode_literals
 
 import re
-import time
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
@@ -40,8 +39,7 @@ class StreamcloudIE(InfoExtractor):
             ''', orig_webpage)
         post = compat_urllib_parse.urlencode(fields)
 
-        self.to_screen('%s: Waiting for timeout' % video_id)
-        time.sleep(12)
+        self._sleep(12, video_id)
         headers = {
             b'Content-Type': b'application/x-www-form-urlencoded',
         }
index 73efe95420ff7b83412864de02d8d5601690b537..c3ceb5f76d450001affda86e79466607b677e8f5 100644 (file)
@@ -1,18 +1,14 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-import re
-import json
-
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
-    compat_str,
 )
 
 
 class StreamCZIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)'
+    _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
 
     _TESTS = [{
         'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
@@ -21,61 +17,63 @@ class StreamCZIE(InfoExtractor):
             'id': '765767',
             'ext': 'mp4',
             'title': 'Peklo na talíři: Éčka pro děti',
-            'description': 'md5:49ace0df986e95e331d0fe239d421519',
-            'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
+            'description': 'Taška s grónskou pomazánkou a další pekelnosti ZDE',
+            'thumbnail': 're:^http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
             'duration': 256,
         },
     }, {
         'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
-        'md5': '246272e753e26bbace7fcd9deca0650c',
+        'md5': 'e54a254fb8b871968fd8403255f28589',
         'info_dict': {
             'id': '10002447',
             'ext': 'mp4',
             'title': 'Kancelář Blaník: Tři roky pro Mazánka',
-            'description': 'md5:9177695a8b756a0a8ab160de4043b392',
-            'thumbnail': 'http://im.stream.cz/episode/537f838c50c11f8d21320000',
+            'description': 'md5:3862a00ba7bf0b3e44806b544032c859',
+            'thumbnail': 're:^http://im.stream.cz/episode/537f838c50c11f8d21320000',
             'duration': 368,
         },
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
-
-        webpage = self._download_webpage(url, video_id)
-
-        data = self._html_search_regex(r'Stream\.Data\.Episode\((.+?)\);', webpage, 'stream data')
-
-        jsonData = json.loads(data)
+        video_id = self._match_id(url)
+        data = self._download_json(
+            'http://www.stream.cz/API/episode/%s' % video_id, video_id)
 
         formats = []
-        for video in jsonData['instances']:
-            for video_format in video['instances']:
-                format_id = video_format['quality']
-
-                if format_id == '240p':
-                    quality = 0
-                elif format_id == '360p':
-                    quality = 1
-                elif format_id == '480p':
-                    quality = 2
-                elif format_id == '720p':
-                    quality = 3
-
+        for quality, video in enumerate(data['video_qualities']):
+            for f in video['formats']:
+                typ = f['type'].partition('/')[2]
+                qlabel = video.get('quality_label')
                 formats.append({
-                    'format_id': '%s-%s' % (video_format['type'].split('/')[1], format_id),
-                    'url': video_format['source'],
+                    'format_note': '%s-%s' % (qlabel, typ) if qlabel else typ,
+                    'format_id': '%s-%s' % (typ, f['quality']),
+                    'url': f['source'],
+                    'height': int_or_none(f['quality'].rstrip('p')),
                     'quality': quality,
                 })
-
         self._sort_formats(formats)
 
+        image = data.get('image')
+        if image:
+            thumbnail = self._proto_relative_url(
+                image.replace('{width}', '1240').replace('{height}', '697'),
+                scheme='http:',
+            )
+        else:
+            thumbnail = None
+
+        stream = data.get('_embedded', {}).get('stream:show', {}).get('name')
+        if stream:
+            title = '%s: %s' % (stream, data['name'])
+        else:
+            title = data['name']
+
         return {
-            'id': compat_str(jsonData['episode_id']),
-            'title': self._og_search_title(webpage),
-            'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'),
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
             'formats': formats,
-            'description': self._og_search_description(webpage),
-            'duration': int_or_none(jsonData['duration']),
-            'view_count': int_or_none(jsonData['stats_total']),
+            'description': data.get('web_site_text'),
+            'duration': int_or_none(data.get('duration')),
+            'view_count': int_or_none(data.get('views')),
         }
index 263f09b4645fa8b6255f1216e99cab27afce2bee..8a333f1d24d6be3bd5160d843c3cd6451ef83178 100644 (file)
@@ -28,23 +28,27 @@ class SunPornoIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
 
-        title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
-        description = self._html_search_meta('description', webpage, 'description')
+        title = self._html_search_regex(
+            r'<title>([^<]+)</title>', webpage, 'title')
+        description = self._html_search_meta(
+            'description', webpage, 'description')
         thumbnail = self._html_search_regex(
             r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
 
         duration = parse_duration(self._search_regex(
-            r'Duration:\s*(\d+:\d+)\s*<', webpage, 'duration', fatal=False))
+            r'itemprop="duration">\s*(\d+:\d+)\s*<',
+            webpage, 'duration', fatal=False))
 
         view_count = int_or_none(self._html_search_regex(
-            r'class="views">\s*(\d+)\s*<', webpage, 'view count', fatal=False))
+            r'class="views">\s*(\d+)\s*<',
+            webpage, 'view count', fatal=False))
         comment_count = int_or_none(self._html_search_regex(
-            r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False))
+            r'(\d+)</b> Comments?',
+            webpage, 'comment count', fatal=False))
 
         formats = []
         quality = qualities(['mp4', 'flv'])
index b870474515ba61ee33641c86554d53d68a6bf46d..bfe07b02417a2a44f23a09c10c25d48ec18b5535 100644 (file)
@@ -4,10 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import parse_filesize
 
 
 class TagesschauIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html'
 
     _TESTS = [{
         'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
@@ -19,6 +20,16 @@ class TagesschauIE(InfoExtractor):
             'description': 'md5:69da3c61275b426426d711bde96463ab',
             'thumbnail': 're:^http:.*\.jpg$',
         },
+    }, {
+        'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
+        'md5': '3c54c1f6243d279b706bde660ceec633',
+        'info_dict': {
+            'id': '5727',
+            'ext': 'mp4',
+            'description': 'md5:695c01bfd98b7e313c501386327aea59',
+            'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
+            'thumbnail': 're:^http:.*\.jpg$',
+        }
     }]
 
     _FORMATS = {
@@ -28,42 +39,82 @@ class TagesschauIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        if video_id.startswith('-'):
-            display_id = video_id.strip('-')
-        else:
-            display_id = video_id
-
+        video_id = self._match_id(url)
+        display_id = video_id.lstrip('-')
         webpage = self._download_webpage(url, display_id)
 
-        playerpage = self._download_webpage(
-            'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id,
-            display_id, 'Downloading player page')
-
-        medias = re.findall(
-            r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
-            playerpage)
+        player_url = self._html_search_meta(
+            'twitter:player', webpage, 'player URL', default=None)
+        if player_url:
+            playerpage = self._download_webpage(
+                player_url, display_id, 'Downloading player page')
 
-        formats = []
-        for url, ext, res in medias:
-            f = {
-                'format_id': res + '_' + ext,
-                'url': url,
-                'ext': ext,
-            }
-            f.update(self._FORMATS.get(res, {}))
-            formats.append(f)
+            medias = re.findall(
+                r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
+                playerpage)
+            formats = []
+            for url, ext, res in medias:
+                f = {
+                    'format_id': res + '_' + ext,
+                    'url': url,
+                    'ext': ext,
+                }
+                f.update(self._FORMATS.get(res, {}))
+                formats.append(f)
+            thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
+            title = self._og_search_title(webpage).strip()
+            description = self._og_search_description(webpage).strip()
+        else:
+            download_text = self._search_regex(
+                r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
+                webpage, 'download links')
+            links = re.finditer(
+                r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
+                download_text)
+            formats = []
+            for l in links:
+                format_id = self._search_regex(
+                    r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
+                format = {
+                    'format_id': format_id,
+                    'url': l.group('url'),
+                    'format_name': l.group('name'),
+                }
+                m = re.match(
+                    r'''(?x)
+                        Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
+                        (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
+                        (?P<vbr>[0-9]+)kbps&\#10;
+                        Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
+                        Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
+                    l.group('title'))
+                if m:
+                    format.update({
+                        'format_note': m.group('audio_desc'),
+                        'vcodec': m.group('vcodec'),
+                        'width': int(m.group('width')),
+                        'height': int(m.group('height')),
+                        'abr': int(m.group('abr')),
+                        'vbr': int(m.group('vbr')),
+                        'filesize_approx': parse_filesize(m.group('filesize_approx')),
+                    })
+                formats.append(format)
+            thumbnail_fn = self._search_regex(
+                r'(?s)<img alt="Sendungsbild".*?src="([^"]+)"',
+                webpage, 'thumbnail', fatal=False)
+            description = self._html_search_regex(
+                r'(?s)<p class="teasertext">(.*?)</p>',
+                webpage, 'description', fatal=False)
+            title = self._html_search_regex(
+                r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
 
         self._sort_formats(formats)
-
-        thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
+        thumbnail = 'http://www.tagesschau.de' + thumbnail_fn
 
         return {
             'id': display_id,
-            'title': self._og_search_title(webpage).strip(),
-            'thumbnail': 'http://www.tagesschau.de' + thumbnail,
+            'title': title,
+            'thumbnail': thumbnail,
             'formats': formats,
-            'description': self._og_search_description(webpage).strip(),
+            'description': description,
         }
index 283e11350b212db0c857f1ccdb8982519a78cfbb..f1f43d0a7113cbf40e5dfd3ffb71af5e900fab78 100644 (file)
@@ -4,10 +4,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
-    ExtractorError,
     clean_html,
-    compat_urllib_request,
+    ExtractorError,
     float_or_none,
     parse_iso8601,
 )
index 6c3445d792206395b7a36d016b8a42ad255ea9cc..82675431f863fded8768241e2ad21c4874f8525d 100644 (file)
@@ -57,9 +57,7 @@ class TeacherTubeIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         title = self._html_search_meta('title', webpage, 'title', fatal=True)
index f8a87afdaf4d27c59b4b29491569b243331b2322..10b3b706a9c82ef8398d408a948e72b6c52b31c3 100644 (file)
@@ -5,7 +5,7 @@ import re
 
 from .subtitles import SubtitlesInfoExtractor
 
-from ..utils import (
+from ..compat import (
     compat_str,
 )
 
@@ -13,7 +13,7 @@ from ..utils import (
 class TEDIE(SubtitlesInfoExtractor):
     _VALID_URL = r'''(?x)
         (?P<proto>https?://)
-        (?P<type>www|embed)(?P<urlmain>\.ted\.com/
+        (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
         (
             (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
             |
@@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor):
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url, re.VERBOSE)
-        if m.group('type') == 'embed':
+        if m.group('type').startswith('embed'):
             desktop_url = m.group('proto') + 'www' + m.group('urlmain')
             return self.url_result(desktop_url, 'TED')
         name = m.group('name')
@@ -199,8 +199,9 @@ class TEDIE(SubtitlesInfoExtractor):
         webpage = self._download_webpage(url, name)
 
         config_json = self._html_search_regex(
-            r"data-config='([^']+)", webpage, 'config')
-        config = json.loads(config_json)
+            r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
+            webpage, 'config')
+        config = json.loads(config_json)['config']
         video_url = config['video']['url']
         thumbnail = config.get('image', {}).get('url')
 
index 2a2fff5e18e4219f8db404bb0803778055570dfc..be3f72df7c11043346b015528ae905913a3d05df 100644 (file)
@@ -6,7 +6,7 @@ from .mitele import MiTeleIE
 
 class TelecincoIE(MiTeleIE):
     IE_NAME = 'telecinco.es'
-    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html'
+    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
 
     _TEST = {
         'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
diff --git a/youtube_dl/extractor/teletask.py b/youtube_dl/extractor/teletask.py
new file mode 100644 (file)
index 0000000..e541451
--- /dev/null
@@ -0,0 +1,53 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class TeleTaskIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.tele-task.de/archive/video/html5/26168/',
+        'info_dict': {
+            'title': 'Duplicate Detection',
+        },
+        'playlist': [{
+            'md5': '290ef69fb2792e481169c3958dbfbd57',
+            'info_dict': {
+                'id': '26168-speaker',
+                'ext': 'mp4',
+                'title': 'Duplicate Detection',
+                'upload_date': '20141218',
+            }
+        }, {
+            'md5': 'e1e7218c5f0e4790015a437fcf6c71b4',
+            'info_dict': {
+                'id': '26168-slides',
+                'ext': 'mp4',
+                'title': 'Duplicate Detection',
+                'upload_date': '20141218',
+            }
+        }]
+    }
+
+    def _real_extract(self, url):
+        lecture_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, lecture_id)
+
+        title = self._html_search_regex(
+            r'itemprop="name">([^<]+)</a>', webpage, 'title')
+        upload_date = unified_strdate(self._html_search_regex(
+            r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False))
+
+        entries = [{
+            'id': '%s-%s' % (lecture_id, format_id),
+            'url': video_url,
+            'title': title,
+            'upload_date': upload_date,
+        } for format_id, video_url in re.findall(
+            r'<video class="([^"]+)"[^>]*>\s*<source src="([^"]+)"', webpage)]
+
+        return self.playlist_result(entries, lecture_id, title)
index 81ba169fbec68c9bd7fea395c8bb135d73b3e828..466155ef800fbc540292eb343bc9092ab9da6416 100644 (file)
@@ -8,7 +8,6 @@ class TenPlayIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
     _TEST = {
         'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
-        #'md5': 'd68703d9f73dc8fccf3320ab34202590',
         'info_dict': {
             'id': '2695695426001',
             'ext': 'flv',
index 6e61cc9e2ecf621b19fe13924456970b091bce1c..025d0877cb928bb433aff9f6eff19a29d253e006 100644 (file)
@@ -1,15 +1,13 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
 class TF1IE(InfoExtractor):
     """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
-    _TEST = {
+    _VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
+    _TESTS = {
         'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
         'info_dict': {
             'id': '10635995',
@@ -21,14 +19,26 @@ class TF1IE(InfoExtractor):
             # Sometimes wat serves the whole file with the --test option
             'skip_download': True,
         },
+    }, {
+        'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
+        'info_dict': {
+            'id': '12043945',
+            'ext': 'mp4',
+            'title': 'Le grand Mystérioso - Chuggington',
+            'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
+            'upload_date': '20150103',
+        },
+        'params': {
+            # Sometimes wat serves the whole file with the --test option
+            'skip_download': True,
+        },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         embed_url = self._html_search_regex(
-            r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
+            r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url')
         embed_page = self._download_webpage(embed_url, video_id,
                                             'Downloading embed player page')
         wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
index e2653d62dc8c288ce8e58e5bfda52793aef7cfaf..110ed976de3d1a3a31c8c9a88cd976482f7d78ca 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 import json
 
-from .common import InfoExtractor
-from ..utils import (
+from .subtitles import SubtitlesInfoExtractor
+from ..compat import (
     compat_str,
+)
+from ..utils import (
     determine_ext,
     ExtractorError,
     xpath_with_ns,
@@ -14,7 +16,7 @@ from ..utils import (
 _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
 
 
-class ThePlatformIE(InfoExtractor):
+class ThePlatformIE(SubtitlesInfoExtractor):
     _VALID_URL = r'''(?x)
         (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
            (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
@@ -64,6 +66,20 @@ class ThePlatformIE(InfoExtractor):
         info_json = self._download_webpage(info_url, video_id)
         info = json.loads(info_json)
 
+        subtitles = {}
+        captions = info.get('captions')
+        if isinstance(captions, list):
+            for caption in captions:
+                lang, src = caption.get('lang'), caption.get('src')
+                if lang and src:
+                    subtitles[lang] = src
+
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, subtitles)
+            return
+
+        subtitles = self.extract_subtitles(video_id, subtitles)
+
         head = meta.find(_x('smil:head'))
         body = meta.find(_x('smil:body'))
 
@@ -115,6 +131,7 @@ class ThePlatformIE(InfoExtractor):
         return {
             'id': video_id,
             'title': info['title'],
+            'subtitles': subtitles,
             'formats': formats,
             'description': info['description'],
             'thumbnail': info['defaultThumbnailUrl'],
index 66d159e99f6b15c01d53017b24b0a70b57470bd3..9f9e388c50948d658d1022f8514122643b623a03 100644 (file)
@@ -5,7 +5,7 @@ import re
 from .common import InfoExtractor
 from .brightcove import BrightcoveIE
 from .discovery import DiscoveryIE
-from ..utils import compat_urlparse
+from ..compat import compat_urlparse
 
 
 class TlcIE(DiscoveryIE):
index 827aa08a455e05cb6f8a17f7dd11d91b5ad3be35..c5c6fdc51b19fce90d45298858ce345bc901307e 100644 (file)
@@ -15,7 +15,7 @@ class TMZIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
             'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie???  Or is she just showing off her amazing boobs?',
-            'thumbnail': 'http://cdnbakmi.kaltura.com/p/591531/sp/59153100/thumbnail/entry_id/0_okj015ty/version/100002/acv/182/width/640',
+            'thumbnail': r're:http://cdnbakmi\.kaltura\.com/.*thumbnail.*',
         }
     }
 
index 0ecd695f85e6a40929501fb6adc40bace8784d42..d48cbbf140054e639f7191acfa0909972ef3ab76 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 class TNAFlixIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
 
-    _TITLE_REGEX = None
+    _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
     _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
     _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
 
@@ -49,8 +49,8 @@ class TNAFlixIE(InfoExtractor):
         if duration:
             duration = parse_duration(duration[1:])
 
-        cfg_url = self._html_search_regex(
-            self._CONFIG_REGEX, webpage, 'flashvars.config')
+        cfg_url = self._proto_relative_url(self._html_search_regex(
+            self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
 
         cfg_xml = self._download_xml(
             cfg_url, display_id, note='Downloading metadata',
index 64a1e903022a78fa3a2b15eeff5eed20afce568d..d73ad3762a1b455cfd4bc384c27e2dd85e776dde 100644 (file)
@@ -4,9 +4,11 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
+)
+from ..utils import (
     int_or_none,
     str_to_int,
 )
index 161e47624b383dfe76ea1e2ea6ec73395a97db53..c89de5ba4a46bb261987d8dbee5f55b3d05492da 100644 (file)
@@ -9,7 +9,7 @@ from .common import InfoExtractor
 
 
 class TudouIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
+    _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
     _TESTS = [{
         'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
         'md5': '140a49ed444bd22f93330985d8475fcb',
@@ -27,13 +27,6 @@ class TudouIE(InfoExtractor):
             'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
             'thumbnail': 're:^https?://.*\.jpg$',
         }
-    }, {
-        'url': 'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html',
-        'info_dict': {
-            'title': 'todo.mp4',
-        },
-        'add_ie': ['Youku'],
-        'skip': 'Only works from China'
     }]
 
     def _url_for_id(self, id, quality=None):
@@ -45,8 +38,7 @@ class TudouIE(InfoExtractor):
         return final_url
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(2)
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage)
@@ -87,4 +79,9 @@ class TudouIE(InfoExtractor):
             }
             result.append(part_info)
 
-        return result
+        return {
+            '_type': 'multi_video',
+            'entries': result,
+            'id': video_id,
+            'title': title,
+        }
index 4ce5aeeba242b94b78d71e3c9d033aa318b588fb..b6b1f2568f23a6ea9fe8e12c86deb6b30d44a809 100644 (file)
@@ -24,7 +24,7 @@ class TuneInIE(InfoExtractor):
     _INFO_DICT = {
         'id': '34682',
         'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
-        'ext': 'AAC',
+        'ext': 'aac',
         'thumbnail': 're:^https?://.*\.png$',
         'location': 'Tacoma, WA',
     }
@@ -78,14 +78,21 @@ class TuneInIE(InfoExtractor):
         for stream in streams:
             if stream.get('Type') == 'Live':
                 is_live = True
+            reliability = stream.get('Reliability')
+            format_note = (
+                'Reliability: %d%%' % reliability
+                if reliability is not None else None)
             formats.append({
+                'preference': (
+                    0 if reliability is None or reliability > 90
+                    else 1),
                 'abr': stream.get('Bandwidth'),
-                'ext': stream.get('MediaType'),
+                'ext': stream.get('MediaType').lower(),
                 'acodec': stream.get('MediaType'),
                 'vcodec': 'none',
                 'url': stream.get('Url'),
-                # Sometimes streams with the highest quality do not exist
-                'preference': stream.get('Reliability'),
+                'source_preference': reliability,
+                'format_note': format_note,
             })
         self._sort_formats(formats)
 
index d516b6427bd271fa8f7e1129cdbbcd9dda692ae1..4de0aac523313eced334aab38a9a20c7bf08dfc7 100644 (file)
@@ -1,10 +1,9 @@
 from __future__ import unicode_literals
 
 import base64
-import re
 
 from .common import InfoExtractor
-from ..utils import compat_parse_qs
+from ..compat import compat_parse_qs
 
 
 class TutvIE(InfoExtractor):
@@ -20,10 +19,9 @@ class TutvIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
         internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
 
         data_content = self._download_webpage(
index d81d1d1a67cef49d4f612f08bfb5b7b7002b51fd..ba65996dc01646e019cfd5820aa36c1934365d9b 100644 (file)
@@ -1,32 +1,30 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     float_or_none,
-    str_to_int,
+    parse_age_limit,
 )
 
 
 class TvigleIE(InfoExtractor):
     IE_NAME = 'tvigle'
     IE_DESC = 'Интернет-телевидение Tvigle.ru'
-    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$'
+    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
 
     _TESTS = [
         {
-            'url': 'http://www.tvigle.ru/video/brat/',
-            'md5': 'ff4344a4894b0524441fb6f8218dc716',
+            'url': 'http://www.tvigle.ru/video/sokrat/',
+            'md5': '36514aed3657d4f70b4b2cef8eb520cd',
             'info_dict': {
-                'id': '5118490',
-                'display_id': 'brat',
-                'ext': 'mp4',
-                'title': 'Ð\91рат',
-                'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb',
-                'duration': 5722.6,
-                'age_limit': 16,
+                'id': '1848932',
+                'display_id': 'sokrat',
+                'ext': 'flv',
+                'title': 'Сократ',
+                'description': 'md5:a05bd01be310074d5833efc6743be95e',
+                'duration': 6586,
+                'age_limit': 0,
             },
         },
         {
@@ -44,8 +42,7 @@ class TvigleIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
+        display_id = self._match_id(url)
 
         webpage = self._download_webpage(url, display_id)
 
@@ -60,8 +57,8 @@ class TvigleIE(InfoExtractor):
         title = item['title']
         description = item['description']
         thumbnail = item['thumbnail']
-        duration = float_or_none(item['durationMilliseconds'], 1000)
-        age_limit = str_to_int(item['ageRestrictions'])
+        duration = float_or_none(item.get('durationMilliseconds'), 1000)
+        age_limit = parse_age_limit(item.get('ageRestrictions'))
 
         formats = []
         for vcodec, fmts in item['videos'].items():
index a645800057fc6dc88850885cba7737243c95574e..cc26f417a0c208efd946aa91c72086471d69ccaa 100644 (file)
+# -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 
 
 class TvpIE(InfoExtractor):
     IE_NAME = 'tvp.pl'
-    _VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:vod|www)\.tvp\.pl/.*/(?P<id>\d+)$'
 
-    _TEST = {
-        'url': 'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
-        'md5': '148408967a6a468953c0a75cbdaf0d7a',
+    _TESTS = [{
+        'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035',
         'info_dict': {
-            'id': '12878238',
+            'id': '4278035',
             'ext': 'wmv',
-            'title': '31.10.2013 - Odcinek 2',
-            'description': '31.10.2013 - Odcinek 2',
+            'title': 'Ogniem i mieczem, odc. 2',
+            'description': 'Bohun dowiaduje się o złamaniu przez kniahinię danego mu słowa i wyrusza do Rozłogów. Helenie w ostatniej chwili udaje się uciec dzięki pomocy Zagłoby.',
+        },
+    }, {
+        'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
+        'info_dict': {
+            'id': '194536',
+            'ext': 'mp4',
+            'title': 'Czas honoru, I seria – odc. 13',
+            #  'description': 'WŁADEK\nCzesław prosi Marię o dostarczenie Władkowi zarazki tyfusu. Jeśli zachoruje zostanie przewieziony do szpitala skąd łatwiej będzie go odbić. Czy matka zdecyduje się zarazić syna? Karol odwiedza Wandę przyznaje się, że ją oszukiwał, ale ostrzega też, że grozi jej aresztowanie i nalega, żeby wyjechała z Warszawy. Czy dziewczyna zdecyduje się znów oddalić od ukochanego? Rozpoczyna się akcja odbicia Władka.',
+        },
+    }, {
+        'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
+        'info_dict': {
+            'id': '17916176',
+            'ext': 'mp4',
+            'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': 'true',
+        },
+    }, {
+        'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
+        'info_dict': {
+            'id': '17834272',
+            'ext': 'mp4',
+            'title': 'Na sygnale, odc. 39',
+            'description': 'Ekipa Wiktora ratuje młodą matkę, która spadła ze schodów trzymając na rękach noworodka. Okazuje się, że dziewczyna jest surogatką, a biologiczni rodzice dziecka próbują zmusić ją do oddania synka…',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': 'true',
         },
-        'skip': 'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
-    }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id
-        params = self._download_json(
-            json_url, video_id, "Downloading video metadata")
-        video_url = params['video_url']
+        webpage = self._download_webpage(
+            'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
+
+        title = self._og_search_title(webpage)
+        series = self._search_regex(
+            r'{name:\s*([\'"])SeriesTitle\1,\s*value:\s*\1(?P<series>.*?)\1},',
+            webpage, 'series', group='series', default=None)
+        if series is not None and series not in title:
+            title = '%s, %s' % (series, title)
+        description = self._og_search_description(webpage, default=None)
+
+        video_url = self._search_regex(
+            r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
+        if video_url is None:
+            video_url = self._download_json(
+                'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
+                video_id)['video_url']
+
+        ext = video_url.rsplit('.', 1)[-1]
+        if ext != 'ism/manifest':
+            if '/' in ext:
+                ext = 'mp4'
+            formats = [{
+                'format_id': 'direct',
+                'url': video_url,
+                'ext': ext,
+            }]
+        else:
+            m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url)
+            formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': self._og_search_title(webpage),
-            'ext': 'wmv',
-            'url': video_url,
-            'description': self._og_search_description(webpage),
+            'title': title,
             'thumbnail': self._og_search_thumbnail(webpage),
+            'description': description,
+            'formats': formats,
+        }
+
+
+class TvpSeriesIE(InfoExtractor):
+    IE_NAME = 'tvp.pl:Series'
+    _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$'
+
+    _TESTS = [{
+        'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem',
+        'info_dict': {
+            'title': 'Ogniem i mieczem',
+            'id': '4278026',
+        },
+        'playlist_count': 4,
+    }, {
+        'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat',
+        'info_dict': {
+            'title': 'Boso przez świat',
+            'id': '9329207',
+        },
+        'playlist_count': 86,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id, tries=5)
+
+        title = self._html_search_regex(
+            r'(?s) id=[\'"]path[\'"]>(?:.*? / ){2}(.*?)</span>', webpage, 'series')
+        playlist_id = self._search_regex(r'nodeId:\s*(\d+)', webpage, 'playlist id')
+        playlist = self._download_webpage(
+            'http://vod.tvp.pl/vod/seriesAjax?type=series&nodeId=%s&recommend'
+            'edId=0&sort=&page=0&pageSize=10000' % playlist_id, display_id, tries=5,
+            note='Downloading playlist')
+
+        videos_paths = re.findall(
+            '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist)
+        entries = [
+            self.url_result('http://vod.tvp.pl%s' % v_path, ie=TvpIE.ie_key())
+            for v_path in videos_paths]
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'display_id': display_id,
+            'title': title,
+            'entries': entries,
         }
index eb94737546fb725e992249d356588417d90ad81a..9a53a3c74143d72a14842ea70ce4063a8d28a30c 100644 (file)
@@ -6,7 +6,6 @@ import re
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
-    ExtractorError,
     parse_iso8601,
     qualities,
 )
@@ -182,8 +181,8 @@ class TVPlayIE(InfoExtractor):
             'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
 
         if video['is_geo_blocked']:
-            raise ExtractorError(
-                'This content is not available in your country due to copyright reasons', expected=True)
+            self.report_warning(
+                'This content might not be available in your country due to copyright reasons')
 
         streams = self._download_json(
             'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py
new file mode 100644 (file)
index 0000000..67e8bfe
--- /dev/null
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    int_or_none,
+)
+
+
+class TwentyFourVideoIE(InfoExtractor):
+    IE_NAME = '24video'
+    _VALID_URL = r'https?://(?:www\.)?24video\.net/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+
+    _TESTS = [
+        {
+            'url': 'http://www.24video.net/video/view/1044982',
+            'md5': '48dd7646775690a80447a8dca6a2df76',
+            'info_dict': {
+                'id': '1044982',
+                'ext': 'mp4',
+                'title': 'Эротика каменного века',
+                'description': 'Как смотрели порно в каменном веке.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'SUPERTELO',
+                'duration': 31,
+                'timestamp': 1275937857,
+                'upload_date': '20100607',
+                'age_limit': 18,
+                'like_count': int,
+                'dislike_count': int,
+            },
+        },
+        {
+            'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
+            'only_matching': True,
+        }
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'http://www.24video.net/video/view/%s' % video_id, video_id)
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_regex(
+            r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False)
+        thumbnail = self._og_search_thumbnail(webpage)
+        duration = int_or_none(self._og_search_property(
+            'duration', webpage, 'duration', fatal=False))
+        timestamp = parse_iso8601(self._search_regex(
+            r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
+            webpage, 'upload date'))
+
+        uploader = self._html_search_regex(
+            r'Загрузил\s*<a href="/jsecUser/movies/[^"]+" class="link">([^<]+)</a>',
+            webpage, 'uploader', fatal=False)
+
+        view_count = int_or_none(self._html_search_regex(
+            r'<span class="video-views">(\d+) просмотр',
+            webpage, 'view count', fatal=False))
+        comment_count = int_or_none(self._html_search_regex(
+            r'<div class="comments-title" id="comments-count">(\d+) комментари',
+            webpage, 'comment count', fatal=False))
+
+        formats = []
+
+        pc_video = self._download_xml(
+            'http://www.24video.net/video/xml/%s?mode=play' % video_id,
+            video_id, 'Downloading PC video URL').find('.//video')
+
+        formats.append({
+            'url': pc_video.attrib['url'],
+            'format_id': 'pc',
+            'quality': 1,
+        })
+
+        like_count = int_or_none(pc_video.get('ratingPlus'))
+        dislike_count = int_or_none(pc_video.get('ratingMinus'))
+        age_limit = 18 if pc_video.get('adult') == 'true' else 0
+
+        mobile_video = self._download_xml(
+            'http://www.24video.net/video/xml/%s' % video_id,
+            video_id, 'Downloading mobile video URL').find('.//video')
+
+        formats.append({
+            'url': mobile_video.attrib['url'],
+            'format_id': 'mobile',
+            'quality': 0,
+        })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'duration': duration,
+            'timestamp': timestamp,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
index 36aa1ad6ec578859d90c947dee9c39213dcfda59..b11a1d5610d0dffe0d98df7d7b05d4228552dfb7 100644 (file)
@@ -1,9 +1,14 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import itertools
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
 from ..utils import (
     ExtractorError,
     parse_iso8601,
@@ -17,6 +22,7 @@ class TwitchIE(InfoExtractor):
     _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
         (?:
             (?P<channelid>[^/]+)|
+            (?:(?:[^/]+)/v/(?P<vodid>[^/]+))|
             (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
             (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
         )
@@ -24,6 +30,7 @@ class TwitchIE(InfoExtractor):
         """
     _PAGE_LIMIT = 100
     _API_BASE = 'https://api.twitch.tv'
+    _LOGIN_URL = 'https://secure.twitch.tv/user/login'
     _TESTS = [{
         'url': 'http://www.twitch.tv/riotgames/b/577357806',
         'info_dict': {
@@ -64,11 +71,24 @@ class TwitchIE(InfoExtractor):
     def _extract_media(self, item, item_id):
         ITEMS = {
             'a': 'video',
+            'v': 'vod',
             'c': 'chapter',
         }
         info = self._extract_info(self._download_json(
             '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
             'Downloading %s info JSON' % ITEMS[item]))
+
+        if item == 'v':
+            access_token = self._download_json(
+                '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
+                'Downloading %s access token' % ITEMS[item])
+            formats = self._extract_m3u8_formats(
+                'http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s'
+                % (item_id, access_token['token'], access_token['sig']),
+                item_id, 'mp4')
+            info['formats'] = formats
+            return info
+
         response = self._download_json(
             '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
             'Downloading %s playlist JSON' % ITEMS[item])
@@ -109,6 +129,44 @@ class TwitchIE(InfoExtractor):
             'view_count': info['views'],
         }
 
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading login page')
+
+        authenticity_token = self._search_regex(
+            r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
+            login_page, 'authenticity token')
+
+        login_form = {
+            'utf8': '✓'.encode('utf-8'),
+            'authenticity_token': authenticity_token,
+            'redirect_on_login': '',
+            'embed_form': 'false',
+            'mp_source_action': '',
+            'follow': '',
+            'user[login]': username,
+            'user[password]': password,
+        }
+
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        request.add_header('Referer', self._LOGIN_URL)
+        response = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        m = re.search(
+            r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
+        if m:
+            raise ExtractorError(
+                'Unable to login: %s' % m.group('msg').strip(), expected=True)
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         if mobj.group('chapterid'):
@@ -165,6 +223,8 @@ class TwitchIE(InfoExtractor):
             """
         elif mobj.group('videoid'):
             return self._extract_media('a', mobj.group('videoid'))
+        elif mobj.group('vodid'):
+            return self._extract_media('v', mobj.group('vodid'))
         elif mobj.group('channelid'):
             channel_id = mobj.group('channelid')
             info = self._download_json(
index 0e4d386a8ba32387f6f9025e633efb4c1ee59700..4667ed83b71f4aec5f081741834e2c9cca010e82 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
 )
 
@@ -97,11 +99,8 @@ class UdemyIE(InfoExtractor):
         if 'returnUrl' not in response:
             raise ExtractorError('Unable to log in')
 
-
-
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        lecture_id = mobj.group('id')
+        lecture_id = self._match_id(url)
 
         lecture = self._download_json(
             'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id,
index 5d06fcc9e65690ae7f9a474e274e7f8c14ce4a73..8872cfcb2795ab0bfb9db1ad5418eb61dd0dffc6 100644 (file)
@@ -1,11 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
+)
+from ..utils import (
     unified_strdate,
 )
 
@@ -18,11 +18,10 @@ class UrortIE(InfoExtractor):
         'url': 'https://urort.p3.no/#!/Band/Gerilja',
         'md5': '5ed31a924be8a05e47812678a86e127b',
         'info_dict': {
-            'id': '33124-4',
+            'id': '33124-24',
             'ext': 'mp3',
             'title': 'The Bomb',
             'thumbnail': 're:^https?://.+\.jpg',
-            'like_count': int,
             'uploader': 'Gerilja',
             'uploader_id': 'Gerilja',
             'upload_date': '20100323',
@@ -33,25 +32,31 @@ class UrortIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        playlist_id = mobj.group('id')
+        playlist_id = self._match_id(url)
 
         fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
-        json_url = 'http://urort.p3.no/breeze/urort/TrackDtos?$filter=' + fstr
+        json_url = 'http://urort.p3.no/breeze/urort/TrackDTOViews?$filter=%s&$orderby=Released%%20desc&$expand=Tags%%2CFiles' % fstr
         songs = self._download_json(json_url, playlist_id)
-        print(songs[0])
-
-        entries = [{
-            'id': '%d-%s' % (s['BandId'], s['$id']),
-            'title': s['Title'],
-            'url': s['TrackUrl'],
-            'ext': 'mp3',
-            'uploader_id': playlist_id,
-            'uploader': s.get('BandName', playlist_id),
-            'like_count': s.get('LikeCount'),
-            'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
-            'upload_date': unified_strdate(s.get('Released')),
-        } for s in songs]
+        entries = []
+        for s in songs:
+            formats = [{
+                'tbr': f.get('Quality'),
+                'ext': f['FileType'],
+                'format_id': '%s-%s' % (f['FileType'], f.get('Quality', '')),
+                'url': 'http://p3urort.blob.core.windows.net/tracks/%s' % f['FileRef'],
+                'preference': 3 if f['FileType'] == 'mp3' else 2,
+            } for f in s['Files']]
+            self._sort_formats(formats)
+            e = {
+                'id': '%d-%s' % (s['BandId'], s['$id']),
+                'title': s['Title'],
+                'uploader_id': playlist_id,
+                'uploader': s.get('BandName', playlist_id),
+                'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
+                'upload_date': unified_strdate(s.get('Released')),
+                'formats': formats,
+            }
+            entries.append(e)
 
         return {
             '_type': 'playlist',
index 53dc3a496ff65edf044137540080d9190ad8d72b..68d03b99905cce848eb38fde8b6d8e643c548105 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
 )
 
index 455b6d9da62f221cf0854655f707d5963546840f..dd026748dcbb536f9f49181b0d211bf0a9157777 100644 (file)
@@ -1,19 +1,18 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
-
+)
+from ..utils import (
     ExtractorError,
 )
 
 
 class Vbox7IE(InfoExtractor):
-    _VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P<id>[^/]+)'
+    _VALID_URL = r'http://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'
     _TEST = {
         'url': 'http://vbox7.com/play:249bb972c2',
         'md5': '99f65c0c9ef9b682b97313e052734c3f',
@@ -25,8 +24,7 @@ class Vbox7IE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         redirect_page, urlh = self._download_webpage_handle(url, video_id)
         new_location = self._search_regex(r'window\.location = \'(.*)\';',
index 94647d1c8c88a18cfb6abcba2ec5ed8e71e9c4b6..96353f5250783be95fd4bf308190309baae70187 100644 (file)
@@ -4,10 +4,13 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
-    get_element_by_id,
+)
+from ..utils import (
+    ExtractorError,
     clean_html,
+    get_element_by_id,
 )
 
 
@@ -15,24 +18,27 @@ class VeeHDIE(InfoExtractor):
     _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
 
     _TEST = {
-        'url': 'http://veehd.com/video/4686958',
+        'url': 'http://veehd.com/video/4639434_Solar-Sinter',
         'info_dict': {
-            'id': '4686958',
+            'id': '4639434',
             'ext': 'mp4',
-            'title': 'Time Lapse View from Space ( ISS)',
-            'uploader_id': 'spotted',
-            'description': 'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
+            'title': 'Solar Sinter',
+            'uploader_id': 'VideoEyes',
+            'description': 'md5:46a840e8692ddbaffb5f81d9885cb457',
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         # VeeHD seems to send garbage on the first request.
         # See https://github.com/rg3/youtube-dl/issues/2102
         self._download_webpage(url, video_id, 'Requesting webpage')
         webpage = self._download_webpage(url, video_id)
+
+        if 'This video has been removed<' in webpage:
+            raise ExtractorError('Video %s has been removed' % video_id, expected=True)
+
         player_path = self._search_regex(
             r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
             webpage, 'player path')
@@ -41,18 +47,35 @@ class VeeHDIE(InfoExtractor):
         self._download_webpage(player_url, video_id, 'Requesting player page')
         player_page = self._download_webpage(
             player_url, video_id, 'Downloading player page')
+
         config_json = self._search_regex(
-            r'value=\'config=({.+?})\'', player_page, 'config json')
-        config = json.loads(config_json)
+            r'value=\'config=({.+?})\'', player_page, 'config json', default=None)
+
+        if config_json:
+            config = json.loads(config_json)
+            video_url = compat_urlparse.unquote(config['clip']['url'])
+        else:
+            iframe_src = self._search_regex(
+                r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url')
+            iframe_url = 'http://veehd.com/%s' % iframe_src
+
+            self._download_webpage(iframe_url, video_id, 'Requesting iframe page')
+            iframe_page = self._download_webpage(
+                iframe_url, video_id, 'Downloading iframe page')
+
+            video_url = self._search_regex(
+                r"file\s*:\s*'([^']+)'", iframe_page, 'video url')
 
-        video_url = compat_urlparse.unquote(config['clip']['url'])
         title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
-        uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
-                                              webpage, 'uploader')
-        thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
-                                       webpage, 'thumbnail')
-        description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
-                                              webpage, 'description', flags=re.DOTALL)
+        uploader_id = self._html_search_regex(
+            r'<a href="/profile/\d+">(.+?)</a>',
+            webpage, 'uploader')
+        thumbnail = self._search_regex(
+            r'<img id="veehdpreview" src="(.+?)"',
+            webpage, 'thumbnail')
+        description = self._html_search_regex(
+            r'<td class="infodropdown".*?<div>(.*?)<ul',
+            webpage, 'description', flags=re.DOTALL)
 
         return {
             '_type': 'video',
index a7953a7e7c5d33b154435cd7b4afa354994f4bf5..01e258e32218c227c5de3caf60588baab56e9045 100644 (file)
@@ -4,8 +4,10 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     int_or_none,
     ExtractorError,
 )
index c912c3cbe7ae42b221816b50ec6a97139cb13d55..43f6b029da8ff5df7fe808c11a85f8a8120f8ca5 100644 (file)
@@ -4,8 +4,10 @@ import re
 import xml.etree.ElementTree
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
 )
 
index 70578a4cc6866524e2c52d574976579a8ad238e5..2f111bf7ee042de1fce790a3f0d0f13be7f1feff 100644 (file)
@@ -17,7 +17,7 @@ class VGTVIE(InfoExtractor):
             'info_dict': {
                 'id': '84196',
                 'ext': 'mp4',
-                'title': 'Hevnen er søt episode 1:10 - Abu',
+                'title': 'Hevnen er søt: Episode 10 - Abu',
                 'description': 'md5:e25e4badb5f544b04341e14abdc72234',
                 'thumbnail': 're:^https?://.*\.jpg',
                 'duration': 648.000,
@@ -35,7 +35,7 @@ class VGTVIE(InfoExtractor):
                 'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
                 'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
                 'thumbnail': 're:^https?://.*\.jpg',
-                'duration': 9056.000,
+                'duration': 9103.0,
                 'timestamp': 1410113864,
                 'upload_date': '20140907',
                 'view_count': int,
index ac6c255376442d132948eb5f54e0517bca5a66f4..0ffc7ff7dc9185a3a3ec5c0fd14d302872662dda 100644 (file)
@@ -1,10 +1,8 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from .internetvideoarchive import InternetVideoArchiveIE
-from ..utils import compat_urlparse
 
 
 class VideoDetectiveIE(InfoExtractor):
@@ -17,13 +15,12 @@ class VideoDetectiveIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'KICK-ASS 2',
             'description': 'md5:65ba37ad619165afac7d432eaded6013',
-            'duration': 135,
+            'duration': 138,
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         og_video = self._og_search_video_url(webpage)
         query = compat_urlparse.urlparse(og_video).query
index 29c4e0101ec21eb59c22de9739a516b9f96c0e0f..9fc64d172e63ecb15469efc2a2085d8bccc06e53 100644 (file)
@@ -1,11 +1,12 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
+    compat_urllib_request,
+)
+from ..utils import (
     remove_start,
 )
 
@@ -16,22 +17,23 @@ class VideoMegaIE(InfoExtractor):
         (?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
         '''
     _TEST = {
-        'url': 'http://videomega.tv/?ref=GKeGPVedBe',
-        'md5': '240fb5bcf9199961f48eb17839b084d6',
+        'url': 'http://videomega.tv/?ref=QR0HCUHI1661IHUCH0RQ',
+        'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
         'info_dict': {
-            'id': 'GKeGPVedBe',
+            'id': 'QR0HCUHI1661IHUCH0RQ',
             'ext': 'mp4',
-            'title': 'XXL - All Sports United',
+            'title': 'Big Buck Bunny',
             'thumbnail': 're:^https?://.*\.jpg$',
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
-        url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
-        webpage = self._download_webpage(url, video_id)
+        iframe_url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
+        req = compat_urllib_request.Request(iframe_url)
+        req.add_header('Referer', url)
+        webpage = self._download_webpage(req, video_id)
 
         escaped_data = self._search_regex(
             r'unescape\("([^"]+)"\)', webpage, 'escaped data')
@@ -39,13 +41,13 @@ class VideoMegaIE(InfoExtractor):
 
         thumbnail = self._search_regex(
             r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False)
-        url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL')
+        video_url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL')
         title = remove_start(self._html_search_regex(
             r'<title>(.*?)</title>', webpage, 'title'), 'VideoMega.tv - ')
 
         formats = [{
             'format_id': 'sd',
-            'url': url,
+            'url': video_url,
         }]
         self._sort_formats(formats)
 
@@ -54,4 +56,5 @@ class VideoMegaIE(InfoExtractor):
             'title': title,
             'formats': formats,
             'thumbnail': thumbnail,
+            'http_referer': iframe_url,
         }
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py
new file mode 100644 (file)
index 0000000..619039e
--- /dev/null
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class VierIE(InfoExtractor):
+    IE_NAME = 'vier'
+    _VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
+    _TESTS = [{
+        'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
+        'info_dict': {
+            'id': '16129',
+            'display_id': 'het-wordt-warm-de-moestuin',
+            'ext': 'mp4',
+            'title': 'Het wordt warm in De Moestuin',
+            'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.vier.be/video/v3/embed/16129',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        embed_id = mobj.group('embed_id')
+        display_id = mobj.group('display_id') or embed_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            r'"nid"\s*:\s*"(\d+)"', webpage, 'video id')
+        application = self._search_regex(
+            r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod')
+        filename = self._search_regex(
+            r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename')
+
+        playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
+        formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
+
+        title = self._og_search_title(webpage, default=display_id)
+        description = self._og_search_description(webpage, default=None)
+        thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
+
+
+class VierVideosIE(InfoExtractor):
+    IE_NAME = 'vier:videos'
+    _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
+    _TESTS = [{
+        'url': 'http://www.vier.be/demoestuin/videos',
+        'info_dict': {
+            'id': 'demoestuin',
+        },
+        'playlist_mincount': 153,
+    }, {
+        'url': 'http://www.vier.be/demoestuin/videos?page=6',
+        'info_dict': {
+            'id': 'demoestuin-page6',
+        },
+        'playlist_mincount': 20,
+    }, {
+        'url': 'http://www.vier.be/demoestuin/videos?page=7',
+        'info_dict': {
+            'id': 'demoestuin-page7',
+        },
+        'playlist_mincount': 13,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        program = mobj.group('program')
+
+        webpage = self._download_webpage(url, program)
+
+        page_id = mobj.group('page')
+        if page_id:
+            page_id = int(page_id)
+            start_page = page_id
+            last_page = start_page + 1
+            playlist_id = '%s-page%d' % (program, page_id)
+        else:
+            start_page = 0
+            last_page = int(self._search_regex(
+                r'videos\?page=(\d+)">laatste</a>',
+                webpage, 'last page', default=0)) + 1
+            playlist_id = program
+
+        entries = []
+        for current_page_id in range(start_page, last_page):
+            current_page = self._download_webpage(
+                'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
+                program,
+                'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage
+            page_entries = [
+                self.url_result('http://www.vier.be' + video_url, 'Vier')
+                for video_url in re.findall(
+                    r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
+            entries.extend(page_entries)
+
+        return self.playlist_result(entries, playlist_id)
index 15f31529822bcba124cfb12bcb9e56566b3bfba7..944901e1482a666ae90cc5e1c0f86e325ec2aecc 100644 (file)
@@ -17,7 +17,6 @@ class VikiIE(SubtitlesInfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
     _TEST = {
         'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
-        'md5': 'a21454021c2646f5433514177e2caa5f',
         'info_dict': {
             'id': '1023585v',
             'ext': 'mp4',
@@ -31,8 +30,7 @@ class VikiIE(SubtitlesInfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
         title = self._og_search_title(webpage)
index 33d370e1ceb1f0d6db418c8575830ac60d4294a4..ee3d86117e625cca66303aeeee229f1a091b4602 100644 (file)
@@ -14,28 +14,17 @@ class VimpleIE(InfoExtractor):
     IE_DESC = 'Vimple.ru'
     _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'
     _TESTS = [
-        # Quality: Large, from iframe
         {
-            'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c',
+            'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
+            'md5': '2e750a330ed211d3fd41821c6ad9a279',
             'info_dict': {
-                'id': 'b132bdfd71b546d3972f9ab9a25f201c',
-                'title': 'great-escape-minecraft.flv',
+                'id': 'c0f6b1687dcd4000a97ebe70068039cf',
                 'ext': 'mp4',
-                'duration': 352,
-                'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c',
+                'title': 'Sunset',
+                'duration': 20,
+                'thumbnail': 're:https?://.*?\.jpg',
             },
         },
-        # Quality: Medium, from mainpage
-        {
-            'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
-            'info_dict': {
-                'id': 'a15950562888453b8e6f9572dc8600cd',
-                'title': 'DB 01',
-                'ext': 'flv',
-                'duration': 1484,
-                'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
-            }
-        },
     ]
 
     def _real_extract(self, url):
index 42995226e584b0ce4e0d207b6b702157ec6f4030..0b58fe0fe0b5188e9c9865e56ce064e94dbc45e5 100644 (file)
@@ -17,6 +17,7 @@ class VineIE(InfoExtractor):
             'id': 'b9KOOWX7HUx',
             'ext': 'mp4',
             'title': 'Chicken.',
+            'alt_title': 'Vine by Jack Dorsey',
             'description': 'Chicken.',
             'upload_date': '20130519',
             'uploader': 'Jack Dorsey',
@@ -25,30 +26,26 @@ class VineIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
 
         data = json.loads(self._html_search_regex(
             r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
 
-        formats = [
-            {
-                'url': data['videoLowURL'],
-                'ext': 'mp4',
-                'format_id': 'low',
-            },
-            {
-                'url': data['videoUrl'],
-                'ext': 'mp4',
-                'format_id': 'standard',
-            }
-        ]
+        formats = [{
+            'url': data['videoLowURL'],
+            'ext': 'mp4',
+            'format_id': 'low',
+        }, {
+            'url': data['videoUrl'],
+            'ext': 'mp4',
+            'format_id': 'standard',
+        }]
 
         return {
             'id': video_id,
             'title': self._og_search_title(webpage),
+            'alt_title': self._og_search_description(webpage),
             'description': data['description'],
             'thumbnail': data['thumbnailUrl'],
             'upload_date': unified_strdate(data['created']),
@@ -63,29 +60,36 @@ class VineIE(InfoExtractor):
 
 class VineUserIE(InfoExtractor):
     IE_NAME = 'vine:user'
-    _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
+    _VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$'
     _VINE_BASE_URL = "https://vine.co/"
-    _TEST = {
-        'url': 'https://vine.co/Visa',
-        'info_dict': {
-            'id': 'Visa',
+    _TESTS = [
+        {
+            'url': 'https://vine.co/Visa',
+            'info_dict': {
+                'id': 'Visa',
+            },
+            'playlist_mincount': 46,
         },
-        'playlist_mincount': 46,
-    }
+        {
+            'url': 'https://vine.co/u/941705360593584128',
+            'only_matching': True,
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         user = mobj.group('user')
+        u = mobj.group('u')
 
-        profile_url = "%sapi/users/profiles/vanity/%s" % (
-            self._VINE_BASE_URL, user)
+        profile_url = "%sapi/users/profiles/%s%s" % (
+            self._VINE_BASE_URL, 'vanity/' if not u else '', user)
         profile_data = self._download_json(
             profile_url, user, note='Downloading user profile data')
 
         user_id = profile_data['data']['userId']
         timeline_data = []
         for pagenum in itertools.count(1):
-            timeline_url = "%sapi/timelines/users/%s?page=%s" % (
+            timeline_url = "%sapi/timelines/users/%s?page=%s&size=100" % (
                 self._VINE_BASE_URL, user_id, pagenum)
             timeline_page = self._download_json(
                 timeline_url, user, note='Downloading page %d' % pagenum)
index ca6b0d5b3369c53b7e06715ea5a56fd338d71e41..81e02a6244d83327b05c6b76c490391b97b15f92 100644 (file)
@@ -5,14 +5,17 @@ import re
 import json
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+    compat_urllib_parse,
+    compat_urllib_request,
+)
 from ..utils import (
     ExtractorError,
-    compat_urllib_request,
-    compat_urllib_parse,
-    compat_str,
+    orderedSet,
     unescapeHTML,
     unified_strdate,
-    orderedSet)
+)
 
 
 class VKIE(InfoExtractor):
@@ -161,6 +164,14 @@ class VKIE(InfoExtractor):
             self.to_screen('Youtube video detected')
             return self.url_result(m_yt.group(1), 'Youtube')
 
+        m_rutube = re.search(
+            r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
+        if m_rutube is not None:
+            self.to_screen('rutube video detected')
+            rutube_url = self._proto_relative_url(
+                m_rutube.group(1).replace('\\', ''))
+            return self.url_result(rutube_url)
+
         m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
         if m_opts:
             m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
index affef650726d716b7e80aaab5c66dab3bc3ddc28..1c0966a793511a2ec3a9d147bd75ff22e8fb7209 100644 (file)
@@ -2,8 +2,9 @@
 from __future__ import unicode_literals
 
 import re
+
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
@@ -24,8 +25,7 @@ class VodlockerIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         fields = dict(re.findall(r'''(?x)<input\s+
index 1b2f731e932a63fbc7722251c0b4e57f0963c34c..405cb9db49f41a144a4c842d8f99aeb1c2023da9 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+)
 from ..utils import (
     int_or_none,
-    compat_str,
     ExtractorError,
 )
 
index ec3c010ad7e151bfc304315cdc5fd32bc21e8f43..c3fde53f5ef06a56b54e94b20b72a7e98c1992a5 100644 (file)
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
+)
+from ..utils import (
     ExtractorError,
     parse_duration,
     qualities,
@@ -25,10 +27,9 @@ class VuClipIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
         ad_m = re.search(
             r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
         if ad_m:
index 88bbbb21967c6807c536ecc5b06d8d8f41095219..c17bebd6e919673d9011de3ac37dfff2929b2cc8 100644 (file)
@@ -10,14 +10,14 @@ from ..utils import (
 
 
 class WashingtonPostIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
     _TEST = {
         'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
         'info_dict': {
             'title': 'Sinkhole of bureaucracy',
         },
         'playlist': [{
-            'md5': 'c3f4b4922ffa259243f68e928db2db8c',
+            'md5': '79132cc09ec5309fa590ae46e4cc31bc',
             'info_dict': {
                 'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
                 'ext': 'mp4',
@@ -29,7 +29,7 @@ class WashingtonPostIE(InfoExtractor):
                 'upload_date': '20140322',
             },
         }, {
-            'md5': 'f645a07652c2950cd9134bb852c5f5eb',
+            'md5': 'e1d5734c06865cc504ad99dc2de0d443',
             'info_dict': {
                 'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
                 'ext': 'mp4',
@@ -44,10 +44,9 @@ class WashingtonPostIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        page_id = mobj.group('id')
-
+        page_id = self._match_id(url)
         webpage = self._download_webpage(url, page_id)
+
         title = self._og_search_title(webpage)
         uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
         entries = []
index 93a6e64542c71be1f05dac1e351d5c57ecd28ff7..45466e31b7445f8dd8da742308dcc69f2ff1152f 100644 (file)
@@ -1,12 +1,15 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
+import itertools
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_parse_qs,
     compat_urlparse,
+)
+from ..utils import (
     determine_ext,
     unified_strdate,
 )
@@ -65,6 +68,10 @@ class WDRIE(InfoExtractor):
                 'upload_date': '20140717',
             },
         },
+        {
+            'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
+            'playlist_mincount': 146,
+        }
     ]
 
     def _real_extract(self, url):
@@ -79,6 +86,27 @@ class WDRIE(InfoExtractor):
                 self.url_result(page_url + href, 'WDR')
                 for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage)
             ]
+
+            if entries:  # Playlist page
+                return self.playlist_result(entries, page_id)
+
+            # Overview page
+            entries = []
+            for page_num in itertools.count(2):
+                hrefs = re.findall(
+                    r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"',
+                    webpage)
+                entries.extend(
+                    self.url_result(page_url + href, 'WDR')
+                    for href in hrefs)
+                next_url_m = re.search(
+                    r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage)
+                if not next_url_m:
+                    break
+                next_url = page_url + next_url_m.group(1)
+                webpage = self._download_webpage(
+                    next_url, page_id,
+                    note='Downloading playlist page %d' % page_num)
             return self.playlist_result(entries, page_id)
 
         flashvars = compat_parse_qs(
@@ -141,7 +169,6 @@ class WDRMobileIE(InfoExtractor):
             'title': mobj.group('title'),
             'age_limit': int(mobj.group('age_limit')),
             'url': url,
-            'ext': determine_ext(url),
             'user_agent': 'mobile',
         }
 
@@ -171,8 +198,7 @@ class WDRMausIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
         param_code = self._html_search_regex(
@@ -223,5 +249,3 @@ class WDRMausIE(InfoExtractor):
             'thumbnail': thumbnail,
             'upload_date': upload_date,
         }
-
-# TODO test _1
diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py
new file mode 100644 (file)
index 0000000..396cf4e
--- /dev/null
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class WebOfStoriesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)'
+    _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
+    _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
+    _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
+    _TESTS = [
+        {
+            'url': 'http://www.webofstories.com/play/hans.bethe/71',
+            'md5': '373e4dd915f60cfe3116322642ddf364',
+            'info_dict': {
+                'id': '4536',
+                'ext': 'mp4',
+                'title': 'The temperature of the sun',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'description': 'Hans Bethe talks about calculating the temperature of the sun',
+                'duration': 238,
+            }
+        },
+        {
+            'url': 'http://www.webofstories.com/play/55908',
+            'md5': '2985a698e1fe3211022422c4b5ed962c',
+            'info_dict': {
+                'id': '55908',
+                'ext': 'mp4',
+                'title': 'The story of Gemmata obscuriglobus',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
+                'duration': 169,
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._og_search_title(webpage)
+        description = self._html_search_meta('description', webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        story_filename = self._search_regex(
+            r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
+        speaker_id = self._search_regex(
+            r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
+        story_id = self._search_regex(
+            r'\.storyId\((\d+)\)', webpage, 'story ID')
+        speaker_type = self._search_regex(
+            r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
+        great_life = self._search_regex(
+            r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
+        is_great_life_series = great_life == 'true'
+        duration = int_or_none(self._search_regex(
+            r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
+
+        # URL building, see: http://www.webofstories.com/scripts/player.js
+        ms_prefix = ''
+        if speaker_type.lower() == 'ms':
+            ms_prefix = 'mini_sites/'
+
+        if is_great_life_series:
+            mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format(
+                self._VIDEO_DOMAIN, speaker_id, story_filename)
+            rtmp_ext = 'flv'
+            streamer = self._GREAT_LIFE_STREAMER
+            play_path = 'stories/{0:}/{1:}'.format(
+                speaker_id, story_filename)
+        else:
+            mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format(
+                self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename)
+            rtmp_ext = 'mp4'
+            streamer = self._USER_STREAMER
+            play_path = 'mp4:{0:}{1:}/{2}.mp4'.format(
+                ms_prefix, speaker_id, story_filename)
+
+        formats = [{
+            'format_id': 'mp4_sd',
+            'url': mp4_url,
+        }, {
+            'format_id': 'rtmp_sd',
+            'page_url': url,
+            'url': streamer,
+            'ext': rtmp_ext,
+            'play_path': play_path,
+        }]
+
+        self._sort_formats(formats)
+
+        return {
+            'id': story_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'description': description,
+            'duration': duration,
+        }
index 748443f811f184d4276d4628cd13ed1e2bf92d9c..13a079151c9c879561e3e538c49f3122f85b349b 100644 (file)
@@ -1,9 +1,8 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import ExtractorError, compat_urllib_request
+from ..compat import compat_urllib_request
+from ..utils import ExtractorError
 
 
 class WistiaIE(InfoExtractor):
@@ -22,8 +21,7 @@ class WistiaIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         request = compat_urllib_request.Request(self._API_URL.format(video_id))
         request.add_header('Referer', url)  # Some videos require this.
index 1b4e883652667f2c2109d34014154c71fd443196..80c48c37d32c0849e689d626811ee34c5b414ee0 100644 (file)
@@ -1,9 +1,7 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
@@ -23,10 +21,9 @@ class XBefIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
         title = self._html_search_regex(
             r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
 
index a9aa72e73cc67fc380a38a56c62b967e7db01e08..236ff403bd08f941a2eb023cd41c3bb21c49d4c3 100644 (file)
@@ -1,46 +1,42 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
-    parse_iso8601,
-    float_or_none,
     int_or_none,
+    parse_filesize,
+    unified_strdate,
 )
 
 
 class XboxClipsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})'
+    _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})'
     _TEST = {
         'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
         'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
         'info_dict': {
             'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
             'ext': 'mp4',
-            'title': 'Iabdulelah playing Upload Studio',
-            'filesize_approx': 28101836.8,
-            'timestamp': 1407388500,
+            'title': 'Iabdulelah playing Titanfall',
+            'filesize_approx': 26800000,
             'upload_date': '20140807',
             'duration': 56,
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
 
         video_url = self._html_search_regex(
-            r'>Link: <a href="([^"]+)">', webpage, 'video URL')
+            r'>(?:Link|Download): <a[^>]+href="([^"]+)"', webpage, 'video URL')
         title = self._html_search_regex(
             r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
-        timestamp = parse_iso8601(self._html_search_regex(
+        upload_date = unified_strdate(self._html_search_regex(
             r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
-        filesize = float_or_none(self._html_search_regex(
-            r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024)
+        filesize = parse_filesize(self._html_search_regex(
+            r'>Size: ([^<]+)<', webpage, 'file size', fatal=False))
         duration = int_or_none(self._html_search_regex(
             r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
         view_count = int_or_none(self._html_search_regex(
@@ -50,7 +46,7 @@ class XboxClipsIE(InfoExtractor):
             'id': video_id,
             'url': video_url,
             'title': title,
-            'timestamp': timestamp,
+            'upload_date': upload_date,
             'filesize_approx': filesize,
             'duration': duration,
             'view_count': view_count,
index 6b37bcbc959a8e8b83fee052da18728ca9a9c298..4527567f8fc26c45b091aa868dc77b159576b56c 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 class XHamsterIE(InfoExtractor):
     """Information Extractor for xHamster"""
-    _VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
     _TESTS = [
         {
             'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
@@ -39,7 +39,11 @@ class XHamsterIE(InfoExtractor):
                 'duration': 200,
                 'age_limit': 18,
             }
-        }
+        },
+        {
+            'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
@@ -57,7 +61,8 @@ class XHamsterIE(InfoExtractor):
 
         video_id = mobj.group('id')
         seo = mobj.group('seo')
-        mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo)
+        proto = mobj.group('proto')
+        mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
         webpage = self._download_webpage(mrss_url, video_id)
 
         title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
index f7e2e8ac9594ef45a1d329c6359e447796b70f4b..8c6241aedf7249343a725ab705968d0af963294a 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import (
     compat_chr,
@@ -25,6 +27,7 @@ class XMinusIE(InfoExtractor):
             'tbr': 320,
             'filesize_approx': 5900000,
             'view_count': int,
+            'description': 'md5:03238c5b663810bc79cf42ef3c03e371',
         }
     }
 
@@ -48,6 +51,11 @@ class XMinusIE(InfoExtractor):
         view_count = int_or_none(self._html_search_regex(
             r'<div class="quality.*?► ([0-9]+)',
             webpage, 'view count', fatal=False))
+        description = self._html_search_regex(
+            r'(?s)<div id="song_texts">(.*?)</div><br',
+            webpage, 'song lyrics', fatal=False)
+        if description:
+            description = re.sub(' *\r *', '\n', description)
 
         enc_token = self._html_search_regex(
             r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
@@ -64,4 +72,5 @@ class XMinusIE(InfoExtractor):
             'filesize_approx': filesize_approx,
             'tbr': tbr,
             'view_count': view_count,
+            'description': description,
         }
index 53ed7ef5a6ea95826d0324bac53a71bea4913fa4..79ed6c744242bf132afd033ae35949cc1e2263b5 100644 (file)
@@ -1,10 +1,8 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
@@ -23,10 +21,7 @@ class XNXXIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        # Get webpage content
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_url = self._search_regex(r'flv_url=(.*?)&amp;',
index 38448e7c0fbfe3641cc364a2707a97910ab16cf8..e8490b028e53080b8e685be13577a05603a4af9e 100644 (file)
@@ -1,18 +1,20 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+    compat_urllib_parse,
+)
+from ..utils import (
     parse_duration,
     str_to_int,
 )
 
 
 class XTubeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<id>[^/?&#]+))'
     _TEST = {
         'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
         'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
@@ -28,41 +30,49 @@ class XTubeIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')
+        video_id = self._match_id(url)
 
         req = compat_urllib_request.Request(url)
         req.add_header('Cookie', 'age_verified=1')
         webpage = self._download_webpage(req, video_id)
 
-        video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title')
+        video_title = self._html_search_regex(
+            r'<p class="title">([^<]+)', webpage, 'title')
         video_uploader = self._html_search_regex(
-            r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
+            [r"var\s+contentOwnerId\s*=\s*'([^']+)",
+             r'By:\s*<a href="/community/profile\.php\?user=([^"]+)'],
+            webpage, 'uploader', fatal=False)
         video_description = self._html_search_regex(
-            r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
+            r'<p class="fieldsDesc">([^<]+)',
+            webpage, 'description', fatal=False)
         duration = parse_duration(self._html_search_regex(
-            r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
-        view_count = self._html_search_regex(
-            r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False)
-        if view_count:
-            view_count = str_to_int(view_count)
-        comment_count = self._html_search_regex(
-            r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False)
-        if comment_count:
-            comment_count = str_to_int(comment_count)
-
-        player_quality_option = json.loads(self._html_search_regex(
-            r'playerQualityOption = ({.+?});', webpage, 'player quality option'))
-
-        QUALITIES = ['3gp', 'mp4_normal', 'mp4_high', 'flv', 'mp4_ultra', 'mp4_720', 'mp4_1080']
-        formats = [
-            {
-                'url': furl,
+            r'<span class="bold">Runtime:</span> ([^<]+)</p>',
+            webpage, 'duration', fatal=False))
+        view_count = str_to_int(self._html_search_regex(
+            r'<span class="bold">Views:</span> ([\d,\.]+)</p>',
+            webpage, 'view count', fatal=False))
+        comment_count = str_to_int(self._html_search_regex(
+            r'<div id="commentBar">([\d,\.]+) Comments</div>',
+            webpage, 'comment count', fatal=False))
+
+        formats = []
+        for format_id, video_url in re.findall(
+                r'flashvars\.quality_(.+?)\s*=\s*"([^"]+)"', webpage):
+            fmt = {
+                'url': compat_urllib_parse.unquote(video_url),
                 'format_id': format_id,
-                'preference': QUALITIES.index(format_id) if format_id in QUALITIES else -1,
-            } for format_id, furl in player_quality_option.items()
-        ]
+            }
+            m = re.search(r'^(?P<height>\d+)[pP]', format_id)
+            if m:
+                fmt['height'] = int(m.group('height'))
+            formats.append(fmt)
+
+        if not formats:
+            video_url = compat_urllib_parse.unquote(self._search_regex(
+                r'flashvars\.video_url\s*=\s*"([^"]+)"',
+                webpage, 'video URL'))
+            formats.append({'url': video_url})
+
         self._sort_formats(formats)
 
         return {
@@ -85,6 +95,7 @@ class XTubeUserIE(InfoExtractor):
         'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
         'info_dict': {
             'id': 'greenshowers',
+            'age_limit': 18,
         },
         'playlist_mincount': 155,
     }
@@ -114,6 +125,7 @@ class XTubeUserIE(InfoExtractor):
         return {
             '_type': 'playlist',
             'id': username,
+            'age_limit': 18,
             'entries': [{
                 '_type': 'url',
                 'url': eurl,
index 7e00448246beb9ab9b7c25f33b05e6f4f1bb8283..2a45dc574263f7e651020e591fcc40bdf987367d 100644 (file)
@@ -3,15 +3,17 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
-    ExtractorError,
+)
+from ..utils import (
     clean_html,
+    ExtractorError,
 )
 
 
 class XVideosIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
+    _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P<id>[0-9]+)(?:.*)'
     _TEST = {
         'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
         'md5': '4b46ae6ea5e6e9086e714d883313c0c9',
@@ -24,37 +26,25 @@ class XVideosIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        self.report_extraction(video_id)
-
         mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
         if mobj:
             raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
 
-        # Extract video URL
         video_url = compat_urllib_parse.unquote(
             self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
-
-        # Extract title
         video_title = self._html_search_regex(
             r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
-
-        # Extract video thumbnail
         video_thumbnail = self._search_regex(
             r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
 
         return {
             'id': video_id,
             'url': video_url,
-            'uploader': None,
-            'upload_date': None,
             'title': video_title,
             'ext': 'flv',
             'thumbnail': video_thumbnail,
-            'description': None,
             'age_limit': 18,
         }
diff --git a/youtube_dl/extractor/xxxymovies.py b/youtube_dl/extractor/xxxymovies.py
new file mode 100644 (file)
index 0000000..5c8f17e
--- /dev/null
@@ -0,0 +1,81 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    int_or_none,
+)
+
+
+class XXXYMoviesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?xxxymovies\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)'
+    _TEST = {
+        'url': 'http://xxxymovies.com/videos/138669/ecstatic-orgasm-sofcore/',
+        'md5': '810b1bdbbffff89dd13bdb369fe7be4b',
+        'info_dict': {
+            'id': '138669',
+            'display_id': 'ecstatic-orgasm-sofcore',
+            'ext': 'mp4',
+            'title': 'Ecstatic Orgasm Sofcore',
+            'duration': 931,
+            'categories': list,
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_url = self._search_regex(
+            r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
+
+        title = self._html_search_regex(
+            [r'<div class="block_header">\s*<h1>([^<]+)</h1>',
+             r'<title>(.*?)\s*-\s*XXXYMovies\.com</title>'],
+            webpage, 'title')
+
+        thumbnail = self._search_regex(
+            r"preview_url\s*:\s*'([^']+)'",
+            webpage, 'thumbnail', fatal=False)
+
+        categories = self._html_search_meta(
+            'keywords', webpage, 'categories', default='').split(',')
+
+        duration = parse_duration(self._search_regex(
+            r'<span>Duration:</span>\s*(\d+:\d+)',
+            webpage, 'duration', fatal=False))
+
+        view_count = int_or_none(self._html_search_regex(
+            r'<div class="video_views">\s*(\d+)',
+            webpage, 'view count', fatal=False))
+        like_count = int_or_none(self._search_regex(
+            r'>\s*Likes? <b>\((\d+)\)',
+            webpage, 'like count', fatal=False))
+        dislike_count = int_or_none(self._search_regex(
+            r'>\s*Dislike <b>\((\d+)\)</b>',
+            webpage, 'dislike count', fatal=False))
+
+        age_limit = self._rta_search(webpage)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'duration': duration,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'age_limit': age_limit,
+        }
index 0fdb122436d9b7e158a706cd1048630e252d5666..f8e7041a08d042ac44c13338439b5568bf4caac6 100644 (file)
@@ -6,11 +6,14 @@ import json
 import re
 
 from .common import InfoExtractor, SearchInfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_parse,
     compat_urlparse,
+)
+from ..utils import (
     clean_html,
+    unescapeHTML,
+    ExtractorError,
     int_or_none,
 )
 
@@ -53,14 +56,14 @@ class YahooIE(InfoExtractor):
             }
         },
         {
-            'url': 'https://tw.screen.yahoo.com/taipei-opinion-poll/選情站報-街頭民調-台北市篇-102823042.html',
-            'md5': '92a7fdd8a08783c68a174d7aa067dde8',
+            'url': 'https://tw.screen.yahoo.com/election-2014-askmayor/敢問市長-黃秀霜批賴清德-非常高傲-033009720.html',
+            'md5': '3a09cf59349cfaddae1797acc3c087fc',
             'info_dict': {
-                'id': '7a23b569-7bea-36cb-85b9-bd5301a0a1fb',
+                'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',
                 'ext': 'mp4',
-                'title': '選情站報 街頭民調 台北市篇',
-                'description': '選情站報 街頭民調 台北市篇',
-                'duration': 429,
+                'title': '敢問市長/黃秀霜批賴清德「非常高傲」',
+                'description': '直言台南沒捷運 交通居五都之末',
+                'duration': 396,
             }
         },
         {
@@ -85,14 +88,14 @@ class YahooIE(InfoExtractor):
                 'duration': 121,
             }
         }, {
-            'url': 'https://ca.finance.yahoo.com/news/20-most-valuable-brands-world-112600775.html',
-            'md5': '3e401e4eed6325aa29d9b96125fd5b4f',
+            'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html',
+            'md5': '226a895aae7e21b0129e2a2006fe9690',
             'info_dict': {
-                'id': 'c1b4c09c-8ed8-3b65-8b05-169c55358a83',
+                'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
                 'ext': 'mp4',
-                'title': "Apple Is The World's Most Valuable Brand",
-                'description': 'md5:73eabc1a11c6f59752593b2ceefa1262',
-                'duration': 21,
+                'title': '\'The Interview\' TV Spot: War',
+                'description': 'The Interview',
+                'duration': 30,
             }
         }, {
             'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
@@ -114,6 +117,16 @@ class YahooIE(InfoExtractor):
                 'description': 'md5:1428185051cfd1949807ad4ff6d3686a',
                 'duration': 201,
             }
+        }, {
+            'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
+            'md5': '989396ae73d20c6f057746fb226aa215',
+            'info_dict': {
+                'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
+                'ext': 'mp4',
+                'title': '\'True Story\' Trailer',
+                'description': 'True Story',
+                'duration': 150,
+            },
         }, {
             'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
             'only_matching': True,
@@ -123,6 +136,7 @@ class YahooIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         display_id = mobj.group('display_id')
+        page_id = mobj.group('id')
         url = mobj.group('url')
         host = mobj.group('host')
         webpage = self._download_webpage(url, display_id)
@@ -147,6 +161,7 @@ class YahooIE(InfoExtractor):
                 r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
                 r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
                 r'"first_videoid"\s*:\s*"([^"]+)"',
+                r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
             ]
             video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
         else:
@@ -161,17 +176,15 @@ class YahooIE(InfoExtractor):
         region = self._search_regex(
             r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
             webpage, 'region', fatal=False, default='US')
-        query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
-                 ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="%s"'
-                 ' AND protocol="http"' % (video_id, region))
         data = compat_urllib_parse.urlencode({
-            'q': query,
-            'env': 'prod',
-            'format': 'json',
+            'protocol': 'http',
+            'region': region,
         })
+        query_url = (
+            'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
+            '{id}?{data}'.format(id=video_id, data=data))
         query_result = self._download_json(
-            'http://video.query.yahoo.com/v1/public/yql?' + data,
-            display_id, 'Downloading video info')
+            query_url, display_id, 'Downloading video info')
 
         info = query_result['query']['results']['mediaObj'][0]
         meta = info.get('meta')
@@ -209,7 +222,7 @@ class YahooIE(InfoExtractor):
         return {
             'id': video_id,
             'display_id': display_id,
-            'title': meta['title'],
+            'title': unescapeHTML(meta['title']),
             'formats': formats,
             'description': clean_html(meta['description']),
             'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
diff --git a/youtube_dl/extractor/yesjapan.py b/youtube_dl/extractor/yesjapan.py
new file mode 100644 (file)
index 0000000..112a6c0
--- /dev/null
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    HEADRequest,
+    get_element_by_attribute,
+    parse_iso8601,
+)
+
+
+class YesJapanIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?yesjapan\.com/video/(?P<slug>[A-Za-z0-9\-]*)_(?P<id>[A-Za-z0-9]+)\.html'
+    _TEST = {
+        'url': 'http://www.yesjapan.com/video/japanese-in-5-20-wa-and-ga-particle-usages_726497834.html',
+        'md5': 'f0be416314e5be21a12b499b330c21cf',
+        'info_dict': {
+            'id': '726497834',
+            'title': 'Japanese in 5! #20 - WA And GA Particle Usages',
+            'description': 'This should clear up some issues most students of Japanese encounter with WA and GA....',
+            'ext': 'mp4',
+            'timestamp': 1416391590,
+            'upload_date': '20141119',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._og_search_title(webpage)
+        video_url = self._og_search_video_url(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        timestamp = None
+        submit_info = get_element_by_attribute('class', 'pm-submit-data', webpage)
+        if submit_info:
+            timestamp = parse_iso8601(self._search_regex(
+                r'datetime="([^"]+)"', submit_info, 'upload date', fatal=False, default=None))
+
+        # attempt to resolve the final URL in order to get a proper extension
+        redirect_req = HEADRequest(video_url)
+        req = self._request_webpage(
+            redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False)
+        if req:
+            video_url = req.geturl()
+
+        formats = [{
+            'format_id': 'sd',
+            'url': video_url,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'timestamp': timestamp,
+            'thumbnail': thumbnail,
+        }
index 7b621a9e32b3cc4521a2988f00e32252cb17bd65..894678a23dac9d1b03e07f0cd9b2eecc7e690e18 100644 (file)
@@ -5,7 +5,7 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import compat_urllib_parse
+from ..compat import compat_urllib_parse
 
 
 class YnetIE(InfoExtractor):
index 8123928be982dbaccb98638ea0b915007ebc7066..107c9ac36e4f4f48bd768567e4399af15fd07743 100644 (file)
@@ -6,10 +6,11 @@ import re
 import sys
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
-
+)
+from ..utils import (
     ExtractorError,
     unescapeHTML,
     unified_strdate,
@@ -45,7 +46,9 @@ class YouPornIE(InfoExtractor):
         age_limit = self._rta_search(webpage)
 
         # Get JSON parameters
-        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
+        json_params = self._search_regex(
+            r'var currentVideo = new Video\((.*)\)[,;]',
+            webpage, 'JSON parameters')
         try:
             params = json.loads(json_params)
         except:
index 2642ecfffbadeff3c7c184d52bf119328d5bbf60..2a1f8be0a7e6d6946b5e1801a4b3cd6dda338efe 100644 (file)
@@ -14,23 +14,24 @@ from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..jsinterp import JSInterpreter
 from ..swfinterp import SWFInterpreter
-from ..utils import (
+from ..compat import (
     compat_chr,
     compat_parse_qs,
     compat_urllib_parse,
     compat_urllib_request,
     compat_urlparse,
     compat_str,
-
+)
+from ..utils import (
     clean_html,
-    get_element_by_id,
-    get_element_by_attribute,
     ExtractorError,
+    get_element_by_attribute,
+    get_element_by_id,
     int_or_none,
     OnDemandPagedList,
+    orderedSet,
     unescapeHTML,
     unified_strdate,
-    orderedSet,
     uppercase_escape,
 )
 
@@ -44,9 +45,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     _LOGIN_REQUIRED = False
 
     def _set_language(self):
-        self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en',
+        self._set_cookie(
+            '.youtube.com', 'PREF', 'f1=50000000&hl=en',
             # YouTube sets the expire time to about two months
-            expire_time=time.time() + 60*24*3600)
+            expire_time=time.time() + 2 * 30 * 24 * 3600)
 
     def _login(self):
         """
@@ -254,7 +256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
@@ -262,9 +264,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
 
         # Dash mp4 audio
-        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
-        '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
-        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
+        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50},
+        '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50},
+        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50},
 
         # Dash webm
         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
@@ -285,7 +287,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
+        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
+        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
 
         # Dash webm audio
         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
@@ -390,6 +394,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'format': '141',
             },
         },
+        # JS player signature function name containing $
+        {
+            'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
+            'info_dict': {
+                'id': 'nfWlot6h_JM',
+                'ext': 'm4a',
+                'title': 'Taylor Swift - Shake It Off',
+                'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
+                'uploader': 'TaylorSwiftVEVO',
+                'uploader_id': 'TaylorSwiftVEVO',
+                'upload_date': '20140818',
+            },
+            'params': {
+                'youtube_include_dash_manifest': True,
+                'format': '141',
+            },
+        },
         # Controversy video
         {
             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
@@ -410,12 +431,71 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'id': 'HtVdAasjOgU',
                 'ext': 'mp4',
                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
-                'description': 'md5:eca57043abae25130f58f655ad9a7771',
+                'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'uploader': 'The Witcher',
                 'uploader_id': 'WitcherGame',
                 'upload_date': '20140605',
             },
         },
+        # Age-gate video with encrypted signature
+        {
+            'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
+            'info_dict': {
+                'id': '6kLq3WMV1nU',
+                'ext': 'mp4',
+                'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
+                'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
+                'uploader': 'LloydVEVO',
+                'uploader_id': 'LloydVEVO',
+                'upload_date': '20110629',
+            },
+        },
+        # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
+        {
+            'url': '__2ABJjxzNo',
+            'info_dict': {
+                'id': '__2ABJjxzNo',
+                'ext': 'mp4',
+                'upload_date': '20100430',
+                'uploader_id': 'deadmau5',
+                'description': 'md5:12c56784b8032162bb936a5f76d55360',
+                'uploader': 'deadmau5',
+                'title': 'Deadmau5 - Some Chords (HD)',
+            },
+            'expected_warnings': [
+                'DASH manifest missing',
+            ]
+        },
+        # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
+        {
+            'url': 'lqQg6PlCWgI',
+            'info_dict': {
+                'id': 'lqQg6PlCWgI',
+                'ext': 'mp4',
+                'upload_date': '20120731',
+                'uploader_id': 'olympic',
+                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+                'uploader': 'Olympics',
+                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+            },
+            'params': {
+                'skip_download': 'requires avconv',
+            }
+        },
+        # Non-square pixels
+        {
+            'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
+            'info_dict': {
+                'id': '_b-2C3KPAM0',
+                'ext': 'mp4',
+                'stretched_ratio': 16 / 9.,
+                'upload_date': '20110310',
+                'uploader_id': 'AllenMeow',
+                'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
+                'uploader': '孫艾倫',
+                'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
+            },
+        }
     ]
 
     def __init__(self, *args, **kwargs):
@@ -444,7 +524,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
         id_m = re.match(
-            r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
+            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
             player_url)
         if not id_m:
             raise ExtractorError('Cannot identify player %r' % player_url)
@@ -493,8 +573,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 return 's[%s%s%s]' % (starts, ends, steps)
 
             step = None
-            start = '(Never used)'  # Quelch pyflakes warnings - start will be
-                                    # set as soon as step is set
+            # Quelch pyflakes warnings - start will be set when step is set
+            start = '(Never used)'
             for i, prev in zip(idxs[1:], idxs[:-1]):
                 if step is not None:
                     if i - prev == step:
@@ -525,7 +605,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _parse_sig_js(self, jscode):
         funcname = self._search_regex(
-            r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode,
+            r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
             'Initial JS player signature function name')
 
         jsi = JSInterpreter(jscode)
@@ -565,24 +645,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _get_available_subtitles(self, video_id, webpage):
         try:
-            sub_list = self._download_webpage(
+            subs_doc = self._download_xml(
                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
                 video_id, note=False)
         except ExtractorError as err:
             self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
             return {}
-        lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
 
         sub_lang_list = {}
-        for l in lang_list:
-            lang = l[1]
+        for track in subs_doc.findall('track'):
+            lang = track.attrib['lang_code']
             if lang in sub_lang_list:
                 continue
             params = compat_urllib_parse.urlencode({
                 'lang': lang,
                 'v': video_id,
                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
-                'name': unescapeHTML(l[0]).encode('utf-8'),
+                'name': track.attrib['name'].encode('utf-8'),
             })
             url = 'https://www.youtube.com/api/timedtext?' + params
             sub_lang_list[lang] = url
@@ -615,10 +694,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             list_url = caption_url + '&' + list_params
             caption_list = self._download_xml(list_url, video_id)
             original_lang_node = caption_list.find('track')
-            if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr':
+            if original_lang_node is None:
                 self._downloader.report_warning('Video doesn\'t have automatic captions')
                 return {}
             original_lang = original_lang_node.attrib['lang_code']
+            caption_kind = original_lang_node.attrib.get('kind', '')
 
             sub_lang_list = {}
             for lang_node in caption_list.findall('target'):
@@ -628,7 +708,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                     'tlang': sub_lang,
                     'fmt': sub_format,
                     'ts': timestamp,
-                    'kind': 'asr',
+                    'kind': caption_kind,
                 })
                 sub_lang_list[sub_lang] = caption_url + '&' + params
             return sub_lang_list
@@ -665,6 +745,47 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
 
+    def _parse_dash_manifest(
+            self, video_id, dash_manifest_url, player_url, age_gate):
+        def decrypt_sig(mobj):
+            s = mobj.group(1)
+            dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
+            return '/signature/%s' % dec_s
+        dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
+        dash_doc = self._download_xml(
+            dash_manifest_url, video_id,
+            note='Downloading DASH manifest',
+            errnote='Could not download DASH manifest')
+
+        formats = []
+        for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+            url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
+            if url_el is None:
+                continue
+            format_id = r.attrib['id']
+            video_url = url_el.text
+            filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
+            f = {
+                'format_id': format_id,
+                'url': video_url,
+                'width': int_or_none(r.attrib.get('width')),
+                'height': int_or_none(r.attrib.get('height')),
+                'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+                'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+                'filesize': filesize,
+                'fps': int_or_none(r.attrib.get('frameRate')),
+            }
+            try:
+                existing_format = next(
+                    fo for fo in formats
+                    if fo['format_id'] == format_id)
+            except StopIteration:
+                f.update(self._formats.get(format_id, {}).items())
+                formats.append(f)
+            else:
+                existing_format.update(f)
+        return formats
+
     def _real_extract(self, url):
         proto = (
             'http' if self._downloader.params.get('prefer_insecure', False)
@@ -692,11 +813,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             age_gate = True
             # We simulate the access to the video from www.youtube.com/v/{video_id}
             # this can be viewed without login into Youtube
+            url = proto + '://www.youtube.com/embed/%s' % video_id
+            embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
             data = compat_urllib_parse.urlencode({
                 'video_id': video_id,
                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
                 'sts': self._search_regex(
-                    r'"sts"\s*:\s*(\d+)', video_webpage, 'sts', default=''),
+                    r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
             })
             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
             video_info_webpage = self._download_webpage(
@@ -722,9 +845,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 # We fallback to the get_video_info pages (used by the embed page)
                 self.report_video_info_webpage_download(video_id)
                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
-                    video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
-                        % (video_id, el_type))
-                    video_info_webpage = self._download_webpage(video_info_url,
+                    video_info_url = (
+                        '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+                        % (proto, video_id, el_type))
+                    video_info_webpage = self._download_webpage(
+                        video_info_url,
                         video_id, note=False,
                         errnote='unable to download video info webpage')
                     video_info = compat_parse_qs(video_info_webpage)
@@ -797,7 +922,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         m_cat_container = self._search_regex(
             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
-            video_webpage, 'categories', fatal=False)
+            video_webpage, 'categories', default=None)
         if m_cat_container:
             category = self._html_search_regex(
                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
@@ -875,7 +1000,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'url': video_info['conn'][0],
                 'player_url': player_url,
             }]
-        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
+        elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
             if 'rtmpe%3Dyes' in encoded_url_map:
                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
@@ -892,11 +1017,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 elif 's' in url_data:
                     encrypted_sig = url_data['s'][0]
 
-                    if not age_gate:
-                        jsplayer_url_json = self._search_regex(
-                            r'"assets":.+?"js":\s*("[^"]+")',
-                            video_webpage, 'JS player URL')
-                        player_url = json.loads(jsplayer_url_json)
+                    jsplayer_url_json = self._search_regex(
+                        r'"assets":.+?"js":\s*("[^"]+")',
+                        embed_webpage if age_gate else video_webpage, 'JS player URL')
+                    player_url = json.loads(jsplayer_url_json)
                     if player_url is None:
                         player_url_json = self._search_regex(
                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
@@ -940,50 +1064,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         # Look for the DASH manifest
         if self._downloader.params.get('youtube_include_dash_manifest', True):
-            try:
-                # The DASH manifest used needs to be the one from the original video_webpage.
-                # The one found in get_video_info seems to be using different signatures.
-                # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
-                # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
-                # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
-                dash_manifest_url = video_info.get('dashmpd')[0]
-
-                def decrypt_sig(mobj):
-                    s = mobj.group(1)
-                    dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
-                    return '/signature/%s' % dec_s
-                dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
-                dash_doc = self._download_xml(
-                    dash_manifest_url, video_id,
-                    note='Downloading DASH manifest',
-                    errnote='Could not download DASH manifest')
-                for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
-                    url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
-                    if url_el is None:
-                        continue
-                    format_id = r.attrib['id']
-                    video_url = url_el.text
-                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
-                    f = {
-                        'format_id': format_id,
-                        'url': video_url,
-                        'width': int_or_none(r.attrib.get('width')),
-                        'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
-                        'asr': int_or_none(r.attrib.get('audioSamplingRate')),
-                        'filesize': filesize,
-                    }
-                    try:
-                        existing_format = next(
-                            fo for fo in formats
-                            if fo['format_id'] == format_id)
-                    except StopIteration:
-                        f.update(self._formats.get(format_id, {}))
-                        formats.append(f)
-                    else:
-                        existing_format.update(f)
-
-            except (ExtractorError, KeyError) as e:
-                self.report_warning('Skipping DASH manifest: %r' % e, video_id)
+            dash_mpd = video_info.get('dashmpd')
+            if dash_mpd:
+                dash_manifest_url = dash_mpd[0]
+                try:
+                    dash_formats = self._parse_dash_manifest(
+                        video_id, dash_manifest_url, player_url, age_gate)
+                except (ExtractorError, KeyError) as e:
+                    self.report_warning(
+                        'Skipping DASH manifest: %r' % e, video_id)
+                else:
+                    # Hide the formats we found through non-DASH
+                    dash_keys = set(df['format_id'] for df in dash_formats)
+                    for f in formats:
+                        if f['format_id'] in dash_keys:
+                            f['format_id'] = 'nondash-%s' % f['format_id']
+                            f['preference'] = f.get('preference', 0) - 10000
+                    formats.extend(dash_formats)
+
+        # Check for malformed aspect ratio
+        stretched_m = re.search(
+            r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
+            video_webpage)
+        if stretched_m:
+            ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
+            for f in formats:
+                if f.get('vcodec') != 'none':
+                    f['stretched_ratio'] = ratio
 
         self._sort_formats(formats)
 
@@ -1029,7 +1136,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                         ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
                      )"""
     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
-    _MORE_PAGES_INDICATOR = r'data-link-type="next"'
     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
     IE_NAME = 'youtube:playlist'
     _TESTS = [{
@@ -1086,6 +1192,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         'info_dict': {
             'title': 'JODA7',
         }
+    }, {
+        'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
+        'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
+        'info_dict': {
+                'title': 'Uploads from Interstellar Movie',
+        },
+        'playlist_mincout': 21,
     }]
 
     def _real_initialize(self):
@@ -1136,9 +1249,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         if playlist_id.startswith('RD'):
             # Mixes require a custom extraction process
             return self._extract_mix(playlist_id)
-        if playlist_id.startswith('TL'):
-            raise ExtractorError('For downloading YouTube.com top lists, use '
-                                 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
 
         url = self._TEMPLATE_URL % playlist_id
         page = self._download_webpage(url, playlist_id)
@@ -1170,6 +1280,10 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                 'Downloading page #%s' % page_num,
                 transform_source=uppercase_escape)
             content_html = more['content_html']
+            if not content_html.strip():
+                # Some webpages show a "Load more" button but they don't
+                # have more videos
+                break
             more_widget_html = more['load_more_widget_html']
 
         playlist_title = self._html_search_regex(
@@ -1180,54 +1294,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         return self.playlist_result(url_results, playlist_id, playlist_title)
 
 
-class YoutubeTopListIE(YoutubePlaylistIE):
-    IE_NAME = 'youtube:toplist'
-    IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
-               ' (Example: "yttoplist:music:Top Tracks")')
-    _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
-    _TESTS = [{
-        'url': 'yttoplist:music:Trending',
-        'playlist_mincount': 5,
-        'skip': 'Only works for logged-in users',
-    }]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        channel = mobj.group('chann')
-        title = mobj.group('title')
-        query = compat_urllib_parse.urlencode({'title': title})
-        channel_page = self._download_webpage(
-            'https://www.youtube.com/%s' % channel, title)
-        link = self._html_search_regex(
-            r'''(?x)
-                <a\s+href="([^"]+)".*?>\s*
-                <span\s+class="branded-page-module-title-text">\s*
-                <span[^>]*>.*?%s.*?</span>''' % re.escape(query),
-            channel_page, 'list')
-        url = compat_urlparse.urljoin('https://www.youtube.com/', link)
-
-        video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
-        ids = []
-        # sometimes the webpage doesn't contain the videos
-        # retry until we get them
-        for i in itertools.count(0):
-            msg = 'Downloading Youtube mix'
-            if i > 0:
-                msg += ', retry #%d' % i
-
-            webpage = self._download_webpage(url, title, msg)
-            ids = orderedSet(re.findall(video_re, webpage))
-            if ids:
-                break
-        url_results = self._ids_to_results(ids)
-        return self.playlist_result(url_results, playlist_title=title)
-
-
 class YoutubeChannelIE(InfoExtractor):
     IE_DESC = 'YouTube.com channels'
-    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
-    _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
-    _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
+    _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
     IE_NAME = 'youtube:channel'
     _TESTS = [{
         'note': 'paginated channel',
@@ -1243,13 +1312,8 @@ class YoutubeChannelIE(InfoExtractor):
         return ids_in_page
 
     def _real_extract(self, url):
-        # Extract channel id
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
+        channel_id = self._match_id(url)
 
-        # Download channel page
-        channel_id = mobj.group(1)
         video_ids = []
         url = 'https://www.youtube.com/channel/%s/videos' % channel_id
         channel_page = self._download_webpage(url, channel_id)
@@ -1263,30 +1327,39 @@ class YoutubeChannelIE(InfoExtractor):
             # The videos are contained in a single page
             # the ajax pages can't be used, they are empty
             video_ids = self.extract_videos_from_page(channel_page)
-        else:
-            # Download all channel pages using the json-based channel_ajax query
+            entries = [
+                self.url_result(video_id, 'Youtube', video_id=video_id)
+                for video_id in video_ids]
+            return self.playlist_result(entries, channel_id)
+
+        def _entries():
+            more_widget_html = content_html = channel_page
             for pagenum in itertools.count(1):
-                url = self._MORE_PAGES_URL % (pagenum, channel_id)
-                page = self._download_json(
-                    url, channel_id, note='Downloading page #%s' % pagenum,
-                    transform_source=uppercase_escape)
 
-                ids_in_page = self.extract_videos_from_page(page['content_html'])
-                video_ids.extend(ids_in_page)
+                ids_in_page = self.extract_videos_from_page(content_html)
+                for video_id in ids_in_page:
+                    yield self.url_result(
+                        video_id, 'Youtube', video_id=video_id)
 
-                if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
+                mobj = re.search(
+                    r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
+                    more_widget_html)
+                if not mobj:
                     break
 
-        self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
+                more = self._download_json(
+                    'https://youtube.com/%s' % mobj.group('more'), channel_id,
+                    'Downloading page #%s' % (pagenum + 1),
+                    transform_source=uppercase_escape)
+                content_html = more['content_html']
+                more_widget_html = more['load_more_widget_html']
 
-        url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
-                       for video_id in video_ids]
-        return self.playlist_result(url_entries, channel_id)
+        return self.playlist_result(_entries(), channel_id)
 
 
 class YoutubeUserIE(InfoExtractor):
     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
     _GDATA_PAGE_SIZE = 50
     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@@ -1314,12 +1387,7 @@ class YoutubeUserIE(InfoExtractor):
             return super(YoutubeUserIE, cls).suitable(url)
 
     def _real_extract(self, url):
-        # Extract username
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
-
-        username = mobj.group(1)
+        username = self._match_id(url)
 
         # Download video ids using YouTube Data API. Result size per
         # query is limited (currently to 50 videos) so we need to query
@@ -1516,9 +1584,11 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
         feed_entries = []
         paging = 0
         for i in itertools.count(1):
-            info = self._download_json(self._FEED_TEMPLATE % paging,
-                                       '%s feed' % self._FEED_NAME,
-                                       'Downloading page %s' % i)
+            info = self._download_json(
+                self._FEED_TEMPLATE % paging,
+                '%s feed' % self._FEED_NAME,
+                'Downloading page %s' % i,
+                transform_source=uppercase_escape)
             feed_html = info.get('feed_html') or info.get('content_html')
             load_more_widget_html = info.get('load_more_widget_html') or feed_html
             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
@@ -1635,3 +1705,20 @@ class YoutubeTruncatedURLIE(InfoExtractor):
             '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
             ' or simply  youtube-dl BaW_jenozKc  .',
             expected=True)
+
+
+class YoutubeTruncatedIDIE(InfoExtractor):
+    IE_NAME = 'youtube:truncated_id'
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
+
+    _TESTS = [{
+        'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        raise ExtractorError(
+            'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
+            expected=True)
index 9ff00e26c4235e0eaace73b219b945ea17cee175..98f15177bd6665bd1c6b96a071d59d4b67e5d918 100644 (file)
@@ -1,12 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import functools
 import re
 
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
     unified_strdate,
+    OnDemandPagedList,
 )
 
 
@@ -87,7 +89,7 @@ def extract_from_xml_url(ie, video_id, xml_url):
 
 
 class ZDFIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
+    _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
 
     _TEST = {
         'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
@@ -106,6 +108,52 @@ class ZDFIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
         xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
         return extract_from_xml_url(self, video_id, xml_url)
+
+
+class ZDFChannelIE(InfoExtractor):
+    _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic',
+        'info_dict': {
+            'id': '1586442',
+        },
+        'playlist_count': 3,
+    }
+    _PAGE_SIZE = 50
+
+    def _fetch_page(self, channel_id, page):
+        offset = page * self._PAGE_SIZE
+        xml_url = (
+            'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s'
+            % (offset, self._PAGE_SIZE, channel_id))
+        doc = self._download_xml(
+            xml_url, channel_id,
+            note='Downloading channel info',
+            errnote='Failed to download channel info')
+
+        title = doc.find('.//information/title').text
+        description = doc.find('.//information/detail').text
+        for asset in doc.findall('.//teasers/teaser'):
+            a_type = asset.find('./type').text
+            a_id = asset.find('./details/assetId').text
+            if a_type not in ('video', 'topic'):
+                continue
+            yield {
+                '_type': 'url',
+                'playlist_title': title,
+                'playlist_description': description,
+                'url': 'zdf:%s:%s' % (a_type, a_id),
+            }
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        entries = OnDemandPagedList(
+            functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE)
+
+        return {
+            '_type': 'playlist',
+            'id': channel_id,
+            'entries': entries,
+        }
index 2e8c715084616a565472fb8700a8d995cb8c6d04..a30974efd382511b652397c475371b368f86f85b 100644 (file)
@@ -109,7 +109,7 @@ def parseOpts(overrideArguments=None):
     kw = {
         'version': __version__,
         'formatter': fmt,
-        'usage': '%prog [options] url [url...]',
+        'usage': '%prog [OPTIONS] URL [URL...]',
         'conflict_handler': 'resolve',
     }
 
@@ -148,14 +148,6 @@ def parseOpts(overrideArguments=None):
         '--extractor-descriptions',
         action='store_true', dest='list_extractor_descriptions', default=False,
         help='Output descriptions of all supported extractors')
-    general.add_option(
-        '--proxy', dest='proxy',
-        default=None, metavar='URL',
-        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
-    general.add_option(
-        '--socket-timeout',
-        dest='socket_timeout', type=float, default=None,
-        help='Time to wait before giving up, in seconds')
     general.add_option(
         '--default-search',
         dest='default_search', metavar='PREFIX',
@@ -163,13 +155,41 @@ def parseOpts(overrideArguments=None):
     general.add_option(
         '--ignore-config',
         action='store_true',
-        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+        help='Do not read configuration files. '
+        'When given in the global configuration file /etc/youtube-dl.conf: '
+        'Do not read the user configuration in ~/.config/youtube-dl/config '
+        '(%APPDATA%/youtube-dl/config.txt on Windows)')
     general.add_option(
         '--flat-playlist',
         action='store_const', dest='extract_flat', const='in_playlist',
         default=False,
         help='Do not extract the videos of a playlist, only list them.')
 
+    network = optparse.OptionGroup(parser, 'Network Options')
+    network.add_option(
+        '--proxy', dest='proxy',
+        default=None, metavar='URL',
+        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
+    network.add_option(
+        '--socket-timeout',
+        dest='socket_timeout', type=float, default=None, metavar='SECONDS',
+        help='Time to wait before giving up, in seconds')
+    network.add_option(
+        '--source-address',
+        metavar='IP', dest='source_address', default=None,
+        help='Client-side IP address to bind to (experimental)',
+    )
+    network.add_option(
+        '-4', '--force-ipv4',
+        action='store_const', const='0.0.0.0', dest='source_address',
+        help='Make all connections via IPv4 (experimental)',
+    )
+    network.add_option(
+        '-6', '--force-ipv6',
+        action='store_const', const='::', dest='source_address',
+        help='Make all connections via IPv6 (experimental)',
+    )
+
     selection = optparse.OptionGroup(parser, 'Video Selection')
     selection.add_option(
         '--playlist-start',
@@ -264,10 +284,12 @@ def parseOpts(overrideArguments=None):
         action='store', dest='format', metavar='FORMAT', default=None,
         help=(
             'video format code, specify the order of preference using'
-            ' slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also'
-            ' supported. You can also use the special names "best",'
-            ' "bestvideo", "bestaudio", "worst", "worstvideo" and'
-            ' "worstaudio". By default, youtube-dl will pick the best quality.'
+            ' slashes, as in -f 22/17/18 . '
+            ' Instead of format codes, you can select by extension for the '
+            'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
+            'You can also use the special names "best",'
+            ' "bestvideo", "bestaudio", "worst". '
+            ' By default, youtube-dl will pick the best quality.'
             ' Use commas to download multiple audio formats, such as'
             ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.'
             ' You can merge the video and audio of two formats into a single'
@@ -297,6 +319,12 @@ def parseOpts(overrideArguments=None):
         '--youtube-skip-dash-manifest',
         action='store_false', dest='youtube_include_dash_manifest',
         help='Do not download the DASH manifest on YouTube videos')
+    video_format.add_option(
+        '--merge-output-format',
+        action='store', dest='merge_output_format', metavar='FORMAT', default=None,
+        help=(
+            'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.'
+            'Ignored if no merge is required'))
 
     subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
     subtitles.add_option(
@@ -346,6 +374,10 @@ def parseOpts(overrideArguments=None):
         '--test',
         action='store_true', dest='test', default=False,
         help=optparse.SUPPRESS_HELP)
+    downloader.add_option(
+        '--playlist-reverse',
+        action='store_true',
+        help='Download playlist videos in reverse order')
 
     workarounds = optparse.OptionGroup(parser, 'Workarounds')
     workarounds.add_option(
@@ -436,6 +468,11 @@ def parseOpts(overrideArguments=None):
         '-J', '--dump-single-json',
         action='store_true', dest='dump_single_json', default=False,
         help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
+    verbosity.add_option(
+        '--print-json',
+        action='store_true', dest='print_json', default=False,
+        help='Be quiet and print the video information as JSON (video is still being downloaded).',
+    )
     verbosity.add_option(
         '--newline',
         action='store_true', dest='progress_with_newline', default=False,
@@ -468,6 +505,14 @@ def parseOpts(overrideArguments=None):
         '--print-traffic',
         dest='debug_printtraffic', action='store_true', default=False,
         help='Display sent and read HTTP traffic')
+    verbosity.add_option(
+        '-C', '--call-home',
+        dest='call_home', action='store_true', default=False,
+        help='Contact the youtube-dl server for debugging.')
+    verbosity.add_option(
+        '--no-call-home',
+        dest='call_home', action='store_false', default=False,
+        help='Do NOT contact the youtube-dl server for debugging.')
 
     filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
     filesystem.add_option(
@@ -477,10 +522,6 @@ def parseOpts(overrideArguments=None):
     filesystem.add_option(
         '--id', default=False,
         action='store_true', dest='useid', help='use only video ID in file name')
-    filesystem.add_option(
-        '-A', '--auto-number',
-        action='store_true', dest='autonumber', default=False,
-        help='number downloaded files starting from 00000')
     filesystem.add_option(
         '-o', '--output',
         dest='outtmpl', metavar='TEMPLATE',
@@ -508,6 +549,10 @@ def parseOpts(overrideArguments=None):
         '--restrict-filenames',
         action='store_true', dest='restrictfilenames', default=False,
         help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames')
+    filesystem.add_option(
+        '-A', '--auto-number',
+        action='store_true', dest='autonumber', default=False,
+        help='[deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000')
     filesystem.add_option(
         '-t', '--title',
         action='store_true', dest='usetitle', default=False,
@@ -611,6 +656,13 @@ def parseOpts(overrideArguments=None):
         '--xattrs',
         action='store_true', dest='xattrs', default=False,
         help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+    postproc.add_option(
+        '--fixup',
+        metavar='POLICY', dest='fixup', default='detect_or_warn',
+        help='(experimental) Automatically correct known faults of the file. '
+             'One of never (do nothing), warn (only emit a warning), '
+             'detect_or_warn(check whether we can do anything about it, warn '
+             'otherwise')
     postproc.add_option(
         '--prefer-avconv',
         action='store_false', dest='prefer_ffmpeg',
@@ -625,6 +677,7 @@ def parseOpts(overrideArguments=None):
         help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
 
     parser.add_option_group(general)
+    parser.add_option_group(network)
     parser.add_option_group(selection)
     parser.add_option_group(downloader)
     parser.add_option_group(filesystem)
index fb367ebe4474063a279fcd096b21461d11deafe8..f8507951cea5b7b9cd4f9cf75b7024910c82a095 100644 (file)
@@ -6,22 +6,29 @@ from .ffmpeg import (
     FFmpegAudioFixPP,
     FFmpegEmbedSubtitlePP,
     FFmpegExtractAudioPP,
+    FFmpegFixupStretchedPP,
     FFmpegMergerPP,
     FFmpegMetadataPP,
-    FFmpegVideoConvertor,
+    FFmpegVideoConvertorPP,
 )
 from .xattrpp import XAttrMetadataPP
 from .execafterdownload import ExecAfterDownloadPP
 
+
+def get_postprocessor(key):
+    return globals()[key + 'PP']
+
+
 __all__ = [
     'AtomicParsleyPP',
     'ExecAfterDownloadPP',
     'FFmpegAudioFixPP',
     'FFmpegEmbedSubtitlePP',
     'FFmpegExtractAudioPP',
+    'FFmpegFixupStretchedPP',
     'FFmpegMergerPP',
     'FFmpegMetadataPP',
     'FFmpegPostProcessor',
-    'FFmpegVideoConvertor',
+    'FFmpegVideoConvertorPP',
     'XAttrMetadataPP',
 ]
index 09db43611a7c288e77c2cacaf96f266c541b2bbc..75c0f7bbe86ef8e19f41fd61e1bbd58678474d8a 100644 (file)
@@ -14,7 +14,7 @@ class ExecAfterDownloadPP(PostProcessor):
 
     def run(self, information):
         cmd = self.exec_cmd
-        if not '{}' in cmd:
+        if '{}' not in cmd:
             cmd += ' {}'
 
         cmd = cmd.replace('{}', shlex_quote(information['filepath']))
index 9303b8378b8065d84ed2d064ab76aacb463ef6bf..5b0ff32b14747d91b1c5c7223fa4efaae7aac574 100644 (file)
@@ -37,11 +37,11 @@ class FFmpegPostProcessor(PostProcessor):
         if not self._executable:
             raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
 
-        REQUIRED_VERSION = '1.0'
+        required_version = '10-0' if self._uses_avconv() else '1.0'
         if is_outdated_version(
-                self._versions[self._executable], REQUIRED_VERSION):
+                self._versions[self._executable], required_version):
             warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
-                self._executable, self._executable, REQUIRED_VERSION)
+                self._executable, self._executable, required_version)
             if self._downloader:
                 self._downloader.report_warning(warning)
 
@@ -50,6 +50,10 @@ class FFmpegPostProcessor(PostProcessor):
         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
         return dict((p, get_exe_version(p, args=['-version'])) for p in programs)
 
+    @property
+    def available(self):
+        return self._executable is not None
+
     @property
     def _executable(self):
         if self._downloader.params.get('prefer_ffmpeg', False):
@@ -78,11 +82,15 @@ class FFmpegPostProcessor(PostProcessor):
     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
         self.check_version()
 
+        oldest_mtime = min(
+            os.stat(encodeFilename(path)).st_mtime for path in input_paths)
+
         files_cmd = []
         for path in input_paths:
-            files_cmd.extend(['-i', encodeFilename(path, True)])
-        cmd = ([self._executable, '-y'] + files_cmd
-               + [encodeArgument(o) for o in opts] +
+            files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
+        cmd = ([encodeFilename(self._executable, True), encodeArgument('-y')] +
+               files_cmd +
+               [encodeArgument(o) for o in opts] +
                [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
 
         if self._downloader.params.get('verbose', False):
@@ -93,6 +101,7 @@ class FFmpegPostProcessor(PostProcessor):
             stderr = stderr.decode('utf-8', 'replace')
             msg = stderr.strip().split('\n')[-1]
             raise FFmpegPostProcessorError(msg)
+        os.utime(encodeFilename(out_path), (oldest_mtime, oldest_mtime))
         if self._deletetempfiles:
             for ipath in input_paths:
                 os.remove(ipath)
@@ -122,8 +131,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
             raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
         try:
             cmd = [
-                self._probe_executable,
-                '-show_streams',
+                encodeFilename(self._probe_executable, True),
+                encodeArgument('-show_streams'),
                 encodeFilename(self._ffmpeg_filename_argument(path), True)]
             handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
             output = handle.communicate()[0]
@@ -236,9 +245,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         return self._nopostoverwrites, information
 
 
-class FFmpegVideoConvertor(FFmpegPostProcessor):
+class FFmpegVideoConvertorPP(FFmpegPostProcessor):
     def __init__(self, downloader=None, preferedformat=None):
-        super(FFmpegVideoConvertor, self).__init__(downloader)
+        super(FFmpegVideoConvertorPP, self).__init__(downloader)
         self._preferedformat = preferedformat
 
     def run(self, information):
@@ -466,15 +475,21 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
         filename = information['filepath']
         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
 
-        opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
+        opts = [
+            '-map', '0',
+            '-c', 'copy',
+            # Don't copy the existing subtitles, we may be running the
+            # postprocessor a second time
+            '-map', '-0:s',
+            '-c:s', 'mov_text',
+        ]
         for (i, lang) in enumerate(sub_langs):
-            opts.extend(['-map', '%d:0' % (i + 1), '-c:s:%d' % i, 'mov_text'])
+            opts.extend(['-map', '%d:0' % (i + 1)])
             lang_code = self._conver_lang_code(lang)
             if lang_code is not None:
                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
-        opts.extend(['-f', 'mp4'])
 
-        temp_filename = filename + '.temp'
+        temp_filename = prepend_extension(filename, 'temp')
         self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
         os.remove(encodeFilename(filename))
@@ -520,7 +535,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
 class FFmpegMergerPP(FFmpegPostProcessor):
     def run(self, info):
         filename = info['filepath']
-        args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest']
+        args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
         self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
         self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
         return True, info
@@ -539,3 +554,22 @@ class FFmpegAudioFixPP(FFmpegPostProcessor):
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 
         return True, info
+
+
+class FFmpegFixupStretchedPP(FFmpegPostProcessor):
+    def run(self, info):
+        stretched_ratio = info.get('stretched_ratio')
+        if stretched_ratio is None or stretched_ratio == 1:
+            return
+
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+
+        options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio]
+        self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
+        self.run_ffmpeg(filename, temp_filename, options)
+
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return True, info
index 2bd264b306f8dc8f53bf6e0b9dec97ecd6e85cb4..e60505ace8b8451666f2aeebea3277bc58cb6297 100644 (file)
@@ -4,8 +4,8 @@ import collections
 import io
 import zlib
 
+from .compat import compat_str
 from .utils import (
-    compat_str,
     ExtractorError,
     struct_unpack,
 )
index 4c07a558e7ad2f2db422ed2a0124df49efc6b09c..d8be4049f5dce0fdd9a61f2aff3c4284d494e598 100644 (file)
@@ -13,6 +13,7 @@ from .compat import (
     compat_str,
     compat_urllib_request,
 )
+from .utils import make_HTTPS_handler
 from .version import __version__
 
 
@@ -58,9 +59,12 @@ def update_self(to_screen, verbose):
         to_screen('It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
         return
 
+    https_handler = make_HTTPS_handler({})
+    opener = compat_urllib_request.build_opener(https_handler)
+
     # Check if there is a new version
     try:
-        newversion = compat_urllib_request.urlopen(VERSION_URL).read().decode('utf-8').strip()
+        newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
     except:
         if verbose:
             to_screen(compat_str(traceback.format_exc()))
@@ -72,14 +76,14 @@ def update_self(to_screen, verbose):
 
     # Download and check versions info
     try:
-        versions_info = compat_urllib_request.urlopen(JSON_URL).read().decode('utf-8')
+        versions_info = opener.open(JSON_URL).read().decode('utf-8')
         versions_info = json.loads(versions_info)
     except:
         if verbose:
             to_screen(compat_str(traceback.format_exc()))
         to_screen('ERROR: can\'t obtain versions info. Please try again later.')
         return
-    if not 'signature' in versions_info:
+    if 'signature' not in versions_info:
         to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
         return
     signature = versions_info['signature']
@@ -120,7 +124,7 @@ def update_self(to_screen, verbose):
             return
 
         try:
-            urlh = compat_urllib_request.urlopen(version['exe'][0])
+            urlh = opener.open(version['exe'][0])
             newcontent = urlh.read()
             urlh.close()
         except (IOError, OSError):
@@ -166,7 +170,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
     # Zip unix package
     elif isinstance(globals().get('__loader__'), zipimporter):
         try:
-            urlh = compat_urllib_request.urlopen(version['bin'][0])
+            urlh = opener.open(version['bin'][0])
             newcontent = urlh.read()
             urlh.close()
         except (IOError, OSError):
index 4d3cbac74aaebdbe0b314690b7dea07e2e2371e2..7832ed87f022d5c4891cecb43822ebebd30a6d0b 100644 (file)
@@ -10,6 +10,7 @@ import ctypes
 import datetime
 import email.utils
 import errno
+import functools
 import gzip
 import itertools
 import io
@@ -34,7 +35,9 @@ from .compat import (
     compat_chr,
     compat_getenv,
     compat_html_entities,
+    compat_http_client,
     compat_parse_qs,
+    compat_socket_create_connection,
     compat_str,
     compat_urllib_error,
     compat_urllib_parse,
@@ -166,7 +169,7 @@ def xpath_text(node, xpath, name=None, fatal=False):
         xpath = xpath.encode('ascii')
 
     n = node.find(xpath)
-    if n is None:
+    if n is None or n.text is None:
         if fatal:
             name = xpath if name is None else name
             raise ExtractorError('Could not find XML element %s' % name)
@@ -205,6 +208,10 @@ def get_element_by_attribute(attribute, value, html):
 
 def clean_html(html):
     """Clean an HTML snippet into a readable string"""
+
+    if html is None:  # Convenience for sanitizing descriptions etc.
+        return html
+
     # Newline vs <br />
     html = html.replace('\n', ' ')
     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
@@ -280,6 +287,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
             return '_'
         return char
 
+    # Handle timestamps
+    s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
     result = ''.join(map(replace_insane, s))
     if not is_id:
         while '__' in result:
@@ -363,7 +372,7 @@ def encodeArgument(s):
     if not isinstance(s, compat_str):
         # Legacy code that uses byte strings
         # Uncomment the following line after fixing all post processors
-        #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
+        # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
         s = s.decode('ascii')
     return encodeFilename(s, True)
 
@@ -387,7 +396,20 @@ def formatSeconds(secs):
         return '%d' % secs
 
 
-def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
+def make_HTTPS_handler(params, **kwargs):
+    opts_no_check_certificate = params.get('nocheckcertificate', False)
+    if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
+        context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
+        if opts_no_check_certificate:
+            context.check_hostname = False
+            context.verify_mode = ssl.CERT_NONE
+        try:
+            return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+        except TypeError:
+            # Python 2.7.8
+            # (create_default_context present but HTTPSHandler has no context=)
+            pass
+
     if sys.version_info < (3, 2):
         import httplib
 
@@ -405,27 +427,14 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
                 except ssl.SSLError:
                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
 
-        class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
-            def https_open(self, req):
-                return self.do_open(HTTPSConnectionV3, req)
-        return HTTPSHandlerV3(**kwargs)
-    elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
-        context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
-        context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
-        if opts_no_check_certificate:
-            context.verify_mode = ssl.CERT_NONE
-        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+        return YoutubeDLHTTPSHandler(params, https_conn_class=HTTPSConnectionV3, **kwargs)
     else:  # Python < 3.4
         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
         context.verify_mode = (ssl.CERT_NONE
                                if opts_no_check_certificate
                                else ssl.CERT_REQUIRED)
         context.set_default_verify_paths()
-        try:
-            context.load_default_certs()
-        except AttributeError:
-            pass  # Python < 3.4
-        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+        return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 
 
 class ExtractorError(Exception):
@@ -463,6 +472,13 @@ class ExtractorError(Exception):
         return ''.join(traceback.format_tb(self.traceback))
 
 
+class UnsupportedError(ExtractorError):
+    def __init__(self, url):
+        super(UnsupportedError, self).__init__(
+            'Unsupported URL: %s' % url, expected=True)
+        self.url = url
+
+
 class RegexNotFoundError(ExtractorError):
     """Error when a regex didn't match"""
     pass
@@ -532,6 +548,26 @@ class ContentTooShortError(Exception):
         self.expected = expected
 
 
+def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
+    hc = http_class(*args, **kwargs)
+    source_address = ydl_handler._params.get('source_address')
+    if source_address is not None:
+        sa = (source_address, 0)
+        if hasattr(hc, 'source_address'):  # Python 2.7+
+            hc.source_address = sa
+        else:  # Python 2.6
+            def _hc_connect(self, *args, **kwargs):
+                sock = compat_socket_create_connection(
+                    (self.host, self.port), self.timeout, sa)
+                if is_https:
+                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
+                else:
+                    self.sock = sock
+            hc.connect = functools.partial(_hc_connect, hc)
+
+    return hc
+
+
 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     """Handler for HTTP requests and responses.
 
@@ -550,6 +586,15 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     public domain.
     """
 
+    def __init__(self, params, *args, **kwargs):
+        compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
+        self._params = params
+
+    def http_open(self, req):
+        return self.do_open(functools.partial(
+            _create_http_connection, self, compat_http_client.HTTPConnection, False),
+            req)
+
     @staticmethod
     def deflate(data):
         try:
@@ -619,6 +664,18 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     https_response = http_response
 
 
+class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
+    def __init__(self, params, https_conn_class=None, *args, **kwargs):
+        compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
+        self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
+        self._params = params
+
+    def https_open(self, req):
+        return self.do_open(functools.partial(
+            _create_http_connection, self, self._https_conn_class, True),
+            req)
+
+
 def parse_iso8601(date_str, delimiter='T'):
     """ Return a UNIX timestamp from the given date """
 
@@ -644,17 +701,19 @@ def parse_iso8601(date_str, delimiter='T'):
     return calendar.timegm(dt.timetuple())
 
 
-def unified_strdate(date_str):
+def unified_strdate(date_str, day_first=True):
     """Return a string with the date in the format YYYYMMDD"""
 
     if date_str is None:
         return None
-
     upload_date = None
     # Replace commas
     date_str = date_str.replace(',', ' ')
     # %z (UTC offset) is only supported in python>=3.2
     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+    # Remove AM/PM + timezone
+    date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str)
+
     format_expressions = [
         '%d %B %Y',
         '%d %b %Y',
@@ -663,13 +722,10 @@ def unified_strdate(date_str):
         '%b %dst %Y %I:%M%p',
         '%b %dnd %Y %I:%M%p',
         '%b %dth %Y %I:%M%p',
+        '%Y %m %d',
         '%Y-%m-%d',
         '%Y/%m/%d',
-        '%d.%m.%Y',
-        '%d/%m/%Y',
-        '%d/%m/%y',
         '%Y/%m/%d %H:%M:%S',
-        '%d/%m/%Y %H:%M:%S',
         '%Y-%m-%d %H:%M:%S',
         '%Y-%m-%d %H:%M:%S.%f',
         '%d.%m.%Y %H:%M',
@@ -681,6 +737,20 @@ def unified_strdate(date_str):
         '%Y-%m-%dT%H:%M:%S.%f',
         '%Y-%m-%dT%H:%M',
     ]
+    if day_first:
+        format_expressions.extend([
+            '%d.%m.%Y',
+            '%d/%m/%Y',
+            '%d/%m/%y',
+            '%d/%m/%Y %H:%M:%S',
+        ])
+    else:
+        format_expressions.extend([
+            '%m.%d.%Y',
+            '%m/%d/%Y',
+            '%m/%d/%y',
+            '%m/%d/%Y %H:%M:%S',
+        ])
     for expression in format_expressions:
         try:
             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@@ -712,8 +782,10 @@ def date_from_str(date_str):
     Return a datetime object from a string in the format YYYYMMDD or
     (now|today)[+-][0-9](day|week|month|year)(s)?"""
     today = datetime.date.today()
-    if date_str == 'now'or date_str == 'today':
+    if date_str in ('now', 'today'):
         return today
+    if date_str == 'yesterday':
+        return today - datetime.timedelta(days=1)
     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
     if match is not None:
         sign = match.group('sign')
@@ -808,22 +880,22 @@ def _windows_write_string(s, out):
 
     GetStdHandle = ctypes.WINFUNCTYPE(
         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
-        ("GetStdHandle", ctypes.windll.kernel32))
+        (b"GetStdHandle", ctypes.windll.kernel32))
     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
 
     WriteConsoleW = ctypes.WINFUNCTYPE(
         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
-        ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
+        ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
     written = ctypes.wintypes.DWORD(0)
 
-    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
+    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
     FILE_TYPE_CHAR = 0x0002
     FILE_TYPE_REMOTE = 0x8000
     GetConsoleMode = ctypes.WINFUNCTYPE(
         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
         ctypes.POINTER(ctypes.wintypes.DWORD))(
-        ("GetConsoleMode", ctypes.windll.kernel32))
+        (b"GetConsoleMode", ctypes.windll.kernel32))
     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
 
     def not_a_console(handle):
@@ -1024,7 +1096,7 @@ def smuggle_url(url, data):
 
 
 def unsmuggle_url(smug_url, default=None):
-    if not '#__youtubedl_smuggle' in smug_url:
+    if '#__youtubedl_smuggle' not in smug_url:
         return smug_url, default
     url, _, sdata = smug_url.rpartition('#')
     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
@@ -1090,11 +1162,14 @@ def parse_filesize(s):
     }
 
     units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
-    m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+    m = re.match(
+        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
     if not m:
         return None
 
-    return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
+    num_str = m.group('num').replace(',', '.')
+    mult = _UNIT_TABLE[m.group('unit')]
+    return int(float(num_str) * mult)
 
 
 def get_term_width():
@@ -1196,25 +1271,36 @@ def float_or_none(v, scale=1, invscale=1, default=None):
 
 
 def parse_duration(s):
-    if s is None:
+    if not isinstance(s, basestring if sys.version_info < (3, 0) else compat_str):
         return None
 
     s = s.strip()
 
     m = re.match(
-        r'''(?ix)T?
+        r'''(?ix)(?:P?T)?
+        (?:
+            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
+            (?P<only_hours>[0-9.]+)\s*(?:hours?)|
+
             (?:
                 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
             )?
-            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
+            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
+        )$''', s)
     if not m:
         return None
-    res = int(m.group('secs'))
+    res = 0
+    if m.group('only_mins'):
+        return float_or_none(m.group('only_mins'), invscale=60)
+    if m.group('only_hours'):
+        return float_or_none(m.group('only_hours'), invscale=60 * 60)
+    if m.group('secs'):
+        res += int(m.group('secs'))
     if m.group('mins'):
         res += int(m.group('mins')) * 60
-        if m.group('hours'):
-            res += int(m.group('hours')) * 60 * 60
+    if m.group('hours'):
+        res += int(m.group('hours')) * 60 * 60
     if m.group('ms'):
         res += float(m.group('ms'))
     return res
@@ -1236,18 +1322,25 @@ def check_executable(exe, args=[]):
 
 
 def get_exe_version(exe, args=['--version'],
-                    version_re=r'version\s+([0-9._-a-zA-Z]+)',
-                    unrecognized='present'):
+                    version_re=None, unrecognized='present'):
     """ Returns the version of the specified executable,
     or False if the executable is not present """
     try:
-        out, err = subprocess.Popen(
+        out, _ = subprocess.Popen(
             [exe] + args,
             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
     except OSError:
         return False
-    firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
-    m = re.search(version_re, firstline)
+    if isinstance(out, bytes):  # Python 2.x
+        out = out.decode('ascii', 'ignore')
+    return detect_exe_version(out, version_re, unrecognized)
+
+
+def detect_exe_version(output, version_re=None, unrecognized='present'):
+    assert isinstance(output, compat_str)
+    if version_re is None:
+        version_re = r'version\s+([-0-9._a-zA-Z]+)'
+    m = re.search(version_re, output)
     if m:
         return m.group(1)
     else:
@@ -1488,7 +1581,7 @@ def limit_length(s, length):
 
 
 def version_tuple(v):
-    return [int(e) for e in v.split('.')]
+    return tuple(int(e) for e in re.split(r'[-.]', v))
 
 
 def is_outdated_version(version, limit, assume_new=True):
@@ -1510,3 +1603,23 @@ def ytdl_is_updateable():
 def args_to_str(args):
     # Get a short string representation for a subprocess command
     return ' '.join(shlex_quote(a) for a in args)
+
+
+def urlhandle_detect_ext(url_handle):
+    try:
+        url_handle.headers
+        getheader = lambda h: url_handle.headers[h]
+    except AttributeError:  # Python < 3
+        getheader = url_handle.info().getheader
+
+    return getheader('Content-Type').split("/")[1]
+
+
+def age_restricted(content_limit, age_limit):
+    """ Returns True iff the content should be blocked """
+
+    if age_limit is None:  # No limit set
+        return False
+    if content_limit is None:
+        return False  # Content available for everyone
+    return age_limit < content_limit
index 847a47c458c011237aafd82fb70a8b6225d25ceb..63a79a7ee7122c1886442d32c635ad66f0fd663a 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2014.12.01'
+__version__ = '2015.01.16'