]> Raphaël G. Git Repositories - youtubedl/commitdiff
Imported Upstream version 2015.02.06
authorRogério Brito <rbrito@ime.usp.br>
Sun, 8 Feb 2015 05:42:39 +0000 (03:42 -0200)
committerRogério Brito <rbrito@ime.usp.br>
Sun, 8 Feb 2015 05:42:39 +0000 (03:42 -0200)
140 files changed:
Makefile
README.md
README.txt
devscripts/release.sh
docs/supportedsites.md
test/helper.py
test/swftests/ArrayAccess.swf
test/swftests/ClassCall.swf
test/swftests/ClassConstruction.swf
test/swftests/ConstArrayAccess.swf
test/swftests/ConstantInt.swf
test/swftests/DictCall.swf
test/swftests/EqualsOperator.swf
test/swftests/LocalVars.swf
test/swftests/MemberAssignment.swf
test/swftests/NeOperator.swf
test/swftests/PrivateCall.swf
test/swftests/PrivateVoidCall.swf
test/swftests/StaticAssignment.swf
test/swftests/StaticRetrieval.swf
test/swftests/StringBasics.swf
test/swftests/StringCharCodeAt.swf
test/swftests/StringConversion.swf
test/test_YoutubeDL.py
test/test_all_urls.py
test/test_download.py
test/test_http.py [new file with mode: 0644]
test/test_jsinterp.py [new file with mode: 0644]
test/test_utils.py
test/testcert.pem [new file with mode: 0644]
test/video-vid.mp4 [new file with mode: 0644]
youtube-dl
youtube-dl.1
youtube-dl.bash-completion
youtube-dl.fish
youtube-dl.zsh
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/compat.py
youtube_dl/downloader/__init__.py
youtube_dl/downloader/common.py
youtube_dl/downloader/external.py [new file with mode: 0644]
youtube_dl/downloader/f4m.py
youtube_dl/downloader/hls.py
youtube_dl/downloader/http.py
youtube_dl/downloader/rtmp.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/abc7news.py [new file with mode: 0644]
youtube_dl/extractor/aftonbladet.py
youtube_dl/extractor/appletrailers.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/atresplayer.py
youtube_dl/extractor/audiomack.py
youtube_dl/extractor/auengine.py [deleted file]
youtube_dl/extractor/bbccouk.py
youtube_dl/extractor/bliptv.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/cinchcast.py
youtube_dl/extractor/cliphunter.py
youtube_dl/extractor/cnn.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/ctsnews.py [new file with mode: 0644]
youtube_dl/extractor/dctp.py [new file with mode: 0644]
youtube_dl/extractor/defense.py
youtube_dl/extractor/drtv.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/folketinget.py
youtube_dl/extractor/franceculture.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/gamestar.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/globo.py
youtube_dl/extractor/grooveshark.py
youtube_dl/extractor/hearthisat.py [new file with mode: 0644]
youtube_dl/extractor/historicfilms.py [new file with mode: 0644]
youtube_dl/extractor/ivi.py
youtube_dl/extractor/kankan.py
youtube_dl/extractor/keezmovies.py
youtube_dl/extractor/krasview.py
youtube_dl/extractor/la7.py
youtube_dl/extractor/liveleak.py
youtube_dl/extractor/lnkgo.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/macgamestore.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mpora.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/ndtv.py
youtube_dl/extractor/nerdcubed.py
youtube_dl/extractor/nerdist.py [new file with mode: 0644]
youtube_dl/extractor/nextmedia.py [new file with mode: 0644]
youtube_dl/extractor/nfl.py
youtube_dl/extractor/nhl.py
youtube_dl/extractor/normalboots.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/ntvde.py [new file with mode: 0644]
youtube_dl/extractor/ntvru.py [moved from youtube_dl/extractor/ntv.py with 97% similarity]
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/ringtv.py
youtube_dl/extractor/rottentomatoes.py
youtube_dl/extractor/rtl2.py [new file with mode: 0644]
youtube_dl/extractor/rtp.py
youtube_dl/extractor/rts.py
youtube_dl/extractor/rtve.py
youtube_dl/extractor/rutv.py
youtube_dl/extractor/servingsys.py
youtube_dl/extractor/sina.py
youtube_dl/extractor/smotri.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/spiegel.py
youtube_dl/extractor/spike.py
youtube_dl/extractor/srmediathek.py
youtube_dl/extractor/streetvoice.py [new file with mode: 0644]
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/teletask.py
youtube_dl/extractor/testtube.py [new file with mode: 0644]
youtube_dl/extractor/tinypic.py
youtube_dl/extractor/toutv.py
youtube_dl/extractor/tvp.py
youtube_dl/extractor/tweakers.py [new file with mode: 0644]
youtube_dl/extractor/twitch.py
youtube_dl/extractor/ubu.py
youtube_dl/extractor/vevo.py
youtube_dl/extractor/viddler.py
youtube_dl/extractor/videomega.py
youtube_dl/extractor/videott.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/washingtonpost.py
youtube_dl/extractor/wdr.py
youtube_dl/extractor/wsj.py [new file with mode: 0644]
youtube_dl/extractor/xuite.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py
youtube_dl/jsinterp.py
youtube_dl/options.py
youtube_dl/postprocessor/__init__.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/utils.py
youtube_dl/version.py

index 5780798793cf2915807d28791a8cb59d40e9dcf2..0636fc4cbe108667d0ecb85aed68018e4e6803ee 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,7 @@
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 
 clean:
-       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
-
-cleanall: clean
-       rm -f youtube-dl youtube-dl.exe
+       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
 
 PREFIX ?= /usr/local
 BINDIR ?= $(PREFIX)/bin
index 078e9df828393989f931e53b587503e6547c37b3..06dea400dd221cfca75b151a2d1a2f8fd214122e 100644 (file)
--- a/README.md
+++ b/README.md
@@ -93,6 +93,14 @@ which means you can modify it, redistribute it or use it however you like.
 ## Video Selection:
     --playlist-start NUMBER          playlist video to start at (default is 1)
     --playlist-end NUMBER            playlist video to end at (default is last)
+    --playlist-items ITEM_SPEC       playlist video items to download. Specify
+                                     indices of the videos in the playlist
+                                     seperated by commas like: "--playlist-items
+                                     1,2,5,8" if you want to download videos
+                                     indexed 1, 2, 5, 8 in the playlist. You can
+                                     specify range: "--playlist-items
+                                     1-3,7,10-13", it will download the videos
+                                     at index 1, 2, 3, 7, 10, 11, 12 and 13.
     --match-title REGEX              download only matching titles (regex or
                                      caseless sub-string)
     --reject-title REGEX             skip download for matching titles (regex or
@@ -124,7 +132,8 @@ which means you can modify it, redistribute it or use it however you like.
 ## Download Options:
     -r, --rate-limit LIMIT           maximum download rate in bytes per second
                                      (e.g. 50K or 4.2M)
-    -R, --retries RETRIES            number of retries (default is 10)
+    -R, --retries RETRIES            number of retries (default is 10), or
+                                     "infinite".
     --buffer-size SIZE               size of download buffer (e.g. 1024 or 16K)
                                      (default is 1024)
     --no-resize-buffer               do not automatically adjust the buffer
@@ -132,6 +141,11 @@ which means you can modify it, redistribute it or use it however you like.
                                      automatically resized from an initial value
                                      of SIZE.
     --playlist-reverse               Download playlist videos in reverse order
+    --xattr-set-filesize             (experimental) set file xattribute
+                                     ytdl.filesize with expected filesize
+    --external-downloader COMMAND    (experimental) Use the specified external
+                                     downloader. Currently supports
+                                     aria2c,curl,wget
 
 ## Filesystem Options:
     -a, --batch-file FILE            file containing URLs to download ('-' for
@@ -191,7 +205,6 @@ which means you can modify it, redistribute it or use it however you like.
     --write-info-json                write video metadata to a .info.json file
     --write-annotations              write video annotations to a .annotation
                                      file
-    --write-thumbnail                write thumbnail image to disk
     --load-info FILE                 json file containing the video information
                                      (created with the "--write-json" option)
     --cookies FILE                   file to read cookies from and dump cookie
@@ -206,6 +219,12 @@ which means you can modify it, redistribute it or use it however you like.
     --no-cache-dir                   Disable filesystem caching
     --rm-cache-dir                   Delete all filesystem cache files
 
+## Thumbnail images:
+    --write-thumbnail                write thumbnail image to disk
+    --write-all-thumbnails           write all thumbnail image formats to disk
+    --list-thumbnails                Simulate and list all available thumbnail
+                                     formats
+
 ## Verbosity / Simulation Options:
     -q, --quiet                      activates quiet mode
     --no-warnings                    Ignore warnings
@@ -259,6 +278,8 @@ which means you can modify it, redistribute it or use it however you like.
     --bidi-workaround                Work around terminals that lack
                                      bidirectional text support. Requires bidiv
                                      or fribidi executable in PATH
+    --sleep-interval SECONDS         Number of seconds to sleep before each
+                                     download.
 
 ## Video Format Options:
     -f, --format FORMAT              video format code, specify the order of
@@ -267,10 +288,22 @@ which means you can modify it, redistribute it or use it however you like.
                                      by extension for the extensions aac, m4a,
                                      mp3, mp4, ogg, wav, webm. You can also use
                                      the special names "best", "bestvideo",
-                                     "bestaudio", "worst".  By default, youtube-
-                                     dl will pick the best quality. Use commas
-                                     to download multiple audio formats, such as
-                                     -f
+                                     "bestaudio", "worst".  You can filter the
+                                     video results by putting a condition in
+                                     brackets, as in -f "best[height=720]" (or
+                                     -f "[filesize>10M]").  This works for
+                                     filesize, height, width, tbr, abr, vbr, and
+                                     fps and the comparisons <, <=, >, >=, =, !=
+                                     . Formats for which the value is not known
+                                     are excluded unless you put a question mark
+                                     (?) after the operator. You can combine
+                                     format filters, so  -f "[height <=?
+                                     720][tbr>500]" selects up to 720p videos
+                                     (or videos where the height is not known)
+                                     with a bitrate of at least 500 KBit/s. By
+                                     default, youtube-dl will pick the best
+                                     quality. Use commas to download multiple
+                                     audio formats, such as -f
                                      136/137/mp4/bestvideo,140/m4a/bestaudio.
                                      You can merge the video and audio of two
                                      formats into a single file using -f <video-
@@ -304,7 +337,8 @@ which means you can modify it, redistribute it or use it however you like.
 
 ## Authentication Options:
     -u, --username USERNAME          login with this account ID
-    -p, --password PASSWORD          account password
+    -p, --password PASSWORD          account password. If this option is left
+                                     out, youtube-dl will ask interactively.
     -2, --twofactor TWOFACTOR        two-factor auth code
     -n, --netrc                      use .netrc authentication data
     --video-password PASSWORD        video password (vimeo, smotri)
@@ -334,11 +368,11 @@ which means you can modify it, redistribute it or use it however you like.
     --add-metadata                   write metadata to the video file
     --xattrs                         write metadata to the video file's xattrs
                                      (using dublin core and xdg standards)
-    --fixup POLICY                   (experimental) Automatically correct known
-                                     faults of the file. One of never (do
-                                     nothing), warn (only emit a warning),
-                                     detect_or_warn(check whether we can do
-                                     anything about it, warn otherwise
+    --fixup POLICY                   Automatically correct known faults of the
+                                     file. One of never (do nothing), warn (only
+                                     emit a warning), detect_or_warn(the
+                                     default; fix file if we can, warn
+                                     otherwise)
     --prefer-avconv                  Prefer avconv over ffmpeg for running the
                                      postprocessors (default)
     --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
@@ -487,9 +521,28 @@ To make a different directory work - either for ffmpeg, or for youtube-dl, or fo
 
 From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
 
+### How do I put downloads into a specific folder?
+
+Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
+
+### How do I download a video starting with a `-` ?
+
+Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
+
+    youtube-dl -- -wNyEUrxzFU
+    youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
+
+### Can you add support for this anime video site, or site which shows current movies for free?
+
+As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
+
+A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into youtube-dl. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization.
+
+Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
+
 ### How can I detect whether a given URL is supported by youtube-dl?
 
-For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
+For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
 
 It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
 
@@ -567,7 +620,7 @@ If you want to add support for a new site, you can follow this quick list (assum
 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
-8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
+8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
 
         $ git add youtube_dl/extractor/__init__.py
@@ -683,7 +736,7 @@ In particular, every site support request issue should only pertain to services
 
 ###  Is anyone going to need the feature?
 
-Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
+Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
 
 ###  Is your question about youtube-dl?
 
index 5e2e5f75822e4fabc02b9cca3a16e0e80f2672a4..2cf50d26839a79f31cec79da8eef2fa51ff89a26 100644 (file)
@@ -107,6 +107,14 @@ Video Selection:
 
     --playlist-start NUMBER          playlist video to start at (default is 1)
     --playlist-end NUMBER            playlist video to end at (default is last)
+    --playlist-items ITEM_SPEC       playlist video items to download. Specify
+                                     indices of the videos in the playlist
+                                     seperated by commas like: "--playlist-items
+                                     1,2,5,8" if you want to download videos
+                                     indexed 1, 2, 5, 8 in the playlist. You can
+                                     specify range: "--playlist-items
+                                     1-3,7,10-13", it will download the videos
+                                     at index 1, 2, 3, 7, 10, 11, 12 and 13.
     --match-title REGEX              download only matching titles (regex or
                                      caseless sub-string)
     --reject-title REGEX             skip download for matching titles (regex or
@@ -140,7 +148,8 @@ Download Options:
 
     -r, --rate-limit LIMIT           maximum download rate in bytes per second
                                      (e.g. 50K or 4.2M)
-    -R, --retries RETRIES            number of retries (default is 10)
+    -R, --retries RETRIES            number of retries (default is 10), or
+                                     "infinite".
     --buffer-size SIZE               size of download buffer (e.g. 1024 or 16K)
                                      (default is 1024)
     --no-resize-buffer               do not automatically adjust the buffer
@@ -148,6 +157,11 @@ Download Options:
                                      automatically resized from an initial value
                                      of SIZE.
     --playlist-reverse               Download playlist videos in reverse order
+    --xattr-set-filesize             (experimental) set file xattribute
+                                     ytdl.filesize with expected filesize
+    --external-downloader COMMAND    (experimental) Use the specified external
+                                     downloader. Currently supports
+                                     aria2c,curl,wget
 
 Filesystem Options:
 -------------------
@@ -209,7 +223,6 @@ Filesystem Options:
     --write-info-json                write video metadata to a .info.json file
     --write-annotations              write video annotations to a .annotation
                                      file
-    --write-thumbnail                write thumbnail image to disk
     --load-info FILE                 json file containing the video information
                                      (created with the "--write-json" option)
     --cookies FILE                   file to read cookies from and dump cookie
@@ -224,6 +237,14 @@ Filesystem Options:
     --no-cache-dir                   Disable filesystem caching
     --rm-cache-dir                   Delete all filesystem cache files
 
+Thumbnail images:
+-----------------
+
+    --write-thumbnail                write thumbnail image to disk
+    --write-all-thumbnails           write all thumbnail image formats to disk
+    --list-thumbnails                Simulate and list all available thumbnail
+                                     formats
+
 Verbosity / Simulation Options:
 -------------------------------
 
@@ -281,6 +302,8 @@ Workarounds:
     --bidi-workaround                Work around terminals that lack
                                      bidirectional text support. Requires bidiv
                                      or fribidi executable in PATH
+    --sleep-interval SECONDS         Number of seconds to sleep before each
+                                     download.
 
 Video Format Options:
 ---------------------
@@ -291,10 +314,22 @@ Video Format Options:
                                      by extension for the extensions aac, m4a,
                                      mp3, mp4, ogg, wav, webm. You can also use
                                      the special names "best", "bestvideo",
-                                     "bestaudio", "worst".  By default, youtube-
-                                     dl will pick the best quality. Use commas
-                                     to download multiple audio formats, such as
-                                     -f
+                                     "bestaudio", "worst".  You can filter the
+                                     video results by putting a condition in
+                                     brackets, as in -f "best[height=720]" (or
+                                     -f "[filesize>10M]").  This works for
+                                     filesize, height, width, tbr, abr, vbr, and
+                                     fps and the comparisons <, <=, >, >=, =, !=
+                                     . Formats for which the value is not known
+                                     are excluded unless you put a question mark
+                                     (?) after the operator. You can combine
+                                     format filters, so  -f "[height <=?
+                                     720][tbr>500]" selects up to 720p videos
+                                     (or videos where the height is not known)
+                                     with a bitrate of at least 500 KBit/s. By
+                                     default, youtube-dl will pick the best
+                                     quality. Use commas to download multiple
+                                     audio formats, such as -f
                                      136/137/mp4/bestvideo,140/m4a/bestaudio.
                                      You can merge the video and audio of two
                                      formats into a single file using -f <video-
@@ -332,7 +367,8 @@ Authentication Options:
 -----------------------
 
     -u, --username USERNAME          login with this account ID
-    -p, --password PASSWORD          account password
+    -p, --password PASSWORD          account password. If this option is left
+                                     out, youtube-dl will ask interactively.
     -2, --twofactor TWOFACTOR        two-factor auth code
     -n, --netrc                      use .netrc authentication data
     --video-password PASSWORD        video password (vimeo, smotri)
@@ -364,11 +400,11 @@ Post-processing Options:
     --add-metadata                   write metadata to the video file
     --xattrs                         write metadata to the video file's xattrs
                                      (using dublin core and xdg standards)
-    --fixup POLICY                   (experimental) Automatically correct known
-                                     faults of the file. One of never (do
-                                     nothing), warn (only emit a warning),
-                                     detect_or_warn(check whether we can do
-                                     anything about it, warn otherwise
+    --fixup POLICY                   Automatically correct known faults of the
+                                     file. One of never (do nothing), warn (only
+                                     emit a warning), detect_or_warn(the
+                                     default; fix file if we can, warn
+                                     otherwise)
     --prefer-avconv                  Prefer avconv over ffmpeg for running the
                                      postprocessors (default)
     --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
@@ -609,6 +645,40 @@ both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg)
 by simply typing youtube-dl or ffmpeg, no matter what directory you're
 in.
 
+How do I put downloads into a specific folder?
+
+Use the -o to specify an output template, for example
+-o "/home/user/videos/%(title)s-%(id)s.%(ext)s". If you want this for
+all of your downloads, put the option into your configuration file.
+
+How do I download a video starting with a - ?
+
+Either prepend http://www.youtube.com/watch?v= or separate the ID from
+the options with --:
+
+    youtube-dl -- -wNyEUrxzFU
+    youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
+
+Can you add support for this anime video site, or site which shows current movies for free?
+
+As a matter of policy (as well as legality), youtube-dl does not include
+support for services that specialize in infringing copyright. As a rule
+of thumb, if you cannot easily find a video that the service is quite
+obviously allowed to distribute (i.e. that has been uploaded by the
+creator, the creator's distributor, or is published under a free
+license), the service is probably unfit for inclusion to youtube-dl.
+
+A note on the service that they don't host the infringing content, but
+just link to those who do, is evidence that the service should not be
+included into youtube-dl. The same goes for any DMCA note when the whole
+front page of the service is filled with videos they are not allowed to
+distribute. A "fair use" note is equally unconvincing if the service
+shows copyright-protected videos in full without authorization.
+
+Support requests for services that do purchase the rights to distribute
+their content are perfectly fine though. If in doubt, you can simply
+include a source that mentions the legitimate purchase of content.
+
 How can I detect whether a given URL is supported by youtube-dl?
 
 For one, have a look at the list of supported sites. Note that it can
@@ -723,8 +793,7 @@ list (assuming your service is called yourextractor):
 7.  Have a look at youtube_dl/common/extractor/common.py for possible
     helper methods and a detailed description of what your extractor
     should return. Add tests and code for as many as you want.
-8.  If you can, check the code with pyflakes (a good idea) and pep8
-    (optional, ignore E501).
+8.  If you can, check the code with flake8.
 9.  When the tests pass, add the new files and commit them and push the
     result, like this:
 
@@ -912,10 +981,10 @@ video service.
 
 Is anyone going to need the feature?
 
-Only post features that you (or an incapicated friend you can personally
-talk to) require. Do not post features because they seem like a good
-idea. If they are really useful, they will be requested by someone who
-requires them.
+Only post features that you (or an incapacitated friend you can
+personally talk to) require. Do not post features because they seem like
+a good idea. If they are really useful, they will be requested by
+someone who requires them.
 
 Is your question about youtube-dl?
 
index 691517ceb9b34394115ed4e54521bad1d4f3b54b..61806961c63798dfab081ceae5d76f9afc0ff773 100755 (executable)
@@ -35,7 +35,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us
 if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
 
 /bin/echo -e "\n### First of all, testing..."
-make cleanall
+make clean
 if $skip_tests ; then
     echo 'SKIPPING TESTS'
 else
@@ -45,9 +45,9 @@ fi
 /bin/echo -e "\n### Changing version in version.py..."
 sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
 
-/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..."
-make README.md
-git add README.md youtube_dl/version.py
+/bin/echo -e "\n### Committing documentation and youtube_dl/version.py..."
+make README.md CONTRIBUTING.md supportedsites
+git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py
 git commit -m "release $version"
 
 /bin/echo -e "\n### Now tagging, signing and pushing..."
index dbbf4a797b6ba169f8ae031ab7c9e438390101d8..2d8f9c316b83780f56d6a99d59a457610cfe05ea 100644 (file)
@@ -9,6 +9,7 @@
  - **8tracks**
  - **9gag**
  - **abc.net.au**
+ - **Abc7News**
  - **AcademicEarth:Course**
  - **AddAnime**
  - **AdobeTV**
  - **Aftonbladet**
  - **AlJazeera**
  - **Allocine**
+ - **AlphaPorno**
  - **anitube.se**
  - **AnySex**
  - **Aparat**
+ - **AppleDailyAnimationNews**
+ - **AppleDailyRealtimeNews**
  - **AppleTrailers**
  - **archive.org**: archive.org videos
  - **ARD**
  - **arte.tv:ddc**
  - **arte.tv:embed**
  - **arte.tv:future**
+ - **AtresPlayer**
+ - **ATTTechChannel**
  - **audiomack**
- - **AUEngine**
+ - **audiomack:album**
  - **Azubu**
  - **bambuser**
  - **bambuser:channel**
  - **cmt.com**
  - **CNET**
  - **CNN**
+ - **CNNArticle**
  - **CNNBlogs**
  - **CollegeHumor**
+ - **CollegeRama**
  - **ComCarCoff**
  - **ComedyCentral**
  - **ComedyCentralShows**: The Daily Show / The Colbert Report
  - **Crunchyroll**
  - **crunchyroll:playlist**
  - **CSpan**: C-SPAN
+ - **CtsNews**
  - **culturebox.francetvinfo.fr**
  - **dailymotion**
  - **dailymotion:playlist**
  - **dailymotion:user**
  - **daum.net**
  - **DBTV**
+ - **DctpTv**
  - **DeezerPlaylist**
  - **defense.gouv.fr**
  - **Discovery**
  - **divxstage**: DivxStage
  - **Dotsub**
+ - **DRBonanza**
  - **Dropbox**
  - **DrTuber**
  - **DRTV**
  - **Dump**
  - **dvtv**: http://video.aktualne.cz/
  - **EbaumsWorld**
+ - **EchoMsk**
  - **eHow**
  - **Einthusan**
  - **eitb.tv**
  - **EMPFlix**
  - **Engadget**
  - **Eporner**
+ - **EroProfile**
  - **Escapist**
  - **EveryonesMixtape**
  - **exfm**: ex.fm
  - **GDCVault**
  - **generic**: Generic downloader that works on some sites
  - **GiantBomb**
+ - **Giga**
  - **Glide**: Glide mobile video messages (glide.me)
  - **Globo**
  - **GodTube**
  - **Grooveshark**
  - **Groupon**
  - **Hark**
+ - **HearThisAt**
  - **Heise**
+ - **HellPorno**
  - **Helsinki**: helsinki.fi
  - **HentaiStigma**
+ - **HistoricFilms**
+ - **hitbox**
+ - **hitbox:live**
  - **HornBunny**
  - **HostingBulk**
  - **HotNewHipHop**
  - **jpopsuki.tv**
  - **Jukebox**
  - **Kankan**
+ - **Karaoketv**
  - **keek**
  - **KeezMovies**
  - **KhanAcademy**
  - **LiveLeak**
  - **livestream**
  - **livestream:original**
+ - **LnkGo**
  - **lrt.lt**
  - **lynda**: lynda.com videos
  - **lynda:course**: lynda.com online courses
  - **MySpass**
  - **myvideo**
  - **MyVidster**
+ - **n-tv.de**
  - **Naver**
  - **NBA**
  - **NBC**
  - **ndr**: NDR.de - Mediathek
  - **NDTV**
  - **NerdCubedFeed**
+ - **Nerdist**
+ - **Netzkino**
  - **Newgrounds**
  - **Newstube**
+ - **NextMedia**
+ - **NextMediaActionNews**
  - **nfb**: National Film Board of Canada
  - **nfl.com**
  - **nhl.com**
+ - **nhl.com:news**: NHL news
  - **nhl.com:videocenter**: NHL videocenter category
  - **niconico**: ニコニコ動画
  - **NiconicoPlaylist**
  - **Nowness**
  - **nowvideo**: NowVideo
  - **npo.nl**
+ - **npo.nl:live**
  - **NRK**
  - **NRKTV**
- - **NTV**
+ - **ntv.ru**
  - **Nuvid**
  - **NYTimes**
  - **ocw.mit.edu**
  - **OktoberfestTV**
  - **on.aol.com**
  - **Ooyala**
+ - **OpenFilm**
+ - **orf:fm4**: radio FM4
  - **orf:oe1**: Radio Österreich 1
  - **orf:tvthek**: ORF TVthek
- - **ORFFM4**: radio FM4
  - **parliamentlive.tv**: UK parliament videos
  - **Patreon**
  - **PBS**
  - **Pyvideo**
  - **QuickVid**
  - **radio.de**
+ - **radiobremen**
  - **radiofrance**
  - **Rai**
  - **RBMARadio**
  - **RottenTomatoes**
  - **Roxwel**
  - **RTBF**
+ - **Rte**
+ - **RTL2**
  - **RTLnow**
  - **rtlxl.nl**
  - **RTP**
  - **RUHD**
  - **rutube**: Rutube videos
  - **rutube:channel**: Rutube channels
+ - **rutube:embed**: Rutube embedded videos
  - **rutube:movie**: Rutube movies
  - **rutube:person**: Rutube person videos
  - **RUTV**: RUTV.RU
  - **Sport5**
  - **SportBox**
  - **SportDeutschland**
- - **SRMediathek**: Süddeutscher Rundfunk
+ - **SRMediathek**: Saarländischer Rundfunk
  - **stanfordoc**: Stanford Open ClassRoom
  - **Steam**
  - **streamcloud.eu**
  - **StreamCZ**
+ - **StreetVoice**
  - **SunPorno**
  - **SWRMediathek**
  - **Syfy**
  - **TeleBruxelles**
  - **telecinco.es**
  - **TeleMB**
+ - **TeleTask**
  - **TenPlay**
+ - **TestTube**
  - **TF1**
  - **TheOnion**
  - **ThePlatform**
  - **tv.dfb.de**
  - **tvigle**: Интернет-телевидение Tvigle.ru
  - **tvp.pl**
+ - **tvp.pl:Series**
  - **TVPlay**: TV3Play and related services
- - **Twitch**
+ - **Tweakers**
+ - **twitch:bookmarks**
+ - **twitch:chapter**
+ - **twitch:past_broadcasts**
+ - **twitch:profile**
+ - **twitch:stream**
+ - **twitch:video**
+ - **twitch:vod**
  - **Ubu**
  - **udemy**
  - **udemy:course**
  - **videoweed**: VideoWeed
  - **Vidme**
  - **Vidzi**
+ - **vier**
+ - **vier:videos**
  - **viki**
  - **vimeo**
  - **vimeo:album**
  - **WDR**
  - **wdr:mobile**
  - **WDRMaus**: Sendung mit der Maus
+ - **WebOfStories**
  - **Weibo**
  - **Wimp**
  - **Wistia**
  - **WorldStarHipHop**
  - **wrzuta.pl**
+ - **WSJ**: Wall Street Journal
  - **XBef**
  - **XboxClips**
  - **XHamster**
  - **XNXX**
  - **XTube**
  - **XTubeUser**: XTube user profile
+ - **Xuite**
  - **XVideos**
+ - **XXXYMovies**
  - **Yahoo**: Yahoo screen and movies
  - **YesJapan**
  - **Ynet**
  - **youtube:search_url**: YouTube.com search URLs
  - **youtube:show**: YouTube.com (multi-season) shows
  - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- - **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks")
  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
  - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
  - **ZDF**
index c416f388cbfe335678269b0226cc10708c49a850..651ef99b983973dab1f17ab8619849af2b9fe9b1 100644 (file)
@@ -103,6 +103,16 @@ def expect_info_dict(self, got_dict, expected_dict):
             self.assertTrue(
                 match_rex.match(got),
                 'field %s (value: %r) should match %r' % (info_field, got, match_str))
+        elif isinstance(expected, compat_str) and expected.startswith('startswith:'):
+            got = got_dict.get(info_field)
+            start_str = expected[len('startswith:'):]
+            self.assertTrue(
+                isinstance(got, compat_str),
+                'Expected a %s object, but got %s for field %s' % (
+                    compat_str.__name__, type(got).__name__, info_field))
+            self.assertTrue(
+                got.startswith(start_str),
+                'field %s (value: %r) should start with %r' % (info_field, got, start_str))
         elif isinstance(expected, type):
             got = got_dict.get(info_field)
             self.assertTrue(isinstance(got, expected),
@@ -140,7 +150,7 @@ def expect_info_dict(self, got_dict, expected_dict):
     # Are checkable fields missing from the test case definition?
     test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
                           for key, value in got_dict.items()
-                          if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
+                          if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
     missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
     if missing_keys:
         def _repr(v):
@@ -148,9 +158,15 @@ def expect_info_dict(self, got_dict, expected_dict):
                 return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
             else:
                 return repr(v)
-        info_dict_str = ''.join(
-            '    %s: %s,\n' % (_repr(k), _repr(v))
-            for k, v in test_info_dict.items())
+        info_dict_str = ''
+        if len(missing_keys) != len(expected_dict):
+            info_dict_str += ''.join(
+                '    %s: %s,\n' % (_repr(k), _repr(v))
+                for k, v in test_info_dict.items() if k not in missing_keys)
+            info_dict_str += '\n'
+        info_dict_str += ''.join(
+            '    %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
+            for k in missing_keys)
         write_string(
             '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
         self.assertFalse(
index 240fd6632ea59fd3b8a87dee190a0484bd2f6acc..0158cae1fd5ac9ff82eafaf8398c9ef313a958e8 100644 (file)
Binary files a/test/swftests/ArrayAccess.swf and b/test/swftests/ArrayAccess.swf differ
index 8bdfe06c356790a1251904675a294c0e6e441af7..b24fadb1294d3dfed94ddfcb00e7e32c5f326790 100644 (file)
Binary files a/test/swftests/ClassCall.swf and b/test/swftests/ClassCall.swf differ
index 576eb30da2cc21cf849c6ac9ca5ba15de5419c60..ea74c2df1e779f6ac1d9e7ba997500035668ea2d 100644 (file)
Binary files a/test/swftests/ClassConstruction.swf and b/test/swftests/ClassConstruction.swf differ
index 0d902fd30f6775762bb0b454271649a273f05690..1acf40a0fdbd69259ec88aaed151f2fdf513659f 100644 (file)
Binary files a/test/swftests/ConstArrayAccess.swf and b/test/swftests/ConstArrayAccess.swf differ
index b8bd0cb97124c000374cba1c8da50747718dab13..8c0359db672abbb7a5ddb1f879d47065a511bde3 100644 (file)
Binary files a/test/swftests/ConstantInt.swf and b/test/swftests/ConstantInt.swf differ
index 3fa3559d674476ced91adc20d6844e59c6a98dbe..be3e096648a3820da47bf308e4c7ef214412e186 100644 (file)
Binary files a/test/swftests/DictCall.swf and b/test/swftests/DictCall.swf differ
index 33487f078548efffa633add3400f41ab60fa0a58..f99ab27bdc95004a8a4e12078b42ba35d17390f4 100644 (file)
Binary files a/test/swftests/EqualsOperator.swf and b/test/swftests/EqualsOperator.swf differ
index 42102af2780a532ff8fbd25029cefd3904670541..3216aee070d49ec43db239152ccf069b0ce821d1 100644 (file)
Binary files a/test/swftests/LocalVars.swf and b/test/swftests/LocalVars.swf differ
index c3ec5137257ff18dbc5a96f53b9a8d355b687d5b..5236bbb7cf902926601dd9bb60f0159e3dbf28fb 100644 (file)
Binary files a/test/swftests/MemberAssignment.swf and b/test/swftests/MemberAssignment.swf differ
index a251f7a20be922af071b237b906311f985c70819..9cbc3e3d7147661e662836d143336970f39be922 100644 (file)
Binary files a/test/swftests/NeOperator.swf and b/test/swftests/NeOperator.swf differ
index 7fa395a5bc595a2d28dded0420278084936bcdc7..c2bd9c04b854332121dc9ba14aad904611b9d247 100644 (file)
Binary files a/test/swftests/PrivateCall.swf and b/test/swftests/PrivateCall.swf differ
index 09a857ecbf002a69f2cd96820a7a5b6f5937e882..0b1a638d8e3ce8449365c1ee4a012ae588ced8c3 100644 (file)
Binary files a/test/swftests/PrivateVoidCall.swf and b/test/swftests/PrivateVoidCall.swf differ
index dff661c8df854e7362c80b385fadc18475a69bbf..0a74f9e568e80b0e225b0378a446b93618e04294 100644 (file)
Binary files a/test/swftests/StaticAssignment.swf and b/test/swftests/StaticAssignment.swf differ
index 622c40dad581db07e9a6ecc3bc483d7a7dee6108..5f0014b72f6434606310234eb25a716efa54a52a 100644 (file)
Binary files a/test/swftests/StaticRetrieval.swf and b/test/swftests/StaticRetrieval.swf differ
index 2784c2b65edcb61fc471fcf010f4499387f95d1b..f4839316ff1cb9ac823a902acd129f6f1acdd158 100644 (file)
Binary files a/test/swftests/StringBasics.swf and b/test/swftests/StringBasics.swf differ
index e81fce18d34c11a969d872a2b8e94850a01d6f9a..7e5b5bcf9cceccac26c0cbbea650d2f308a707c7 100644 (file)
Binary files a/test/swftests/StringCharCodeAt.swf and b/test/swftests/StringCharCodeAt.swf differ
index 188d56ef85849598acb032b47872c7e5f561713a..3f584927257873385ec941905ec6cb89a7847310 100644 (file)
Binary files a/test/swftests/StringConversion.swf and b/test/swftests/StringConversion.swf differ
index 85d87f2c31e803aff668f1d71a6bbdfba33cdcd8..678b9f7d15ee66892ef010552753f0a9577b67a7 100644 (file)
@@ -281,6 +281,61 @@ class TestFormatSelection(unittest.TestCase):
             downloaded = ydl.downloaded_info_dicts[0]
             self.assertEqual(downloaded['format_id'], f1id)
 
+    def test_format_filtering(self):
+        formats = [
+            {'format_id': 'A', 'filesize': 500, 'width': 1000},
+            {'format_id': 'B', 'filesize': 1000, 'width': 500},
+            {'format_id': 'C', 'filesize': 1000, 'width': 400},
+            {'format_id': 'D', 'filesize': 2000, 'width': 600},
+            {'format_id': 'E', 'filesize': 3000},
+            {'format_id': 'F'},
+            {'format_id': 'G', 'filesize': 1000000},
+        ]
+        for f in formats:
+            f['url'] = 'http://_/'
+            f['ext'] = 'unknown'
+        info_dict = _make_result(formats)
+
+        ydl = YDL({'format': 'best[filesize<3000]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'D')
+
+        ydl = YDL({'format': 'best[filesize<=3000]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'E')
+
+        ydl = YDL({'format': 'best[filesize <= ? 3000]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'F')
+
+        ydl = YDL({'format': 'best [filesize = 1000] [width>450]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'B')
+
+        ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'C')
+
+        ydl = YDL({'format': '[filesize>?1]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'G')
+
+        ydl = YDL({'format': '[filesize<1M]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'E')
+
+        ydl = YDL({'format': '[filesize<1MiB]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'G')
+
     def test_add_extra_info(self):
         test_dict = {
             'extractor': 'Foo',
index bd4fe17bf2c0f37b4f9ac2b291a8f7d664f74534..e66264b4b16147cae6e41d329bf07dcc31ff83e4 100644 (file)
@@ -14,7 +14,6 @@ from test.helper import gettestcases
 from youtube_dl.extractor import (
     FacebookIE,
     gen_extractors,
-    TwitchIE,
     YoutubeIE,
 )
 
@@ -72,18 +71,6 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
 
-    def test_twitch_channelid_matching(self):
-        self.assertTrue(TwitchIE.suitable('twitch.tv/vanillatv'))
-        self.assertTrue(TwitchIE.suitable('www.twitch.tv/vanillatv'))
-        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv'))
-        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/'))
-
-    def test_twitch_videoid_matching(self):
-        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
-
-    def test_twitch_chapterid_matching(self):
-        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
-
     def test_youtube_extract(self):
         assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
         assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
@@ -115,8 +102,6 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch(':ythistory', ['youtube:history'])
         self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
         self.assertMatch(':tds', ['ComedyCentralShows'])
-        self.assertMatch(':colbertreport', ['ComedyCentralShows'])
-        self.assertMatch(':cr', ['ComedyCentralShows'])
 
     def test_vimeo_matching(self):
         self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
index 412f3dbce8683766ba53061fb2aecee95339b829..6a149ae4f707e1dc048890b72a4903ccb8a5f785 100644 (file)
@@ -89,7 +89,7 @@ def generator(test_case):
 
         for tc in test_cases:
             info_dict = tc.get('info_dict', {})
-            if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
+            if not (info_dict.get('id') and info_dict.get('ext')):
                 raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
 
         if 'skip' in test_case:
@@ -116,7 +116,7 @@ def generator(test_case):
         expect_warnings(ydl, test_case.get('expected_warnings', []))
 
         def get_tc_filename(tc):
-            return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
+            return ydl.prepare_filename(tc.get('info_dict', {}))
 
         res_dict = None
 
diff --git a/test/test_http.py b/test/test_http.py
new file mode 100644 (file)
index 0000000..bd4d46f
--- /dev/null
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl import YoutubeDL
+from youtube_dl.compat import compat_http_server
+import ssl
+import threading
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+    def log_message(self, format, *args):
+        pass
+
+    def do_GET(self):
+        if self.path == '/video.html':
+            self.send_response(200)
+            self.send_header('Content-Type', 'text/html; charset=utf-8')
+            self.end_headers()
+            self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
+        elif self.path == '/vid.mp4':
+            self.send_response(200)
+            self.send_header('Content-Type', 'video/mp4')
+            self.end_headers()
+            self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
+        else:
+            assert False
+
+
+class FakeLogger(object):
+    def debug(self, msg):
+        pass
+
+    def warning(self, msg):
+        pass
+
+    def error(self, msg):
+        pass
+
+
+class TestHTTP(unittest.TestCase):
+    def setUp(self):
+        certfn = os.path.join(TEST_DIR, 'testcert.pem')
+        self.httpd = compat_http_server.HTTPServer(
+            ('localhost', 0), HTTPTestRequestHandler)
+        self.httpd.socket = ssl.wrap_socket(
+            self.httpd.socket, certfile=certfn, server_side=True)
+        self.port = self.httpd.socket.getsockname()[1]
+        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+        self.server_thread.daemon = True
+        self.server_thread.start()
+
+    def test_nocheckcertificate(self):
+        if sys.version_info >= (2, 7, 9):  # No certificate checking anyways
+            ydl = YoutubeDL({'logger': FakeLogger()})
+            self.assertRaises(
+                Exception,
+                ydl.extract_info, 'https://localhost:%d/video.html' % self.port)
+
+        ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
+        r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
+        self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
new file mode 100644 (file)
index 0000000..b91b8c4
--- /dev/null
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.jsinterp import JSInterpreter
+
+
+class TestJSInterpreter(unittest.TestCase):
+    def test_basic(self):
+        jsi = JSInterpreter('function x(){;}')
+        self.assertEqual(jsi.call_function('x'), None)
+
+        jsi = JSInterpreter('function x3(){return 42;}')
+        self.assertEqual(jsi.call_function('x3'), 42)
+
+    def test_calc(self):
+        jsi = JSInterpreter('function x4(a){return 2*a+1;}')
+        self.assertEqual(jsi.call_function('x4', 3), 7)
+
+    def test_empty_return(self):
+        jsi = JSInterpreter('function f(){return; y()}')
+        self.assertEqual(jsi.call_function('f'), None)
+
+    def test_morespace(self):
+        jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }')
+        self.assertEqual(jsi.call_function('x', 3), 7)
+
+        jsi = JSInterpreter('function f () { x =  2  ; return x; }')
+        self.assertEqual(jsi.call_function('f'), 2)
+
+    def test_strange_chars(self):
+        jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }')
+        self.assertEqual(jsi.call_function('$_xY1', 20), 21)
+
+    def test_operators(self):
+        jsi = JSInterpreter('function f(){return 1 << 5;}')
+        self.assertEqual(jsi.call_function('f'), 32)
+
+        jsi = JSInterpreter('function f(){return 19 & 21;}')
+        self.assertEqual(jsi.call_function('f'), 17)
+
+        jsi = JSInterpreter('function f(){return 11 >> 2;}')
+        self.assertEqual(jsi.call_function('f'), 2)
+
+    def test_array_access(self):
+        jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
+        self.assertEqual(jsi.call_function('f'), [5, 2, 7])
+
+    def test_parens(self):
+        jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}')
+        self.assertEqual(jsi.call_function('f'), 7)
+
+        jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
+        self.assertEqual(jsi.call_function('f'), 9)
+
+    def test_assignments(self):
+        jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
+        self.assertEqual(jsi.call_function('f'), 31)
+
+        jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
+        self.assertEqual(jsi.call_function('f'), 51)
+
+        jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
+        self.assertEqual(jsi.call_function('f'), -11)
+
+    def test_comments(self):
+        jsi = JSInterpreter('''
+        function x() {
+            var x = /* 1 + */ 2;
+            var y = /* 30
+            * 40 */ 50;
+            return x + y;
+        }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 52)
+
+    def test_precedence(self):
+        jsi = JSInterpreter('''
+        function x() {
+            var a = [10, 20, 30, 40, 50];
+            var b = 6;
+            a[0]=a[b%a.length];
+            return a;
+        }''')
+        self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
+
+
+if __name__ == '__main__':
+    unittest.main()
index 206760d995c98299c60458181f88b5017e65d964..80c765bc496f0ce51e3deda0b39689f8136b597a 100644 (file)
@@ -28,6 +28,7 @@ from youtube_dl.utils import (
     fix_xml_ampersands,
     InAdvancePagedList,
     intlist_to_bytes,
+    is_html,
     js_to_json,
     limit_length,
     OnDemandPagedList,
@@ -51,6 +52,7 @@ from youtube_dl.utils import (
     urlencode_postdata,
     version_tuple,
     xpath_with_ns,
+    render_table,
 )
 
 
@@ -154,6 +156,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(
             unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
             '20141126')
+        self.assertEqual(
+            unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
+            '20150202')
 
     def test_find_xpath_attr(self):
         testxml = '''<root>
@@ -236,6 +241,8 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_duration('5 s'), 5)
         self.assertEqual(parse_duration('3 min'), 180)
         self.assertEqual(parse_duration('2.5 hours'), 9000)
+        self.assertEqual(parse_duration('02:03:04'), 7384)
+        self.assertEqual(parse_duration('01:02:03:04'), 93784)
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(
@@ -369,6 +376,16 @@ class TestUtil(unittest.TestCase):
         on = js_to_json('{"abc": true}')
         self.assertEqual(json.loads(on), {'abc': True})
 
+        # Ignore JavaScript code as well
+        on = js_to_json('''{
+            "x": 1,
+            y: "a",
+            z: some.code
+        }''')
+        d = json.loads(on)
+        self.assertEqual(d['x'], 1)
+        self.assertEqual(d['y'], 'a')
+
     def test_clean_html(self):
         self.assertEqual(clean_html('a:\nb'), 'a: b')
         self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
@@ -417,5 +434,31 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
         self.assertTrue(age_restricted(18, 14))
         self.assertFalse(age_restricted(18, 18))
 
+    def test_is_html(self):
+        self.assertFalse(is_html(b'\x49\x44\x43<html'))
+        self.assertTrue(is_html(b'<!DOCTYPE foo>\xaaa'))
+        self.assertTrue(is_html(  # UTF-8 with BOM
+            b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa'))
+        self.assertTrue(is_html(  # UTF-16-LE
+            b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00'
+        ))
+        self.assertTrue(is_html(  # UTF-16-BE
+            b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4'
+        ))
+        self.assertTrue(is_html(  # UTF-32-BE
+            b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4'))
+        self.assertTrue(is_html(  # UTF-32-LE
+            b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
+
+    def test_render_table(self):
+        self.assertEqual(
+            render_table(
+                ['a', 'bcd'],
+                [[123, 4], [9999, 51]]),
+            'a    bcd\n'
+            '123  4\n'
+            '9999 51')
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/testcert.pem b/test/testcert.pem
new file mode 100644 (file)
index 0000000..b3e0f00
--- /dev/null
@@ -0,0 +1,52 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDMF0bAzaHAdIyB
+HRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaUYF1uTcNp
+Qx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQqO6BVg4+h
+A1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8A4CK58Ev
+mMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRhKxUhmw0J
+aobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/Mo83KyiP
+tKMCSQulAgMBAAECggEALCfBDAexPjU5DNoh6bIorUXxIJzxTNzNHCdvgbCGiA54
+BBKPh8s6qwazpnjT6WQWDIg/O5zZufqjE4wM9x4+0Zoqfib742ucJO9wY4way6x4
+Clt0xzbLPabB+MoZ4H7ip+9n2+dImhe7pGdYyOHoNYeOL57BBi1YFW42Hj6u/8pd
+63YCXisto3Rz1YvRQVjwsrS+cRKZlzAFQRviL30jav7Wh1aWEfcXxjj4zhm8pJdk
+ITGtq6howz57M0NtX6hZnfe8ywzTnDFIGKIMA2cYHuYJcBh9bc4tCGubTvTKK9UE
+8fM+f6UbfGqfpKCq1mcgs0XMoFDSzKS9+mSJn0+5JQKBgQD+OCKaeH3Yzw5zGnlw
+XuQfMJGNcgNr+ImjmvzUAC2fAZUJLAcQueE5kzMv5Fmd+EFE2CEX1Vit3tg0SXvA
+G+bq609doILHMA03JHnV1npO/YNIhG3AAtJlKYGxQNfWH9mflYj9mEui8ZFxG52o
+zWhHYuifOjjZszUR+/eio6NPzwKBgQDNhUBTrT8LIX4SE/EFUiTlYmWIvOMgXYvN
+8Cm3IRNQ/yyphZaXEU0eJzfX5uCDfSVOgd6YM/2pRah+t+1Hvey4H8e0GVTu5wMP
+gkkqwKPGIR1YOmlw6ippqwvoJD7LuYrm6Q4D6e1PvkjwCq6lEndrOPmPrrXNd0JJ
+XO60y3U2SwKBgQDLkyZarryQXxcCI6Q10Tc6pskYDMIit095PUbTeiUOXNT9GE28
+Hi32ziLCakk9kCysNasii81MxtQ54tJ/f5iGbNMMddnkKl2a19Hc5LjjAm4cJzg/
+98KGEhvyVqvAo5bBDZ06/rcrD+lZOzUglQS5jcIcqCIYa0LHWQ/wJLxFzwKBgFcZ
+1SRhdSmDfUmuF+S4ZpistflYjC3IV5rk4NkS9HvMWaJS0nqdw4A3AMzItXgkjq4S
+DkOVLTkTI5Do5HAWRv/VwC5M2hkR4NMu1VGAKSisGiKtRsirBWSZMEenLNHshbjN
+Jrpz5rZ4H7NT46ZkCCZyFBpX4gb9NyOedjA7Via3AoGARF8RxbYjnEGGFuhnbrJB
+FTPR0vaL4faY3lOgRZ8jOG9V2c9Hzi/y8a8TU4C11jnJSDqYCXBTd5XN28npYxtD
+pjRsCwy6ze+yvYXPO7C978eMG3YRyj366NXUxnXN59ibwe/lxi2OD9z8J1LEdF6z
+VJua1Wn8HKxnXMI61DhTCSo=
+-----END PRIVATE KEY-----
+-----BEGIN CERTIFICATE-----
+MIIEEzCCAvugAwIBAgIJAK1haYi6gmSKMA0GCSqGSIb3DQEBCwUAMIGeMQswCQYD
+VQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEbMBkG
+A1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRsIHRl
+c3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhhZ0Bw
+aGloYWcuZGUwIBcNMTUwMTMwMDExNTA4WhgPMjExNTAxMDYwMTE1MDhaMIGeMQsw
+CQYDVQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEb
+MBkGA1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRs
+IHRlc3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhh
+Z0BwaGloYWcuZGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMF0bA
+zaHAdIyBHRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaU
+YF1uTcNpQx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQq
+O6BVg4+hA1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8
+A4CK58EvmMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRh
+KxUhmw0JaobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/
+Mo83KyiPtKMCSQulAgMBAAGjUDBOMB0GA1UdDgQWBBTBUZoqhQkzHQ6xNgZfFxOd
+ZEVt8TAfBgNVHSMEGDAWgBTBUZoqhQkzHQ6xNgZfFxOdZEVt8TAMBgNVHRMEBTAD
+AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCUOCl3T/J9B08Z+ijfOJAtkbUaEHuVZb4x
+5EpZSy2ZbkLvtsftMFieHVNXn9dDswQc5qjYStCC4o60LKw4M6Y63FRsAZ/DNaqb
+PY3jyCyuugZ8/sNf50vHYkAcF7SQYqOQFQX4TQsNUk2xMJIt7H0ErQFmkf/u3dg6
+cy89zkT462IwxzSG7NNhIlRkL9o5qg+Y1mF9eZA1B0rcL6hO24PPTHOd90HDChBu
+SZ6XMi/LzYQSTf0Vg2R+uMIVlzSlkdcZ6sqVnnqeLL8dFyIa4e9sj/D4ZCYP8Mqe
+Z73H5/NNhmwCHRqVUTgm307xblQaWGhwAiDkaRvRW2aJQ0qGEdZK
+-----END CERTIFICATE-----
diff --git a/test/video-vid.mp4 b/test/video-vid.mp4
new file mode 100644 (file)
index 0000000..825eaa4
Binary files /dev/null and b/test/video-vid.mp4 differ
index 65d5ba3c3dfd1ca7d0b9adf52deb1a1715006cbd..14d9cce52b139f9d6c63a45e1235d9b768b76588 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index cb69deed11dee9827c1d1a21930c0b4a0335b97f..bddd9911fa29cc02c8616a16f10f465d235ea028 100644 (file)
@@ -74,6 +74,14 @@ redistribute it or use it however you like.
 \f[C]
 \-\-playlist\-start\ NUMBER\ \ \ \ \ \ \ \ \ \ playlist\ video\ to\ start\ at\ (default\ is\ 1)
 \-\-playlist\-end\ NUMBER\ \ \ \ \ \ \ \ \ \ \ \ playlist\ video\ to\ end\ at\ (default\ is\ last)
+\-\-playlist\-items\ ITEM_SPEC\ \ \ \ \ \ \ playlist\ video\ items\ to\ download.\ Specify
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ indices\ of\ the\ videos\ in\ the\ playlist
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ seperated\ by\ commas\ like:\ "\-\-playlist\-items
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 1,2,5,8"\ if\ you\ want\ to\ download\ videos
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ indexed\ 1,\ 2,\ 5,\ 8\ in\ the\ playlist.\ You\ can
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ specify\ range:\ "\-\-playlist\-items
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 1\-3,7,10\-13",\ it\ will\ download\ the\ videos
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ at\ index\ 1,\ 2,\ 3,\ 7,\ 10,\ 11,\ 12\ and\ 13.
 \-\-match\-title\ REGEX\ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ matching\ titles\ (regex\ or
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ caseless\ sub\-string)
 \-\-reject\-title\ REGEX\ \ \ \ \ \ \ \ \ \ \ \ \ skip\ download\ for\ matching\ titles\ (regex\ or
@@ -109,7 +117,8 @@ redistribute it or use it however you like.
 \f[C]
 \-r,\ \-\-rate\-limit\ LIMIT\ \ \ \ \ \ \ \ \ \ \ maximum\ download\ rate\ in\ bytes\ per\ second
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (e.g.\ 50K\ or\ 4.2M)
-\-R,\ \-\-retries\ RETRIES\ \ \ \ \ \ \ \ \ \ \ \ number\ of\ retries\ (default\ is\ 10)
+\-R,\ \-\-retries\ RETRIES\ \ \ \ \ \ \ \ \ \ \ \ number\ of\ retries\ (default\ is\ 10),\ or
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "infinite".
 \-\-buffer\-size\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16K)
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (default\ is\ 1024)
 \-\-no\-resize\-buffer\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer
@@ -117,6 +126,11 @@ redistribute it or use it however you like.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ automatically\ resized\ from\ an\ initial\ value
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ of\ SIZE.
 \-\-playlist\-reverse\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ playlist\ videos\ in\ reverse\ order
+\-\-xattr\-set\-filesize\ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)\ set\ file\ xattribute
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ytdl.filesize\ with\ expected\ filesize
+\-\-external\-downloader\ COMMAND\ \ \ \ (experimental)\ Use\ the\ specified\ external
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloader.\ Currently\ supports
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ aria2c,curl,wget
 \f[]
 .fi
 .SS Filesystem Options:
@@ -180,7 +194,6 @@ redistribute it or use it however you like.
 \-\-write\-info\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ video\ metadata\ to\ a\ .info.json\ file
 \-\-write\-annotations\ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ video\ annotations\ to\ a\ .annotation
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file
-\-\-write\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ thumbnail\ image\ to\ disk
 \-\-load\-info\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ json\ file\ containing\ the\ video\ information
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (created\ with\ the\ "\-\-write\-json"\ option)
 \-\-cookies\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file\ to\ read\ cookies\ from\ and\ dump\ cookie
@@ -196,6 +209,16 @@ redistribute it or use it however you like.
 \-\-rm\-cache\-dir\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Delete\ all\ filesystem\ cache\ files
 \f[]
 .fi
+.SS Thumbnail images:
+.IP
+.nf
+\f[C]
+\-\-write\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ thumbnail\ image\ to\ disk
+\-\-write\-all\-thumbnails\ \ \ \ \ \ \ \ \ \ \ write\ all\ thumbnail\ image\ formats\ to\ disk
+\-\-list\-thumbnails\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate\ and\ list\ all\ available\ thumbnail
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ formats
+\f[]
+.fi
 .SS Verbosity / Simulation Options:
 .IP
 .nf
@@ -256,6 +279,8 @@ redistribute it or use it however you like.
 \-\-bidi\-workaround\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Work\ around\ terminals\ that\ lack
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ bidirectional\ text\ support.\ Requires\ bidiv
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ or\ fribidi\ executable\ in\ PATH
+\-\-sleep\-interval\ SECONDS\ \ \ \ \ \ \ \ \ Number\ of\ seconds\ to\ sleep\ before\ each
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download.
 \f[]
 .fi
 .SS Video Format Options:
@@ -268,10 +293,22 @@ redistribute it or use it however you like.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ by\ extension\ for\ the\ extensions\ aac,\ m4a,
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mp3,\ mp4,\ ogg,\ wav,\ webm.\ You\ can\ also\ use
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ special\ names\ "best",\ "bestvideo",
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "bestaudio",\ "worst".\ \ By\ default,\ youtube\-
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ dl\ will\ pick\ the\ best\ quality.\ Use\ commas
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ to\ download\ multiple\ audio\ formats,\ such\ as
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-f
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "bestaudio",\ "worst".\ \ You\ can\ filter\ the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ results\ by\ putting\ a\ condition\ in
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ brackets,\ as\ in\ \-f\ "best[height=720]"\ (or
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-f\ "[filesize>10M]").\ \ This\ works\ for
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filesize,\ height,\ width,\ tbr,\ abr,\ vbr,\ and
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ fps\ and\ the\ comparisons\ <,\ <=,\ >,\ >=,\ =,\ !=
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ .\ Formats\ for\ which\ the\ value\ is\ not\ known
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ are\ excluded\ unless\ you\ put\ a\ question\ mark
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (?)\ after\ the\ operator.\ You\ can\ combine
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ format\ filters,\ so\ \ \-f\ "[height\ <=?
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 720][tbr>500]"\ selects\ up\ to\ 720p\ videos
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (or\ videos\ where\ the\ height\ is\ not\ known)
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ with\ a\ bitrate\ of\ at\ least\ 500\ KBit/s.\ By
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ youtube\-dl\ will\ pick\ the\ best
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ quality.\ Use\ commas\ to\ download\ multiple
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ audio\ formats,\ such\ as\ \-f
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 136/137/mp4/bestvideo,140/m4a/bestaudio.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ You\ can\ merge\ the\ video\ and\ audio\ of\ two
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ formats\ into\ a\ single\ file\ using\ \-f\ <video\-
@@ -313,7 +350,8 @@ redistribute it or use it however you like.
 .nf
 \f[C]
 \-u,\ \-\-username\ USERNAME\ \ \ \ \ \ \ \ \ \ login\ with\ this\ account\ ID
-\-p,\ \-\-password\ PASSWORD\ \ \ \ \ \ \ \ \ \ account\ password
+\-p,\ \-\-password\ PASSWORD\ \ \ \ \ \ \ \ \ \ account\ password.\ If\ this\ option\ is\ left
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ out,\ youtube\-dl\ will\ ask\ interactively.
 \-2,\ \-\-twofactor\ TWOFACTOR\ \ \ \ \ \ \ \ two\-factor\ auth\ code
 \-n,\ \-\-netrc\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ .netrc\ authentication\ data
 \-\-video\-password\ PASSWORD\ \ \ \ \ \ \ \ video\ password\ (vimeo,\ smotri)
@@ -347,11 +385,11 @@ redistribute it or use it however you like.
 \-\-add\-metadata\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ metadata\ to\ the\ video\ file
 \-\-xattrs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ metadata\ to\ the\ video\ file\[aq]s\ xattrs
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (using\ dublin\ core\ and\ xdg\ standards)
-\-\-fixup\ POLICY\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)\ Automatically\ correct\ known
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ faults\ of\ the\ file.\ One\ of\ never\ (do
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ nothing),\ warn\ (only\ emit\ a\ warning),
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ detect_or_warn(check\ whether\ we\ can\ do
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ anything\ about\ it,\ warn\ otherwise
+\-\-fixup\ POLICY\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Automatically\ correct\ known\ faults\ of\ the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file.\ One\ of\ never\ (do\ nothing),\ warn\ (only
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ emit\ a\ warning),\ detect_or_warn(the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default;\ fix\ file\ if\ we\ can,\ warn
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ otherwise)
 \-\-prefer\-avconv\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ avconv\ over\ ffmpeg\ for\ running\ the
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ postprocessors\ (default)
 \-\-prefer\-ffmpeg\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ ffmpeg\ over\ avconv\ for\ running\ the
@@ -633,10 +671,50 @@ From then on, after restarting your shell, you will be able to access
 both youtube\-dl and ffmpeg (and youtube\-dl will be able to find
 ffmpeg) by simply typing \f[C]youtube\-dl\f[] or \f[C]ffmpeg\f[], no
 matter what directory you\[aq]re in.
+.SS How do I put downloads into a specific folder?
+.PP
+Use the \f[C]\-o\f[] to specify an output template (#output-template),
+for example \f[C]\-o\ "/home/user/videos/%(title)s\-%(id)s.%(ext)s"\f[].
+If you want this for all of your downloads, put the option into your
+configuration file (#configuration).
+.SS How do I download a video starting with a \f[C]\-\f[] ?
+.PP
+Either prepend \f[C]http://www.youtube.com/watch?v=\f[] or separate the
+ID from the options with \f[C]\-\-\f[]:
+.IP
+.nf
+\f[C]
+youtube\-dl\ \-\-\ \-wNyEUrxzFU
+youtube\-dl\ "http://www.youtube.com/watch?v=\-wNyEUrxzFU"
+\f[]
+.fi
+.SS Can you add support for this anime video site, or site which shows
+current movies for free?
+.PP
+As a matter of policy (as well as legality), youtube\-dl does not
+include support for services that specialize in infringing copyright.
+As a rule of thumb, if you cannot easily find a video that the service
+is quite obviously allowed to distribute (i.e.
+that has been uploaded by the creator, the creator\[aq]s distributor, or
+is published under a free license), the service is probably unfit for
+inclusion to youtube\-dl.
+.PP
+A note on the service that they don\[aq]t host the infringing content,
+but just link to those who do, is evidence that the service should
+\f[B]not\f[] be included into youtube\-dl.
+The same goes for any DMCA note when the whole front page of the service
+is filled with videos they are not allowed to distribute.
+A "fair use" note is equally unconvincing if the service shows
+copyright\-protected videos in full without authorization.
+.PP
+Support requests for services that \f[B]do\f[] purchase the rights to
+distribute their content are perfectly fine though.
+If in doubt, you can simply include a source that mentions the
+legitimate purchase of content.
 .SS How can I detect whether a given URL is supported by youtube\-dl?
 .PP
 For one, have a look at the list of supported
-sites (docs/supportedsites).
+sites (docs/supportedsites.md).
 Note that it can sometimes happen that the site changes its URL scheme
 (say, from http://example.com/v/1234567 to http://example.com/v/1234567
 ) and youtube\-dl reports an URL of a service in that list as
@@ -780,8 +858,7 @@ return (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/commo
 Add tests and code for as many as you want.
 .IP " 8." 4
 If you can, check the code with
-pyflakes (https://pypi.python.org/pypi/pyflakes) (a good idea) and
-pep8 (https://pypi.python.org/pypi/pep8) (optional, ignore E501).
+flake8 (https://pypi.python.org/pypi/flake8).
 .IP " 9." 4
 When the tests pass, add (http://git-scm.com/docs/git-add) the new files
 and commit (http://git-scm.com/docs/git-commit) them and
@@ -995,8 +1072,8 @@ Do not post reports of a network error alongside the request for a new
 video service.
 .SS Is anyone going to need the feature?
 .PP
-Only post features that you (or an incapicated friend you can personally
-talk to) require.
+Only post features that you (or an incapacitated friend you can
+personally talk to) require.
 Do not post features because they seem like a good idea.
 If they are really useful, they will be requested by someone who
 requires them.
index 0bad8b653a6245b271308eeb535f2e690edeffe1..6be85ed8260728ed56f9beb8e61e3b7ff2cf1bb2 100644 (file)
@@ -4,7 +4,7 @@ __youtube_dl()
     COMPREPLY=()
     cur="${COMP_WORDS[COMP_CWORD]}"
     prev="${COMP_WORDS[COMP_CWORD-1]}"
-    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --write-thumbnail --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --exec"
+    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --exec"
     keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
     fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
     diropts="--cache-dir"
index 76857d4103e4e329092f756787240371dc194b4e..5077f5e9f35f18be2989a5ba189c528eb7afdd5c 100644 (file)
@@ -17,6 +17,7 @@ complete --command youtube-dl --long-option force-ipv4 --short-option 4 --descri
 complete --command youtube-dl --long-option force-ipv6 --short-option 6 --description 'Make all connections via IPv6 (experimental)'
 complete --command youtube-dl --long-option playlist-start --description 'playlist video to start at (default is %default)'
 complete --command youtube-dl --long-option playlist-end --description 'playlist video to end at (default is last)'
+complete --command youtube-dl --long-option playlist-items --description 'playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.'
 complete --command youtube-dl --long-option match-title --description 'download only matching titles (regex or caseless sub-string)'
 complete --command youtube-dl --long-option reject-title --description 'skip download for matching titles (regex or caseless sub-string)'
 complete --command youtube-dl --long-option max-downloads --description 'Abort after downloading NUMBER files'
@@ -32,11 +33,13 @@ complete --command youtube-dl --long-option age-limit --description 'download on
 complete --command youtube-dl --long-option download-archive --description 'Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.' --require-parameter
 complete --command youtube-dl --long-option include-ads --description 'Download advertisements as well (experimental)'
 complete --command youtube-dl --long-option rate-limit --short-option r --description 'maximum download rate in bytes per second (e.g. 50K or 4.2M)'
-complete --command youtube-dl --long-option retries --short-option R --description 'number of retries (default is %default)'
+complete --command youtube-dl --long-option retries --short-option R --description 'number of retries (default is %default), or "infinite".'
 complete --command youtube-dl --long-option buffer-size --description 'size of download buffer (e.g. 1024 or 16K) (default is %default)'
 complete --command youtube-dl --long-option no-resize-buffer --description 'do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.'
 complete --command youtube-dl --long-option test
 complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
+complete --command youtube-dl --long-option xattr-set-filesize --description '(experimental) set file xattribute ytdl.filesize with expected filesize'
+complete --command youtube-dl --long-option external-downloader --description '(experimental) Use the specified external downloader. Currently supports aria2c,curl,wget'
 complete --command youtube-dl --long-option batch-file --short-option a --description 'file containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
 complete --command youtube-dl --long-option id --description 'use only video ID in file name'
 complete --command youtube-dl --long-option output --short-option o --description 'output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube'"'"'s itags: "137"), %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id, %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format. %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o '"'"'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s'"'"' .'
@@ -53,12 +56,14 @@ complete --command youtube-dl --long-option no-mtime --description 'do not use t
 complete --command youtube-dl --long-option write-description --description 'write video description to a .description file'
 complete --command youtube-dl --long-option write-info-json --description 'write video metadata to a .info.json file'
 complete --command youtube-dl --long-option write-annotations --description 'write video annotations to a .annotation file'
-complete --command youtube-dl --long-option write-thumbnail --description 'write thumbnail image to disk'
 complete --command youtube-dl --long-option load-info --description 'json file containing the video information (created with the "--write-json" option)' --require-parameter
 complete --command youtube-dl --long-option cookies --description 'file to read cookies from and dump cookie jar in' --require-parameter
 complete --command youtube-dl --long-option cache-dir --description 'Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.'
 complete --command youtube-dl --long-option no-cache-dir --description 'Disable filesystem caching'
 complete --command youtube-dl --long-option rm-cache-dir --description 'Delete all filesystem cache files'
+complete --command youtube-dl --long-option write-thumbnail --description 'write thumbnail image to disk'
+complete --command youtube-dl --long-option write-all-thumbnails --description 'write all thumbnail image formats to disk'
+complete --command youtube-dl --long-option list-thumbnails --description 'Simulate and list all available thumbnail formats'
 complete --command youtube-dl --long-option quiet --short-option q --description 'activates quiet mode'
 complete --command youtube-dl --long-option no-warnings --description 'Ignore warnings'
 complete --command youtube-dl --long-option simulate --short-option s --description 'do not download the video and do not write anything to disk'
@@ -91,7 +96,8 @@ complete --command youtube-dl --long-option user-agent --description 'specify a
 complete --command youtube-dl --long-option referer --description 'specify a custom referer, use if the video access is restricted to one domain'
 complete --command youtube-dl --long-option add-header --description 'specify a custom HTTP header and its value, separated by a colon '"'"':'"'"'. You can use this option multiple times'
 complete --command youtube-dl --long-option bidi-workaround --description 'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH'
-complete --command youtube-dl --long-option format --short-option f --description 'video format code, specify the order of preference using slashes, as in -f 22/17/18 .  Instead of format codes, you can select by extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", "worst".  By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f  136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.'
+complete --command youtube-dl --long-option sleep-interval --description 'Number of seconds to sleep before each download.'
+complete --command youtube-dl --long-option format --short-option f --description 'video format code, specify the order of preference using slashes, as in -f 22/17/18 .  Instead of format codes, you can select by extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", "worst".  You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").  This works for filesize, height, width, tbr, abr, vbr, and fps and the comparisons <, <=, >, >=, =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so  -f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f  136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.'
 complete --command youtube-dl --long-option all-formats --description 'download all available video formats'
 complete --command youtube-dl --long-option prefer-free-formats --description 'prefer free video formats unless a specific one is requested'
 complete --command youtube-dl --long-option max-quality --description 'highest quality format to download'
@@ -106,7 +112,7 @@ complete --command youtube-dl --long-option list-subs --description 'lists all a
 complete --command youtube-dl --long-option sub-format --description 'subtitle format (default=srt) ([sbv/vtt] youtube only)'
 complete --command youtube-dl --long-option sub-lang --description 'languages of the subtitles to download (optional) separated by commas, use IETF language tags like '"'"'en,pt'"'"''
 complete --command youtube-dl --long-option username --short-option u --description 'login with this account ID'
-complete --command youtube-dl --long-option password --short-option p --description 'account password'
+complete --command youtube-dl --long-option password --short-option p --description 'account password. If this option is left out, youtube-dl will ask interactively.'
 complete --command youtube-dl --long-option twofactor --short-option 2 --description 'two-factor auth code'
 complete --command youtube-dl --long-option netrc --short-option n --description 'use .netrc authentication data'
 complete --command youtube-dl --long-option video-password --description 'video password (vimeo, smotri)'
@@ -120,7 +126,7 @@ complete --command youtube-dl --long-option embed-subs --description 'embed subt
 complete --command youtube-dl --long-option embed-thumbnail --description 'embed thumbnail in the audio as cover art'
 complete --command youtube-dl --long-option add-metadata --description 'write metadata to the video file'
 complete --command youtube-dl --long-option xattrs --description 'write metadata to the video file'"'"'s xattrs (using dublin core and xdg standards)'
-complete --command youtube-dl --long-option fixup --description '(experimental) Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(check whether we can do anything about it, warn otherwise'
+complete --command youtube-dl --long-option fixup --description 'Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; fix file if we can, warn otherwise)'
 complete --command youtube-dl --long-option prefer-avconv --description 'Prefer avconv over ffmpeg for running the postprocessors (default)'
 complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors'
 complete --command youtube-dl --long-option exec --description 'Execute a command on the file after downloading, similar to find'"'"'s -exec syntax. Example: --exec '"'"'adb push {} /sdcard/Music/ && rm {}'"'"''
index b4cef180d3d8329f026bed7d1f10105b810b84a9..71d142377fc694ed1bb3258cc1aaab31f07da7ce 100644 (file)
@@ -19,7 +19,7 @@ __youtube_dl() {
             elif [[ ${prev} == "--recode-video" ]]; then
                 _arguments '*: :(mp4 flv ogg webm mkv)'
             else
-                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --write-thumbnail --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --exec)'
+                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --exec)'
             fi
         ;;
     esac
index 772fddd4542f6726b57878a950410c90ee1a640c..9605f8f99ac04bf72032b1cc8a64635bb6c2a8b8 100755 (executable)
@@ -10,6 +10,7 @@ import io
 import itertools
 import json
 import locale
+import operator
 import os
 import platform
 import re
@@ -24,6 +25,7 @@ if os.name == 'nt':
     import ctypes
 
 from .compat import (
+    compat_basestring,
     compat_cookiejar,
     compat_expanduser,
     compat_http_client,
@@ -49,11 +51,14 @@ from .utils import (
     make_HTTPS_handler,
     MaxDownloadsReached,
     PagedList,
+    parse_filesize,
     PostProcessingError,
     platform_name,
     preferredencoding,
+    render_table,
     SameFileError,
     sanitize_filename,
+    std_headers,
     subtitles_filename,
     takewhile_inclusive,
     UnavailableVideoError,
@@ -71,6 +76,7 @@ from .extractor import get_info_extractor, gen_extractors
 from .downloader import get_suitable_downloader
 from .downloader.rtmp import rtmpdump_version
 from .postprocessor import (
+    FFmpegFixupM4aPP,
     FFmpegFixupStretchedPP,
     FFmpegMergerPP,
     FFmpegPostProcessor,
@@ -132,6 +138,7 @@ class YoutubeDL(object):
     nooverwrites:      Prevent overwriting files.
     playliststart:     Playlist item to start at.
     playlistend:       Playlist item to end at.
+    playlist_items:    Specific indices of playlist to download.
     playlistreverse:   Download playlist items in reverse order.
     matchtitle:        Download only matching titles.
     rejecttitle:       Reject downloads for matching titles.
@@ -141,6 +148,7 @@ class YoutubeDL(object):
     writeinfojson:     Write the video description to a .info.json file
     writeannotations:  Write the video annotations to a .annotations.xml file
     writethumbnail:    Write the thumbnail image to a file
+    write_all_thumbnails:  Write all thumbnail formats to files
     writesubtitles:    Write the video subtitles to a file
     writeautomaticsub: Write the automatic subtitles to a file
     allsubtitles:      Downloads all the subtitles of the video
@@ -191,11 +199,12 @@ class YoutubeDL(object):
                        postprocessor.
     progress_hooks:    A list of functions that get called on download
                        progress, with a dictionary with the entries
-                       * filename: The final filename
-                       * status: One of "downloading" and "finished"
-
-                       The dict may also have some of the following entries:
+                       * status: One of "downloading" and "finished".
+                                 Check this first and ignore unknown values.
 
+                       If status is one of "downloading" or "finished", the
+                       following properties may also be present:
+                       * filename: The final filename (always present)
                        * downloaded_bytes: Bytes on disk
                        * total_bytes: Size of the whole file, None if unknown
                        * tmpfilename: The filename we're currently writing to
@@ -211,16 +220,21 @@ class YoutubeDL(object):
                        - "never": do nothing
                        - "warn": only emit a warning
                        - "detect_or_warn": check whether we can do anything
-                                           about it, warn otherwise
+                                           about it, warn otherwise (default)
     source_address:    (Experimental) Client-side IP address to bind to.
     call_home:         Boolean, true iff we are allowed to contact the
                        youtube-dl servers for debugging.
+    sleep_interval:    Number of seconds to sleep before each download.
+    external_downloader:  Executable of the external downloader to call.
+    listformats:       Print an overview of available video formats and exit.
+    list_thumbnails:   Print a table of all thumbnails and exit.
 
 
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
-    noresizebuffer, retries, continuedl, noprogress, consoletitle
+    noresizebuffer, retries, continuedl, noprogress, consoletitle,
+    xattr_set_filesize.
 
     The following options are used by the post processors:
     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
@@ -530,6 +544,11 @@ class YoutubeDL(object):
             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
             tmpl = compat_expanduser(outtmpl)
             filename = tmpl % template_dict
+            # Temporary fix for #4787
+            # 'Treat' all problem characters by passing filename through preferredencoding
+            # to workaround encoding issues with subprocess on python2 @ Windows
+            if sys.version_info < (3, 0) and sys.platform == 'win32':
+                filename = encodeFilename(filename, True).decode(preferredencoding())
             return filename
         except ValueError as err:
             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
@@ -693,24 +712,51 @@ class YoutubeDL(object):
             if playlistend == -1:
                 playlistend = None
 
+            playlistitems_str = self.params.get('playlist_items', None)
+            playlistitems = None
+            if playlistitems_str is not None:
+                def iter_playlistitems(format):
+                    for string_segment in format.split(','):
+                        if '-' in string_segment:
+                            start, end = string_segment.split('-')
+                            for item in range(int(start), int(end) + 1):
+                                yield int(item)
+                        else:
+                            yield int(string_segment)
+                playlistitems = iter_playlistitems(playlistitems_str)
+
             ie_entries = ie_result['entries']
             if isinstance(ie_entries, list):
                 n_all_entries = len(ie_entries)
-                entries = ie_entries[playliststart:playlistend]
+                if playlistitems:
+                    entries = [ie_entries[i - 1] for i in playlistitems]
+                else:
+                    entries = ie_entries[playliststart:playlistend]
                 n_entries = len(entries)
                 self.to_screen(
                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
             elif isinstance(ie_entries, PagedList):
-                entries = ie_entries.getslice(
-                    playliststart, playlistend)
+                if playlistitems:
+                    entries = []
+                    for item in playlistitems:
+                        entries.extend(ie_entries.getslice(
+                            item - 1, item
+                        ))
+                else:
+                    entries = ie_entries.getslice(
+                        playliststart, playlistend)
                 n_entries = len(entries)
                 self.to_screen(
                     "[%s] playlist %s: Downloading %d videos" %
                     (ie_result['extractor'], playlist, n_entries))
             else:  # iterable
-                entries = list(itertools.islice(
-                    ie_entries, playliststart, playlistend))
+                if playlistitems:
+                    entry_list = list(ie_entries)
+                    entries = [entry_list[i - 1] for i in playlistitems]
+                else:
+                    entries = list(itertools.islice(
+                        ie_entries, playliststart, playlistend))
                 n_entries = len(entries)
                 self.to_screen(
                     "[%s] playlist %s: Downloading %d videos" %
@@ -768,7 +814,59 @@ class YoutubeDL(object):
         else:
             raise Exception('Invalid result type: %s' % result_type)
 
+    def _apply_format_filter(self, format_spec, available_formats):
+        " Returns a tuple of the remaining format_spec and filtered formats "
+
+        OPERATORS = {
+            '<': operator.lt,
+            '<=': operator.le,
+            '>': operator.gt,
+            '>=': operator.ge,
+            '=': operator.eq,
+            '!=': operator.ne,
+        }
+        operator_rex = re.compile(r'''(?x)\s*\[
+            (?P<key>width|height|tbr|abr|vbr|filesize|fps)
+            \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+            (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
+            \]$
+            ''' % '|'.join(map(re.escape, OPERATORS.keys())))
+        m = operator_rex.search(format_spec)
+        if not m:
+            raise ValueError('Invalid format specification %r' % format_spec)
+
+        try:
+            comparison_value = int(m.group('value'))
+        except ValueError:
+            comparison_value = parse_filesize(m.group('value'))
+            if comparison_value is None:
+                comparison_value = parse_filesize(m.group('value') + 'B')
+            if comparison_value is None:
+                raise ValueError(
+                    'Invalid value %r in format specification %r' % (
+                        m.group('value'), format_spec))
+        op = OPERATORS[m.group('op')]
+
+        def _filter(f):
+            actual_value = f.get(m.group('key'))
+            if actual_value is None:
+                return m.group('none_inclusive')
+            return op(actual_value, comparison_value)
+        new_formats = [f for f in available_formats if _filter(f)]
+
+        new_format_spec = format_spec[:-len(m.group(0))]
+        if not new_format_spec:
+            new_format_spec = 'best'
+
+        return (new_format_spec, new_formats)
+
     def select_format(self, format_spec, available_formats):
+        while format_spec.endswith(']'):
+            format_spec, available_formats = self._apply_format_filter(
+                format_spec, available_formats)
+        if not available_formats:
+            return None
+
         if format_spec == 'best' or format_spec is None:
             return available_formats[-1]
         elif format_spec == 'worst':
@@ -808,6 +906,42 @@ class YoutubeDL(object):
                 return matches[-1]
         return None
 
+    def _calc_headers(self, info_dict):
+        res = std_headers.copy()
+
+        add_headers = info_dict.get('http_headers')
+        if add_headers:
+            res.update(add_headers)
+
+        cookies = self._calc_cookies(info_dict)
+        if cookies:
+            res['Cookie'] = cookies
+
+        return res
+
+    def _calc_cookies(self, info_dict):
+        class _PseudoRequest(object):
+            def __init__(self, url):
+                self.url = url
+                self.headers = {}
+                self.unverifiable = False
+
+            def add_unredirected_header(self, k, v):
+                self.headers[k] = v
+
+            def get_full_url(self):
+                return self.url
+
+            def is_unverifiable(self):
+                return self.unverifiable
+
+            def has_header(self, h):
+                return h in self.headers
+
+        pr = _PseudoRequest(info_dict['url'])
+        self.cookiejar.add_cookie_header(pr)
+        return pr.headers.get('Cookie')
+
     def process_video_result(self, info_dict, download=True):
         assert info_dict.get('_type', 'video') == 'video'
 
@@ -822,12 +956,19 @@ class YoutubeDL(object):
             info_dict['playlist_index'] = None
 
         thumbnails = info_dict.get('thumbnails')
+        if thumbnails is None:
+            thumbnail = info_dict.get('thumbnail')
+            if thumbnail:
+                info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
         if thumbnails:
             thumbnails.sort(key=lambda t: (
-                t.get('width'), t.get('height'), t.get('url')))
-            for t in thumbnails:
+                t.get('preference'), t.get('width'), t.get('height'),
+                t.get('id'), t.get('url')))
+            for i, t in enumerate(thumbnails):
                 if 'width' in t and 'height' in t:
                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
+                if t.get('id') is None:
+                    t['id'] = '%d' % i
 
         if thumbnails and 'thumbnail' not in info_dict:
             info_dict['thumbnail'] = thumbnails[-1]['url']
@@ -876,6 +1017,11 @@ class YoutubeDL(object):
             # Automatically determine file extension if missing
             if 'ext' not in format:
                 format['ext'] = determine_ext(format['url']).lower()
+            # Add HTTP headers, so that external programs can use them from the
+            # json output
+            full_format_info = info_dict.copy()
+            full_format_info.update(format)
+            format['http_headers'] = self._calc_headers(full_format_info)
 
         format_limit = self.params.get('format_limit', None)
         if format_limit:
@@ -891,9 +1037,12 @@ class YoutubeDL(object):
             # element in the 'formats' field in info_dict is info_dict itself,
             # wich can't be exported to json
             info_dict['formats'] = formats
-        if self.params.get('listformats', None):
+        if self.params.get('listformats'):
             self.list_formats(info_dict)
             return
+        if self.params.get('list_thumbnails'):
+            self.list_thumbnails(info_dict)
+            return
 
         req_format = self.params.get('format')
         if req_format is None:
@@ -927,8 +1076,10 @@ class YoutubeDL(object):
                                 else self.params['merge_output_format'])
                             selected_format = {
                                 'requested_formats': formats_info,
-                                'format': rf,
-                                'ext': formats_info[0]['ext'],
+                                'format': '%s+%s' % (formats_info[0].get('format'),
+                                                     formats_info[1].get('format')),
+                                'format_id': '%s+%s' % (formats_info[0].get('format_id'),
+                                                        formats_info[1].get('format_id')),
                                 'width': formats_info[0].get('width'),
                                 'height': formats_info[0].get('height'),
                                 'resolution': formats_info[0].get('resolution'),
@@ -989,7 +1140,7 @@ class YoutubeDL(object):
 
         self._num_downloads += 1
 
-        filename = self.prepare_filename(info_dict)
+        info_dict['_filename'] = filename = self.prepare_filename(info_dict)
 
         # Forced printings
         if self.params.get('forcetitle', False):
@@ -1014,10 +1165,7 @@ class YoutubeDL(object):
         if self.params.get('forceformat', False):
             self.to_stdout(info_dict['format'])
         if self.params.get('forcejson', False):
-            info_dict['_filename'] = filename
             self.to_stdout(json.dumps(info_dict))
-        if self.params.get('dump_single_json', False):
-            info_dict['_filename'] = filename
 
         # Do nothing else if in simulate mode
         if self.params.get('simulate', False):
@@ -1100,35 +1248,18 @@ class YoutubeDL(object):
                     self.report_error('Cannot write metadata to JSON file ' + infofn)
                     return
 
-        if self.params.get('writethumbnail', False):
-            if info_dict.get('thumbnail') is not None:
-                thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
-                thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
-                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
-                    self.to_screen('[%s] %s: Thumbnail is already present' %
-                                   (info_dict['extractor'], info_dict['id']))
-                else:
-                    self.to_screen('[%s] %s: Downloading thumbnail ...' %
-                                   (info_dict['extractor'], info_dict['id']))
-                    try:
-                        uf = self.urlopen(info_dict['thumbnail'])
-                        with open(thumb_filename, 'wb') as thumbf:
-                            shutil.copyfileobj(uf, thumbf)
-                        self.to_screen('[%s] %s: Writing thumbnail to: %s' %
-                                       (info_dict['extractor'], info_dict['id'], thumb_filename))
-                    except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                        self.report_warning('Unable to download thumbnail "%s": %s' %
-                                            (info_dict['thumbnail'], compat_str(err)))
+        self._write_thumbnails(info_dict, filename)
 
         if not self.params.get('skip_download', False):
             try:
                 def dl(name, info):
-                    fd = get_suitable_downloader(info)(self, self.params)
+                    fd = get_suitable_downloader(info, self.params)(self, self.params)
                     for ph in self._progress_hooks:
                         fd.add_progress_hook(ph)
                     if self.params.get('verbose'):
                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
                     return fd.download(name, info)
+
                 if info_dict.get('requested_formats') is not None:
                     downloaded = []
                     success = True
@@ -1164,11 +1295,12 @@ class YoutubeDL(object):
 
             if success:
                 # Fixup content
+                fixup_policy = self.params.get('fixup')
+                if fixup_policy is None:
+                    fixup_policy = 'detect_or_warn'
+
                 stretched_ratio = info_dict.get('stretched_ratio')
                 if stretched_ratio is not None and stretched_ratio != 1:
-                    fixup_policy = self.params.get('fixup')
-                    if fixup_policy is None:
-                        fixup_policy = 'detect_or_warn'
                     if fixup_policy == 'warn':
                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
                             info_dict['id'], stretched_ratio))
@@ -1182,7 +1314,23 @@ class YoutubeDL(object):
                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
                                     info_dict['id'], stretched_ratio))
                     else:
-                        assert fixup_policy == 'ignore'
+                        assert fixup_policy in ('ignore', 'never')
+
+                if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
+                    if fixup_policy == 'warn':
+                        self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
+                            info_dict['id']))
+                    elif fixup_policy == 'detect_or_warn':
+                        fixup_pp = FFmpegFixupM4aPP(self)
+                        if fixup_pp.available:
+                            info_dict.setdefault('__postprocessors', [])
+                            info_dict['__postprocessors'].append(fixup_pp)
+                        else:
+                            self.report_warning(
+                                '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
+                                    info_dict['id']))
+                    else:
+                        assert fixup_policy in ('ignore', 'never')
 
                 try:
                     self.post_process(filename, info_dict)
@@ -1384,8 +1532,26 @@ class YoutubeDL(object):
         header_line = line({
             'format_id': 'format code', 'ext': 'extension',
             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
-        self.to_screen('[info] Available formats for %s:\n%s\n%s' %
-                       (info_dict['id'], header_line, '\n'.join(formats_s)))
+        self.to_screen(
+            '[info] Available formats for %s:\n%s\n%s' %
+            (info_dict['id'], header_line, '\n'.join(formats_s)))
+
+    def list_thumbnails(self, info_dict):
+        thumbnails = info_dict.get('thumbnails')
+        if not thumbnails:
+            tn_url = info_dict.get('thumbnail')
+            if tn_url:
+                thumbnails = [{'id': '0', 'url': tn_url}]
+            else:
+                self.to_screen(
+                    '[info] No thumbnails present for %s' % info_dict['id'])
+                return
+
+        self.to_screen(
+            '[info] Thumbnails for %s:' % info_dict['id'])
+        self.to_screen(render_table(
+            ['ID', 'width', 'height', 'URL'],
+            [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
 
     def urlopen(self, req):
         """ Start an HTTP download """
@@ -1396,7 +1562,7 @@ class YoutubeDL(object):
         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
         # To work around aforementioned issue we will replace request's original URL with
         # percent-encoded one
-        req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
+        req_is_string = isinstance(req, compat_basestring)
         url = req if req_is_string else req.get_full_url()
         url_escaped = escape_url(url)
 
@@ -1531,3 +1697,39 @@ class YoutubeDL(object):
         if encoding is None:
             encoding = preferredencoding()
         return encoding
+
+    def _write_thumbnails(self, info_dict, filename):
+        if self.params.get('writethumbnail', False):
+            thumbnails = info_dict.get('thumbnails')
+            if thumbnails:
+                thumbnails = [thumbnails[-1]]
+        elif self.params.get('write_all_thumbnails', False):
+            thumbnails = info_dict.get('thumbnails')
+        else:
+            return
+
+        if not thumbnails:
+            # No thumbnails present, so return immediately
+            return
+
+        for t in thumbnails:
+            thumb_ext = determine_ext(t['url'], 'jpg')
+            suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
+            thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
+            thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+
+            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+                self.to_screen('[%s] %s: Thumbnail %sis already present' %
+                               (info_dict['extractor'], info_dict['id'], thumb_display_id))
+            else:
+                self.to_screen('[%s] %s: Downloading thumbnail %s...' %
+                               (info_dict['extractor'], info_dict['id'], thumb_display_id))
+                try:
+                    uf = self.urlopen(t['url'])
+                    with open(thumb_filename, 'wb') as thumbf:
+                        shutil.copyfileobj(uf, thumbf)
+                    self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
+                                   (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
+                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                    self.report_warning('Unable to download thumbnail "%s": %s' %
+                                        (t['url'], compat_str(err)))
index ddf6260d1e9a4a11fd0140ccbc8d6a4c324df78b..e90679ff974e6464885284733edf96ab4aca09b9 100644 (file)
@@ -143,10 +143,13 @@ def _real_main(argv=None):
             parser.error('invalid max_filesize specified')
         opts.max_filesize = numeric_limit
     if opts.retries is not None:
-        try:
-            opts.retries = int(opts.retries)
-        except (TypeError, ValueError):
-            parser.error('invalid retry count specified')
+        if opts.retries in ('inf', 'infinite'):
+            opts_retries = float('inf')
+        else:
+            try:
+                opts_retries = int(opts.retries)
+            except (TypeError, ValueError):
+                parser.error('invalid retry count specified')
     if opts.buffersize is not None:
         numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
         if numeric_buffersize is None:
@@ -238,6 +241,12 @@ def _real_main(argv=None):
             'verboseOutput': opts.verbose,
             'exec_cmd': opts.exec_cmd,
         })
+    if opts.xattr_set_filesize:
+        try:
+            import xattr
+            xattr  # Confuse flake8
+        except ImportError:
+            parser.error('setting filesize xattr requested but python-xattr is not available')
 
     ydl_opts = {
         'usenetrc': opts.usenetrc,
@@ -268,7 +277,7 @@ def _real_main(argv=None):
         'ignoreerrors': opts.ignoreerrors,
         'ratelimit': opts.ratelimit,
         'nooverwrites': opts.nooverwrites,
-        'retries': opts.retries,
+        'retries': opts_retries,
         'buffersize': opts.buffersize,
         'noresizebuffer': opts.noresizebuffer,
         'continuedl': opts.continue_dl,
@@ -286,6 +295,7 @@ def _real_main(argv=None):
         'writeannotations': opts.writeannotations,
         'writeinfojson': opts.writeinfojson,
         'writethumbnail': opts.writethumbnail,
+        'write_all_thumbnails': opts.write_all_thumbnails,
         'writesubtitles': opts.writesubtitles,
         'writeautomaticsub': opts.writeautomaticsub,
         'allsubtitles': opts.allsubtitles,
@@ -329,6 +339,11 @@ def _real_main(argv=None):
         'fixup': opts.fixup,
         'source_address': opts.source_address,
         'call_home': opts.call_home,
+        'sleep_interval': opts.sleep_interval,
+        'external_downloader': opts.external_downloader,
+        'list_thumbnails': opts.list_thumbnails,
+        'playlist_items': opts.playlist_items,
+        'xattr_set_filesize': opts.xattr_set_filesize,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
@@ -346,7 +361,9 @@ def _real_main(argv=None):
                 sys.exit()
 
             ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
-            parser.error('you must provide at least one URL')
+            parser.error(
+                'You must provide at least one URL.\n'
+                'Type youtube-dl --help to see a list of all options.')
 
         try:
             if opts.load_info_filename is not None:
index 4453b34fceea50861ccedec0ec177a12f424274d..e989cdbbd180abf4543726e86d088cd45225bfca 100644 (file)
@@ -71,6 +71,11 @@ try:
 except ImportError:
     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
 
+try:
+    import http.server as compat_http_server
+except ImportError:
+    import BaseHTTPServer as compat_http_server
+
 try:
     from urllib.parse import unquote as compat_urllib_parse_unquote
 except ImportError:
@@ -109,6 +114,26 @@ except ImportError:
             string += pct_sequence.decode(encoding, errors)
         return string
 
+try:
+    compat_str = unicode  # Python 2
+except NameError:
+    compat_str = str
+
+try:
+    compat_basestring = basestring  # Python 2
+except NameError:
+    compat_basestring = str
+
+try:
+    compat_chr = unichr  # Python 2
+except NameError:
+    compat_chr = chr
+
+try:
+    from xml.etree.ElementTree import ParseError as compat_xml_parse_error
+except ImportError:  # Python 2.6
+    from xml.parsers.expat import ExpatError as compat_xml_parse_error
+
 
 try:
     from urllib.parse import parse_qs as compat_parse_qs
@@ -118,7 +143,7 @@ except ImportError:  # Python 2
 
     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
                    encoding='utf-8', errors='replace'):
-        qs, _coerce_result = qs, unicode
+        qs, _coerce_result = qs, compat_str
         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
         r = []
         for name_value in pairs:
@@ -157,21 +182,6 @@ except ImportError:  # Python 2
                 parsed_result[name] = [value]
         return parsed_result
 
-try:
-    compat_str = unicode  # Python 2
-except NameError:
-    compat_str = str
-
-try:
-    compat_chr = unichr  # Python 2
-except NameError:
-    compat_chr = chr
-
-try:
-    from xml.etree.ElementTree import ParseError as compat_xml_parse_error
-except ImportError:  # Python 2.6
-    from xml.parsers.expat import ExpatError as compat_xml_parse_error
-
 try:
     from shlex import quote as shlex_quote
 except ImportError:  # Python < 3.3
@@ -357,6 +367,7 @@ def workaround_optparse_bug9161():
 
 __all__ = [
     'compat_HTTPError',
+    'compat_basestring',
     'compat_chr',
     'compat_cookiejar',
     'compat_expanduser',
@@ -365,6 +376,7 @@ __all__ = [
     'compat_html_entities',
     'compat_html_parser',
     'compat_http_client',
+    'compat_http_server',
     'compat_kwargs',
     'compat_ord',
     'compat_parse_qs',
index 31e28df58e828f8c5434390ff18a63358215924a..eff1122c5c09eff494ad34af835b06e33c9e4751 100644 (file)
@@ -1,35 +1,41 @@
 from __future__ import unicode_literals
 
 from .common import FileDownloader
+from .external import get_external_downloader
+from .f4m import F4mFD
 from .hls import HlsFD
 from .hls import NativeHlsFD
 from .http import HttpFD
 from .mplayer import MplayerFD
 from .rtmp import RtmpFD
-from .f4m import F4mFD
 
 from ..utils import (
-    determine_ext,
+    determine_protocol,
 )
 
+PROTOCOL_MAP = {
+    'rtmp': RtmpFD,
+    'm3u8_native': NativeHlsFD,
+    'm3u8': HlsFD,
+    'mms': MplayerFD,
+    'rtsp': MplayerFD,
+    'f4m': F4mFD,
+}
+
 
-def get_suitable_downloader(info_dict):
+def get_suitable_downloader(info_dict, params={}):
     """Get the downloader class that can handle the info dict."""
-    url = info_dict['url']
-    protocol = info_dict.get('protocol')
-
-    if url.startswith('rtmp'):
-        return RtmpFD
-    if protocol == 'm3u8_native':
-        return NativeHlsFD
-    if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
-        return HlsFD
-    if url.startswith('mms') or url.startswith('rtsp'):
-        return MplayerFD
-    if determine_ext(url) == 'f4m':
-        return F4mFD
-    else:
-        return HttpFD
+    protocol = determine_protocol(info_dict)
+    info_dict['protocol'] = protocol
+
+    external_downloader = params.get('external_downloader')
+    if external_downloader is not None:
+        ed = get_external_downloader(external_downloader)
+        if ed.supports(info_dict):
+            return ed
+
+    return PROTOCOL_MAP.get(protocol, HttpFD)
+
 
 __all__ = [
     'get_suitable_downloader',
index de6b9311d59b3a270cc0a7dc58d05b4f0692e896..7bb3a948d2ebd0eaca46ca72f72dfaa2e7ffd1bd 100644 (file)
@@ -25,21 +25,23 @@ class FileDownloader(object):
 
     Available options:
 
-    verbose:           Print additional info to stdout.
-    quiet:             Do not print messages to stdout.
-    ratelimit:         Download speed limit, in bytes/sec.
-    retries:           Number of times to retry for HTTP error 5xx
-    buffersize:        Size of download buffer in bytes.
-    noresizebuffer:    Do not automatically resize the download buffer.
-    continuedl:        Try to continue downloads if possible.
-    noprogress:        Do not print the progress bar.
-    logtostderr:       Log messages to stderr instead of stdout.
-    consoletitle:      Display progress in console window's titlebar.
-    nopart:            Do not use temporary .part files.
-    updatetime:        Use the Last-modified header to set output file timestamps.
-    test:              Download only first bytes to test the downloader.
-    min_filesize:      Skip files smaller than this size
-    max_filesize:      Skip files larger than this size
+    verbose:            Print additional info to stdout.
+    quiet:              Do not print messages to stdout.
+    ratelimit:          Download speed limit, in bytes/sec.
+    retries:            Number of times to retry for HTTP error 5xx
+    buffersize:         Size of download buffer in bytes.
+    noresizebuffer:     Do not automatically resize the download buffer.
+    continuedl:         Try to continue downloads if possible.
+    noprogress:         Do not print the progress bar.
+    logtostderr:        Log messages to stderr instead of stdout.
+    consoletitle:       Display progress in console window's titlebar.
+    nopart:             Do not use temporary .part files.
+    updatetime:         Use the Last-modified header to set output file timestamps.
+    test:               Download only first bytes to test the downloader.
+    min_filesize:       Skip files smaller than this size
+    max_filesize:       Skip files larger than this size
+    xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
+                        (experimenatal)
 
     Subclasses of this one must re-define the real_download method.
     """
@@ -284,6 +286,7 @@ class FileDownloader(object):
         """Download to a filename using the info from info_dict
         Return True on success and False otherwise
         """
+
         nooverwrites_and_exists = (
             self.params.get('nooverwrites', False)
             and os.path.exists(encodeFilename(filename))
@@ -305,6 +308,11 @@ class FileDownloader(object):
             })
             return True
 
+        sleep_interval = self.params.get('sleep_interval')
+        if sleep_interval:
+            self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
+            time.sleep(sleep_interval)
+
         return self.real_download(filename, info_dict)
 
     def real_download(self, filename, info_dict):
@@ -319,3 +327,24 @@ class FileDownloader(object):
         # See YoutubeDl.py (search for progress_hooks) for a description of
         # this interface
         self._progress_hooks.append(ph)
+
+    def _debug_cmd(self, args, subprocess_encoding, exe=None):
+        if not self.params.get('verbose', False):
+            return
+
+        if exe is None:
+            exe = os.path.basename(args[0])
+
+        if subprocess_encoding:
+            str_args = [
+                a.decode(subprocess_encoding) if isinstance(a, bytes) else a
+                for a in args]
+        else:
+            str_args = args
+        try:
+            import pipes
+            shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
+        except ImportError:
+            shell_quote = repr
+        self.to_screen('[debug] %s command line: %s' % (
+            exe, shell_quote(str_args)))
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
new file mode 100644 (file)
index 0000000..ff031d2
--- /dev/null
@@ -0,0 +1,126 @@
+from __future__ import unicode_literals
+
+import os.path
+import subprocess
+import sys
+
+from .common import FileDownloader
+from ..utils import (
+    encodeFilename,
+)
+
+
+class ExternalFD(FileDownloader):
+    def real_download(self, filename, info_dict):
+        self.report_destination(filename)
+        tmpfilename = self.temp_name(filename)
+
+        retval = self._call_downloader(tmpfilename, info_dict)
+        if retval == 0:
+            fsize = os.path.getsize(encodeFilename(tmpfilename))
+            self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
+            self.try_rename(tmpfilename, filename)
+            self._hook_progress({
+                'downloaded_bytes': fsize,
+                'total_bytes': fsize,
+                'filename': filename,
+                'status': 'finished',
+            })
+            return True
+        else:
+            self.to_stderr('\n')
+            self.report_error('%s exited with code %d' % (
+                self.get_basename(), retval))
+            return False
+
+    @classmethod
+    def get_basename(cls):
+        return cls.__name__[:-2].lower()
+
+    @property
+    def exe(self):
+        return self.params.get('external_downloader')
+
+    @classmethod
+    def supports(cls, info_dict):
+        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
+
+    def _source_address(self, command_option):
+        source_address = self.params.get('source_address')
+        if source_address is None:
+            return []
+        return [command_option, source_address]
+
+    def _call_downloader(self, tmpfilename, info_dict):
+        """ Either overwrite this or implement _make_cmd """
+        cmd = self._make_cmd(tmpfilename, info_dict)
+
+        if sys.platform == 'win32' and sys.version_info < (3, 0):
+            # Windows subprocess module does not actually support Unicode
+            # on Python 2.x
+            # See http://stackoverflow.com/a/9951851/35070
+            subprocess_encoding = sys.getfilesystemencoding()
+            cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd]
+        else:
+            subprocess_encoding = None
+        self._debug_cmd(cmd, subprocess_encoding)
+
+        p = subprocess.Popen(
+            cmd, stderr=subprocess.PIPE)
+        _, stderr = p.communicate()
+        if p.returncode != 0:
+            self.to_stderr(stderr)
+        return p.returncode
+
+
+class CurlFD(ExternalFD):
+    def _make_cmd(self, tmpfilename, info_dict):
+        cmd = [self.exe, '-o', tmpfilename]
+        for key, val in info_dict['http_headers'].items():
+            cmd += ['--header', '%s: %s' % (key, val)]
+        cmd += self._source_address('--interface')
+        cmd += ['--', info_dict['url']]
+        return cmd
+
+
+class WgetFD(ExternalFD):
+    def _make_cmd(self, tmpfilename, info_dict):
+        cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
+        for key, val in info_dict['http_headers'].items():
+            cmd += ['--header', '%s: %s' % (key, val)]
+        cmd += self._source_address('--bind-address')
+        cmd += ['--', info_dict['url']]
+        return cmd
+
+
+class Aria2cFD(ExternalFD):
+    def _make_cmd(self, tmpfilename, info_dict):
+        cmd = [
+            self.exe, '-c',
+            '--min-split-size', '1M', '--max-connection-per-server', '4']
+        dn = os.path.dirname(tmpfilename)
+        if dn:
+            cmd += ['--dir', dn]
+        cmd += ['--out', os.path.basename(tmpfilename)]
+        for key, val in info_dict['http_headers'].items():
+            cmd += ['--header', '%s: %s' % (key, val)]
+        cmd += self._source_address('--interface')
+        cmd += ['--', info_dict['url']]
+        return cmd
+
+_BY_NAME = dict(
+    (klass.get_basename(), klass)
+    for name, klass in globals().items()
+    if name.endswith('FD') and name != 'ExternalFD'
+)
+
+
+def list_external_downloaders():
+    return sorted(_BY_NAME.keys())
+
+
+def get_external_downloader(external_downloader):
+    """ Given the name of the executable, see whether we support the given
+        downloader . """
+    bn = os.path.basename(external_downloader)
+    return _BY_NAME[bn]
index c460c167a2db78be7a1cdb8e81a3efe89e677ed1..0e7a1c20075499e58b977da4154ce287b144f958 100644 (file)
@@ -177,13 +177,12 @@ def build_fragments_list(boot_info):
     """ Return a list of (segment, fragment) for each fragment in the video """
     res = []
     segment_run_table = boot_info['segments'][0]
-    # I've only found videos with one segment
-    segment_run_entry = segment_run_table['segment_run'][0]
-    n_frags = segment_run_entry[1]
     fragment_run_entry_table = boot_info['fragments'][0]['fragments']
     first_frag_number = fragment_run_entry_table[0]['first']
-    for (i, frag_number) in zip(range(1, n_frags + 1), itertools.count(first_frag_number)):
-        res.append((1, frag_number))
+    fragments_counter = itertools.count(first_frag_number)
+    for segment, fragments_count in segment_run_table['segment_run']:
+        for _ in range(fragments_count):
+            res.append((segment, next(fragments_counter)))
     return res
 
 
@@ -231,6 +230,23 @@ class F4mFD(FileDownloader):
     A downloader for f4m manifests or AdobeHDS.
     """
 
+    def _get_unencrypted_media(self, doc):
+        media = doc.findall(_add_ns('media'))
+        if not media:
+            self.report_error('No media found')
+        for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
+                  doc.findall(_add_ns('drmAdditionalHeaderSet'))):
+            # If id attribute is missing it's valid for all media nodes
+            # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
+            if 'id' not in e.attrib:
+                self.report_error('Missing ID in f4m DRM')
+        media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
+                                      'drmAdditionalHeaderSetId' not in e.attrib,
+                            media))
+        if not media:
+            self.report_error('Unsupported DRM')
+        return media
+
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
         requested_bitrate = info_dict.get('tbr')
@@ -249,7 +265,8 @@ class F4mFD(FileDownloader):
         )
 
         doc = etree.fromstring(manifest)
-        formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
+        formats = [(int(f.attrib.get('bitrate', -1)), f)
+                   for f in self._get_unencrypted_media(doc)]
         if requested_bitrate is None:
             # get the best format
             formats = sorted(formats, key=lambda f: f[0])
index aa58b52abb5998ba8879e6eba3a1d974484467e2..e527ee425365a096b50f541b1c75c82dcb9013fb 100644 (file)
@@ -11,6 +11,7 @@ from ..compat import (
     compat_urllib_request,
 )
 from ..utils import (
+    encodeArgument,
     encodeFilename,
 )
 
@@ -21,23 +22,22 @@ class HlsFD(FileDownloader):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
 
-        args = [
-            '-y', '-i', url, '-f', 'mp4', '-c', 'copy',
-            '-bsf:a', 'aac_adtstoasc',
-            encodeFilename(tmpfilename, for_subprocess=True)]
-
         ffpp = FFmpegPostProcessor(downloader=self)
         program = ffpp._executable
         if program is None:
             self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
             return False
         ffpp.check_version()
-        cmd = [program] + args
 
-        retval = subprocess.call(cmd)
+        args = [
+            encodeArgument(opt)
+            for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
+        args.append(encodeFilename(tmpfilename, True))
+
+        retval = subprocess.call(args)
         if retval == 0:
             fsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize))
+            self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
             self.try_rename(tmpfilename, filename)
             self._hook_progress({
                 'downloaded_bytes': fsize,
index e68f20c9f46a93ebfeca2ff47dc0843f4ab94874..49170cf9d47634602efe7832b235e4a751e25817 100644 (file)
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
 import os
 import time
 
+from socket import error as SocketError
+import errno
+
 from .common import FileDownloader
 from ..compat import (
     compat_urllib_request,
@@ -24,10 +27,6 @@ class HttpFD(FileDownloader):
 
         # Do not include the Accept-Encoding header
         headers = {'Youtubedl-no-compression': 'True'}
-        if 'user_agent' in info_dict:
-            headers['Youtubedl-user-agent'] = info_dict['user_agent']
-        if 'http_referer' in info_dict:
-            headers['Referer'] = info_dict['http_referer']
         add_headers = info_dict.get('http_headers')
         if add_headers:
             headers.update(add_headers)
@@ -103,6 +102,11 @@ class HttpFD(FileDownloader):
                             resume_len = 0
                             open_mode = 'wb'
                             break
+            except SocketError as e:
+                if e.errno != errno.ECONNRESET:
+                    # Connection reset is no problem, just retry
+                    raise
+
             # Retry
             count += 1
             if count <= retries:
@@ -161,6 +165,14 @@ class HttpFD(FileDownloader):
                 except (OSError, IOError) as err:
                     self.report_error('unable to open for writing: %s' % str(err))
                     return False
+
+                if self.params.get('xattr_set_filesize', False) and data_len is not None:
+                    try:
+                        import xattr
+                        xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
+                    except(OSError, IOError, ImportError) as err:
+                        self.report_error('unable to set filesize xattr: %s' % str(err))
+
             try:
                 stream.write(data_block)
             except (IOError, OSError) as err:
index 5346cb9a0ae8ab7d02f4cd91e9e4b17019baf88a..f7eeb6f43f09670e8ecb6cba1791d49d09ecbf15 100644 (file)
@@ -104,6 +104,9 @@ class RtmpFD(FileDownloader):
         live = info_dict.get('rtmp_live', False)
         conn = info_dict.get('rtmp_conn', None)
         protocol = info_dict.get('rtmp_protocol', None)
+        real_time = info_dict.get('rtmp_real_time', False)
+        no_resume = info_dict.get('no_resume', False)
+        continue_dl = info_dict.get('continuedl', False)
 
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
@@ -141,7 +144,14 @@ class RtmpFD(FileDownloader):
             basic_args += ['--conn', conn]
         if protocol is not None:
             basic_args += ['--protocol', protocol]
-        args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
+        if real_time:
+            basic_args += ['--realtime']
+
+        args = basic_args
+        if not no_resume and continue_dl and not live:
+            args += ['--resume']
+        if not live and continue_dl:
+            args += ['--skip', '1']
 
         if sys.platform == 'win32' and sys.version_info < (3, 0):
             # Windows subprocess module does not actually support Unicode
@@ -152,19 +162,7 @@ class RtmpFD(FileDownloader):
         else:
             subprocess_encoding = None
 
-        if self.params.get('verbose', False):
-            if subprocess_encoding:
-                str_args = [
-                    a.decode(subprocess_encoding) if isinstance(a, bytes) else a
-                    for a in args]
-            else:
-                str_args = args
-            try:
-                import pipes
-                shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
-            except ImportError:
-                shell_quote = repr
-            self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args))
+        self._debug_cmd(args, subprocess_encoding, exe='rtmpdump')
 
         RD_SUCCESS = 0
         RD_FAILED = 1
index 0902eb4374caf943e1af4e23aeb7c74a8386a794..047f7002a3e8dafaac1c4368ddbcc94f6550bc94 100644 (file)
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 
 from .abc import ABCIE
+from .abc7news import Abc7NewsIE
 from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
 from .adobetv import AdobeTVIE
@@ -28,7 +29,6 @@ from .arte import (
 from .atresplayer import AtresPlayerIE
 from .atttechchannel import ATTTechChannelIE
 from .audiomack import AudiomackIE, AudiomackAlbumIE
-from .auengine import AUEngineIE
 from .azubu import AzubuIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
@@ -82,6 +82,7 @@ from .crunchyroll import (
     CrunchyrollShowPlaylistIE
 )
 from .cspan import CSpanIE
+from .ctsnews import CtsNewsIE
 from .dailymotion import (
     DailymotionIE,
     DailymotionPlaylistIE,
@@ -89,6 +90,7 @@ from .dailymotion import (
 )
 from .daum import DaumIE
 from .dbtv import DBTVIE
+from .dctp import DctpTvIE
 from .deezer import DeezerPlaylistIE
 from .dfb import DFBIE
 from .dotsub import DotsubIE
@@ -175,10 +177,12 @@ from .goshgay import GoshgayIE
 from .grooveshark import GroovesharkIE
 from .groupon import GrouponIE
 from .hark import HarkIE
+from .hearthisat import HearThisAtIE
 from .heise import HeiseIE
 from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
+from .historicfilms import HistoricFilmsIE
 from .hitbox import HitboxIE, HitboxLiveIE
 from .hornbunny import HornBunnyIE
 from .hostingbulk import HostingBulkIE
@@ -281,11 +285,22 @@ from .ndr import NDRIE
 from .ndtv import NDTVIE
 from .netzkino import NetzkinoIE
 from .nerdcubed import NerdCubedFeedIE
+from .nerdist import NerdistIE
 from .newgrounds import NewgroundsIE
 from .newstube import NewstubeIE
+from .nextmedia import (
+    NextMediaIE,
+    NextMediaActionNewsIE,
+    AppleDailyRealtimeNewsIE,
+    AppleDailyAnimationNewsIE
+)
 from .nfb import NFBIE
 from .nfl import NFLIE
-from .nhl import NHLIE, NHLVideocenterIE
+from .nhl import (
+    NHLIE,
+    NHLNewsIE,
+    NHLVideocenterIE,
+)
 from .niconico import NiconicoIE, NiconicoPlaylistIE
 from .ninegag import NineGagIE
 from .noco import NocoIE
@@ -303,7 +318,8 @@ from .nrk import (
     NRKIE,
     NRKTVIE,
 )
-from .ntv import NTVIE
+from .ntvde import NTVDeIE
+from .ntvru import NTVRuIE
 from .nytimes import NYTimesIE
 from .nuvid import NuvidIE
 from .oktoberfesttv import OktoberfestTVIE
@@ -348,6 +364,7 @@ from .rtbf import RTBFIE
 from .rte import RteIE
 from .rtlnl import RtlXlIE
 from .rtlnow import RTLnowIE
+from .rtl2 import RTL2IE
 from .rtp import RTPIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE
@@ -408,6 +425,7 @@ from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
+from .streetvoice import StreetVoiceIE
 from .sunporno import SunPornoIE
 from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
@@ -429,6 +447,7 @@ from .telemb import TeleMBIE
 from .teletask import TeleTaskIE
 from .tenplay import TenPlayIE
 from .testurl import TestURLIE
+from .testtube import TestTubeIE
 from .tf1 import TF1IE
 from .theonion import TheOnionIE
 from .theplatform import ThePlatformIE
@@ -456,8 +475,17 @@ from .tutv import TutvIE
 from .tvigle import TvigleIE
 from .tvp import TvpIE, TvpSeriesIE
 from .tvplay import TVPlayIE
+from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
-from .twitch import TwitchIE
+from .twitch import (
+    TwitchVideoIE,
+    TwitchChapterIE,
+    TwitchVodIE,
+    TwitchProfileIE,
+    TwitchPastBroadcastsIE,
+    TwitchBookmarksIE,
+    TwitchStreamIE,
+)
 from .ubu import UbuIE
 from .udemy import (
     UdemyIE,
@@ -527,6 +555,7 @@ from .wimp import WimpIE
 from .wistia import WistiaIE
 from .worldstarhiphop import WorldStarHipHopIE
 from .wrzuta import WrzutaIE
+from .wsj import WSJIE
 from .xbef import XBefIE
 from .xboxclips import XboxClipsIE
 from .xhamster import XHamsterIE
@@ -534,6 +563,7 @@ from .xminus import XMinusIE
 from .xnxx import XNXXIE
 from .xvideos import XVideosIE
 from .xtube import XTubeUserIE, XTubeIE
+from .xuite import XuiteIE
 from .xxxymovies import XXXYMoviesIE
 from .yahoo import (
     YahooIE,
diff --git a/youtube_dl/extractor/abc7news.py b/youtube_dl/extractor/abc7news.py
new file mode 100644 (file)
index 0000000..c04949c
--- /dev/null
@@ -0,0 +1,68 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_iso8601
+
+
+class Abc7NewsIE(InfoExtractor):
+    _VALID_URL = r'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
+            'info_dict': {
+                'id': '472581',
+                'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
+                'ext': 'mp4',
+                'title': 'East Bay museum celebrates history of synthesized music',
+                'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'timestamp': 1421123075,
+                'upload_date': '20150113',
+                'uploader': 'Jonathan Bloom',
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://abc7news.com/472581',
+            'only_matching': True,
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id') or video_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        m3u8 = self._html_search_meta(
+            'contentURL', webpage, 'm3u8 url', fatal=True)
+
+        formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
+        self._sort_formats(formats)
+
+        title = self._og_search_title(webpage).strip()
+        description = self._og_search_description(webpage).strip()
+        thumbnail = self._og_search_thumbnail(webpage)
+        timestamp = parse_iso8601(self._search_regex(
+            r'<div class="meta">\s*<time class="timeago" datetime="([^"]+)">',
+            webpage, 'upload date', fatal=False))
+        uploader = self._search_regex(
+            r'rel="author">([^<]+)</a>',
+            webpage, 'uploader', default=None)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'formats': formats,
+        }
index cfc7370ae43da592eaca49245200bd922d75a019..8442019eac3eaa0a373140d494ffa2ca420f4606 100644 (file)
@@ -1,8 +1,6 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -21,9 +19,7 @@ class AftonbladetIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.search(self._VALID_URL, url)
-
-        video_id = mobj.group('video_id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         # find internal video meta data
index 7cd0482c75d7157df218071a2e22ce2904d094b6..287f71e076e91a44ea331c995410fbe8b40d178d 100644 (file)
@@ -122,14 +122,15 @@ class AppleTrailersIE(InfoExtractor):
             playlist.append({
                 '_type': 'video',
                 'id': video_id,
-                'title': title,
                 'formats': formats,
                 'title': title,
                 'duration': duration,
                 'thumbnail': thumbnail,
                 'upload_date': upload_date,
                 'uploader_id': uploader_id,
-                'user_agent': 'QuickTime compatible (youtube-dl)',
+                'http_headers': {
+                    'User-Agent': 'QuickTime compatible (youtube-dl)',
+                },
             })
 
         return {
index 967bd865c53229e7ff38997ea9a7f4a6ab19f92d..783b53e23035a7bd3f3feac628ff2de8daefbea5 100644 (file)
@@ -23,13 +23,7 @@ class ARDMediathekIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
-        'file': '22429276.mp4',
-        'md5': '469751912f1de0816a9fc9df8336476c',
-        'info_dict': {
-            'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
-            'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
-        },
-        'skip': 'Blocked outside of Germany',
+        'only_matching': True,
     }, {
         'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
         'info_dict': {
index 5db1941b339a0e6e9bde01ec28e337478f92ce57..f016368fa8d0890de874a774b2a4a18db60a01c6 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import time
 import hmac
 
-from .common import InfoExtractor
+from .subtitles import SubtitlesInfoExtractor
 from ..compat import (
     compat_str,
     compat_urllib_parse,
@@ -17,7 +17,7 @@ from ..utils import (
 )
 
 
-class AtresPlayerIE(InfoExtractor):
+class AtresPlayerIE(SubtitlesInfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
     _TESTS = [
         {
@@ -95,7 +95,7 @@ class AtresPlayerIE(InfoExtractor):
         for fmt in ['windows', 'android_tablet']:
             request = compat_urllib_request.Request(
                 self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
-            request.add_header('Youtubedl-user-agent', self._USER_AGENT)
+            request.add_header('User-Agent', self._USER_AGENT)
 
             fmt_json = self._download_json(
                 request, video_id, 'Downloading %s video JSON' % fmt)
@@ -105,13 +105,22 @@ class AtresPlayerIE(InfoExtractor):
                 raise ExtractorError(
                     '%s returned error: %s' % (self.IE_NAME, result), expected=True)
 
-            for _, video_url in fmt_json['resultObject'].items():
+            for format_id, video_url in fmt_json['resultObject'].items():
+                if format_id == 'token' or not video_url.startswith('http'):
+                    continue
                 if video_url.endswith('/Manifest'):
-                    formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id))
+                    if 'geodeswowsmpra3player' in video_url:
+                        f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
+                        f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
+                        # this videos are protected by DRM, the f4m downloader doesn't support them
+                        continue
+                    else:
+                        f4m_url = video_url[:-9] + '/manifest.f4m'
+                    formats.extend(self._extract_f4m_formats(f4m_url, video_id))
                 else:
                     formats.append({
                         'url': video_url,
-                        'format_id': 'android',
+                        'format_id': 'android-%s' % format_id,
                         'preference': 1,
                     })
         self._sort_formats(formats)
@@ -134,6 +143,15 @@ class AtresPlayerIE(InfoExtractor):
         description = xpath_text(art, './description', 'description')
         thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
 
+        subtitles = {}
+        subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
+        if subtitle:
+            subtitles['es'] = subtitle
+
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, subtitles)
+            return
+
         return {
             'id': video_id,
             'title': title,
@@ -141,4 +159,5 @@ class AtresPlayerIE(InfoExtractor):
             'thumbnail': thumbnail,
             'duration': duration,
             'formats': formats,
+            'subtitles': self.extract_subtitles(video_id, subtitles),
         }
index 8bfe502143a2cbf9c076bdabaad3e7ad5d2090b6..693ba22c6dde0dd1760531108353ab48b1ed0fa1 100644 (file)
@@ -88,16 +88,21 @@ class AudiomackAlbumIE(InfoExtractor):
         # Album playlist ripped from fakeshoredrive with no metadata
         {
             'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
+            'info_dict': {
+                'title': 'PPP (Pistol P Project)',
+                'id': '837572',
+            },
             'playlist': [{
                 'info_dict': {
-                    'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
-                    'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
+                    'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
+                    'id': '837577',
                     'ext': 'mp3',
+                    'uploader': 'Lil Herb a.k.a. G Herbo',
                 }
             }],
             'params': {
-                'playliststart': 8,
-                'playlistend': 8,
+                'playliststart': 9,
+                'playlistend': 9,
             }
         }
     ]
diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py
deleted file mode 100644 (file)
index a1b666b..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse
-from ..utils import (
-    determine_ext,
-    ExtractorError,
-    remove_end,
-)
-
-
-class AUEngineIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P<id>[^&]+).*?'
-
-    _TEST = {
-        'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
-        'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
-        'info_dict': {
-            'id': 'lfvlytY6',
-            'ext': 'mp4',
-            'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(
-            r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title')
-        video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage)
-        video_url = compat_urllib_parse.unquote(video_urls[0])
-        thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage)
-        thumbnail = compat_urllib_parse.unquote(thumbnails[0])
-
-        if not video_url:
-            raise ExtractorError('Could not find video URL')
-
-        ext = '.' + determine_ext(video_url)
-        title = remove_end(title, ext)
-
-        return {
-            'id': video_id,
-            'url': video_url,
-            'title': title,
-            'thumbnail': thumbnail,
-            'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf',
-        }
index 1cf48fe0dd739b328478899a83f0d8aba94e6c4a..126c8824cccedbca287ac3ebfc92d1a5e2d93b57 100644 (file)
@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
 class BBCCoUkIE(SubtitlesInfoExtractor):
     IE_NAME = 'bbc.co.uk'
     IE_DESC = 'BBC iPlayer'
-    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
+    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
 
     _TESTS = [
         {
@@ -118,6 +118,9 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
         }, {
             'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
             'only_matching': True,
+        }, {
+            'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
+            'only_matching': True,
         }
     ]
 
index 14b814120be3b8215a28fc00a95f87bd22e0c062..436cc515563a07d853ced1b8373461a752ed6038 100644 (file)
@@ -199,7 +199,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
         # For some weird reason, blip.tv serves a video instead of subtitles
         # when we request with a common UA
         req = compat_urllib_request.Request(url)
-        req.add_header('Youtubedl-user-agent', 'youtube-dl')
+        req.add_header('User-Agent', 'youtube-dl')
         return self._download_webpage(req, None, note=False)
 
 
index 003152c4e6d6ec9880a54016870e41e42635f41c..ea0969d4d259a99653bebbcabcebb0e1f87719f3 100644 (file)
@@ -108,7 +108,7 @@ class BrightcoveIE(InfoExtractor):
         """
 
         # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
-        object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
+        object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
                             lambda m: m.group(1) + '/>', object_str)
         # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
         object_str = object_str.replace('<--', '<!--')
index 0c9a24befddcc1ce4b69f35b2d0b245723a60484..562c9bbbb4d02305d22a7ad5bc22ef9056d25258 100644 (file)
@@ -28,12 +28,10 @@ class CinchcastIE(InfoExtractor):
             item, './{http://developer.longtailvideo.com/trac/}date')
         upload_date = unified_strdate(date_str, day_first=False)
         # duration is present but wrong
-        formats = []
-        formats.append({
+        formats = [{
             'format_id': 'main',
-            'url': item.find(
-                './{http://search.yahoo.com/mrss/}content').attrib['url'],
-        })
+            'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'],
+        }]
         backup_url = xpath_text(
             item, './{http://developer.longtailvideo.com/trac/}backupContent')
         if backup_url:
index 2edab90a33d553225b8c790b8d391f0e40b55cf8..d46592cc5c8c71d30fda96c1b25c6f4a9c55ad75 100644 (file)
@@ -1,9 +1,7 @@
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
+from ..utils import determine_ext
 
 
 _translation_table = {
@@ -27,10 +25,10 @@ class CliphunterIE(InfoExtractor):
     '''
     _TEST = {
         'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
-        'md5': 'a2ba71eebf523859fe527a61018f723e',
+        'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
         'info_dict': {
             'id': '1012420',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Fun Jynx Maze solo',
             'thumbnail': 're:^https?://.*\.jpg$',
             'age_limit': 18,
@@ -44,39 +42,31 @@ class CliphunterIE(InfoExtractor):
         video_title = self._search_regex(
             r'mediaTitle = "([^"]+)"', webpage, 'title')
 
-        pl_fiji = self._search_regex(
-            r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
-        pl_c_qual = self._search_regex(
-            r'pl_c_qual = "(.)"', webpage, 'video quality')
-        video_url = _decode(pl_fiji)
-        formats = [{
-            'url': video_url,
-            'format_id': 'default-%s' % pl_c_qual,
-        }]
-
-        qualities_json = self._search_regex(
-            r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info')
-        qualities_data = json.loads(qualities_json)
-
-        for i, t in enumerate(
-                re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)):
-            quality_id, crypted_url = t
-            video_url = _decode(crypted_url)
+        fmts = {}
+        for fmt in ('mp4', 'flv'):
+            fmt_list = self._parse_json(self._search_regex(
+                r'var %sjson\s*=\s*(\[.*?\]);' % fmt, webpage, '%s formats' % fmt), video_id)
+            for f in fmt_list:
+                fmts[f['fname']] = _decode(f['sUrl'])
+
+        qualities = self._parse_json(self._search_regex(
+            r'var player_btns\s*=\s*(.*?);\n', webpage, 'quality info'), video_id)
+
+        formats = []
+        for fname, url in fmts.items():
             f = {
-                'format_id': quality_id,
-                'url': video_url,
-                'quality': i,
+                'url': url,
             }
-            if quality_id in qualities_data:
-                qd = qualities_data[quality_id]
-                m = re.match(
-                    r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b>
-                        \s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd)
-                if m:
-                    f['width'] = int(m.group('width'))
-                    f['height'] = int(m.group('height'))
-                    f['tbr'] = int(m.group('tbr'))
+            if fname in qualities:
+                qual = qualities[fname]
+                f.update({
+                    'format_id': '%s_%sp' % (determine_ext(url), qual['h']),
+                    'width': qual['w'],
+                    'height': qual['h'],
+                    'tbr': qual['br'],
+                })
             formats.append(f)
+
         self._sort_formats(formats)
 
         thumbnail = self._search_regex(
index 93e8d0de355d7ccb239f06aee956468d33cb43d9..90ea074387ef6afe4aaa87a41c13ec6cf5a1aa7b 100644 (file)
@@ -51,7 +51,7 @@ class CNNIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         path = mobj.group('path')
         page_title = mobj.group('title')
-        info_url = 'http://cnn.com/video/data/3.0/%s/index.xml' % path
+        info_url = 'http://edition.cnn.com/video/data/3.0/%s/index.xml' % path
         info = self._download_xml(info_url, page_title)
 
         formats = []
@@ -143,13 +143,13 @@ class CNNArticleIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
     _TEST = {
         'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
-        'md5': '275b326f85d80dff7592a9820f5dc887',
+        'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
         'info_dict': {
-            'id': 'bestoftv/2014/12/21/sotu-crowley-president-obama-north-korea-not-going-to-be-intimidated.cnn',
+            'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
             'ext': 'mp4',
-            'title': 'Obama: We\'re not going to be intimidated',
-            'description': 'md5:e735586f3dc936075fa654a4d91b21f9',
-            'upload_date': '20141220',
+            'title': 'Obama: Cyberattack not an act of war',
+            'description': 'md5:51ce6750450603795cad0cdfbd7d05c5',
+            'upload_date': '20141221',
         },
         'add_ie': ['CNN'],
     }
index 8d27af5e57348e56a924d1d633df8799343245e4..b2453898199ed72da7df13fa90714572fad22133 100644 (file)
@@ -34,12 +34,12 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
 
 class ComedyCentralShowsIE(MTVServicesInfoExtractor):
     IE_DESC = 'The Daily Show / The Colbert Report'
-    # urls can be abbreviations like :thedailyshow or :colbert
+    # urls can be abbreviations like :thedailyshow
     # urls for episodes like:
     # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
     #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
     #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
-    _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
+    _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
                       |https?://(:www\.)?
                           (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
                          ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
@@ -49,7 +49,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
                           )|
                           (?P<interview>
-                              extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
+                              extended-interviews/(?P<interID>[0-9a-z]+)/
+                              (?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
+                              (?:/[^/?#]?|[?#]|$))))
                      '''
     _TESTS = [{
         'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
@@ -62,6 +64,38 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
             'uploader': 'thedailyshow',
             'title': 'thedailyshow kristen-stewart part 1',
         }
+    }, {
+        'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
+        'info_dict': {
+            'id': 'sarah-chayes-extended-interview',
+            'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
+            'title': 'thedailyshow Sarah Chayes Extended Interview',
+        },
+        'playlist': [
+            {
+                'info_dict': {
+                    'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
+                    'ext': 'mp4',
+                    'upload_date': '20150129',
+                    'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
+                    'uploader': 'thedailyshow',
+                    'title': 'thedailyshow sarah-chayes-extended-interview part 1',
+                },
+            },
+            {
+                'info_dict': {
+                    'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
+                    'ext': 'mp4',
+                    'upload_date': '20150129',
+                    'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
+                    'uploader': 'thedailyshow',
+                    'title': 'thedailyshow sarah-chayes-extended-interview part 2',
+                },
+            },
+        ],
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
         'only_matching': True,
@@ -230,6 +264,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
 
         return {
             '_type': 'playlist',
+            'id': epTitle,
             'entries': entries,
             'title': show_name + ' ' + title,
             'description': description,
index 03f3f18c83012cdced0e305fe1cc02d69a85bb7c..df1a4417bb99fe6cf415b293ce068787d1568dc8 100644 (file)
@@ -14,6 +14,7 @@ import xml.etree.ElementTree
 
 from ..compat import (
     compat_cookiejar,
+    compat_HTTPError,
     compat_http_client,
     compat_urllib_error,
     compat_urllib_parse_urlparse,
@@ -26,6 +27,7 @@ from ..utils import (
     compiled_regex_type,
     ExtractorError,
     float_or_none,
+    HEADRequest,
     int_or_none,
     RegexNotFoundError,
     sanitize_filename,
@@ -87,7 +89,8 @@ class InfoExtractor(object):
                     * player_url SWF Player URL (used for rtmpdump).
                     * protocol   The protocol that will be used for the actual
                                  download, lower-case.
-                                 "http", "https", "rtsp", "rtmp", "m3u8" or so.
+                                 "http", "https", "rtsp", "rtmp", "rtmpe",
+                                 "m3u8", or "m3u8_native".
                     * preference Order number of this format. If this field is
                                  present and not None, the formats get sorted
                                  by this field, regardless of all other values.
@@ -108,15 +111,17 @@ class InfoExtractor(object):
                                   (quality takes higher priority)
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
-                    * http_referer  HTTP Referer header value to set.
                     * http_method  HTTP method to use for the download.
                     * http_headers  A dictionary of additional HTTP headers
                                  to add to the request.
                     * http_post_data  Additional data to send with a POST
                                  request.
                     * stretched_ratio  If given and not 1, indicates that the
-                                       video's pixels are not square.
-                                       width : height ratio as float.
+                                 video's pixels are not square.
+                                 width : height ratio as float.
+                    * no_resume  The server does not support resuming the
+                                 (HTTP or RTMP) download. Boolean.
+
     url:            Final video URL.
     ext:            Video filename extension.
     format:         The video format, defaults to ext (used for --get-format)
@@ -130,7 +135,9 @@ class InfoExtractor(object):
                     something like "4234987", title "Dancing naked mole rats",
                     and display_id "dancing-naked-mole-rats"
     thumbnails:     A list of dictionaries, with the following entries:
+                        * "id" (optional, string) - Thumbnail format ID
                         * "url"
+                        * "preference" (optional, int) - quality of the image
                         * "width" (optional, int)
                         * "height" (optional, int)
                         * "resolution" (optional, string "{width}x{height"},
@@ -138,6 +145,7 @@ class InfoExtractor(object):
     thumbnail:      Full URL to a video thumbnail image.
     description:    Full video description.
     uploader:       Full name of the video uploader.
+    creator:        The main artist who created the video.
     timestamp:      UNIX timestamp of the moment the video became available.
     upload_date:    Video upload date (YYYYMMDD).
                     If not explicitly set, calculated from timestamp.
@@ -697,11 +705,11 @@ class InfoExtractor(object):
                 preference,
                 f.get('language_preference') if f.get('language_preference') is not None else -1,
                 f.get('quality') if f.get('quality') is not None else -1,
-                f.get('height') if f.get('height') is not None else -1,
-                f.get('width') if f.get('width') is not None else -1,
-                ext_preference,
                 f.get('tbr') if f.get('tbr') is not None else -1,
                 f.get('vbr') if f.get('vbr') is not None else -1,
+                ext_preference,
+                f.get('height') if f.get('height') is not None else -1,
+                f.get('width') if f.get('width') is not None else -1,
                 f.get('abr') if f.get('abr') is not None else -1,
                 audio_ext_preference,
                 f.get('fps') if f.get('fps') is not None else -1,
@@ -712,6 +720,27 @@ class InfoExtractor(object):
             )
         formats.sort(key=_formats_key)
 
+    def _check_formats(self, formats, video_id):
+        if formats:
+            formats[:] = filter(
+                lambda f: self._is_valid_url(
+                    f['url'], video_id,
+                    item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
+                formats)
+
+    def _is_valid_url(self, url, video_id, item='video'):
+        try:
+            self._request_webpage(
+                HEADRequest(url), video_id,
+                'Checking %s URL' % item)
+            return True
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                self.report_warning(
+                    '%s URL is invalid, skipping' % item, video_id)
+                return False
+            raise
+
     def http_scheme(self):
         """ Either "http:" or "https:", depending on the user's preferences """
         return (
@@ -736,7 +765,7 @@ class InfoExtractor(object):
         self.to_screen(msg)
         time.sleep(timeout)
 
-    def _extract_f4m_formats(self, manifest_url, video_id):
+    def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
         manifest = self._download_xml(
             manifest_url, video_id, 'Downloading f4m manifest',
             'Unable to download f4m manifest')
@@ -749,26 +778,28 @@ class InfoExtractor(object):
             media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
         for i, media_el in enumerate(media_nodes):
             if manifest_version == '2.0':
-                manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
+                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
+                                + (media_el.attrib.get('href') or media_el.attrib.get('url')))
             tbr = int_or_none(media_el.attrib.get('bitrate'))
-            format_id = 'f4m-%d' % (i if tbr is None else tbr)
             formats.append({
-                'format_id': format_id,
+                'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
                 'url': manifest_url,
                 'ext': 'flv',
                 'tbr': tbr,
                 'width': int_or_none(media_el.attrib.get('width')),
                 'height': int_or_none(media_el.attrib.get('height')),
+                'preference': preference,
             })
         self._sort_formats(formats)
 
         return formats
 
     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
-                              entry_protocol='m3u8', preference=None):
+                              entry_protocol='m3u8', preference=None,
+                              m3u8_id=None):
 
         formats = [{
-            'format_id': 'm3u8-meta',
+            'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
             'url': m3u8_url,
             'ext': ext,
             'protocol': 'm3u8',
@@ -804,9 +835,8 @@ class InfoExtractor(object):
                     formats.append({'url': format_url(line)})
                     continue
                 tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
-
                 f = {
-                    'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
+                    'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
                     'url': format_url(line.strip()),
                     'tbr': tbr,
                     'ext': ext,
@@ -832,10 +862,13 @@ class InfoExtractor(object):
         return formats
 
     # TODO: improve extraction
-    def _extract_smil_formats(self, smil_url, video_id):
+    def _extract_smil_formats(self, smil_url, video_id, fatal=True):
         smil = self._download_xml(
             smil_url, video_id, 'Downloading SMIL file',
-            'Unable to download SMIL file')
+            'Unable to download SMIL file', fatal=fatal)
+        if smil is False:
+            assert not fatal
+            return []
 
         base = smil.find('./head/meta').get('base')
 
diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py
new file mode 100644 (file)
index 0000000..0226f80
--- /dev/null
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import parse_iso8601, ExtractorError
+
+
+class CtsNewsIE(InfoExtractor):
+    # https connection failed (Connection reset)
+    _VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
+    _TESTS = [{
+        'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
+        'md5': 'a9875cb790252b08431186d741beaabe',
+        'info_dict': {
+            'id': '201501291578109',
+            'ext': 'mp4',
+            'title': '以色列.真主黨交火 3人死亡',
+            'description': 'md5:95e9b295c898b7ff294f09d450178d7d',
+            'timestamp': 1422528540,
+            'upload_date': '20150129',
+        }
+    }, {
+        # News count not appear on page but still available in database
+        'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html',
+        'md5': '3aee7e0df7cdff94e43581f54c22619e',
+        'info_dict': {
+            'id': '201309031304098',
+            'ext': 'mp4',
+            'title': '韓國31歲童顏男 貌如十多歲小孩',
+            'description': 'md5:f183feeba3752b683827aab71adad584',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1378205880,
+            'upload_date': '20130903',
+        }
+    }, {
+        # With Youtube embedded video
+        'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
+        'md5': '1d842c771dc94c8c3bca5af2cc1db9c5',
+        'add_ie': ['Youtube'],
+        'info_dict': {
+            'id': 'OVbfO7d0_hQ',
+            'ext': 'mp4',
+            'title': 'iPhone6熱銷 蘋果財報亮眼',
+            'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20150128',
+            'uploader_id': 'TBSCTS',
+            'uploader': '中華電視公司',
+        }
+    }]
+
+    def _real_extract(self, url):
+        news_id = self._match_id(url)
+        page = self._download_webpage(url, news_id)
+
+        if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None):
+            feed_url = self._html_search_regex(
+                r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)',
+                page, 'feed url')
+            video_url = self._download_webpage(
+                feed_url, news_id, note='Fetching feed')
+        else:
+            self.to_screen('Not CTSPlayer video, trying Youtube...')
+            youtube_url = self._search_regex(
+                r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url',
+                default=None)
+            if not youtube_url:
+                raise ExtractorError('The news includes no videos!', expected=True)
+
+            return {
+                '_type': 'url',
+                'url': youtube_url,
+                'ie_key': 'Youtube',
+            }
+
+        description = self._html_search_meta('description', page)
+        title = self._html_search_meta('title', page)
+        thumbnail = self._html_search_meta('image', page)
+
+        datetime_str = self._html_search_regex(
+            r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
+        # Transform into ISO 8601 format with timezone info
+        datetime_str = datetime_str.replace('/', '-') + ':00+0800'
+        timestamp = parse_iso8601(datetime_str, delimiter=' ')
+
+        return {
+            'id': news_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+        }
diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py
new file mode 100644 (file)
index 0000000..6ed3543
--- /dev/null
@@ -0,0 +1,57 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+
+
+class DctpTvIE(InfoExtractor):
+    _VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
+    _TEST = {
+        'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+        'info_dict': {
+            'id': '1324',
+            'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
+            'ext': 'flv',
+            'title': 'Videoinstallation für eine Kaufhausfassade'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
+        version_json = self._download_json(
+            base_url + 'version.json',
+            video_id, note='Determining file version')
+        version = version_json['version_name']
+        info_json = self._download_json(
+            '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
+            video_id, note='Fetching object ID')
+        object_id = compat_str(info_json['object_id'])
+        meta_json = self._download_json(
+            '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
+            video_id, note='Downloading metadata')
+        uuid = meta_json['uuid']
+        title = meta_json['title']
+        wide = meta_json['is_wide']
+        if wide:
+            ratio = '16x9'
+        else:
+            ratio = '4x3'
+        play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
+
+        servers_json = self._download_json(
+            'http://www.dctp.tv/streaming_servers/',
+            video_id, note='Downloading server list')
+        url = servers_json[0]['endpoint']
+
+        return {
+            'id': object_id,
+            'title': title,
+            'format': 'rtmp',
+            'url': url,
+            'play_path': play_path,
+            'rtmp_real_time': True,
+            'ext': 'flv',
+            'display_id': video_id
+        }
index 5e50c63d9aca7d2642239ccf32a5cedd91b05174..2b90bf4fc2fcba04fe7e164602196586713d4225 100644 (file)
@@ -1,40 +1,38 @@
 from __future__ import unicode_literals
 
-import re
-import json
-
 from .common import InfoExtractor
 
 
 class DefenseGouvFrIE(InfoExtractor):
     IE_NAME = 'defense.gouv.fr'
-    _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
-                  r'ligthboxvideo/base-de-medias/webtv/(.*)')
+    _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
 
     _TEST = {
         'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
-        'file': '11213.mp4',
         'md5': '75bba6124da7e63d2d60b5244ec9430c',
-        "info_dict": {
-            "title": "attaque-chimique-syrienne-du-21-aout-2013-1"
+        'info_dict': {
+            'id': '11213',
+            'ext': 'mp4',
+            'title': 'attaque-chimique-syrienne-du-21-aout-2013-1'
         }
     }
 
     def _real_extract(self, url):
-        title = re.match(self._VALID_URL, url).group(1)
+        title = self._match_id(url)
         webpage = self._download_webpage(url, title)
+
         video_id = self._search_regex(
             r"flashvars.pvg_id=\"(\d+)\";",
             webpage, 'ID')
 
         json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
                     + video_id)
-        info = self._download_webpage(json_url, title,
-                                      'Downloading JSON config')
-        video_url = json.loads(info)['renditions'][0]['url']
-
-        return {'id': video_id,
-                'ext': 'mp4',
-                'url': video_url,
-                'title': title,
-                }
+        info = self._download_json(json_url, title, 'Downloading JSON config')
+        video_url = info['renditions'][0]['url']
+
+        return {
+            'id': video_id,
+            'ext': 'mp4',
+            'url': video_url,
+            'title': title,
+        }
index c44adb1099bf6f0a2d08ccad7cebebef3939ddf9..d5df18d7c971c18f01c51128c75fbe4ee09ea070 100644 (file)
@@ -6,7 +6,7 @@ from ..utils import parse_iso8601
 
 
 class DRTVIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)+(?P<id>[\da-z-]+)(?:[/#?]|$)'
+    _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
 
     _TEST = {
         'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
@@ -25,9 +25,15 @@ class DRTVIE(SubtitlesInfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        programcard = self._download_json(
-            'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON')
+        webpage = self._download_webpage(url, video_id)
+
+        video_id = self._search_regex(
+            r'data-(?:material-identifier|episode-slug)="([^"]+)"',
+            webpage, 'video id')
 
+        programcard = self._download_json(
+            'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
+            video_id, 'Downloading video JSON')
         data = programcard['Data'][0]
 
         title = data['Title']
@@ -48,14 +54,20 @@ class DRTVIE(SubtitlesInfoExtractor):
             elif asset['Kind'] == 'VideoResource':
                 duration = asset['DurationInMilliseconds'] / 1000.0
                 restricted_to_denmark = asset['RestrictedToDenmark']
+                spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
                 for link in asset['Links']:
                     target = link['Target']
                     uri = link['Uri']
+                    format_id = target
+                    preference = -1 if target == 'HDS' else -2
+                    if spoken_subtitles:
+                        preference -= 2
+                        format_id += '-spoken-subtitles'
                     formats.append({
                         'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
-                        'format_id': target,
+                        'format_id': format_id,
                         'ext': link['FileFormat'],
-                        'preference': -1 if target == 'HDS' else -2,
+                        'preference': preference,
                     })
                 subtitles_list = asset.get('SubtitlesList')
                 if isinstance(subtitles_list, list):
index 81ceace53289709b93d7c647f6627197320381ef..1ccc1a9642bb09ed84bdd2747c665520ab3c98c4 100644 (file)
@@ -5,6 +5,7 @@ import hashlib
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_urllib_parse,
     compat_urllib_request,
     compat_urlparse,
 )
@@ -16,7 +17,8 @@ from ..utils import (
 class FC2IE(InfoExtractor):
     _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
     IE_NAME = 'fc2'
-    _TEST = {
+    _NETRC_MACHINE = 'fc2'
+    _TESTS = [{
         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
         'md5': 'a6ebe8ebe0396518689d963774a54eb7',
         'info_dict': {
@@ -24,12 +26,57 @@ class FC2IE(InfoExtractor):
             'ext': 'flv',
             'title': 'Boxing again with Puff',
         },
-    }
+    }, {
+        'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
+        'info_dict': {
+            'id': '20150125cEva0hDn',
+            'ext': 'mp4',
+        },
+        'params': {
+            'username': 'ytdl@yt-dl.org',
+            'password': '(snip)',
+            'skip': 'requires actual password'
+        }
+    }]
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None or password is None:
+            return False
+
+        # Log in
+        login_form_strs = {
+            'email': username,
+            'password': password,
+            'done': 'video',
+            'Submit': ' Login ',
+        }
+
+        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+        # chokes on unicode
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
+        login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
+        request = compat_urllib_request.Request(
+            'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
+
+        login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
+        if 'mode=redirect&login=done' not in login_results:
+            self.report_warning('unable to log in: bad username or password')
+            return False
+
+        # this is also needed
+        login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
+        self._download_webpage(
+            login_redir, None, note='Login redirect', errnote='Login redirect failed')
+
+        return True
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        self._login()
         webpage = self._download_webpage(url, video_id)
         self._downloader.cookiejar.clear_session_cookies()  # must clear
+        self._login()
 
         title = self._og_search_title(webpage)
         thumbnail = self._og_search_thumbnail(webpage)
@@ -46,7 +93,12 @@ class FC2IE(InfoExtractor):
         info = compat_urlparse.parse_qs(info_webpage)
 
         if 'err_code' in info:
-            raise ExtractorError('Error code: %s' % info['err_code'][0])
+            # most of the time we can still download wideo even if err_code is 403 or 602
+            self.report_warning(
+                'Error code was: %s... but still trying' % info['err_code'][0])
+
+        if 'filepath' not in info:
+            raise ExtractorError('Cannot download file. Are you logged in?')
 
         video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
         title_info = info.get('title')
index 68e2db94385bc7f51d7f27a68d61d5e4289ba0c9..0fb29de75228f0133c0b8d54a015fbd5d90954c1 100644 (file)
@@ -16,6 +16,7 @@ class FolketingetIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
     _TEST = {
         'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
+        'md5': '6269e8626fa1a891bf5369b386ae996a',
         'info_dict': {
             'id': '1165642',
             'ext': 'mp4',
@@ -29,9 +30,6 @@ class FolketingetIE(InfoExtractor):
             'upload_date': '20141120',
             'duration': 3960,
         },
-        'params': {
-            'skip_download': 'rtmpdump required',
-        }
     }
 
     def _real_extract(self, url):
index 0c29721629a25369621072e4f451e7decdc8df0b..1e83a4e7e1eadfb322e7e691848fae462711596c 100644 (file)
@@ -1,77 +1,69 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
 from ..compat import (
-    compat_parse_qs,
     compat_urlparse,
 )
+from ..utils import (
+    determine_ext,
+    int_or_none,
+)
 
 
 class FranceCultureIE(InfoExtractor):
-    _VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
         'info_dict': {
             'id': '4795174',
             'ext': 'mp3',
             'title': 'Rendez-vous au pays des geeks',
+            'alt_title': 'Carnet nomade | 13-14',
             'vcodec': 'none',
-            'uploader': 'Colette Fellous',
             'upload_date': '20140301',
-            'duration': 3601,
             'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
-            'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
+            'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats',
+            'timestamp': 1393700400,
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        baseurl = mobj.group('baseurl')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        params_code = self._search_regex(
-            r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
-            webpage, 'parameter code')
-        params = compat_parse_qs(params_code)
-        video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
+
+        video_path = self._search_regex(
+            r'<a id="player".*?href="([^"]+)"', webpage, 'video path')
+        video_url = compat_urlparse.urljoin(url, video_path)
+        timestamp = int_or_none(self._search_regex(
+            r'<a id="player".*?data-date="([0-9]+)"',
+            webpage, 'upload date', fatal=False))
+        thumbnail = self._search_regex(
+            r'<a id="player".*?>\s+<img src="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
 
         title = self._html_search_regex(
-            r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
-        uploader = self._html_search_regex(
-            r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
-            webpage, 'uploader', fatal=False)
-        thumbnail_part = self._html_search_regex(
-            r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
-            'thumbnail', fatal=False)
-        if thumbnail_part is None:
-            thumbnail = None
-        else:
-            thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
+            r'<span class="title-diffusion">(.*?)</span>', webpage, 'title')
+        alt_title = self._html_search_regex(
+            r'<span class="title">(.*?)</span>',
+            webpage, 'alt_title', fatal=False)
         description = self._html_search_regex(
-            r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
+            r'<span class="description">(.*?)</span>',
+            webpage, 'description', fatal=False)
 
-        info = json.loads(params['infoData'][0])[0]
-        duration = info.get('media_length')
-        upload_date_candidate = info.get('media_section5')
-        upload_date = (
-            upload_date_candidate
-            if (upload_date_candidate is not None and
-                re.match(r'[0-9]{8}$', upload_date_candidate))
-            else None)
+        uploader = self._html_search_regex(
+            r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
+            webpage, 'uploader', default=None)
+        vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
 
         return {
             'id': video_id,
             'url': video_url,
-            'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
-            'duration': duration,
+            'vcodec': vcodec,
             'uploader': uploader,
-            'upload_date': upload_date,
+            'timestamp': timestamp,
             'title': title,
+            'alt_title': alt_title,
             'thumbnail': thumbnail,
             'description': description,
         }
index bbc760a4990cac1b6cdb731c161d61c853a72729..170d6807529ac9b121187786cf9329b3b3525dc3 100644 (file)
@@ -230,12 +230,13 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
 
 class GenerationQuoiIE(InfoExtractor):
     IE_NAME = 'france2.fr:generation-quoi'
-    _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
+    _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
 
     _TEST = {
         'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
-        'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
         'info_dict': {
+            'id': 'k7FJX8VBcvvLmX4wA5Q',
+            'ext': 'mp4',
             'title': 'Génération Quoi - Garde à Vous',
             'uploader': 'Génération Quoi',
         },
@@ -243,14 +244,12 @@ class GenerationQuoiIE(InfoExtractor):
             # It uses Dailymotion
             'skip_download': True,
         },
-        'skip': 'Only available from France',
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
-        info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name)
-        info_json = self._download_webpage(info_url, name)
+        display_id = self._match_id(url)
+        info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id)
+        info_json = self._download_webpage(info_url, display_id)
         info = json.loads(info_json)
         return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
                                ie='Dailymotion')
index 50f8fc7e77c83ad63a05e9a312a9ff5b3afd8983..7591a151ea352ea4d3c3b066d68bffb1141c513f 100644 (file)
@@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
@@ -29,9 +27,7 @@ class GameStarIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         og_title = self._og_search_title(webpage)
index 7a5bf939237ff45731fd3befca5ad0b7dfc0df1f..fbbc79a574ca03f1e483738a726f2fde0bf6b21d 100644 (file)
@@ -17,6 +17,7 @@ from ..utils import (
     ExtractorError,
     float_or_none,
     HEADRequest,
+    is_html,
     orderedSet,
     parse_xml,
     smuggle_url,
@@ -139,6 +140,19 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Ooyala'],
         },
+        # multiple ooyala embeds on SBN network websites
+        {
+            'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
+            'info_dict': {
+                'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
+                'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
+            },
+            'playlist_mincount': 3,
+            'params': {
+                'skip_download': True,
+            },
+            'add_ie': ['Ooyala'],
+        },
         # google redirect
         {
             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@ -361,7 +375,7 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
                 'title': 'Zero Punctuation',
-                'description': 're:'
+                'description': 're:.*groundbreaking video review series.*'
             },
             'playlist_mincount': 11,
         },
@@ -488,6 +502,29 @@ class GenericIE(InfoExtractor):
                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
             }
         },
+        # Cinerama player
+        {
+            'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
+            'info_dict': {
+                'id': '730m_DandD_1901_512k',
+                'ext': 'mp4',
+                'uploader': 'www.abc.net.au',
+                'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
+            }
+        },
+        # embedded viddler video
+        {
+            'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
+            'info_dict': {
+                'id': '4d03aad9',
+                'ext': 'mp4',
+                'uploader': 'deadspin',
+                'title': 'WALL-TO-GORTAT',
+                'timestamp': 1422285291,
+                'upload_date': '20150126',
+            },
+            'add_ie': ['Viddler'],
+        }
     ]
 
     def report_following_redirect(self, new_url):
@@ -647,7 +684,7 @@ class GenericIE(InfoExtractor):
         # Maybe it's a direct link to a video?
         # Be careful not to download the whole thing!
         first_bytes = full_response.read(512)
-        if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
+        if not is_html(first_bytes):
             self._downloader.report_warning(
                 'URL could be a direct video link, returning it as such.')
             upload_date = unified_strdate(
@@ -849,12 +886,28 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'))
 
+        # Look for embedded Viddler player
+        mobj = re.search(
+            r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
         # Look for Ooyala videos
-        mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
-                re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+        mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+                re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
+                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
         if mobj is not None:
             return OoyalaIE._build_url_result(mobj.group('ec'))
 
+        # Look for multiple Ooyala embeds on SBN network websites
+        mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
+        if mobj is not None:
+            embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
+            if embeds:
+                return _playlist_from_matches(
+                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
+
         # Look for Aparat videos
         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
         if mobj is not None:
@@ -1042,9 +1095,13 @@ class GenericIE(InfoExtractor):
             found = filter_video(re.findall(r'''(?xs)
                 flowplayer\("[^"]+",\s*
                     \{[^}]+?\}\s*,
-                    \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
+                    \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
                         ["']?url["']?\s*:\s*["']([^"']+)["']
             ''', webpage))
+        if not found:
+            # Cinerama player
+            found = re.findall(
+                r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
         if not found:
             # Try to find twitter cards info
             found = filter_video(re.findall(
index 6949a57c70dd9b378c4879dad8afd4f3b18e558a..29638a1948ff1230403f313f1c7725ab69224434 100644 (file)
@@ -70,6 +70,19 @@ class GloboIE(InfoExtractor):
                 'like_count': int,
             }
         },
+        {
+            'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
+            'md5': 'c1defca721ce25b2354e927d3e4b3dec',
+            'info_dict': {
+                'id': '3928201',
+                'ext': 'mp4',
+                'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas',
+                'duration': 1472.906,
+                'uploader': 'Canal Brasil',
+                'uploader_id': 705,
+                'like_count': int,
+            }
+        },
     ]
 
     class MD5():
@@ -381,11 +394,16 @@ class GloboIE(InfoExtractor):
             signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
             signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
 
-            formats.append({
-                'url': '%s?h=%s&k=%s' % (resource['url'], signed_hash, 'flash'),
-                'format_id': resource_id,
-                'height': resource['height']
-            })
+            resource_url = resource['url']
+            signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
+            if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
+                formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4'))
+            else:
+                formats.append({
+                    'url': signed_url,
+                    'format_id': resource_id,
+                    'height': resource.get('height'),
+                })
 
         self._sort_formats(formats)
 
index fff74a70a891fc163ff488408e4df348564b8a29..848d17beb4d3559cb818f14795655b12912e0f11 100644 (file)
@@ -83,7 +83,7 @@ class GroovesharkIE(InfoExtractor):
         return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
 
     def _transform_bootstrap(self, js):
-        return re.split('(?m)^\s*try\s*{', js)[0] \
+        return re.split('(?m)^\s*try\s*\{', js)[0] \
                  .split(' = ', 1)[1].strip().rstrip(';')
 
     def _transform_meta(self, js):
diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dl/extractor/hearthisat.py
new file mode 100644 (file)
index 0000000..a19b31a
--- /dev/null
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+    compat_urlparse,
+)
+from ..utils import (
+    HEADRequest,
+    str_to_int,
+    urlencode_postdata,
+    urlhandle_detect_ext,
+)
+
+
+class HearThisAtIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
+    _PLAYLIST_URL = 'https://hearthis.at/playlist.php'
+    _TEST = {
+        'url': 'https://hearthis.at/moofi/dr-kreep',
+        'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
+        'info_dict': {
+            'id': '150939',
+            'ext': 'wav',
+            'title': 'Moofi - Dr. Kreep',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'timestamp': 1421564134,
+            'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.',
+            'upload_date': '20150118',
+            'comment_count': int,
+            'view_count': int,
+            'like_count': int,
+            'duration': 71,
+            'categories': ['Experimental'],
+        }
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        display_id = '{artist:s} - {title:s}'.format(**m.groupdict())
+
+        webpage = self._download_webpage(url, display_id)
+        track_id = self._search_regex(
+            r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
+
+        payload = urlencode_postdata({'tracks[]': track_id})
+        req = compat_urllib_request.Request(self._PLAYLIST_URL, payload)
+        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+
+        track = self._download_json(req, track_id, 'Downloading playlist')[0]
+        title = '{artist:s} - {title:s}'.format(**track)
+
+        categories = None
+        if track.get('category'):
+            categories = [track['category']]
+
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        meta_span = r'<span[^>]+class="%s".*?</i>([^<]+)</span>'
+        view_count = str_to_int(self._search_regex(
+            meta_span % 'plays_count', webpage, 'view count', fatal=False))
+        like_count = str_to_int(self._search_regex(
+            meta_span % 'likes_count', webpage, 'like count', fatal=False))
+        comment_count = str_to_int(self._search_regex(
+            meta_span % 'comment_count', webpage, 'comment count', fatal=False))
+        duration = str_to_int(self._search_regex(
+            r'data-length="(\d+)', webpage, 'duration', fatal=False))
+        timestamp = str_to_int(self._search_regex(
+            r'<span[^>]+class="calctime"[^>]+data-time="(\d+)', webpage, 'timestamp', fatal=False))
+
+        formats = []
+        mp3_url = self._search_regex(
+            r'(?s)<a class="player-link"\s+(?:[a-zA-Z0-9_:-]+="[^"]+"\s+)*?data-mp3="([^"]+)"',
+            webpage, 'mp3 URL', fatal=False)
+        if mp3_url:
+            formats.append({
+                'format_id': 'mp3',
+                'vcodec': 'none',
+                'acodec': 'mp3',
+                'url': mp3_url,
+            })
+        download_path = self._search_regex(
+            r'<a class="[^"]*download_fct[^"]*"\s+href="([^"]+)"',
+            webpage, 'download URL', default=None)
+        if download_path:
+            download_url = compat_urlparse.urljoin(url, download_path)
+            ext_req = HEADRequest(download_url)
+            ext_handle = self._request_webpage(
+                ext_req, display_id, note='Determining extension')
+            ext = urlhandle_detect_ext(ext_handle)
+            formats.append({
+                'format_id': 'download',
+                'vcodec': 'none',
+                'ext': ext,
+                'url': download_url,
+                'preference': 2,  # Usually better quality
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': track_id,
+            'display_id': display_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'description': description,
+            'duration': duration,
+            'timestamp': timestamp,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'like_count': like_count,
+            'categories': categories,
+        }
diff --git a/youtube_dl/extractor/historicfilms.py b/youtube_dl/extractor/historicfilms.py
new file mode 100644 (file)
index 0000000..40afbe5
--- /dev/null
@@ -0,0 +1,46 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class HistoricFilmsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.historicfilms.com/tapes/4728',
+        'md5': 'd4a437aec45d8d796a38a215db064e9a',
+        'info_dict': {
+            'id': '4728',
+            'ext': 'mov',
+            'title': 'Historic Films: GP-7',
+            'description': 'md5:1a86a0f3ac54024e419aba97210d959a',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 2096,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        tape_id = self._search_regex(
+            r'class="tapeId">([^<]+)<', webpage, 'tape id')
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._html_search_meta(
+            'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage)
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, 'duration'))
+
+        video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+        }
index 7a400323dc4df3807057a77b25f7401ce5e2a3b8..e825944443392153d8a0d456f5ad5dc80b2c9f77 100644 (file)
@@ -16,7 +16,7 @@ from ..utils import (
 class IviIE(InfoExtractor):
     IE_DESC = 'ivi.ru'
     IE_NAME = 'ivi'
-    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
 
     _TESTS = [
         # Single movie
@@ -63,29 +63,34 @@ class IviIE(InfoExtractor):
         return int(m.group('commentcount')) if m is not None else 0
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = self._match_id(url)
 
         api_url = 'http://api.digitalaccess.ru/api/json/'
 
-        data = {'method': 'da.content.get',
-                'params': [video_id, {'site': 's183',
-                                      'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
-                                      'contentid': video_id
-                                      }
-                           ]
+        data = {
+            'method': 'da.content.get',
+            'params': [
+                video_id, {
+                    'site': 's183',
+                    'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
+                    'contentid': video_id
                 }
+            ]
+        }
 
         request = compat_urllib_request.Request(api_url, json.dumps(data))
 
-        video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON')
+        video_json_page = self._download_webpage(
+            request, video_id, 'Downloading video JSON')
         video_json = json.loads(video_json_page)
 
         if 'error' in video_json:
             error = video_json['error']
             if error['origin'] == 'NoRedisValidData':
                 raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-            raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True)
+            raise ExtractorError(
+                'Unable to download video %s: %s' % (video_id, error['message']),
+                expected=True)
 
         result = video_json['result']
 
index dbfe4cc03fd8c569ed2f05d5ae9c86c36bb9e278..364dc878ee23b98413a7f2c6735124d50d4f487b 100644 (file)
@@ -13,17 +13,17 @@ class KankanIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
-        'file': '48863.flv',
         'md5': '29aca1e47ae68fc28804aca89f29507e',
         'info_dict': {
+            'id': '48863',
+            'ext': 'flv',
             'title': 'Ready To Go',
         },
         'skip': 'Only available from China',
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
index 97dcb518a3587406bc93a44c39344630cafe7119..82eddec511850ade9b4786636027597baf75dd29 100644 (file)
@@ -7,10 +7,6 @@ from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
-    compat_urllib_parse,
-)
-from ..aes import (
-    aes_decrypt_text
 )
 
 
@@ -18,9 +14,10 @@ class KeezMoviesIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
     _TEST = {
         'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
-        'file': '1214711.mp4',
         'md5': '6e297b7e789329923fcf83abb67c9289',
         'info_dict': {
+            'id': '1214711',
+            'ext': 'mp4',
             'title': 'Petite Asian Lady Mai Playing In Bathtub',
             'age_limit': 18,
         }
@@ -39,11 +36,10 @@ class KeezMoviesIE(InfoExtractor):
             embedded_url = mobj.group(1)
             return self.url_result(embedded_url)
 
-        video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title')
-        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, 'video_url'))
-        if 'encrypted=true' in webpage:
-            password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, 'password')
-            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
+        video_title = self._html_search_regex(
+            r'<h1 [^>]*>([^<]+)', webpage, 'title')
+        video_url = self._html_search_regex(
+            r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL')
         path = compat_urllib_parse_urlparse(video_url).path
         extension = os.path.splitext(path)[1][1:]
         format = path.split('/')[4].split('_')[:2]
index 6f3d2345b6f976ff9b380a04014f20df18483e6e..e46954b47449b11be795c17478e168a6a57af0fd 100644 (file)
@@ -2,18 +2,17 @@
 from __future__ import unicode_literals
 
 import json
-import re
 
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
-    unescapeHTML,
+    js_to_json,
 )
 
 
 class KrasViewIE(InfoExtractor):
     IE_DESC = 'Красвью'
-    _VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
+    _VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://krasview.ru/video/512228',
@@ -29,20 +28,18 @@ class KrasViewIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
 
-        flashvars = json.loads(self._search_regex(
-            r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
+        flashvars = json.loads(js_to_json(self._search_regex(
+            r'video_Init\(({.+?})', webpage, 'flashvars')))
 
         video_url = flashvars['url']
-        title = unescapeHTML(flashvars['title'])
-        description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
-        thumbnail = flashvars['image']
-        duration = int(flashvars['duration'])
-        filesize = int(flashvars['size'])
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage, default=None)
+        thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
+        duration = int_or_none(flashvars.get('duration'))
         width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
         height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
 
@@ -53,7 +50,6 @@ class KrasViewIE(InfoExtractor):
             'description': description,
             'thumbnail': thumbnail,
             'duration': duration,
-            'filesize': filesize,
             'width': width,
             'height': height,
         }
index db2028e9f5148d37f69f0d7c4c41fbbb77d88928..b08f6e3c9548de02217e43bebbf20b5f2ab871e8 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     parse_duration,
@@ -20,9 +18,10 @@ class LA7IE(InfoExtractor):
 
     _TEST = {
         'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
-        'file': '50355319.mp4',
         'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
         'info_dict': {
+            'id': '50355319',
+            'ext': 'mp4',
             'title': 'IL DIVO',
             'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci',
             'duration': 6254,
@@ -31,9 +30,7 @@ class LA7IE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
         doc = self._download_xml(xml_url, video_id)
 
index b04be1e8cfda94addca26a1d1e3731ce61519dc1..35822067f908f0567e8dcb8c9c8265df4d3421c2 100644 (file)
@@ -8,20 +8,20 @@ from ..utils import int_or_none
 
 
 class LiveLeakIE(InfoExtractor):
-    _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
+    _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
     _TESTS = [{
         'url': 'http://www.liveleak.com/view?i=757_1364311680',
-        'md5': '0813c2430bea7a46bf13acf3406992f4',
+        'md5': '50f79e05ba149149c1b4ea961223d5b3',
         'info_dict': {
             'id': '757_1364311680',
-            'ext': 'mp4',
+            'ext': 'flv',
             'description': 'extremely bad day for this guy..!',
             'uploader': 'ljfriel2',
             'title': 'Most unlucky car accident'
         }
     }, {
         'url': 'http://www.liveleak.com/view?i=f93_1390833151',
-        'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
+        'md5': 'b13a29626183c9d33944e6a04f41aafc',
         'info_dict': {
             'id': 'f93_1390833151',
             'ext': 'mp4',
@@ -43,8 +43,7 @@ class LiveLeakIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('video_id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
@@ -81,9 +80,19 @@ class LiveLeakIE(InfoExtractor):
         sources = json.loads(sources_json)
 
         formats = [{
+            'format_id': '%s' % i,
             'format_note': s.get('label'),
             'url': s['file'],
-        } for s in sources]
+        } for i, s in enumerate(sources)]
+        for i, s in enumerate(sources):
+            orig_url = s['file'].replace('.h264_base.mp4', '')
+            if s['file'] != orig_url:
+                formats.append({
+                    'format_id': 'original-%s' % i,
+                    'format_note': s.get('label'),
+                    'url': orig_url,
+                    'preference': 1,
+                })
         self._sort_formats(formats)
 
         return {
index a8e357859d24beb3724a81652b7a664bfe43ea43..fd23b0b43fa91af1e828c9038f83ac228b93aa94 100644 (file)
@@ -6,13 +6,12 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
-    js_to_json,
     unified_strdate,
 )
 
 
 class LnkGoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi\-video/(?P<show>[^/]+)/ziurek\-(?P<display_id>[A-Za-z0-9\-]+)'
+    _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
     _TESTS = [{
         'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
         'info_dict': {
@@ -51,8 +50,7 @@ class LnkGoIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
+        display_id = self._match_id(url)
 
         webpage = self._download_webpage(
             url, display_id, 'Downloading player webpage')
@@ -61,6 +59,8 @@ class LnkGoIE(InfoExtractor):
             r'data-ep="([^"]+)"', webpage, 'video ID')
         title = self._og_search_title(webpage)
         description = self._og_search_description(webpage)
+        upload_date = unified_strdate(self._search_regex(
+            r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
 
         thumbnail_w = int_or_none(
             self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False))
@@ -75,39 +75,28 @@ class LnkGoIE(InfoExtractor):
                 'height': thumbnail_h,
             })
 
-        upload_date = unified_strdate(self._search_regex(
-            r'class="meta-item\sair-time">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
-        duration = int_or_none(self._search_regex(
-            r'VideoDuration = "([^"]+)"', webpage, 'duration', fatal=False))
+        config = self._parse_json(self._search_regex(
+            r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id)
 
-        pg_rating = self._search_regex(
-            r'pgrating="([^"]+)"', webpage, 'PG rating', fatal=False, default='')
-        age_limit = self._AGE_LIMITS.get(pg_rating.upper(), 0)
+        if config.get('pGeo'):
+            self.report_warning(
+                'This content might not be available in your country due to copyright reasons')
 
-        sources_js = self._search_regex(
-            r'(?s)sources:\s(\[.*?\]),', webpage, 'sources')
-        sources = self._parse_json(
-            sources_js, video_id, transform_source=js_to_json)
+        formats = [{
+            'format_id': 'hls',
+            'ext': 'mp4',
+            'url': config['EpisodeVideoLink_HLS'],
+        }]
 
-        formats = []
-        for source in sources:
-            if source.get('provider') == 'rtmp':
-                m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', source['file'])
-                if not m:
-                    continue
-                formats.append({
-                    'format_id': 'rtmp',
-                    'ext': 'flv',
-                    'url': m.group('url'),
-                    'play_path': m.group('play_path'),
-                    'page_url': url,
-                })
-            elif source.get('file').endswith('.m3u8'):
-                formats.append({
-                    'format_id': 'hls',
-                    'ext': source.get('type', 'mp4'),
-                    'url': source['file'],
-                })
+        m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink'])
+        if m:
+            formats.append({
+                'format_id': 'rtmp',
+                'ext': 'flv',
+                'url': m.group('url'),
+                'play_path': m.group('play_path'),
+                'page_url': url,
+            })
 
         self._sort_formats(formats)
 
@@ -117,8 +106,8 @@ class LnkGoIE(InfoExtractor):
             'title': title,
             'formats': formats,
             'thumbnails': [thumbnail],
-            'duration': duration,
+            'duration': int_or_none(config.get('VideoTime')),
             'description': description,
-            'age_limit': age_limit,
+            'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0),
             'upload_date': upload_date,
         }
index 26e84970d49463068f032dcf05afbc03e485e859..762cefa34ec35aa172102a5bbe9f78c129bdef92 100644 (file)
@@ -85,6 +85,7 @@ class LyndaIE(SubtitlesInfoExtractor):
                 } for format_id, video_url in prioritized_streams['0'].items()
             ])
 
+        self._check_formats(formats, video_id)
         self._sort_formats(formats)
 
         if self._downloader.params.get('listsubtitles', False):
index b818cf50c85c79865b5afc09090d6261e81d08c6..3cd4a3a192ce3f6b611f6b3f4f3d928b75c9bba0 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import ExtractorError
 
@@ -13,21 +11,22 @@ class MacGameStoreIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
-        'file': '2450.m4v',
         'md5': '8649b8ea684b6666b4c5be736ecddc61',
         'info_dict': {
+            'id': '2450',
+            'ext': 'm4v',
             'title': 'Crow',
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, video_id, 'Downloading trailer page')
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url, video_id, 'Downloading trailer page')
 
-        if re.search(r'>Missing Media<', webpage) is not None:
-            raise ExtractorError('Trailer %s does not exist' % video_id, expected=True)
+        if '>Missing Media<' in webpage:
+            raise ExtractorError(
+                'Trailer %s does not exist' % video_id, expected=True)
 
         video_title = self._html_search_regex(
             r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
index 07d194562e77044a8d8d87138ed32205842a1a25..1831c6749401405c5a39ca60f4347df03bfe3631 100644 (file)
@@ -9,7 +9,7 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     HEADRequest,
-    int_or_none,
+    str_to_int,
     parse_iso8601,
 )
 
@@ -18,7 +18,7 @@ class MixcloudIE(InfoExtractor):
     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
     IE_NAME = 'mixcloud'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
         'info_dict': {
             'id': 'dholbach-cryptkeeper',
@@ -33,7 +33,20 @@ class MixcloudIE(InfoExtractor):
             'view_count': int,
             'like_count': int,
         },
-    }
+    }, {
+        'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
+        'info_dict': {
+            'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
+            'ext': 'm4a',
+            'title': 'Electric Relaxation vol. 3',
+            'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
+            'uploader': 'Daniel Drumz',
+            'uploader_id': 'gillespeterson',
+            'thumbnail': 're:https?://.*\.jpg',
+            'view_count': int,
+            'like_count': int,
+        },
+    }]
 
     def _get_url(self, track_id, template_url):
         server_count = 30
@@ -60,7 +73,7 @@ class MixcloudIE(InfoExtractor):
         webpage = self._download_webpage(url, track_id)
 
         preview_url = self._search_regex(
-            r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
+            r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
         song_url = preview_url.replace('/previews/', '/c/originals/')
         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
         final_song_url = self._get_url(track_id, template_url)
@@ -85,15 +98,17 @@ class MixcloudIE(InfoExtractor):
         uploader_id = self._search_regex(
             r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
         description = self._og_search_description(webpage)
-        like_count = int_or_none(self._search_regex(
-            r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
+        like_count = str_to_int(self._search_regex(
+            [r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
+             r'/favorites/?">([0-9]+)<'],
             webpage, 'like count', fatal=False))
-        view_count = int_or_none(self._search_regex(
-            r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
+        view_count = str_to_int(self._search_regex(
+            [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
+             r'/listeners/?">([0-9,.]+)</a>'],
             webpage, 'play count', fatal=False))
         timestamp = parse_iso8601(self._search_regex(
             r'<time itemprop="dateCreated" datetime="([^"]+)">',
-            webpage, 'upload date'))
+            webpage, 'upload date', default=None))
 
         return {
             'id': track_id,
index 88c9501cd4e34492003a1fe67923d1d84a9e2d2b..6db3c67a5a471d9cd850ad3bd828a9e2478c00e3 100644 (file)
@@ -1,21 +1,19 @@
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
 from ..utils import int_or_none
 
 
 class MporaIE(InfoExtractor):
-    _VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
+    _VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
     IE_NAME = 'MPORA'
 
     _TEST = {
         'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
-        'file': 'AAdo8okx4wiz.mp4',
         'md5': 'a7a228473eedd3be741397cf452932eb',
         'info_dict': {
+            'id': 'AAdo8okx4wiz',
+            'ext': 'mp4',
             'title': 'Katy Curd -  Winter in the Forest',
             'duration': 416,
             'uploader': 'Peter Newman Media',
@@ -23,14 +21,12 @@ class MporaIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
         data_json = self._search_regex(
             r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
-
-        data = json.loads(data_json)
+        data = self._parse_json(data_json, video_id)
 
         uploader = data['info_overlay'].get('username')
         duration = data['video']['duration'] // 1000
index 5ebc78033a4abbb98310096c279fe11459b4a791..bc7f49ebbac86cda7aa1bb711076b783e24bfea8 100644 (file)
@@ -2,10 +2,11 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
+from .subtitles import SubtitlesInfoExtractor
 from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+    compat_str,
 )
 from ..utils import (
     ExtractorError,
@@ -22,7 +23,7 @@ def _media_xml_tag(tag):
     return '{http://search.yahoo.com/mrss/}%s' % tag
 
 
-class MTVServicesInfoExtractor(InfoExtractor):
+class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
     _MOBILE_TEMPLATE = None
 
     @staticmethod
@@ -53,7 +54,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         webpage_url = self._MOBILE_TEMPLATE % mtvn_id
         req = compat_urllib_request.Request(webpage_url)
         # Otherwise we get a webpage that would execute some javascript
-        req.add_header('Youtubedl-user-agent', 'curl/7')
+        req.add_header('User-Agent', 'curl/7')
         webpage = self._download_webpage(req, mtvn_id,
                                          'Downloading mobile page')
         metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
@@ -78,17 +79,42 @@ class MTVServicesInfoExtractor(InfoExtractor):
             try:
                 _, _, ext = rendition.attrib['type'].partition('/')
                 rtmp_video_url = rendition.find('./src').text
-                formats.append({'ext': ext,
-                                'url': self._transform_rtmp_url(rtmp_video_url),
-                                'format_id': rendition.get('bitrate'),
-                                'width': int(rendition.get('width')),
-                                'height': int(rendition.get('height')),
-                                })
+                if rtmp_video_url.endswith('siteunavail.png'):
+                    continue
+                formats.append({
+                    'ext': ext,
+                    'url': self._transform_rtmp_url(rtmp_video_url),
+                    'format_id': rendition.get('bitrate'),
+                    'width': int(rendition.get('width')),
+                    'height': int(rendition.get('height')),
+                })
             except (KeyError, TypeError):
                 raise ExtractorError('Invalid rendition field.')
         self._sort_formats(formats)
         return formats
 
+    def _extract_subtitles(self, mdoc, mtvn_id):
+        subtitles = {}
+        FORMATS = {
+            'scc': 'cea-608',
+            'eia-608': 'cea-608',
+            'xml': 'ttml',
+        }
+        subtitles_format = FORMATS.get(
+            self._downloader.params.get('subtitlesformat'), 'ttml')
+        for transcript in mdoc.findall('.//transcript'):
+            if transcript.get('kind') != 'captions':
+                continue
+            lang = transcript.get('srclang')
+            for typographic in transcript.findall('./typographic'):
+                captions_format = typographic.get('format')
+                if captions_format == subtitles_format:
+                    subtitles[lang] = compat_str(typographic.get('src'))
+                    break
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(mtvn_id, subtitles)
+        return self.extract_subtitles(mtvn_id, subtitles)
+
     def _get_video_info(self, itemdoc):
         uri = itemdoc.find('guid').text
         video_id = self._id_from_uri(uri)
@@ -135,6 +161,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         return {
             'title': title,
             'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
+            'subtitles': self._extract_subtitles(mediagen_doc, mtvn_id),
             'id': video_id,
             'thumbnail': self._get_thumbnail_url(uri, itemdoc),
             'description': description,
@@ -167,7 +194,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
             mgid = self._search_regex(
                 [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
                 webpage, 'mgid')
-        return self._get_videos_info(mgid)
+
+        videos_info = self._get_videos_info(mgid)
+        if self._downloader.params.get('listsubtitles', False):
+            return
+        return videos_info
 
 
 class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
@@ -212,25 +243,14 @@ class MTVIE(MTVServicesInfoExtractor):
     _TESTS = [
         {
             'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
-            'file': '853555.mp4',
             'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
             'info_dict': {
+                'id': '853555',
+                'ext': 'mp4',
                 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
                 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
             },
         },
-        {
-            'add_ie': ['Vevo'],
-            'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
-            'file': 'USCJY1331283.mp4',
-            'md5': '73b4e7fcadd88929292fe52c3ced8caf',
-            'info_dict': {
-                'title': 'Everything Has Changed',
-                'upload_date': '20130606',
-                'uploader': 'Taylor Swift',
-            },
-            'skip': 'VEVO is only available in some countries',
-        },
     ]
 
     def _get_thumbnail_url(self, uri, itemdoc):
@@ -244,8 +264,8 @@ class MTVIE(MTVServicesInfoExtractor):
             webpage = self._download_webpage(url, video_id)
 
             # Some videos come from Vevo.com
-            m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
-                               webpage, re.DOTALL)
+            m_vevo = re.search(
+                r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
             if m_vevo:
                 vevo_id = m_vevo.group(1)
                 self.to_screen('Vevo video detected: %s' % vevo_id)
index 690c46b6a57be11edf36899b959318af5e482119..f840f65321997078859ab5f74682969ec4499359 100644 (file)
@@ -6,6 +6,7 @@ import json
 from .common import InfoExtractor
 from ..compat import (
     compat_str,
+    compat_HTTPError,
 )
 from ..utils import (
     ExtractorError,
@@ -78,6 +79,16 @@ class NBCNewsIE(InfoExtractor):
             },
             'add_ie': ['ThePlatform'],
         },
+        {
+            'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
+            'md5': 'fdbf39ab73a72df5896b6234ff98518a',
+            'info_dict': {
+                'id': 'Wjf9EDR3A_60',
+                'ext': 'mp4',
+                'title': 'FULL EPISODE: Family Business',
+                'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
+            },
+        },
     ]
 
     def _real_extract(self, url):
@@ -115,10 +126,19 @@ class NBCNewsIE(InfoExtractor):
                 if not base_url:
                     continue
                 playlist_url = base_url + '?form=MPXNBCNewsAPI'
-                all_videos = self._download_json(playlist_url, title)['videos']
 
                 try:
-                    info = next(v for v in all_videos if v['mpxId'] == mpxid)
+                    all_videos = self._download_json(playlist_url, title)
+                except ExtractorError as ee:
+                    if isinstance(ee.cause, compat_HTTPError):
+                        continue
+                    raise
+
+                if not all_videos or 'videos' not in all_videos:
+                    continue
+
+                try:
+                    info = next(v for v in all_videos['videos'] if v['mpxId'] == mpxid)
                     break
                 except StopIteration:
                     continue
index 95e7d63aade1edbc6a0c300bcc18168d707f8716..2a1ca80df797f0abe63cc6327c5e283965865f70 100644 (file)
@@ -27,9 +27,7 @@ class NDTVIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         filename = self._search_regex(
index efc903afa93465473621ef89ccc81142320b90ee..dff78e4862390e4e6468a34d804001d2156221a7 100644 (file)
@@ -11,6 +11,7 @@ class NerdCubedFeedIE(InfoExtractor):
     _TEST = {
         'url': 'http://www.nerdcubed.co.uk/feed.json',
         'info_dict': {
+            'id': 'nerdcubed-feed',
             'title': 'nerdcubed.co.uk feed',
         },
         'playlist_mincount': 1300,
diff --git a/youtube_dl/extractor/nerdist.py b/youtube_dl/extractor/nerdist.py
new file mode 100644 (file)
index 0000000..c6dc34b
--- /dev/null
@@ -0,0 +1,80 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+    determine_ext,
+    parse_iso8601,
+    xpath_text,
+)
+
+
+class NerdistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nerdist\.com/vepisode/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'http://www.nerdist.com/vepisode/exclusive-which-dc-characters-w',
+        'md5': '3698ed582931b90d9e81e02e26e89f23',
+        'info_dict': {
+            'display_id': 'exclusive-which-dc-characters-w',
+            'id': 'RPHpvJyr',
+            'ext': 'mp4',
+            'title': 'Your TEEN TITANS Revealed! Who\'s on the show?',
+            'thumbnail': 're:^https?://.*/thumbs/.*\.jpg$',
+            'description': 'Exclusive: Find out which DC Comics superheroes will star in TEEN TITANS Live-Action TV Show on Nerdist News with Jessica Chobot!',
+            'uploader': 'Eric Diaz',
+            'upload_date': '20150202',
+            'timestamp': 1422892808,
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            r'''(?x)<script\s+(?:type="text/javascript"\s+)?
+                src="https?://content\.nerdist\.com/players/([a-zA-Z0-9_]+)-''',
+            webpage, 'video ID')
+        timestamp = parse_iso8601(self._html_search_meta(
+            'shareaholic:article_published_time', webpage, 'upload date'))
+        uploader = self._html_search_meta(
+            'shareaholic:article_author_name', webpage, 'article author')
+
+        doc = self._download_xml(
+            'http://content.nerdist.com/jw6/%s.xml' % video_id, video_id)
+        video_info = doc.find('.//item')
+        title = xpath_text(video_info, './title', fatal=True)
+        description = xpath_text(video_info, './description')
+        thumbnail = xpath_text(
+            video_info, './{http://rss.jwpcdn.com/}image', 'thumbnail')
+
+        formats = []
+        for source in video_info.findall('./{http://rss.jwpcdn.com/}source'):
+            vurl = source.attrib['file']
+            ext = determine_ext(vurl)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    vurl, video_id, entry_protocol='m3u8_native', ext='mp4',
+                    preference=0))
+            elif ext == 'smil':
+                formats.extend(self._extract_smil_formats(
+                    vurl, video_id, fatal=False
+                ))
+            else:
+                formats.append({
+                    'format_id': ext,
+                    'url': vurl,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'formats': formats,
+            'uploader': uploader,
+        }
diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py
new file mode 100644 (file)
index 0000000..02dba4e
--- /dev/null
@@ -0,0 +1,163 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import parse_iso8601
+
+
+class NextMediaIE(InfoExtractor):
+    _VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
+        'md5': 'dff9fad7009311c421176d1ac90bfe4f',
+        'info_dict': {
+            'id': '53109199',
+            'ext': 'mp4',
+            'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'md5:28222b9912b6665a21011b034c70fcc7',
+            'timestamp': 1415456273,
+            'upload_date': '20141108',
+        }
+    }]
+
+    _URL_PATTERN = r'\{ url: \'(.+)\' \}'
+
+    def _real_extract(self, url):
+        news_id = self._match_id(url)
+        page = self._download_webpage(url, news_id)
+        return self._extract_from_nextmedia_page(news_id, url, page)
+
+    def _extract_from_nextmedia_page(self, news_id, url, page):
+        title = self._fetch_title(page)
+        video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
+
+        attrs = {
+            'id': news_id,
+            'title': title,
+            'url': video_url,  # ext can be inferred from url
+            'thumbnail': self._fetch_thumbnail(page),
+            'description': self._fetch_description(page),
+        }
+
+        timestamp = self._fetch_timestamp(page)
+        if timestamp:
+            attrs['timestamp'] = timestamp
+        else:
+            attrs['upload_date'] = self._fetch_upload_date(url)
+
+        return attrs
+
+    def _fetch_title(self, page):
+        return self._og_search_title(page)
+
+    def _fetch_thumbnail(self, page):
+        return self._og_search_thumbnail(page)
+
+    def _fetch_timestamp(self, page):
+        dateCreated = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
+        return parse_iso8601(dateCreated)
+
+    def _fetch_upload_date(self, url):
+        return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
+
+    def _fetch_description(self, page):
+        return self._og_search_property('description', page)
+
+
+class NextMediaActionNewsIE(NextMediaIE):
+    _VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
+    _TESTS = [{
+        'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
+        'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
+        'info_dict': {
+            'id': '19009428',
+            'ext': 'mp4',
+            'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
+            'timestamp': 1421791200,
+            'upload_date': '20150120',
+        }
+    }]
+
+    def _real_extract(self, url):
+        news_id = self._match_id(url)
+        actionnews_page = self._download_webpage(url, news_id)
+        article_url = self._og_search_url(actionnews_page)
+        article_page = self._download_webpage(article_url, news_id)
+        return self._extract_from_nextmedia_page(news_id, url, article_page)
+
+
+class AppleDailyRealtimeNewsIE(NextMediaIE):
+    _VALID_URL = r'http://(www|ent).appledaily.com.tw/(realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+    _TESTS = [{
+        'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
+        'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
+        'info_dict': {
+            'id': '36354694',
+            'ext': 'mp4',
+            'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'md5:b23787119933404ce515c6356a8c355c',
+            'upload_date': '20150128',
+        }
+    }, {
+        'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
+        'md5': '86b4e9132d158279c7883822d94ccc49',
+        'info_dict': {
+            'id': '550549',
+            'ext': 'mp4',
+            'title': '不滿被踩腳 山東兩大媽一路打下車',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'md5:2648aaf6fc4f401f6de35a91d111aa1d',
+            'upload_date': '20150128',
+        }
+    }]
+
+    _URL_PATTERN = r'\{url: \'(.+)\'\}'
+
+    def _fetch_title(self, page):
+        return self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title')
+
+    def _fetch_thumbnail(self, page):
+        return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
+
+    def _fetch_timestamp(self, page):
+        return None
+
+
+class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
+    _VALID_URL = 'http://www.appledaily.com.tw/animation/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+    _TESTS = [{
+        'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
+        'md5': '03df296d95dedc2d5886debbb80cb43f',
+        'info_dict': {
+            'id': '5003671',
+            'ext': 'mp4',
+            'title': '20正妹熱舞 《刀龍傳說Online》火辣上市',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
+            'upload_date': '20150128',
+        }
+    }, {
+        # No thumbnail
+        'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
+        'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
+        'info_dict': {
+            'id': '5003673',
+            'ext': 'mp4',
+            'title': '半夜尿尿 好像會看到___',
+            'description': 'md5:61d2da7fe117fede148706cdb85ac066',
+            'upload_date': '20150128',
+        },
+        'expected_warnings': [
+            'video thumbnail',
+        ]
+    }]
+
+    def _fetch_title(self, page):
+        return self._html_search_meta('description', page, 'news title')
+
+    def _fetch_description(self, page):
+        return self._html_search_meta('description', page, 'news description')
index 606e2294efb716cfe755d1b9564357dbda7f9039..2684dd250aa65e22903612f4a1780fc8f701296a 100644 (file)
@@ -46,7 +46,18 @@ class NFLIE(InfoExtractor):
                 'timestamp': 1388354455,
                 'thumbnail': 're:^https?://.*\.jpg$',
             }
-        }
+        },
+        {
+            'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
+            'info_dict': {
+                'id': '0ap3000000467607',
+                'ext': 'mp4',
+                'title': 'Frustrations flare on the field',
+                'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
+                'timestamp': 1422850320,
+                'upload_date': '20150202',
+            },
+        },
     ]
 
     @staticmethod
@@ -80,7 +91,11 @@ class NFLIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         config_url = NFLIE.prepend_host(host, self._search_regex(
-            r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL'))
+            r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL',
+            default='static/content/static/config/video/config.json'))
+        # For articles, the id in the url is not the video id
+        video_id = self._search_regex(
+            r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id)
         config = self._download_json(config_url, video_id,
                                      note='Downloading player config')
         url_template = NFLIE.prepend_host(
index d3a4fc51387a8e1a1b56718ba9dafabf1d9a7db5..40746599880469f5c79110020f39d31f2a8cbff6 100644 (file)
@@ -20,6 +20,12 @@ class NHLBaseInfoExtractor(InfoExtractor):
     def _fix_json(json_string):
         return json_string.replace('\\\'', '\'')
 
+    def _real_extract_video(self, video_id):
+        json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
+        data = self._download_json(
+            json_url, video_id, transform_source=self._fix_json)
+        return self._extract_video(data[0])
+
     def _extract_video(self, info):
         video_id = info['id']
         self.report_extraction(video_id)
@@ -54,7 +60,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
 
 class NHLIE(NHLBaseInfoExtractor):
     IE_NAME = 'nhl.com'
-    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
+    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
 
     _TESTS = [{
         'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
@@ -92,15 +98,41 @@ class NHLIE(NHLBaseInfoExtractor):
     }, {
         'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
         'only_matching': True,
+    }, {
+        'url': 'http://video.nhl.com/videocenter/?id=736722',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
-        data = self._download_json(
-            json_url, video_id, transform_source=self._fix_json)
-        return self._extract_video(data[0])
+        video_id = self._match_id(url)
+        return self._real_extract_video(video_id)
+
+
+class NHLNewsIE(NHLBaseInfoExtractor):
+    IE_NAME = 'nhl.com:news'
+    IE_DESC = 'NHL news'
+    _VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
+
+    _TEST = {
+        'url': 'http://www.nhl.com/ice/news.htm?id=750727',
+        'md5': '4b3d1262e177687a3009937bd9ec0be8',
+        'info_dict': {
+            'id': '736722',
+            'ext': 'mp4',
+            'title': 'Cal Clutterbuck has been fined $2,000',
+            'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
+            'duration': 37,
+            'upload_date': '20150128',
+        },
+    }
+
+    def _real_extract(self, url):
+        news_id = self._match_id(url)
+        webpage = self._download_webpage(url, news_id)
+        video_id = self._search_regex(
+            [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"],
+            webpage, 'video id')
+        return self._real_extract_video(video_id)
 
 
 class NHLVideocenterIE(NHLBaseInfoExtractor):
index c13ff0d650bcd443bf7a6b6d2444215892d72732..5952d136f7b3efd3e9f91843ba12de6a13d989ba 100644 (file)
@@ -1,8 +1,6 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 from ..utils import (
@@ -11,7 +9,7 @@ from ..utils import (
 
 
 class NormalbootsIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
+    _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
     _TEST = {
         'url': 'http://normalboots.com/video/home-alone-games-jontron/',
         'md5': '8bf6de238915dd501105b44ef5f1e0f6',
@@ -30,19 +28,22 @@ class NormalbootsIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
-                                                 webpage, 'uploader')
-        raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
-                                                  webpage, 'date')
-        video_upload_date = unified_strdate(raw_upload_date)
 
-        player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
+        video_uploader = self._html_search_regex(
+            r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
+            webpage, 'uploader', fatal=False)
+        video_upload_date = unified_strdate(self._html_search_regex(
+            r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
+            webpage, 'date', fatal=False))
+
+        player_url = self._html_search_regex(
+            r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"',
+            webpage, 'player url')
         player_page = self._download_webpage(player_url, video_id)
-        video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
+        video_url = self._html_search_regex(
+            r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
 
         return {
             'id': video_id,
index 175b14583efbad65d9fbb1777d14dbb5576c1cc1..54be06a4edc375f736cc038961eac67e73eecf85 100644 (file)
@@ -1,6 +1,6 @@
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
+from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
     fix_xml_ampersands,
     parse_duration,
@@ -11,7 +11,7 @@ from ..utils import (
 )
 
 
-class NPOBaseIE(InfoExtractor):
+class NPOBaseIE(SubtitlesInfoExtractor):
     def _get_token(self, video_id):
         token_page = self._download_webpage(
             'http://ida.omroep.nl/npoplayer/i.js',
@@ -161,6 +161,16 @@ class NPOIE(NPOBaseIE):
 
         self._sort_formats(formats)
 
+        subtitles = {}
+        if metadata.get('tt888') == 'ja':
+            subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id
+
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, subtitles)
+            return
+
+        subtitles = self.extract_subtitles(video_id, subtitles)
+
         return {
             'id': video_id,
             'title': metadata['titel'],
@@ -169,6 +179,7 @@ class NPOIE(NPOBaseIE):
             'upload_date': unified_strdate(metadata.get('gidsdatum')),
             'duration': parse_duration(metadata.get('tijdsduur')),
             'formats': formats,
+            'subtitles': subtitles,
         }
 
 
diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py
new file mode 100644 (file)
index 0000000..d2cfe09
--- /dev/null
@@ -0,0 +1,68 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    parse_duration,
+)
+
+
+class NTVDeIE(InfoExtractor):
+    IE_NAME = 'n-tv.de'
+    _VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/videos/[^/?#]+/[^/?#]+-article(?P<id>.+)\.html'
+
+    _TESTS = [{
+        'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html',
+        'md5': '6ef2514d4b1e8e03ca24b49e2f167153',
+        'info_dict': {
+            'id': '14438086',
+            'ext': 'mp4',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
+            'alt_title': 'Winterchaos auf deutschen Straßen',
+            'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
+            'duration': 4020,
+            'timestamp': 1422892797,
+            'upload_date': '20150202',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        info = self._parse_json(self._search_regex(
+            r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'),
+            video_id, transform_source=js_to_json)
+        timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
+        vdata = self._parse_json(self._search_regex(
+            r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
+            webpage, 'player data'),
+            video_id, transform_source=js_to_json)
+        duration = parse_duration(vdata.get('duration'))
+        formats = [{
+            'format_id': 'flash',
+            'url': 'rtmp://fms.n-tv.de/' + vdata['video'],
+        }, {
+            'format_id': 'mobile',
+            'url': 'http://video.n-tv.de' + vdata['videoMp4'],
+            'tbr': 400,  # estimation
+        }]
+        m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8']
+        formats.extend(self._extract_m3u8_formats(
+            m3u8_url, video_id, ext='mp4',
+            entry_protocol='m3u8_native', preference=0))
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': info['headline'],
+            'description': info.get('intro'),
+            'alt_title': info.get('kicker'),
+            'timestamp': timestamp,
+            'thumbnail': vdata.get('html5VideoPoster'),
+            'duration': duration,
+            'formats': formats,
+        }
similarity index 97%
rename from youtube_dl/extractor/ntv.py
rename to youtube_dl/extractor/ntvru.py
index ee740cd9c0fe71a48b79aee00c40ea610e81ea99..0ab8d510011737775c397d8298c9198a6a5035ce 100644 (file)
@@ -1,15 +1,14 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     unescapeHTML
 )
 
 
-class NTVIE(InfoExtractor):
+class NTVRuIE(InfoExtractor):
+    IE_NAME = 'ntv.ru'
     _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
 
     _TESTS = [
@@ -92,9 +91,7 @@ class NTVIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         page = self._download_webpage(url, video_id)
 
         video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
index 634142d0d27300eb82ea2f460fd2163a20208709..fb2032832e4757e328d016ab289e892721d73af2 100644 (file)
@@ -10,6 +10,7 @@ from ..compat import (
     compat_urllib_request,
 )
 from ..utils import (
+    ExtractorError,
     str_to_int,
 )
 from ..aes import (
@@ -44,6 +45,15 @@ class PornHubIE(InfoExtractor):
         req.add_header('Cookie', 'age_verified=1')
         webpage = self._download_webpage(req, video_id)
 
+        error_msg = self._html_search_regex(
+            r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>',
+            webpage, 'error message', default=None)
+        if error_msg:
+            error_msg = re.sub(r'\s+', ' ', error_msg)
+            raise ExtractorError(
+                'PornHub said: %s' % error_msg,
+                expected=True, video_id=video_id)
+
         video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
         video_uploader = self._html_search_regex(
             r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
index 59dc137cc225889feb9428dd70f42a91451a951d..efa4afeb6a6615a4fa1e90781f27d3dd65083810 100644 (file)
@@ -6,12 +6,13 @@ from .common import InfoExtractor
 
 
 class RingTVIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
     _TEST = {
         "url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30",
-        "file": "857645.mp4",
         "md5": "d25945f5df41cdca2d2587165ac28720",
         "info_dict": {
+            'id': '857645',
+            'ext': 'mp4',
             "title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
             "description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
         }
index c1500b82feb83c419bdb3141d6fc3a5bcd90d90d..e8bb20a0803700937875355d2f854d1de88cea1a 100644 (file)
@@ -10,8 +10,9 @@ class RottenTomatoesIE(VideoDetectiveIE):
 
     _TEST = {
         'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
-        'file': '613340.mp4',
         'info_dict': {
+            'id': '613340',
+            'ext': 'mp4',
             'title': 'TOY STORY 3',
             'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
         },
diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py
new file mode 100644 (file)
index 0000000..72cd804
--- /dev/null
@@ -0,0 +1,72 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class RTL2IE(InfoExtractor):
+    _VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
+    _TESTS = [{
+        'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
+        'md5': 'bfcc179030535b08dc2b36b469b5adc7',
+        'info_dict': {
+            'id': 'folge-203-0',
+            'ext': 'f4v',
+            'title': 'GRIP sucht den Sommerkönig',
+            'description': 'Matthias, Det und Helge treten gegeneinander an.'
+        },
+    }, {
+        'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
+        'md5': 'ffcd517d2805b57ce11a58a2980c2b02',
+        'info_dict': {
+            'id': '21040-anna-erwischt-alex',
+            'ext': 'mp4',
+            'title': 'Anna erwischt Alex!',
+            'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
+        },
+    }]
+
+    def _real_extract(self, url):
+        # Some rtl2 urls have no slash at the end, so append it.
+        if not url.endswith('/'):
+            url += '/'
+
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        vico_id = self._html_search_regex(
+            r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
+        vivi_id = self._html_search_regex(
+            r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
+        info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
+        webpage = self._download_webpage(info_url, '')
+
+        info = self._download_json(info_url, video_id)
+        video_info = info['video']
+        title = video_info['titel']
+        description = video_info.get('beschreibung')
+        thumbnail = video_info.get('image')
+
+        download_url = video_info['streamurl']
+        download_url = download_url.replace('\\', '')
+        stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, 'stream URL')
+        rtmp_conn = ["S:connect", "O:1", "NS:pageUrl:" + url, "NB:fpad:0", "NN:videoFunction:1", "O:0"]
+
+        formats = [{
+            'url': download_url,
+            'play_path': stream_url,
+            'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf',
+            'page_url': url,
+            'flash_version': 'LNX 11,2,202,429',
+            'rtmp_conn': rtmp_conn,
+            'no_resume': True,
+        }]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'description': description,
+            'formats': formats,
+        }
index 7736cabbac6b855be739b8c7521a073ea630f32f..4511cba4964ae83f1c3da5e648a0a4855c5a1144 100644 (file)
@@ -49,6 +49,7 @@ class RTPIE(InfoExtractor):
             'ext': ext,
             'vcodec': config.get('type') == 'audio' and 'none' or None,
             'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
+            'rtmp_real_time': True,
         }]
 
         return {
index 5e84c109802e34ce8f57496ee3b7e2cd409c0788..d0981115da9c64f1addf3108b8a9e6acf0c6508e 100644 (file)
@@ -6,12 +6,14 @@ import re
 from .common import InfoExtractor
 from ..compat import (
     compat_str,
+    compat_urllib_parse_urlparse,
 )
 from ..utils import (
     int_or_none,
     parse_duration,
     parse_iso8601,
     unescapeHTML,
+    xpath_text,
 )
 
 
@@ -159,11 +161,27 @@ class RTSIE(InfoExtractor):
             return int_or_none(self._search_regex(
                 r'-([0-9]+)k\.', url, 'bitrate', default=None))
 
-        formats = [{
-            'format_id': fid,
-            'url': furl,
-            'tbr': extract_bitrate(furl),
-        } for fid, furl in info['streams'].items()]
+        formats = []
+        for format_id, format_url in info['streams'].items():
+            if format_url.endswith('.f4m'):
+                token = self._download_xml(
+                    'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path,
+                    video_id, 'Downloading %s token' % format_id)
+                auth_params = xpath_text(token, './/authparams', 'auth params')
+                if not auth_params:
+                    continue
+                formats.extend(self._extract_f4m_formats(
+                    '%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params),
+                    video_id, f4m_id=format_id))
+            elif format_url.endswith('.m3u8'):
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', m3u8_id=format_id))
+            else:
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                    'tbr': extract_bitrate(format_url),
+                })
 
         if 'media' in info:
             formats.extend([{
index 0ce22d60c7fa995980e3f70159583f156672d76b..3469d9578f5317222a404ce8f5918dd133d6f381 100644 (file)
@@ -57,7 +57,7 @@ def _decrypt_url(png):
 class RTVEALaCartaIE(InfoExtractor):
     IE_NAME = 'rtve.es:alacarta'
     IE_DESC = 'RTVE a la carta'
-    _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'http://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
@@ -74,7 +74,11 @@ class RTVEALaCartaIE(InfoExtractor):
             'id': '1694255',
             'ext': 'flv',
             'title': 'TODO',
-        }
+        },
+        'skip': 'The f4m manifest can\'t be used yet',
+    }, {
+        'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -86,6 +90,18 @@ class RTVEALaCartaIE(InfoExtractor):
         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
         png = self._download_webpage(png_url, video_id, 'Downloading url information')
         video_url = _decrypt_url(png)
+        if not video_url.endswith('.f4m'):
+            auth_url = video_url.replace(
+                'resources/', 'auth/resources/'
+            ).replace('.net.rtve', '.multimedia.cdn.rtve')
+            video_path = self._download_webpage(
+                auth_url, video_id, 'Getting video url')
+            # Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
+            # the right Content-Length header and the mp4 format
+            video_url = (
+                'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
+                '&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
+            )
 
         return {
             'id': video_id,
index a73e6f331fc02a8977863a412227681b3838b91a..ef766237bf318d40da067a6a820a725fbe0da286 100644 (file)
@@ -162,10 +162,8 @@ class RUTVIE(InfoExtractor):
                         'vbr': int(quality),
                     }
                 elif transport == 'm3u8':
-                    fmt = {
-                        'url': url,
-                        'ext': 'mp4',
-                    }
+                    formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4'))
+                    continue
                 else:
                     fmt = {
                         'url': url
index 16dc3736b48bfb15a94b98713beef4757446b642..c013d678f70f36f8589007cf1b6cc14d036cde21 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
@@ -13,10 +11,15 @@ class ServingSysIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
+        'info_dict': {
+            'id': '5349193',
+            'title': 'AdAPPter_Hyundai_demo',
+        },
         'playlist': [{
-            'file': '29955898.flv',
             'md5': 'baed851342df6846eb8677a60a011a0f',
             'info_dict': {
+                'id': '29955898',
+                'ext': 'flv',
                 'title': 'AdAPPter_Hyundai_demo (1)',
                 'duration': 74,
                 'tbr': 1378,
@@ -24,9 +27,10 @@ class ServingSysIE(InfoExtractor):
                 'height': 400,
             },
         }, {
-            'file': '29907998.flv',
             'md5': '979b4da2655c4bc2d81aeb915a8c5014',
             'info_dict': {
+                'id': '29907998',
+                'ext': 'flv',
                 'title': 'AdAPPter_Hyundai_demo (2)',
                 'duration': 34,
                 'width': 854,
@@ -37,14 +41,13 @@ class ServingSysIE(InfoExtractor):
         'params': {
             'playlistend': 2,
         },
-        'skip': 'Blocked in the US [sic]',
+        '_skip': 'Blocked in the US [sic]',
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        pl_id = mobj.group('id')
-
+        pl_id = self._match_id(url)
         vast_doc = self._download_xml(url, pl_id)
+
         title = vast_doc.find('.//AdTitle').text
         media = vast_doc.find('.//MediaFile').text
         info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
index a63d126d4560dda83133fa6280116ca517e71bdc..0891a441f85f42b75d91f1d267fabdd1b5e952ce 100644 (file)
@@ -11,7 +11,7 @@ from ..compat import (
 
 
 class SinaIE(InfoExtractor):
-    _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
+    _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
                         (
                             (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
                             |
@@ -23,9 +23,10 @@ class SinaIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
-            'file': '110028898.flv',
             'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
             'info_dict': {
+                'id': '110028898',
+                'ext': 'flv',
                 'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
             }
         },
@@ -39,10 +40,6 @@ class SinaIE(InfoExtractor):
         },
     ]
 
-    @classmethod
-    def suitable(cls, url):
-        return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
-
     def _extract_video(self, video_id):
         data = compat_urllib_parse.urlencode({'vid': video_id})
         url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
@@ -59,7 +56,7 @@ class SinaIE(InfoExtractor):
                 }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         if mobj.group('token') is not None:
             # The video id is in the redirected url
index 26f361c93990b6b92ff31d2447b70f7e08263d00..24746a09a0c2183e8a0bd8e239cb59291b41f19a 100644 (file)
@@ -102,12 +102,13 @@ class SmotriIE(InfoExtractor):
                 'uploader_id': 'mopeder',
                 'duration': 71,
                 'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
+                'upload_date': '20150114',
             },
         },
         # swf player
         {
             'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
-            'md5': '4d47034979d9390d14acdf59c4935bc2',
+            'md5': '31099eeb4bc906712c5f40092045108d',
             'info_dict': {
                 'id': 'v9188090500',
                 'ext': 'mp4',
@@ -138,9 +139,6 @@ class SmotriIE(InfoExtractor):
     def _search_meta(self, name, html, display_name=None):
         if display_name is None:
             display_name = name
-        return self._html_search_regex(
-            r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
-            html, display_name, fatal=False)
         return self._html_search_meta(name, html, display_name)
 
     def _real_extract(self, url):
index 5d60c4939588ad543840b501ef0e552ad0b1e673..c5284fa673b7eda4f74191fba6a788df39939a51 100644 (file)
@@ -246,6 +246,7 @@ class SoundcloudSetIE(SoundcloudIE):
     _TESTS = [{
         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
         'info_dict': {
+            'id': '2284613',
             'title': 'The Royal Concept EP',
         },
         'playlist_mincount': 6,
@@ -279,7 +280,7 @@ class SoundcloudSetIE(SoundcloudIE):
         return {
             '_type': 'playlist',
             'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
-            'id': info['id'],
+            'id': '%s' % info['id'],
             'title': info['title'],
         }
 
index f345883c767438a91412e0619a993a70e3a21a92..b868241d50a23cbfa7289f6d2affb46e224696e3 100644 (file)
@@ -4,14 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urlparse,
-    compat_HTTPError,
-)
-from ..utils import (
-    HEADRequest,
-    ExtractorError,
-)
+from ..compat import compat_urlparse
 from .spiegeltv import SpiegeltvIE
 
 
@@ -72,16 +65,6 @@ class SpiegelIE(InfoExtractor):
             if n.tag.startswith('type') and n.tag != 'type6':
                 format_id = n.tag.rpartition('type')[2]
                 video_url = base_url + n.find('./filename').text
-                # Test video URLs beforehand as some of them are invalid
-                try:
-                    self._request_webpage(
-                        HEADRequest(video_url), video_id,
-                        'Checking %s video URL' % format_id)
-                except ExtractorError as e:
-                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
-                        self.report_warning(
-                            '%s video URL is invalid, skipping' % format_id, video_id)
-                        continue
                 formats.append({
                     'format_id': format_id,
                     'url': video_url,
@@ -94,6 +77,7 @@ class SpiegelIE(InfoExtractor):
                 })
         duration = float(idoc[0].findall('./duration')[0].text)
 
+        self._check_formats(formats, video_id)
         self._sort_formats(formats)
 
         return {
index a3adf54e3097a5f91a5617c24f286cae5e374e24..e529bb55ccccb1beefdf12d2df1ea689dd0d6f2e 100644 (file)
@@ -1,14 +1,12 @@
 from __future__ import unicode_literals
 
-import re
-
 from .mtv import MTVServicesInfoExtractor
 
 
 class SpikeIE(MTVServicesInfoExtractor):
     _VALID_URL = r'''(?x)https?://
-        (www\.spike\.com/(video-clips|episodes)/.+|
-         m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+))
+        (?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+|
+         m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
         '''
     _TEST = {
         'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
@@ -25,8 +23,7 @@ class SpikeIE(MTVServicesInfoExtractor):
     _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
 
     def _real_extract(self, url):
-        mobj = re.search(self._VALID_URL, url)
-        mobile_id = mobj.group('mobile_id')
-        if mobile_id is not None:
+        mobile_id = self._match_id(url)
+        if mobile_id:
             url = 'http://www.spike.com/video-clips/%s' % mobile_id
         return super(SpikeIE, self)._real_extract(url)
index 666a7dcc8a1e7dd425fd1d201ef1b5c1802c6a16..5d583c720bff22bee0b5af55699a378e3a5267ea 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import js_to_json
 
 
 class SRMediathekIE(InfoExtractor):
-    IE_DESC = 'Süddeutscher Rundfunk'
+    IE_DESC = 'Saarländischer Rundfunk'
     _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
 
     _TEST = {
diff --git a/youtube_dl/extractor/streetvoice.py b/youtube_dl/extractor/streetvoice.py
new file mode 100644 (file)
index 0000000..6a57fa6
--- /dev/null
@@ -0,0 +1,51 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import unified_strdate
+
+
+class StreetVoiceIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://streetvoice.com/skippylu/songs/94440/',
+        'md5': '15974627fc01a29e492c98593c2fd472',
+        'info_dict': {
+            'id': '94440',
+            'ext': 'mp3',
+            'filesize': 4167053,
+            'title': '輸',
+            'description': 'Crispy脆樂團 - 輸',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 260,
+            'upload_date': '20091018',
+            'uploader': 'Crispy脆樂團',
+            'uploader_id': '627810',
+        }
+    }, {
+        'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        song_id = self._match_id(url)
+
+        song = self._download_json(
+            'http://streetvoice.com/music/api/song/%s' % song_id, song_id)
+
+        title = song['name']
+        author = song['musician']['name']
+
+        return {
+            'id': song_id,
+            'url': song['file'],
+            'filesize': song.get('size'),
+            'title': title,
+            'description': '%s - %s' % (author, title),
+            'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
+            'duration': song.get('length'),
+            'upload_date': unified_strdate(song.get('created_at')),
+            'uploader': author,
+            'uploader_id': compat_str(song['musician']['id']),
+        }
index 5fa67eb8d4441d62c1591289551171cdbcbcf45b..18a8237197ca4f017252fa181b08bfacf67c44b2 100644 (file)
@@ -10,17 +10,19 @@ class TeamcocoIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
-            'file': '80187.mp4',
             'md5': '3f7746aa0dc86de18df7539903d399ea',
             'info_dict': {
+                'id': '80187',
+                'ext': 'mp4',
                 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
             }
         }, {
             'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
-            'file': '19705.mp4',
             'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
             'info_dict': {
+                'id': '19705',
+                'ext': 'mp4',
                 "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
                 "title": "Louis C.K. Interview Pt. 1 11/3/11"
             }
@@ -36,7 +38,7 @@ class TeamcocoIE(InfoExtractor):
         video_id = mobj.group("video_id")
         if not video_id:
             video_id = self._html_search_regex(
-                r'data-node-id="(\d+?)"',
+                r'<div\s+class="player".*?data-id="(\d+?)"',
                 webpage, 'video id')
 
         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
index e54145105f45d6b15345b47f101fe7f804ec174d..b9e2ef8cab9a0e3a63bf5a46baf2b70a39afa1cb 100644 (file)
@@ -11,6 +11,7 @@ class TeleTaskIE(InfoExtractor):
     _TEST = {
         'url': 'http://www.tele-task.de/archive/video/html5/26168/',
         'info_dict': {
+            'id': '26168',
             'title': 'Duplicate Detection',
         },
         'playlist': [{
@@ -34,7 +35,6 @@ class TeleTaskIE(InfoExtractor):
 
     def _real_extract(self, url):
         lecture_id = self._match_id(url)
-
         webpage = self._download_webpage(url, lecture_id)
 
         title = self._html_search_regex(
diff --git a/youtube_dl/extractor/testtube.py b/youtube_dl/extractor/testtube.py
new file mode 100644 (file)
index 0000000..6a7b5e4
--- /dev/null
@@ -0,0 +1,72 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    qualities,
+)
+
+
+class TestTubeIE(InfoExtractor):
+    _VALID_URL = r'https?://testtube\.com/[^/?#]+/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
+        'info_dict': {
+            'id': '60163',
+            'display_id': '5-weird-ways-plants-can-eat-animals',
+            'duration': 275,
+            'ext': 'mp4',
+            'title': '5 Weird Ways Plants Can Eat Animals',
+            'description': 'Why have some plants evolved to eat meat?',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'DNews',
+            'uploader_id': 'dnews',
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
+            webpage, 'video ID')
+
+        all_info = self._download_json(
+            'https://testtube.com/api/getPlaylist.json?api_key=ba9c741bce1b9d8e3defcc22193f3651b8867e62&codecs=h264,vp8,theora&video_id=%s' % video_id,
+            video_id)
+        info = all_info['items'][0]
+
+        formats = []
+        for vcodec, fdatas in info['media'].items():
+            for name, fdata in fdatas.items():
+                formats.append({
+                    'format_id': '%s-%s' % (vcodec, name),
+                    'url': fdata['url'],
+                    'vcodec': vcodec,
+                    'tbr': fdata.get('bitrate'),
+                })
+        self._sort_formats(formats)
+
+        duration = int_or_none(info.get('duration'))
+        images = info.get('images')
+        thumbnails = None
+        preference = qualities(['mini', 'small', 'medium', 'large'])
+        if images:
+            thumbnails = [{
+                'id': thumbnail_id,
+                'url': img_url,
+                'preference': preference(thumbnail_id)
+            } for thumbnail_id, img_url in images.items()]
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': info['title'],
+            'description': info.get('summary'),
+            'thumbnails': thumbnails,
+            'uploader': info.get('show', {}).get('name'),
+            'uploader_id': info.get('show', {}).get('slug'),
+            'duration': duration,
+            'formats': formats,
+        }
index 4fe89dbe516f8e25eb1f84239bc9cbc9f26bd648..e036b8cdf1e6ca6ad4277a4c3d22e79361322703 100644 (file)
@@ -9,17 +9,23 @@ from ..utils import ExtractorError
 class TinyPicIE(InfoExtractor):
     IE_NAME = 'tinypic'
     IE_DESC = 'tinypic.com videos'
-    _VALID_URL = r'http://tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
-
-    _TEST = {
-        'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
-        'md5': '609b74432465364e72727ebc6203f044',
-        'info_dict': {
-            'id': '6xw7tc',
-            'ext': 'flv',
-            'title': 'shadow phenomenon weird',
+    _VALID_URL = r'http://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
+
+    _TESTS = [
+        {
+            'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
+            'md5': '609b74432465364e72727ebc6203f044',
+            'info_dict': {
+                'id': '6xw7tc',
+                'ext': 'flv',
+                'title': 'shadow phenomenon weird',
+            },
+        },
+        {
+            'url': 'http://de.tinypic.com/player.php?v=dy90yh&s=8',
+            'only_matching': True,
         }
-    }
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index 2837f9c8e5fcf9624acc31c156f775aad32454dd..4797d1310aaeec2664d822c052f26be5ea5210af 100644 (file)
@@ -16,8 +16,9 @@ class TouTvIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://www.tou.tv/30-vies/S04E41',
-        'file': '30-vies_S04E41.mp4',
         'info_dict': {
+            'id': '30-vies_S04E41',
+            'ext': 'mp4',
             'title': '30 vies Saison 4 / Épisode 41',
             'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
             'age_limit': 8,
index cc26f417a0c208efd946aa91c72086471d69ccaa..f57d609d43eecb13f3bb43ecc042107b5cad50bd 100644 (file)
@@ -12,61 +12,59 @@ class TvpIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035',
+        'md5': 'cdd98303338b8a7f7abab5cd14092bf2',
         'info_dict': {
             'id': '4278035',
             'ext': 'wmv',
             'title': 'Ogniem i mieczem, odc. 2',
-            'description': 'Bohun dowiaduje się o złamaniu przez kniahinię danego mu słowa i wyrusza do Rozłogów. Helenie w ostatniej chwili udaje się uciec dzięki pomocy Zagłoby.',
         },
     }, {
         'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
+        'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
         'info_dict': {
             'id': '194536',
             'ext': 'mp4',
             'title': 'Czas honoru, I seria – odc. 13',
-            #  'description': 'WŁADEK\nCzesław prosi Marię o dostarczenie Władkowi zarazki tyfusu. Jeśli zachoruje zostanie przewieziony do szpitala skąd łatwiej będzie go odbić. Czy matka zdecyduje się zarazić syna? Karol odwiedza Wandę przyznaje się, że ją oszukiwał, ale ostrzega też, że grozi jej aresztowanie i nalega, żeby wyjechała z Warszawy. Czy dziewczyna zdecyduje się znów oddalić od ukochanego? Rozpoczyna się akcja odbicia Władka.',
         },
     }, {
         'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
+        'md5': 'c3b15ed1af288131115ff17a17c19dda',
         'info_dict': {
             'id': '17916176',
             'ext': 'mp4',
             'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
         },
-        'params': {
-            # m3u8 download
-            'skip_download': 'true',
-        },
     }, {
         'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
+        'md5': 'c3b15ed1af288131115ff17a17c19dda',
         'info_dict': {
             'id': '17834272',
             'ext': 'mp4',
             'title': 'Na sygnale, odc. 39',
-            'description': 'Ekipa Wiktora ratuje młodą matkę, która spadła ze schodów trzymając na rękach noworodka. Okazuje się, że dziewczyna jest surogatką, a biologiczni rodzice dziecka próbują zmusić ją do oddania synka…',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': 'true',
         },
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
         webpage = self._download_webpage(
             'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
 
-        title = self._og_search_title(webpage)
-        series = self._search_regex(
-            r'{name:\s*([\'"])SeriesTitle\1,\s*value:\s*\1(?P<series>.*?)\1},',
+        title = self._search_regex(
+            r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
+            webpage, 'title', group='title')
+        series_title = self._search_regex(
+            r'name\s*:\s*([\'"])SeriesTitle\1\s*,\s*value\s*:\s*\1(?P<series>.+?)\1',
             webpage, 'series', group='series', default=None)
-        if series is not None and series not in title:
-            title = '%s, %s' % (series, title)
-        description = self._og_search_description(webpage, default=None)
+        if series_title:
+            title = '%s, %s' % (series_title, title)
+
+        thumbnail = self._search_regex(
+            r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
 
         video_url = self._search_regex(
             r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
-        if video_url is None:
+        if not video_url:
             video_url = self._download_json(
                 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
                 video_id)['video_url']
@@ -89,8 +87,7 @@ class TvpIE(InfoExtractor):
         return {
             'id': video_id,
             'title': title,
-            'thumbnail': self._og_search_thumbnail(webpage),
-            'description': description,
+            'thumbnail': thumbnail,
             'formats': formats,
         }
 
diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py
new file mode 100644 (file)
index 0000000..c80ec15
--- /dev/null
@@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    xpath_with_ns,
+    int_or_none,
+    float_or_none,
+)
+
+
+class TweakersIE(InfoExtractor):
+    _VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
+        'md5': '1b5afa817403bb5baa08359dca31e6df',
+        'info_dict': {
+            'id': '9926',
+            'ext': 'mp4',
+            'title': 'New Nintendo 3DS XL - Op alle fronten beter',
+            'description': 'md5:f97324cc71e86e11c853f0763820e3ba',
+            'thumbnail': 're:^https?://.*\.jpe?g$',
+            'duration': 386,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        playlist = self._download_xml(
+            'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id,
+            video_id)
+
+        NS_MAP = {
+            'xspf': 'http://xspf.org/ns/0/',
+            's1': 'http://static.streamone.nl/player/ns/0',
+        }
+
+        track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP))
+
+        title = xpath_text(
+            track, xpath_with_ns('./xspf:title', NS_MAP), 'title')
+        description = xpath_text(
+            track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
+        thumbnail = xpath_text(
+            track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
+        duration = float_or_none(
+            xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'),
+            1000)
+
+        formats = [{
+            'url': location.text,
+            'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+            'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+            'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+        } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
index b11a1d5610d0dffe0d98df7d7b05d4228552dfb7..87290d002e44850e6b3584a97ff2a3e1be7c1a0f 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 
 import itertools
 import re
+import random
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_str,
     compat_urllib_parse,
     compat_urllib_request,
 )
@@ -15,44 +17,12 @@ from ..utils import (
 )
 
 
-class TwitchIE(InfoExtractor):
-    # TODO: One broadcast may be split into multiple videos. The key
-    # 'broadcast_id' is the same for all parts, and 'broadcast_part'
-    # starts at 1 and increases. Can we treat all parts as one video?
-    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
-        (?:
-            (?P<channelid>[^/]+)|
-            (?:(?:[^/]+)/v/(?P<vodid>[^/]+))|
-            (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
-            (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
-        )
-        /?(?:\#.*)?$
-        """
-    _PAGE_LIMIT = 100
+class TwitchBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
+
     _API_BASE = 'https://api.twitch.tv'
+    _USHER_BASE = 'http://usher.twitch.tv'
     _LOGIN_URL = 'https://secure.twitch.tv/user/login'
-    _TESTS = [{
-        'url': 'http://www.twitch.tv/riotgames/b/577357806',
-        'info_dict': {
-            'id': 'a577357806',
-            'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
-        },
-        'playlist_mincount': 12,
-    }, {
-        'url': 'http://www.twitch.tv/acracingleague/c/5285812',
-        'info_dict': {
-            'id': 'c5285812',
-            'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
-        },
-        'playlist_mincount': 3,
-    }, {
-        'url': 'http://www.twitch.tv/vanillatv',
-        'info_dict': {
-            'id': 'vanillatv',
-            'title': 'VanillaTV',
-        },
-        'playlist_mincount': 412,
-    }]
 
     def _handle_error(self, response):
         if not isinstance(response, dict):
@@ -64,34 +34,60 @@ class TwitchIE(InfoExtractor):
                 expected=True)
 
     def _download_json(self, url, video_id, note='Downloading JSON metadata'):
-        response = super(TwitchIE, self)._download_json(url, video_id, note)
+        response = super(TwitchBaseIE, self)._download_json(url, video_id, note)
         self._handle_error(response)
         return response
 
-    def _extract_media(self, item, item_id):
-        ITEMS = {
-            'a': 'video',
-            'v': 'vod',
-            'c': 'chapter',
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading login page')
+
+        authenticity_token = self._search_regex(
+            r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
+            login_page, 'authenticity token')
+
+        login_form = {
+            'utf8': '✓'.encode('utf-8'),
+            'authenticity_token': authenticity_token,
+            'redirect_on_login': '',
+            'embed_form': 'false',
+            'mp_source_action': '',
+            'follow': '',
+            'user[login]': username,
+            'user[password]': password,
         }
-        info = self._extract_info(self._download_json(
+
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        request.add_header('Referer', self._LOGIN_URL)
+        response = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        m = re.search(
+            r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
+        if m:
+            raise ExtractorError(
+                'Unable to login: %s' % m.group('msg').strip(), expected=True)
+
+
+class TwitchItemBaseIE(TwitchBaseIE):
+    def _download_info(self, item, item_id):
+        return self._extract_info(self._download_json(
             '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
-            'Downloading %s info JSON' % ITEMS[item]))
-
-        if item == 'v':
-            access_token = self._download_json(
-                '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
-                'Downloading %s access token' % ITEMS[item])
-            formats = self._extract_m3u8_formats(
-                'http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s'
-                % (item_id, access_token['token'], access_token['sig']),
-                item_id, 'mp4')
-            info['formats'] = formats
-            return info
+            'Downloading %s info JSON' % self._ITEM_TYPE))
 
+    def _extract_media(self, item_id):
+        info = self._download_info(self._ITEM_SHORTCUT, item_id)
         response = self._download_json(
-            '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
-            'Downloading %s playlist JSON' % ITEMS[item])
+            '%s/api/videos/%s%s' % (self._API_BASE, self._ITEM_SHORTCUT, item_id), item_id,
+            'Downloading %s playlist JSON' % self._ITEM_TYPE)
         entries = []
         chunks = response['chunks']
         qualities = list(chunks.keys())
@@ -129,119 +125,258 @@ class TwitchIE(InfoExtractor):
             'view_count': info['views'],
         }
 
-    def _real_initialize(self):
-        self._login()
+    def _real_extract(self, url):
+        return self._extract_media(self._match_id(url))
 
-    def _login(self):
-        (username, password) = self._get_login_info()
-        if username is None:
-            return
 
-        login_page = self._download_webpage(
-            self._LOGIN_URL, None, 'Downloading login page')
+class TwitchVideoIE(TwitchItemBaseIE):
+    IE_NAME = 'twitch:video'
+    _VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+    _ITEM_TYPE = 'video'
+    _ITEM_SHORTCUT = 'a'
 
-        authenticity_token = self._search_regex(
-            r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
-            login_page, 'authenticity token')
+    _TEST = {
+        'url': 'http://www.twitch.tv/riotgames/b/577357806',
+        'info_dict': {
+            'id': 'a577357806',
+            'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
+        },
+        'playlist_mincount': 12,
+    }
 
-        login_form = {
-            'utf8': '✓'.encode('utf-8'),
-            'authenticity_token': authenticity_token,
-            'redirect_on_login': '',
-            'embed_form': 'false',
-            'mp_source_action': '',
-            'follow': '',
-            'user[login]': username,
-            'user[password]': password,
-        }
 
-        request = compat_urllib_request.Request(
-            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
-        request.add_header('Referer', self._LOGIN_URL)
-        response = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
+class TwitchChapterIE(TwitchItemBaseIE):
+    IE_NAME = 'twitch:chapter'
+    _VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+    _ITEM_TYPE = 'chapter'
+    _ITEM_SHORTCUT = 'c'
 
-        m = re.search(
-            r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
-        if m:
-            raise ExtractorError(
-                'Unable to login: %s' % m.group('msg').strip(), expected=True)
+    _TESTS = [{
+        'url': 'http://www.twitch.tv/acracingleague/c/5285812',
+        'info_dict': {
+            'id': 'c5285812',
+            'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
+        },
+        'playlist_mincount': 3,
+    }, {
+        'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
+        'only_matching': True,
+    }]
+
+
+class TwitchVodIE(TwitchItemBaseIE):
+    IE_NAME = 'twitch:vod'
+    _VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+    _ITEM_TYPE = 'vod'
+    _ITEM_SHORTCUT = 'v'
+
+    _TEST = {
+        'url': 'http://www.twitch.tv/ksptv/v/3622000',
+        'info_dict': {
+            'id': 'v3622000',
+            'ext': 'mp4',
+            'title': '''KSPTV: Squadcast: "Everyone's on vacation so here's Dahud" Edition!''',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 6951,
+            'timestamp': 1419028564,
+            'upload_date': '20141219',
+            'uploader': 'KSPTV',
+            'uploader_id': 'ksptv',
+            'view_count': int,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        item_id = self._match_id(url)
+        info = self._download_info(self._ITEM_SHORTCUT, item_id)
+        access_token = self._download_json(
+            '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
+            'Downloading %s access token' % self._ITEM_TYPE)
+        formats = self._extract_m3u8_formats(
+            '%s/vod/%s?nauth=%s&nauthsig=%s'
+            % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
+            item_id, 'mp4')
+        info['formats'] = formats
+        return info
+
+
+class TwitchPlaylistBaseIE(TwitchBaseIE):
+    _PLAYLIST_URL = '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
+    _PAGE_LIMIT = 100
+
+    def _extract_playlist(self, channel_id):
+        info = self._download_json(
+            '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
+            channel_id, 'Downloading channel info JSON')
+        channel_name = info.get('display_name') or info.get('name')
+        entries = []
+        offset = 0
+        limit = self._PAGE_LIMIT
+        for counter in itertools.count(1):
+            response = self._download_json(
+                self._PLAYLIST_URL % (channel_id, offset, limit),
+                channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
+            page_entries = self._extract_playlist_page(response)
+            if not page_entries:
+                break
+            entries.extend(page_entries)
+            offset += limit
+        return self.playlist_result(
+            [self.url_result(entry) for entry in set(entries)],
+            channel_id, channel_name)
+
+    def _extract_playlist_page(self, response):
+        videos = response.get('videos')
+        return [video['url'] for video in videos] if videos else []
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj.group('chapterid'):
-            return self._extract_media('c', mobj.group('chapterid'))
+        return self._extract_playlist(self._match_id(url))
 
-            """
-            webpage = self._download_webpage(url, chapter_id)
-            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
+
+class TwitchProfileIE(TwitchPlaylistBaseIE):
+    IE_NAME = 'twitch:profile'
+    _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+    _PLAYLIST_TYPE = 'profile'
+
+    _TEST = {
+        'url': 'http://www.twitch.tv/vanillatv/profile',
+        'info_dict': {
+            'id': 'vanillatv',
+            'title': 'VanillaTV',
+        },
+        'playlist_mincount': 412,
+    }
+
+
+class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
+    IE_NAME = 'twitch:past_broadcasts'
+    _VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+    _PLAYLIST_URL = TwitchPlaylistBaseIE._PLAYLIST_URL + '&broadcasts=true'
+    _PLAYLIST_TYPE = 'past broadcasts'
+
+    _TEST = {
+        'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
+        'info_dict': {
+            'id': 'spamfish',
+            'title': 'Spamfish',
+        },
+        'playlist_mincount': 54,
+    }
+
+
+class TwitchBookmarksIE(TwitchPlaylistBaseIE):
+    IE_NAME = 'twitch:bookmarks'
+    _VALID_URL = r'%s/(?P<id>[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+    _PLAYLIST_URL = '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
+    _PLAYLIST_TYPE = 'bookmarks'
+
+    _TEST = {
+        'url': 'http://www.twitch.tv/ognos/profile/bookmarks',
+        'info_dict': {
+            'id': 'ognos',
+            'title': 'Ognos',
+        },
+        'playlist_mincount': 3,
+    }
+
+    def _extract_playlist_page(self, response):
+        entries = []
+        for bookmark in response.get('bookmarks', []):
+            video = bookmark.get('video')
+            if not video:
+                continue
+            entries.append(video['url'])
+        return entries
+
+
+class TwitchStreamIE(TwitchBaseIE):
+    IE_NAME = 'twitch:stream'
+    _VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+
+    _TEST = {
+        'url': 'http://www.twitch.tv/shroomztv',
+        'info_dict': {
+            'id': '12772022048',
+            'display_id': 'shroomztv',
+            'ext': 'mp4',
+            'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
+            'is_live': True,
+            'timestamp': 1421928037,
+            'upload_date': '20150122',
+            'uploader': 'ShroomzTV',
+            'uploader_id': 'shroomztv',
+            'view_count': int,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+
+        stream = self._download_json(
+            '%s/kraken/streams/%s' % (self._API_BASE, channel_id), channel_id,
+            'Downloading stream JSON').get('stream')
+
+        # Fallback on profile extraction if stream is offline
+        if not stream:
+            return self.url_result(
+                'http://www.twitch.tv/%s/profile' % channel_id,
+                'TwitchProfile', channel_id)
+
+        access_token = self._download_json(
+            '%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id,
+            'Downloading channel access token')
+
+        query = {
+            'allow_source': 'true',
+            'p': random.randint(1000000, 10000000),
+            'player': 'twitchweb',
+            'segment_preference': '4',
+            'sig': access_token['sig'],
+            'token': access_token['token'],
+        }
+
+        formats = self._extract_m3u8_formats(
+            '%s/api/channel/hls/%s.m3u8?%s'
+            % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
+            channel_id, 'mp4')
+
+        view_count = stream.get('viewers')
+        timestamp = parse_iso8601(stream.get('created_at'))
+
+        channel = stream['channel']
+        title = self._live_title(channel.get('display_name') or channel.get('name'))
+        description = channel.get('status')
+
+        thumbnails = []
+        for thumbnail_key, thumbnail_url in stream['preview'].items():
+            m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key)
             if not m:
-                raise ExtractorError('Cannot find archive of a chapter')
-            archive_id = m.group(1)
-
-            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
-            doc = self._download_xml(
-                api, chapter_id,
-                note='Downloading chapter information',
-                errnote='Chapter information download failed')
-            for a in doc.findall('.//archive'):
-                if archive_id == a.find('./id').text:
-                    break
-            else:
-                raise ExtractorError('Could not find chapter in chapter information')
-
-            video_url = a.find('./video_file_url').text
-            video_ext = video_url.rpartition('.')[2] or 'flv'
-
-            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
-            chapter_info = self._download_json(
-                chapter_api_url, 'c' + chapter_id,
-                note='Downloading chapter metadata',
-                errnote='Download of chapter metadata failed')
-
-            bracket_start = int(doc.find('.//bracket_start').text)
-            bracket_end = int(doc.find('.//bracket_end').text)
-
-            # TODO determine start (and probably fix up file)
-            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
-            #video_url += '?start=' + TODO:start_timestamp
-            # bracket_start is 13290, but we want 51670615
-            self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
-                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
-
-            info = {
-                'id': 'c' + chapter_id,
-                'url': video_url,
-                'ext': video_ext,
-                'title': chapter_info['title'],
-                'thumbnail': chapter_info['preview'],
-                'description': chapter_info['description'],
-                'uploader': chapter_info['channel']['display_name'],
-                'uploader_id': chapter_info['channel']['name'],
-            }
-            return info
-            """
-        elif mobj.group('videoid'):
-            return self._extract_media('a', mobj.group('videoid'))
-        elif mobj.group('vodid'):
-            return self._extract_media('v', mobj.group('vodid'))
-        elif mobj.group('channelid'):
-            channel_id = mobj.group('channelid')
-            info = self._download_json(
-                '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
-                channel_id, 'Downloading channel info JSON')
-            channel_name = info.get('display_name') or info.get('name')
-            entries = []
-            offset = 0
-            limit = self._PAGE_LIMIT
-            for counter in itertools.count(1):
-                response = self._download_json(
-                    '%s/kraken/channels/%s/videos/?offset=%d&limit=%d'
-                    % (self._API_BASE, channel_id, offset, limit),
-                    channel_id, 'Downloading channel videos JSON page %d' % counter)
-                videos = response['videos']
-                if not videos:
-                    break
-                entries.extend([self.url_result(video['url'], 'Twitch') for video in videos])
-                offset += limit
-            return self.playlist_result(entries, channel_id, channel_name)
+                continue
+            thumbnails.append({
+                'url': thumbnail_url,
+                'width': int(m.group('width')),
+                'height': int(m.group('height')),
+            })
+
+        return {
+            'id': compat_str(stream['_id']),
+            'display_id': channel_id,
+            'title': title,
+            'description': description,
+            'thumbnails': thumbnails,
+            'uploader': channel.get('display_name'),
+            'uploader_id': channel.get('name'),
+            'timestamp': timestamp,
+            'view_count': view_count,
+            'formats': formats,
+            'is_live': True,
+        }
index 0182d67ec2db7fee5aa0b6f7f5457f13cca0c305..d5023775857a8bd27ee0f19cffd8176a477be4fd 100644 (file)
@@ -3,50 +3,51 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    qualities,
+)
 
 
 class UbuIE(InfoExtractor):
     _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
     _TEST = {
         'url': 'http://ubu.com/film/her_noise.html',
-        'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
+        'md5': '138d5652618bf0f03878978db9bef1ee',
         'info_dict': {
             'id': 'her_noise',
-            'ext': 'mp4',
+            'ext': 'm4v',
             'title': 'Her Noise - The Making Of (2007)',
             'duration': 3600,
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         title = self._html_search_regex(
             r'<title>.+?Film &amp; Video: ([^<]+)</title>', webpage, 'title')
 
         duration = int_or_none(self._html_search_regex(
-            r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
-        if duration:
-            duration *= 60
+            r'Duration: (\d+) minutes', webpage, 'duration', fatal=False),
+            invscale=60)
 
         formats = []
-
         FORMAT_REGEXES = [
-            ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
-            ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
+            ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"),
+            ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'),
         ]
-
+        preference = qualities([fid for fid, _ in FORMAT_REGEXES])
         for format_id, format_regex in FORMAT_REGEXES:
             m = re.search(format_regex, webpage)
             if m:
                 formats.append({
                     'url': m.group(1),
                     'format_id': format_id,
+                    'preference': preference(format_id),
                 })
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
index 43f6b029da8ff5df7fe808c11a85f8a8120f8ca5..c17094f8193f7678cc3d0a912c3d970f38e6bf7c 100644 (file)
@@ -9,6 +9,7 @@ from ..compat import (
 )
 from ..utils import (
     ExtractorError,
+    int_or_none,
 )
 
 
@@ -192,9 +193,29 @@ class VevoIE(InfoExtractor):
         # Download via HLS API
         formats.extend(self._download_api_formats(video_id))
 
+        # Download SMIL
+        smil_blocks = sorted((
+            f for f in video_info['videoVersions']
+            if f['sourceType'] == 13),
+            key=lambda f: f['version'])
+        smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
+            self._SMIL_BASE_URL, video_id, video_id.lower())
+        if smil_blocks:
+            smil_url_m = self._search_regex(
+                r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
+                default=None)
+            if smil_url_m is not None:
+                smil_url = smil_url_m
+        if smil_url:
+            smil_xml = self._download_webpage(
+                smil_url, video_id, 'Downloading SMIL info', fatal=False)
+            if smil_xml:
+                formats.extend(self._formats_from_smil(smil_xml))
+
         self._sort_formats(formats)
-        timestamp_ms = int(self._search_regex(
-            r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
+        timestamp_ms = int_or_none(self._search_regex(
+            r'/Date\((\d+)\)/',
+            video_info['launchDate'], 'launch date', fatal=False))
 
         return {
             'id': video_id,
index 0faa729c60f916d69b885cfc76580104b226f84b..8516a2940cb38c7e030e504e6a29f88fcd3946a1 100644 (file)
@@ -5,27 +5,58 @@ from ..utils import (
     float_or_none,
     int_or_none,
 )
+from ..compat import (
+    compat_urllib_request
+)
 
 
 class ViddlerIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
-    _TEST = {
-        "url": "http://www.viddler.com/v/43903784",
+    _TESTS = [{
+        'url': 'http://www.viddler.com/v/43903784',
         'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
         'info_dict': {
             'id': '43903784',
             'ext': 'mp4',
-            "title": "Video Made Easy",
-            'description': 'You don\'t need to be a professional to make high-quality video content. Viddler provides some quick and easy tips on how to produce great video content with limited resources. ',
-            "uploader": "viddler",
+            'title': 'Video Made Easy',
+            'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
+            'uploader': 'viddler',
             'timestamp': 1335371429,
             'upload_date': '20120425',
-            "duration": 100.89,
+            'duration': 100.89,
             'thumbnail': 're:^https?://.*\.jpg$',
             'view_count': int,
+            'comment_count': int,
             'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
         }
-    }
+    }, {
+        'url': 'http://www.viddler.com/v/4d03aad9/',
+        'md5': 'faa71fbf70c0bee7ab93076fd007f4b0',
+        'info_dict': {
+            'id': '4d03aad9',
+            'ext': 'mp4',
+            'title': 'WALL-TO-GORTAT',
+            'upload_date': '20150126',
+            'uploader': 'deadspin',
+            'timestamp': 1422285291,
+            'view_count': int,
+            'comment_count': int,
+        }
+    }, {
+        'url': 'http://www.viddler.com/player/221ebbbd/0/',
+        'md5': '0defa2bd0ea613d14a6e9bd1db6be326',
+        'info_dict': {
+            'id': '221ebbbd',
+            'ext': 'mp4',
+            'title': 'LETeens-Grammar-snack-third-conditional',
+            'description': ' ',
+            'upload_date': '20140929',
+            'uploader': 'BCLETeens',
+            'timestamp': 1411997190,
+            'view_count': int,
+            'comment_count': int,
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -33,14 +64,17 @@ class ViddlerIE(InfoExtractor):
         json_url = (
             'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
             video_id)
-        data = self._download_json(json_url, video_id)['video']
+        headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
+        request = compat_urllib_request.Request(json_url, None, headers)
+        data = self._download_json(request, video_id)['video']
 
         formats = []
         for filed in data['files']:
             if filed.get('status', 'ready') != 'ready':
                 continue
+            format_id = filed.get('profile_id') or filed['profile_name']
             f = {
-                'format_id': filed['profile_id'],
+                'format_id': format_id,
                 'format_note': filed['profile_name'],
                 'url': self._proto_relative_url(filed['url']),
                 'width': int_or_none(filed.get('width')),
@@ -53,16 +87,15 @@ class ViddlerIE(InfoExtractor):
 
             if filed.get('cdn_url'):
                 f = f.copy()
-                f['url'] = self._proto_relative_url(filed['cdn_url'])
-                f['format_id'] = filed['profile_id'] + '-cdn'
+                f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
+                f['format_id'] = format_id + '-cdn'
                 f['source_preference'] = 1
                 formats.append(f)
 
             if filed.get('html5_video_source'):
                 f = f.copy()
-                f['url'] = self._proto_relative_url(
-                    filed['html5_video_source'])
-                f['format_id'] = filed['profile_id'] + '-html5'
+                f['url'] = self._proto_relative_url(filed['html5_video_source'])
+                f['format_id'] = format_id + '-html5'
                 f['source_preference'] = 0
                 formats.append(f)
         self._sort_formats(formats)
@@ -71,7 +104,6 @@ class ViddlerIE(InfoExtractor):
             t.get('text') for t in data.get('tags', []) if 'text' in t]
 
         return {
-            '_type': 'video',
             'id': video_id,
             'title': data['title'],
             'formats': formats,
@@ -81,5 +113,6 @@ class ViddlerIE(InfoExtractor):
             'uploader': data.get('author'),
             'duration': float_or_none(data.get('length')),
             'view_count': int_or_none(data.get('view_count')),
+            'comment_count': int_or_none(data.get('comment_count')),
             'categories': categories,
         }
index 9fc64d172e63ecb15469efc2a2085d8bccc06e53..27303031620a8c126797bcdd6207d2f2355c74be 100644 (file)
@@ -1,12 +1,15 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
 from ..utils import (
+    ExtractorError,
     remove_start,
 )
 
@@ -35,8 +38,11 @@ class VideoMegaIE(InfoExtractor):
         req.add_header('Referer', url)
         webpage = self._download_webpage(req, video_id)
 
-        escaped_data = self._search_regex(
-            r'unescape\("([^"]+)"\)', webpage, 'escaped data')
+        try:
+            escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1]
+        except IndexError:
+            raise ExtractorError('Unable to extract escaped data')
+
         playlist = compat_urllib_parse.unquote(escaped_data)
 
         thumbnail = self._search_regex(
@@ -56,5 +62,7 @@ class VideoMegaIE(InfoExtractor):
             'title': title,
             'formats': formats,
             'thumbnail': thumbnail,
-            'http_referer': iframe_url,
+            'http_headers': {
+                'Referer': iframe_url,
+            },
         }
index 1f938838cc9247e9e80c8ce32911cb40954c1830..ececc7ee0118932716ca9bdb06779cf94e6dc0ec 100644 (file)
@@ -13,9 +13,9 @@ from ..utils import (
 class VideoTtIE(InfoExtractor):
     ID_NAME = 'video.tt'
     IE_DESC = 'video.tt - Your True Tube'
-    _VALID_URL = r'http://(?:www\.)?video\.tt/(?:video/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
+    _VALID_URL = r'http://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8',
         'md5': 'b13aa9e2f267effb5d1094443dff65ba',
         'info_dict': {
@@ -26,7 +26,10 @@ class VideoTtIE(InfoExtractor):
             'upload_date': '20130827',
             'uploader': 'joseph313',
         }
-    }
+    }, {
+        'url': 'http://video.tt/embed/amd5YujV8',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index 06b0bed41e68401a8667cbabdca0d9796ea8ca3d..1bb47351435bd48832671b84038b6c4a749cdfbc 100644 (file)
@@ -501,9 +501,10 @@ class VimeoReviewIE(InfoExtractor):
     _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
-        'file': '75524534.mp4',
         'md5': 'c507a72f780cacc12b2248bb4006d253',
         'info_dict': {
+            'id': '75524534',
+            'ext': 'mp4',
             'title': "DICK HARDWICK 'Comedian'",
             'uploader': 'Richard Hardwick',
         }
index c17bebd6e919673d9011de3ac37dfff2929b2cc8..72eb010f8d1e480e37f133545ad6f8e1a64fef3f 100644 (file)
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -11,9 +12,10 @@ from ..utils import (
 
 class WashingtonPostIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
         'info_dict': {
+            'id': 'sinkhole-of-bureaucracy',
             'title': 'Sinkhole of bureaucracy',
         },
         'playlist': [{
@@ -40,15 +42,38 @@ class WashingtonPostIE(InfoExtractor):
                 'upload_date': '20140322',
                 'uploader': 'The Washington Post',
             },
+        }],
+    }, {
+        'url': 'http://www.washingtonpost.com/blogs/wonkblog/wp/2014/12/31/one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear/',
+        'info_dict': {
+            'id': 'one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear',
+            'title': 'One airline figured out how to make sure its airplanes never disappear',
+        },
+        'playlist': [{
+            'md5': 'a7c1b5634ba5e57a6a82cdffa5b1e0d0',
+            'info_dict': {
+                'id': '0e4bb54c-9065-11e4-a66f-0ca5037a597d',
+                'ext': 'mp4',
+                'description': 'Washington Post transportation reporter Ashley Halsey III explains why a plane\'s black box needs to be recovered from a crash site instead of having its information streamed in real time throughout the flight.',
+                'upload_date': '20141230',
+                'uploader': 'The Washington Post',
+                'timestamp': 1419974765,
+                'title': 'Why black boxes don’t transmit data in real time',
+            }
         }]
-    }
+    }]
 
     def _real_extract(self, url):
         page_id = self._match_id(url)
         webpage = self._download_webpage(url, page_id)
 
         title = self._og_search_title(webpage)
-        uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
+
+        uuids = re.findall(r'''(?x)
+            (?:
+                <div\s+class="posttv-video-embed[^>]*?data-uuid=|
+                data-video-uuid=
+            )"([^"]+)"''', webpage)
         entries = []
         for i, uuid in enumerate(uuids, start=1):
             vinfo_all = self._download_json(
@@ -75,10 +100,11 @@ class WashingtonPostIE(InfoExtractor):
                 'filesize': s.get('fileSize'),
                 'url': s.get('url'),
                 'ext': 'mp4',
+                'preference': -100 if s.get('type') == 'smil' else None,
                 'protocol': {
                     'MP4': 'http',
                     'F4F': 'f4m',
-                }.get(s.get('type'))
+                }.get(s.get('type')),
             } for s in vinfo.get('streams', [])]
             source_media_url = vinfo.get('sourceMediaURL')
             if source_media_url:
index 45466e31b7445f8dd8da742308dcc69f2ff1152f..c9048850061e1ecae4380557503a6b3927d2220c 100644 (file)
@@ -71,6 +71,9 @@ class WDRIE(InfoExtractor):
         {
             'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
             'playlist_mincount': 146,
+            'info_dict': {
+                'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
+            }
         }
     ]
 
@@ -169,7 +172,9 @@ class WDRMobileIE(InfoExtractor):
             'title': mobj.group('title'),
             'age_limit': int(mobj.group('age_limit')),
             'url': url,
-            'user_agent': 'mobile',
+            'http_headers': {
+                'User-Agent': 'mobile',
+            },
         }
 
 
diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py
new file mode 100644 (file)
index 0000000..cbe3dc7
--- /dev/null
@@ -0,0 +1,89 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    unified_strdate,
+)
+
+
+class WSJIE(InfoExtractor):
+    _VALID_URL = r'https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=(?P<id>[a-zA-Z0-9-]+)'
+    IE_DESC = 'Wall Street Journal'
+    _TEST = {
+        'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
+        'md5': '9747d7a6ebc2f4df64b981e1dde9efa9',
+        'info_dict': {
+            'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
+            'ext': 'mp4',
+            'upload_date': '20150202',
+            'uploader_id': 'bbright',
+            'creator': 'bbright',
+            'categories': list,  # a long list
+            'duration': 90,
+            'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        bitrates = [128, 174, 264, 320, 464, 664, 1264]
+        api_url = (
+            'http://video-api.wsj.com/api-video/find_all_videos.asp?'
+            'type=guid&count=1&query=%s&'
+            'fields=hls,adZone,thumbnailList,guid,state,secondsUntilStartTime,'
+            'author,description,name,linkURL,videoStillURL,duration,videoURL,'
+            'adCategory,catastrophic,linkShortURL,doctypeID,youtubeID,'
+            'titletag,rssURL,wsj-section,wsj-subsection,allthingsd-section,'
+            'allthingsd-subsection,sm-section,sm-subsection,provider,'
+            'formattedCreationDate,keywords,keywordsOmniture,column,editor,'
+            'emailURL,emailPartnerID,showName,omnitureProgramName,'
+            'omnitureVideoFormat,linkRelativeURL,touchCastID,'
+            'omniturePublishDate,%s') % (
+                video_id, ','.join('video%dkMP4Url' % br for br in bitrates))
+        info = self._download_json(api_url, video_id)['items'][0]
+
+        # Thumbnails are conveniently in the correct format already
+        thumbnails = info.get('thumbnailList')
+        creator = info.get('author')
+        uploader_id = info.get('editor')
+        categories = info.get('keywords')
+        duration = int_or_none(info.get('duration'))
+        upload_date = unified_strdate(
+            info.get('formattedCreationDate'), day_first=False)
+        title = info.get('name', info.get('titletag'))
+
+        formats = [{
+            'format_id': 'f4m',
+            'format_note': 'f4m (meta URL)',
+            'url': info['videoURL'],
+        }]
+        if info.get('hls'):
+            formats.extend(self._extract_m3u8_formats(
+                info['hls'], video_id, ext='mp4',
+                preference=0, entry_protocol='m3u8_native'))
+        for br in bitrates:
+            field = 'video%dkMP4Url' % br
+            if info.get(field):
+                formats.append({
+                    'format_id': 'mp4-%d' % br,
+                    'container': 'mp4',
+                    'tbr': br,
+                    'url': info[field],
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'creator': creator,
+            'uploader_id': uploader_id,
+            'duration': duration,
+            'upload_date': upload_date,
+            'title': title,
+            'formats': formats,
+            'categories': categories,
+        }
diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py
new file mode 100644 (file)
index 0000000..4971965
--- /dev/null
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import base64
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+    parse_duration,
+)
+
+
+class XuiteIE(InfoExtractor):
+    _REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
+    _VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P<id>%s)' % _REGEX_BASE64
+    _TESTS = [{
+        # Audio
+        'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
+        'md5': '63a42c705772aa53fd4c1a0027f86adf',
+        'info_dict': {
+            'id': '3860914',
+            'ext': 'mp3',
+            'title': '孤單南半球-歐德陽',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 247.246,
+            'timestamp': 1314932940,
+            'upload_date': '20110902',
+            'uploader': '阿能',
+            'uploader_id': '15973816',
+            'categories': ['個人短片'],
+        },
+    }, {
+        # Video with only one format
+        'url': 'http://vlog.xuite.net/play/TkRZNjhULTM0NDE2MjkuZmx2',
+        'md5': 'c45737fc8ac5dc8ac2f92ecbcecf505e',
+        'info_dict': {
+            'id': '3441629',
+            'ext': 'mp4',
+            'title': '孫燕姿 - 眼淚成詩',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 217.399,
+            'timestamp': 1299383640,
+            'upload_date': '20110306',
+            'uploader': 'Valen',
+            'uploader_id': '10400126',
+            'categories': ['影視娛樂'],
+        },
+    }, {
+        # Video with two formats
+        'url': 'http://vlog.xuite.net/play/bWo1N1pLLTIxMzAxMTcwLmZsdg==',
+        'md5': '1166e0f461efe55b62e26a2d2a68e6de',
+        'info_dict': {
+            'id': '21301170',
+            'ext': 'mp4',
+            'title': '暗殺教室 02',
+            'description': '字幕:【極影字幕社】',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 1384.907,
+            'timestamp': 1421481240,
+            'upload_date': '20150117',
+            'uploader': '我只是想認真點',
+            'uploader_id': '242127761',
+            'categories': ['電玩動漫'],
+        },
+    }, {
+        'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
+        'only_matching': True,
+    }]
+
+    def _extract_flv_config(self, media_id):
+        base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
+        flv_config = self._download_xml(
+            'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
+            'flv config')
+        prop_dict = {}
+        for prop in flv_config.findall('./property'):
+            prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
+            # CDATA may be empty in flv config
+            if not prop.text:
+                continue
+            encoded_content = base64.b64decode(prop.text).decode('utf-8')
+            prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
+        return prop_dict
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        error_msg = self._search_regex(
+            r'<div id="error-message-content">([^<]+)',
+            webpage, 'error message', default=None)
+        if error_msg:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error_msg),
+                expected=True)
+
+        video_id = self._html_search_regex(
+            r'data-mediaid="(\d+)"', webpage, 'media id')
+        flv_config = self._extract_flv_config(video_id)
+
+        FORMATS = {
+            'audio': 'mp3',
+            'video': 'mp4',
+        }
+
+        formats = []
+        for format_tag in ('src', 'hq_src'):
+            video_url = flv_config.get(format_tag)
+            if not video_url:
+                continue
+            format_id = self._search_regex(
+                r'\bq=(.+?)\b', video_url, 'format id', default=format_tag)
+            formats.append({
+                'url': video_url,
+                'ext': FORMATS.get(flv_config['type'], 'mp4'),
+                'format_id': format_id,
+                'height': int(format_id) if format_id.isnumeric() else None,
+            })
+        self._sort_formats(formats)
+
+        timestamp = flv_config.get('publish_datetime')
+        if timestamp:
+            timestamp = parse_iso8601(timestamp + ' +0800', ' ')
+
+        category = flv_config.get('category')
+        categories = [category] if category else []
+
+        return {
+            'id': video_id,
+            'title': flv_config['title'],
+            'description': flv_config.get('description'),
+            'thumbnail': flv_config.get('thumb'),
+            'timestamp': timestamp,
+            'uploader': flv_config.get('author_name'),
+            'uploader_id': flv_config.get('author_id'),
+            'duration': parse_duration(flv_config.get('duration')),
+            'categories': categories,
+            'formats': formats,
+        }
index 2a1f8be0a7e6d6946b5e1801a4b3cd6dda338efe..e4b26b84fe5cf65dfdcedc5d9fd9bf2b67e17f35 100644 (file)
@@ -264,9 +264,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
 
         # Dash mp4 audio
-        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50},
-        '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50},
-        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50},
+        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
+        '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
+        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
 
         # Dash webm
         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
@@ -809,6 +809,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             player_url = None
 
         # Get video info
+        embed_webpage = None
         if re.search(r'player-age-gate-content">', video_webpage) is not None:
             age_gate = True
             # We simulate the access to the video from www.youtube.com/v/{video_id}
@@ -1016,10 +1017,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                     url += '&signature=' + url_data['sig'][0]
                 elif 's' in url_data:
                     encrypted_sig = url_data['s'][0]
+                    ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
 
                     jsplayer_url_json = self._search_regex(
-                        r'"assets":.+?"js":\s*("[^"]+")',
-                        embed_webpage if age_gate else video_webpage, 'JS player URL')
+                        ASSETS_RE,
+                        embed_webpage if age_gate else video_webpage,
+                        'JS player URL (1)', default=None)
+                    if not jsplayer_url_json and not age_gate:
+                        # We need the embed website after all
+                        if embed_webpage is None:
+                            embed_url = proto + '://www.youtube.com/embed/%s' % video_id
+                            embed_webpage = self._download_webpage(
+                                embed_url, video_id, 'Downloading embed webpage')
+                        jsplayer_url_json = self._search_regex(
+                            ASSETS_RE, embed_webpage, 'JS player URL')
+
                     player_url = json.loads(jsplayer_url_json)
                     if player_url is None:
                         player_url_json = self._search_regex(
@@ -1148,6 +1160,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
     }, {
         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
         'info_dict': {
+            'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
             'title': 'YDL_Empty_List',
         },
         'playlist_count': 0,
@@ -1156,6 +1169,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
         'info_dict': {
             'title': '29C3: Not my department',
+            'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
         },
         'playlist_count': 95,
     }, {
@@ -1163,6 +1177,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         'url': 'PLBB231211A4F62143',
         'info_dict': {
             'title': '[OLD]Team Fortress 2 (Class-based LP)',
+            'id': 'PLBB231211A4F62143',
         },
         'playlist_mincount': 26,
     }, {
@@ -1170,12 +1185,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
         'info_dict': {
             'title': 'Uploads from Cauchemar',
+            'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
         },
         'playlist_mincount': 799,
     }, {
         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
         'info_dict': {
             'title': 'YDL_safe_search',
+            'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
         },
         'playlist_count': 2,
     }, {
@@ -1184,6 +1201,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         'playlist_count': 4,
         'info_dict': {
             'title': 'JODA15',
+            'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
         }
     }, {
         'note': 'Embedded SWF player',
@@ -1191,12 +1209,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         'playlist_count': 4,
         'info_dict': {
             'title': 'JODA7',
+            'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
         }
     }, {
         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
         'info_dict': {
-                'title': 'Uploads from Interstellar Movie',
+            'title': 'Uploads from Interstellar Movie',
+            'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
         },
         'playlist_mincout': 21,
     }]
@@ -1302,6 +1322,9 @@ class YoutubeChannelIE(InfoExtractor):
         'note': 'paginated channel',
         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
         'playlist_mincount': 91,
+        'info_dict': {
+            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+        }
     }]
 
     def extract_videos_from_page(self, page):
@@ -1682,11 +1705,18 @@ class YoutubeTruncatedURLIE(InfoExtractor):
     IE_NAME = 'youtube:truncated_url'
     IE_DESC = False  # Do not list
     _VALID_URL = r'''(?x)
-        (?:https?://)?[^/]+/watch\?(?:
+        (?:https?://)?
+        (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
+        (?:watch\?(?:
             feature=[a-z_]+|
-            annotation_id=annotation_[^&]+
-        )?$|
-        (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
+            annotation_id=annotation_[^&]+|
+            x-yt-cl=[0-9]+|
+            hl=[^&]*|
+        )?
+        |
+            attribution_link\?a=[^&]+
+        )
+        $
     '''
 
     _TESTS = [{
@@ -1695,6 +1725,15 @@ class YoutubeTruncatedURLIE(InfoExtractor):
     }, {
         'url': 'http://www.youtube.com/watch?',
         'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/watch?feature=foo',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/watch?hl=en-GB',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -1710,7 +1749,7 @@ class YoutubeTruncatedURLIE(InfoExtractor):
 class YoutubeTruncatedIDIE(InfoExtractor):
     IE_NAME = 'youtube:truncated_id'
     IE_DESC = False  # Do not list
-    _VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
 
     _TESTS = [{
         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
index b4617fbad0fc40323a129ce1218f9f97590c89bb..453e2732cc4faa453a98b153356c2188feef1d35 100644 (file)
 from __future__ import unicode_literals
 
 import json
+import operator
 import re
 
 from .utils import (
     ExtractorError,
 )
 
+_OPERATORS = [
+    ('|', operator.or_),
+    ('^', operator.xor),
+    ('&', operator.and_),
+    ('>>', operator.rshift),
+    ('<<', operator.lshift),
+    ('-', operator.sub),
+    ('+', operator.add),
+    ('%', operator.mod),
+    ('/', operator.truediv),
+    ('*', operator.mul),
+]
+_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
+_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
+
+_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
+
 
 class JSInterpreter(object):
-    def __init__(self, code):
-        self.code = code
+    def __init__(self, code, objects=None):
+        if objects is None:
+            objects = {}
+        self.code = self._remove_comments(code)
         self._functions = {}
-        self._objects = {}
+        self._objects = objects
+
+    def _remove_comments(self, code):
+        return re.sub(r'(?s)/\*.*?\*/', '', code)
 
-    def interpret_statement(self, stmt, local_vars, allow_recursion=20):
+    def interpret_statement(self, stmt, local_vars, allow_recursion=100):
         if allow_recursion < 0:
             raise ExtractorError('Recursion limit reached')
 
-        if stmt.startswith('var '):
-            stmt = stmt[len('var '):]
-        ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
-                         r'=(?P<expr>.*)$', stmt)
-        if ass_m:
-            if ass_m.groupdict().get('index'):
-                def assign(val):
-                    lvar = local_vars[ass_m.group('out')]
-                    idx = self.interpret_expression(
-                        ass_m.group('index'), local_vars, allow_recursion)
-                    assert isinstance(idx, int)
-                    lvar[idx] = val
-                    return val
-                expr = ass_m.group('expr')
-            else:
-                def assign(val):
-                    local_vars[ass_m.group('out')] = val
-                    return val
-                expr = ass_m.group('expr')
-        elif stmt.startswith('return '):
-            assign = lambda v: v
-            expr = stmt[len('return '):]
+        should_abort = False
+        stmt = stmt.lstrip()
+        stmt_m = re.match(r'var\s', stmt)
+        if stmt_m:
+            expr = stmt[len(stmt_m.group(0)):]
         else:
-            # Try interpreting it as an expression
-            expr = stmt
-            assign = lambda v: v
+            return_m = re.match(r'return(?:\s+|$)', stmt)
+            if return_m:
+                expr = stmt[len(return_m.group(0)):]
+                should_abort = True
+            else:
+                # Try interpreting it as an expression
+                expr = stmt
 
         v = self.interpret_expression(expr, local_vars, allow_recursion)
-        return assign(v)
+        return v, should_abort
 
     def interpret_expression(self, expr, local_vars, allow_recursion):
+        expr = expr.strip()
+
+        if expr == '':  # Empty expression
+            return None
+
+        if expr.startswith('('):
+            parens_count = 0
+            for m in re.finditer(r'[()]', expr):
+                if m.group(0) == '(':
+                    parens_count += 1
+                else:
+                    parens_count -= 1
+                    if parens_count == 0:
+                        sub_expr = expr[1:m.start()]
+                        sub_result = self.interpret_expression(
+                            sub_expr, local_vars, allow_recursion)
+                        remaining_expr = expr[m.end():].strip()
+                        if not remaining_expr:
+                            return sub_result
+                        else:
+                            expr = json.dumps(sub_result) + remaining_expr
+                        break
+            else:
+                raise ExtractorError('Premature end of parens in %r' % expr)
+
+        for op, opfunc in _ASSIGN_OPERATORS:
+            m = re.match(r'''(?x)
+                (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
+                \s*%s
+                (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
+            if not m:
+                continue
+            right_val = self.interpret_expression(
+                m.group('expr'), local_vars, allow_recursion - 1)
+
+            if m.groupdict().get('index'):
+                lvar = local_vars[m.group('out')]
+                idx = self.interpret_expression(
+                    m.group('index'), local_vars, allow_recursion)
+                assert isinstance(idx, int)
+                cur = lvar[idx]
+                val = opfunc(cur, right_val)
+                lvar[idx] = val
+                return val
+            else:
+                cur = local_vars.get(m.group('out'))
+                val = opfunc(cur, right_val)
+                local_vars[m.group('out')] = val
+                return val
+
         if expr.isdigit():
             return int(expr)
 
-        if expr.isalpha():
-            return local_vars[expr]
+        var_m = re.match(
+            r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
+            expr)
+        if var_m:
+            return local_vars[var_m.group('name')]
 
         try:
             return json.loads(expr)
@@ -61,7 +124,7 @@ class JSInterpreter(object):
             pass
 
         m = re.match(
-            r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
+            r'(?P<var>%s)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
             expr)
         if m:
             variable = m.group('var')
@@ -114,23 +177,31 @@ class JSInterpreter(object):
             return obj[member](argvals)
 
         m = re.match(
-            r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
+            r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
         if m:
             val = local_vars[m.group('in')]
             idx = self.interpret_expression(
                 m.group('idx'), local_vars, allow_recursion - 1)
             return val[idx]
 
-        m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
-        if m:
-            a = self.interpret_expression(
-                m.group('a'), local_vars, allow_recursion)
-            b = self.interpret_expression(
-                m.group('b'), local_vars, allow_recursion)
-            return a % b
+        for op, opfunc in _OPERATORS:
+            m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
+            if not m:
+                continue
+            x, abort = self.interpret_statement(
+                m.group('x'), local_vars, allow_recursion - 1)
+            if abort:
+                raise ExtractorError(
+                    'Premature left-side return of %s in %r' % (op, expr))
+            y, abort = self.interpret_statement(
+                m.group('y'), local_vars, allow_recursion - 1)
+            if abort:
+                raise ExtractorError(
+                    'Premature right-side return of %s in %r' % (op, expr))
+            return opfunc(x, y)
 
         m = re.match(
-            r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
+            r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)
         if m:
             fname = m.group('func')
             argvals = tuple([
@@ -139,6 +210,7 @@ class JSInterpreter(object):
             if fname not in self._functions:
                 self._functions[fname] = self.extract_function(fname)
             return self._functions[fname](argvals)
+
         raise ExtractorError('Unsupported JS expression %r' % expr)
 
     def extract_object(self, objname):
@@ -162,9 +234,11 @@ class JSInterpreter(object):
 
     def extract_function(self, funcname):
         func_m = re.search(
-            (r'(?:function %s|[{;]%s\s*=\s*function)' % (
-                re.escape(funcname), re.escape(funcname))) +
-            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+            r'''(?x)
+                (?:function\s+%s|[{;]%s\s*=\s*function)\s*
+                \((?P<args>[^)]*)\)\s*
+                \{(?P<code>[^}]+)\}''' % (
+                re.escape(funcname), re.escape(funcname)),
             self.code)
         if func_m is None:
             raise ExtractorError('Could not find JS function %r' % funcname)
@@ -172,10 +246,16 @@ class JSInterpreter(object):
 
         return self.build_function(argnames, func_m.group('code'))
 
+    def call_function(self, funcname, *args):
+        f = self.extract_function(funcname)
+        return f(args)
+
     def build_function(self, argnames, code):
         def resf(args):
             local_vars = dict(zip(argnames, args))
             for stmt in code.split(';'):
-                res = self.interpret_statement(stmt, local_vars)
+                res, abort = self.interpret_statement(stmt, local_vars)
+                if abort:
+                    break
             return res
         return resf
index a30974efd382511b652397c475371b368f86f85b..d5e5759649a5572432b538333518c76c92ba98d7 100644 (file)
@@ -5,6 +5,7 @@ import optparse
 import shlex
 import sys
 
+from .downloader.external import list_external_downloaders
 from .compat import (
     compat_expanduser,
     compat_getenv,
@@ -199,6 +200,10 @@ def parseOpts(overrideArguments=None):
         '--playlist-end',
         dest='playlistend', metavar='NUMBER', default=None, type=int,
         help='playlist video to end at (default is last)')
+    selection.add_option(
+        '--playlist-items',
+        dest='playlist_items', metavar='ITEM_SPEC', default=None,
+        help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
     selection.add_option(
         '--match-title',
         dest='matchtitle', metavar='REGEX',
@@ -264,7 +269,7 @@ def parseOpts(overrideArguments=None):
     authentication.add_option(
         '-p', '--password',
         dest='password', metavar='PASSWORD',
-        help='account password')
+        help='account password. If this option is left out, youtube-dl will ask interactively.')
     authentication.add_option(
         '-2', '--twofactor',
         dest='twofactor', metavar='TWOFACTOR',
@@ -289,6 +294,17 @@ def parseOpts(overrideArguments=None):
             'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
             'You can also use the special names "best",'
             ' "bestvideo", "bestaudio", "worst". '
+            ' You can filter the video results by putting a condition in'
+            ' brackets, as in -f "best[height=720]"'
+            ' (or -f "[filesize>10M]"). '
+            ' This works for filesize, height, width, tbr, abr, vbr, and fps'
+            ' and the comparisons <, <=, >, >=, =, != .'
+            ' Formats for which the value is not known are excluded unless you'
+            ' put a question mark (?) after the operator.'
+            ' You can combine format filters, so  '
+            '-f "[height <=? 720][tbr>500]" '
+            'selects up to 720p videos (or videos where the height is not '
+            'known) with a bitrate of at least 500 KBit/s.'
             ' By default, youtube-dl will pick the best quality.'
             ' Use commas to download multiple audio formats, such as'
             ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.'
@@ -361,7 +377,7 @@ def parseOpts(overrideArguments=None):
     downloader.add_option(
         '-R', '--retries',
         dest='retries', metavar='RETRIES', default=10,
-        help='number of retries (default is %default)')
+        help='number of retries (default is %default), or "infinite".')
     downloader.add_option(
         '--buffer-size',
         dest='buffersize', metavar='SIZE', default='1024',
@@ -378,6 +394,15 @@ def parseOpts(overrideArguments=None):
         '--playlist-reverse',
         action='store_true',
         help='Download playlist videos in reverse order')
+    downloader.add_option(
+        '--xattr-set-filesize',
+        dest='xattr_set_filesize', action='store_true',
+        help='(experimental) set file xattribute ytdl.filesize with expected filesize')
+    downloader.add_option(
+        '--external-downloader',
+        dest='external_downloader', metavar='COMMAND',
+        help='(experimental) Use the specified external downloader. '
+             'Currently supports %s' % ','.join(list_external_downloaders()))
 
     workarounds = optparse.OptionGroup(parser, 'Workarounds')
     workarounds.add_option(
@@ -410,6 +435,10 @@ def parseOpts(overrideArguments=None):
         '--bidi-workaround',
         dest='bidi_workaround', action='store_true',
         help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
+    workarounds.add_option(
+        '--sleep-interval', metavar='SECONDS',
+        dest='sleep_interval', type=float,
+        help='Number of seconds to sleep before each download.')
 
     verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
     verbosity.add_option(
@@ -593,10 +622,6 @@ def parseOpts(overrideArguments=None):
         '--write-annotations',
         action='store_true', dest='writeannotations', default=False,
         help='write video annotations to a .annotation file')
-    filesystem.add_option(
-        '--write-thumbnail',
-        action='store_true', dest='writethumbnail', default=False,
-        help='write thumbnail image to disk')
     filesystem.add_option(
         '--load-info',
         dest='load_info_filename', metavar='FILE',
@@ -616,6 +641,20 @@ def parseOpts(overrideArguments=None):
         action='store_true', dest='rm_cachedir',
         help='Delete all filesystem cache files')
 
+    thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
+    thumbnail.add_option(
+        '--write-thumbnail',
+        action='store_true', dest='writethumbnail', default=False,
+        help='write thumbnail image to disk')
+    thumbnail.add_option(
+        '--write-all-thumbnails',
+        action='store_true', dest='write_all_thumbnails', default=False,
+        help='write all thumbnail image formats to disk')
+    thumbnail.add_option(
+        '--list-thumbnails',
+        action='store_true', dest='list_thumbnails', default=False,
+        help='Simulate and list all available thumbnail formats')
+
     postproc = optparse.OptionGroup(parser, 'Post-processing Options')
     postproc.add_option(
         '-x', '--extract-audio',
@@ -659,10 +698,9 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '--fixup',
         metavar='POLICY', dest='fixup', default='detect_or_warn',
-        help='(experimental) Automatically correct known faults of the file. '
+        help='Automatically correct known faults of the file. '
              'One of never (do nothing), warn (only emit a warning), '
-             'detect_or_warn(check whether we can do anything about it, warn '
-             'otherwise')
+             'detect_or_warn(the default; fix file if we can, warn otherwise)')
     postproc.add_option(
         '--prefer-avconv',
         action='store_false', dest='prefer_ffmpeg',
@@ -681,6 +719,7 @@ def parseOpts(overrideArguments=None):
     parser.add_option_group(selection)
     parser.add_option_group(downloader)
     parser.add_option_group(filesystem)
+    parser.add_option_group(thumbnail)
     parser.add_option_group(verbosity)
     parser.add_option_group(workarounds)
     parser.add_option_group(video_format)
index f8507951cea5b7b9cd4f9cf75b7024910c82a095..0ffbca258587651fb5a4aaba9cede2c3f87d9fc5 100644 (file)
@@ -7,6 +7,7 @@ from .ffmpeg import (
     FFmpegEmbedSubtitlePP,
     FFmpegExtractAudioPP,
     FFmpegFixupStretchedPP,
+    FFmpegFixupM4aPP,
     FFmpegMergerPP,
     FFmpegMetadataPP,
     FFmpegVideoConvertorPP,
@@ -25,6 +26,7 @@ __all__ = [
     'FFmpegAudioFixPP',
     'FFmpegEmbedSubtitlePP',
     'FFmpegExtractAudioPP',
+    'FFmpegFixupM4aPP',
     'FFmpegFixupStretchedPP',
     'FFmpegMergerPP',
     'FFmpegMetadataPP',
index 5b0ff32b14747d91b1c5c7223fa4efaae7aac574..4a4422c5a6132cd9a3dfe3116fe554ac2a374ca5 100644 (file)
@@ -509,6 +509,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
             metadata['artist'] = info['uploader']
         elif info.get('uploader_id') is not None:
             metadata['artist'] = info['uploader_id']
+        if info.get('description') is not None:
+            metadata['description'] = info['description']
+            metadata['comment'] = info['description']
+        if info.get('webpage_url') is not None:
+            metadata['purl'] = info['webpage_url']
 
         if not metadata:
             self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
@@ -560,7 +565,7 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor):
     def run(self, info):
         stretched_ratio = info.get('stretched_ratio')
         if stretched_ratio is None or stretched_ratio == 1:
-            return
+            return True, info
 
         filename = info['filepath']
         temp_filename = prepend_extension(filename, 'temp')
@@ -573,3 +578,21 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor):
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 
         return True, info
+
+
+class FFmpegFixupM4aPP(FFmpegPostProcessor):
+    def run(self, info):
+        if info.get('container') != 'm4a_dash':
+            return True, info
+
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+
+        options = ['-c', 'copy', '-f', 'mp4']
+        self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
+        self.run_ffmpeg(filename, temp_filename, options)
+
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return True, info
index 7832ed87f022d5c4891cecb43822ebebd30a6d0b..8f5463f1c9a1e1a2660867abdc0f1f62e9147032 100644 (file)
@@ -32,6 +32,7 @@ import xml.etree.ElementTree
 import zlib
 
 from .compat import (
+    compat_basestring,
     compat_chr,
     compat_getenv,
     compat_html_entities,
@@ -140,7 +141,7 @@ else:
     def find_xpath_attr(node, xpath, key, val):
         # Here comes the crazy part: In 2.6, if the xpath is a unicode,
         # .//node does not match if a node is a direct child of . !
-        if isinstance(xpath, unicode):
+        if isinstance(xpath, compat_str):
             xpath = xpath.encode('ascii')
 
         for f in node.findall(xpath):
@@ -411,25 +412,9 @@ def make_HTTPS_handler(params, **kwargs):
             pass
 
     if sys.version_info < (3, 2):
-        import httplib
-
-        class HTTPSConnectionV3(httplib.HTTPSConnection):
-            def __init__(self, *args, **kwargs):
-                httplib.HTTPSConnection.__init__(self, *args, **kwargs)
-
-            def connect(self):
-                sock = socket.create_connection((self.host, self.port), self.timeout)
-                if getattr(self, '_tunnel_host', False):
-                    self.sock = sock
-                    self._tunnel()
-                try:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
-                except ssl.SSLError:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
-
-        return YoutubeDLHTTPSHandler(params, https_conn_class=HTTPSConnectionV3, **kwargs)
+        return YoutubeDLHTTPSHandler(params, **kwargs)
     else:  # Python < 3.4
-        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+        context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
         context.verify_mode = (ssl.CERT_NONE
                                if opts_no_check_certificate
                                else ssl.CERT_REQUIRED)
@@ -560,7 +545,9 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
                 sock = compat_socket_create_connection(
                     (self.host, self.port), self.timeout, sa)
                 if is_https:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
+                    self.sock = ssl.wrap_socket(
+                        sock, self.key_file, self.cert_file,
+                        ssl_version=ssl.PROTOCOL_TLSv1)
                 else:
                     self.sock = sock
             hc.connect = functools.partial(_hc_connect, hc)
@@ -612,17 +599,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
     def http_request(self, req):
         for h, v in std_headers.items():
-            if h not in req.headers:
+            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
+            # The dict keys are capitalized because of this bug by urllib
+            if h.capitalize() not in req.headers:
                 req.add_header(h, v)
         if 'Youtubedl-no-compression' in req.headers:
             if 'Accept-encoding' in req.headers:
                 del req.headers['Accept-encoding']
             del req.headers['Youtubedl-no-compression']
-        if 'Youtubedl-user-agent' in req.headers:
-            if 'User-agent' in req.headers:
-                del req.headers['User-agent']
-            req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
-            del req.headers['Youtubedl-user-agent']
 
         if sys.version_info < (2, 7) and '#' in req.get_full_url():
             # Python 2.6 is brain-dead when it comes to fragments
@@ -671,9 +655,14 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
         self._params = params
 
     def https_open(self, req):
+        kwargs = {}
+        if hasattr(self, '_context'):  # python > 2.6
+            kwargs['context'] = self._context
+        if hasattr(self, '_check_hostname'):  # python 3.x
+            kwargs['check_hostname'] = self._check_hostname
         return self.do_open(functools.partial(
             _create_http_connection, self, self._https_conn_class, True),
-            req)
+            req, **kwargs)
 
 
 def parse_iso8601(date_str, delimiter='T'):
@@ -712,7 +701,7 @@ def unified_strdate(date_str, day_first=True):
     # %z (UTC offset) is only supported in python>=3.2
     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
     # Remove AM/PM + timezone
-    date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str)
+    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
 
     format_expressions = [
         '%d %B %Y',
@@ -875,6 +864,9 @@ def _windows_write_string(s, out):
     except AttributeError:
         # If the output stream doesn't have a fileno, it's virtual
         return False
+    except io.UnsupportedOperation:
+        # Some strange Windows pseudo files?
+        return False
     if fileno not in WIN_OUTPUT_IDS:
         return False
 
@@ -1271,7 +1263,7 @@ def float_or_none(v, scale=1, invscale=1, default=None):
 
 
 def parse_duration(s):
-    if not isinstance(s, basestring if sys.version_info < (3, 0) else compat_str):
+    if not isinstance(s, compat_basestring):
         return None
 
     s = s.strip()
@@ -1283,7 +1275,10 @@ def parse_duration(s):
             (?P<only_hours>[0-9.]+)\s*(?:hours?)|
 
             (?:
-                (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
+                (?:
+                    (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
+                    (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
+                )?
                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
             )?
             (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
@@ -1301,6 +1296,8 @@ def parse_duration(s):
         res += int(m.group('mins')) * 60
     if m.group('hours'):
         res += int(m.group('hours')) * 60 * 60
+    if m.group('days'):
+        res += int(m.group('days')) * 24 * 60 * 60
     if m.group('ms'):
         res += float(m.group('ms'))
     return res
@@ -1435,7 +1432,7 @@ def uppercase_escape(s):
 
 def escape_rfc3986(s):
     """Escape non-ASCII characters as suggested by RFC 3986"""
-    if sys.version_info < (3, 0) and isinstance(s, unicode):
+    if sys.version_info < (3, 0) and isinstance(s, compat_str):
         s = s.encode('utf-8')
     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
 
@@ -1551,7 +1548,7 @@ def js_to_json(code):
     res = re.sub(r'''(?x)
         "(?:[^"\\]*(?:\\\\|\\")?)*"|
         '(?:[^'\\]*(?:\\\\|\\')?)*'|
-        [a-zA-Z_][a-zA-Z_0-9]*
+        [a-zA-Z_][.a-zA-Z_0-9]*
         ''', fix_kv, code)
     res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
     return res
@@ -1612,6 +1609,14 @@ def urlhandle_detect_ext(url_handle):
     except AttributeError:  # Python < 3
         getheader = url_handle.info().getheader
 
+    cd = getheader('Content-Disposition')
+    if cd:
+        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+        if m:
+            e = determine_ext(m.group('filename'), default_ext=None)
+            if e:
+                return e
+
     return getheader('Content-Type').split("/")[1]
 
 
@@ -1623,3 +1628,53 @@ def age_restricted(content_limit, age_limit):
     if content_limit is None:
         return False  # Content available for everyone
     return age_limit < content_limit
+
+
+def is_html(first_bytes):
+    """ Detect whether a file contains HTML by examining its first bytes. """
+
+    BOMS = [
+        (b'\xef\xbb\xbf', 'utf-8'),
+        (b'\x00\x00\xfe\xff', 'utf-32-be'),
+        (b'\xff\xfe\x00\x00', 'utf-32-le'),
+        (b'\xff\xfe', 'utf-16-le'),
+        (b'\xfe\xff', 'utf-16-be'),
+    ]
+    for bom, enc in BOMS:
+        if first_bytes.startswith(bom):
+            s = first_bytes[len(bom):].decode(enc, 'replace')
+            break
+    else:
+        s = first_bytes.decode('utf-8', 'replace')
+
+    return re.match(r'^\s*<', s)
+
+
+def determine_protocol(info_dict):
+    protocol = info_dict.get('protocol')
+    if protocol is not None:
+        return protocol
+
+    url = info_dict['url']
+    if url.startswith('rtmp'):
+        return 'rtmp'
+    elif url.startswith('mms'):
+        return 'mms'
+    elif url.startswith('rtsp'):
+        return 'rtsp'
+
+    ext = determine_ext(url)
+    if ext == 'm3u8':
+        return 'm3u8'
+    elif ext == 'f4m':
+        return 'f4m'
+
+    return compat_urllib_parse_urlparse(url).scheme
+
+
+def render_table(header_row, data):
+    """ Render a list of rows, each as a list of values """
+    table = [header_row] + data
+    max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+    format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+    return '\n'.join(format_str % tuple(row) for row in table)
index 63a79a7ee7122c1886442d32c635ad66f0fd663a..1091ae61bbf7599ef59390978f893bc2cdc1f28f 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.01.16'
+__version__ = '2015.02.06'