]> Raphaël G. Git Repositories - youtubedl/commitdiff
Imported Upstream version 2015.05.15
authorRogério Brito <rbrito@ime.usp.br>
Mon, 18 May 2015 10:48:41 +0000 (07:48 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Mon, 18 May 2015 10:48:41 +0000 (07:48 -0300)
206 files changed:
Makefile
README.md
README.txt
devscripts/check-porn.py
devscripts/generate_aes_testdata.py [new file with mode: 0644]
docs/supportedsites.md
test/helper.py
test/parameters.json
test/test_YoutubeDL.py
test/test_aes.py [new file with mode: 0644]
test/test_all_urls.py
test/test_download.py
test/test_execution.py
test/test_http.py
test/test_netrc.py [new file with mode: 0644]
test/test_postprocessors.py [new file with mode: 0644]
test/test_subtitles.py
test/test_unicode_literals.py
test/test_utils.py
youtube-dl
youtube-dl.1
youtube-dl.bash-completion
youtube-dl.fish
youtube-dl.zsh
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/compat.py
youtube_dl/downloader/__init__.py
youtube_dl/downloader/common.py
youtube_dl/downloader/external.py
youtube_dl/downloader/f4m.py
youtube_dl/downloader/http.py
youtube_dl/downloader/rtmp.py
youtube_dl/downloader/rtsp.py [moved from youtube_dl/downloader/mplayer.py with 64% similarity]
youtube_dl/extractor/__init__.py
youtube_dl/extractor/addanime.py
youtube_dl/extractor/adultswim.py
youtube_dl/extractor/aftenposten.py
youtube_dl/extractor/aftonbladet.py
youtube_dl/extractor/archiveorg.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/atresplayer.py
youtube_dl/extractor/baidu.py [new file with mode: 0644]
youtube_dl/extractor/bambuser.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/bbccouk.py
youtube_dl/extractor/beatportpro.py [new file with mode: 0644]
youtube_dl/extractor/bet.py
youtube_dl/extractor/bild.py
youtube_dl/extractor/bilibili.py
youtube_dl/extractor/bliptv.py
youtube_dl/extractor/bloomberg.py
youtube_dl/extractor/br.py
youtube_dl/extractor/breakcom.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/byutv.py
youtube_dl/extractor/canalplus.py
youtube_dl/extractor/cinemassacre.py [new file with mode: 0644]
youtube_dl/extractor/cloudy.py
youtube_dl/extractor/cnn.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/cracked.py
youtube_dl/extractor/crooksandliars.py [new file with mode: 0644]
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/cspan.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/dhm.py [new file with mode: 0644]
youtube_dl/extractor/dotsub.py
youtube_dl/extractor/douyutv.py [new file with mode: 0644]
youtube_dl/extractor/dreisat.py
youtube_dl/extractor/drtv.py
youtube_dl/extractor/dump.py
youtube_dl/extractor/dumpert.py [new file with mode: 0644]
youtube_dl/extractor/eagleplatform.py [new file with mode: 0644]
youtube_dl/extractor/eighttracks.py
youtube_dl/extractor/ellentv.py
youtube_dl/extractor/eroprofile.py
youtube_dl/extractor/escapist.py
youtube_dl/extractor/extremetube.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/flickr.py
youtube_dl/extractor/footyroom.py [new file with mode: 0644]
youtube_dl/extractor/foxsports.py [new file with mode: 0644]
youtube_dl/extractor/francetv.py
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/gamersyde.py [new file with mode: 0644]
youtube_dl/extractor/gazeta.py [new file with mode: 0644]
youtube_dl/extractor/gdcvault.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/gfycat.py [new file with mode: 0644]
youtube_dl/extractor/giga.py
youtube_dl/extractor/globo.py
youtube_dl/extractor/gorillavid.py
youtube_dl/extractor/grooveshark.py [deleted file]
youtube_dl/extractor/historicfilms.py
youtube_dl/extractor/hitbox.py
youtube_dl/extractor/iconosquare.py
youtube_dl/extractor/ign.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/jeuxvideo.py
youtube_dl/extractor/kanalplay.py [new file with mode: 0644]
youtube_dl/extractor/krasview.py
youtube_dl/extractor/letv.py
youtube_dl/extractor/libsyn.py [new file with mode: 0644]
youtube_dl/extractor/lifenews.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/lrt.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/megavideoz.py [new file with mode: 0644]
youtube_dl/extractor/miomio.py [new file with mode: 0644]
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mlb.py
youtube_dl/extractor/moniker.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/ndr.py
youtube_dl/extractor/netzkino.py
youtube_dl/extractor/nhl.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/nytimes.py
youtube_dl/extractor/odnoklassniki.py
youtube_dl/extractor/ooyala.py
youtube_dl/extractor/orf.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/philharmoniedeparis.py [new file with mode: 0644]
youtube_dl/extractor/phoenix.py
youtube_dl/extractor/pladform.py [new file with mode: 0644]
youtube_dl/extractor/playfm.py
youtube_dl/extractor/playwire.py [new file with mode: 0644]
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/pornovoisines.py [new file with mode: 0644]
youtube_dl/extractor/primesharetv.py [new file with mode: 0644]
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/qqmusic.py [new file with mode: 0644]
youtube_dl/extractor/radiojavan.py [new file with mode: 0644]
youtube_dl/extractor/rai.py
youtube_dl/extractor/redtube.py
youtube_dl/extractor/rtve.py
youtube_dl/extractor/rutv.py
youtube_dl/extractor/safari.py [new file with mode: 0644]
youtube_dl/extractor/screenwavemedia.py
youtube_dl/extractor/senateisvp.py [new file with mode: 0644]
youtube_dl/extractor/slideshare.py
youtube_dl/extractor/sohu.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/southpark.py
youtube_dl/extractor/spankbang.py [new file with mode: 0644]
youtube_dl/extractor/spike.py
youtube_dl/extractor/srf.py [new file with mode: 0644]
youtube_dl/extractor/ssa.py [new file with mode: 0644]
youtube_dl/extractor/svt.py [moved from youtube_dl/extractor/svtplay.py with 66% similarity]
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/testtube.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/tmz.py
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/tvplay.py
youtube_dl/extractor/twentytwotracks.py [new file with mode: 0644]
youtube_dl/extractor/twitch.py
youtube_dl/extractor/udn.py [new file with mode: 0644]
youtube_dl/extractor/ultimedia.py [new file with mode: 0644]
youtube_dl/extractor/ustream.py
youtube_dl/extractor/varzesh3.py [new file with mode: 0644]
youtube_dl/extractor/veehd.py
youtube_dl/extractor/vessel.py [new file with mode: 0644]
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/vice.py
youtube_dl/extractor/videomega.py
youtube_dl/extractor/vidme.py
youtube_dl/extractor/viewster.py [new file with mode: 0644]
youtube_dl/extractor/viki.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vimple.py
youtube_dl/extractor/vine.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/voicerepublic.py [new file with mode: 0644]
youtube_dl/extractor/vporn.py
youtube_dl/extractor/wat.py
youtube_dl/extractor/worldstarhiphop.py
youtube_dl/extractor/xstream.py [new file with mode: 0644]
youtube_dl/extractor/xuite.py
youtube_dl/extractor/yahoo.py
youtube_dl/extractor/yam.py
youtube_dl/extractor/yandexmusic.py [new file with mode: 0644]
youtube_dl/extractor/youporn.py
youtube_dl/extractor/yourupload.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zingmp3.py
youtube_dl/options.py
youtube_dl/postprocessor/__init__.py
youtube_dl/postprocessor/atomicparsley.py [deleted file]
youtube_dl/postprocessor/common.py
youtube_dl/postprocessor/embedthumbnail.py [new file with mode: 0644]
youtube_dl/postprocessor/execafterdownload.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/postprocessor/metadatafromtitle.py [new file with mode: 0644]
youtube_dl/postprocessor/xattrpp.py
youtube_dl/update.py
youtube_dl/utils.py
youtube_dl/version.py

index c6c76274f995a85185290d35868b974c13240aa2..fdb1abb60cacfe49295a7438e3d0f4f51c248359 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
 
 clean:
        rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
-       find -name "*.pyc" -delete
+       find -name "*.pyc" -delete
 
 PREFIX ?= /usr/local
 BINDIR ?= $(PREFIX)/bin
index 04f664cd351cd46adc68e559339e4436669ab67f..3d9436456c4bd29d11bf160efa27769b05edde2d 100644 (file)
--- a/README.md
+++ b/README.md
@@ -5,6 +5,7 @@ youtube-dl - download videos from youtube.com or other video platforms
 - [OPTIONS](#options)
 - [CONFIGURATION](#configuration)
 - [OUTPUT TEMPLATE](#output-template)
+- [FORMAT SELECTION](#format-selection)
 - [VIDEO SELECTION](#video-selection)
 - [FAQ](#faq)
 - [DEVELOPER INSTRUCTIONS](#developer-instructions)
@@ -45,371 +46,191 @@ which means you can modify it, redistribute it or use it however you like.
     youtube-dl [OPTIONS] URL [URL...]
 
 # OPTIONS
-    -h, --help                       print this help text and exit
-    --version                        print program version and exit
-    -U, --update                     update this program to latest version. Make
-                                     sure that you have sufficient permissions
-                                     (run with sudo if needed)
-    -i, --ignore-errors              continue on download errors, for example to
-                                     skip unavailable videos in a playlist
-    --abort-on-error                 Abort downloading of further videos (in the
-                                     playlist or the command line) if an error
-                                     occurs
-    --dump-user-agent                display the current browser identification
-    --list-extractors                List all supported extractors and the URLs
-                                     they would handle
-    --extractor-descriptions         Output descriptions of all supported
-                                     extractors
-    --default-search PREFIX          Use this prefix for unqualified URLs. For
-                                     example "gvsearch2:" downloads two videos
-                                     from google videos for  youtube-dl "large
-                                     apple". Use the value "auto" to let
-                                     youtube-dl guess ("auto_warning" to emit a
-                                     warning when guessing). "error" just throws
-                                     an error. The default value "fixup_error"
-                                     repairs broken URLs, but emits an error if
-                                     this is not possible instead of searching.
-    --ignore-config                  Do not read configuration files. When given
-                                     in the global configuration file /etc
-                                     /youtube-dl.conf: Do not read the user
-                                     configuration in ~/.config/youtube-
-                                     dl/config (%APPDATA%/youtube-dl/config.txt
-                                     on Windows)
-    --flat-playlist                  Do not extract the videos of a playlist,
-                                     only list them.
-    --no-color                       Do not emit color codes in output.
+    -h, --help                       Print this help text and exit
+    --version                        Print program version and exit
+    -U, --update                     Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
+    -i, --ignore-errors              Continue on download errors, for example to skip unavailable videos in a playlist
+    --abort-on-error                 Abort downloading of further videos (in the playlist or the command line) if an error occurs
+    --dump-user-agent                Display the current browser identification
+    --list-extractors                List all supported extractors and the URLs they would handle
+    --extractor-descriptions         Output descriptions of all supported extractors
+    --default-search PREFIX          Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
+                                     Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
+                                     default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
+    --ignore-config                  Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
+                                     in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
+    --flat-playlist                  Do not extract the videos of a playlist, only list them.
+    --no-color                       Do not emit color codes in output
 
 ## Network Options:
-    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
-                                     an empty string (--proxy "") for direct
-                                     connection
+    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
     --socket-timeout SECONDS         Time to wait before giving up, in seconds
-    --source-address IP              Client-side IP address to bind to
-                                     (experimental)
-    -4, --force-ipv4                 Make all connections via IPv4
-                                     (experimental)
-    -6, --force-ipv6                 Make all connections via IPv6
-                                     (experimental)
+    --source-address IP              Client-side IP address to bind to (experimental)
+    -4, --force-ipv4                 Make all connections via IPv4 (experimental)
+    -6, --force-ipv6                 Make all connections via IPv6 (experimental)
+    --cn-verification-proxy URL      Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is
+                                     not present) is used for the actual downloading. (experimental)
 
 ## Video Selection:
-    --playlist-start NUMBER          playlist video to start at (default is 1)
-    --playlist-end NUMBER            playlist video to end at (default is last)
-    --playlist-items ITEM_SPEC       playlist video items to download. Specify
-                                     indices of the videos in the playlist
-                                     seperated by commas like: "--playlist-items
-                                     1,2,5,8" if you want to download videos
-                                     indexed 1, 2, 5, 8 in the playlist. You can
-                                     specify range: "--playlist-items
-                                     1-3,7,10-13", it will download the videos
-                                     at index 1, 2, 3, 7, 10, 11, 12 and 13.
-    --match-title REGEX              download only matching titles (regex or
-                                     caseless sub-string)
-    --reject-title REGEX             skip download for matching titles (regex or
-                                     caseless sub-string)
+    --playlist-start NUMBER          Playlist video to start at (default is 1)
+    --playlist-end NUMBER            Playlist video to end at (default is last)
+    --playlist-items ITEM_SPEC       Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
+                                     if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
+                                     download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
+    --match-title REGEX              Download only matching titles (regex or caseless sub-string)
+    --reject-title REGEX             Skip download for matching titles (regex or caseless sub-string)
     --max-downloads NUMBER           Abort after downloading NUMBER files
-    --min-filesize SIZE              Do not download any videos smaller than
-                                     SIZE (e.g. 50k or 44.6m)
-    --max-filesize SIZE              Do not download any videos larger than SIZE
-                                     (e.g. 50k or 44.6m)
-    --date DATE                      download only videos uploaded in this date
-    --datebefore DATE                download only videos uploaded on or before
-                                     this date (i.e. inclusive)
-    --dateafter DATE                 download only videos uploaded on or after
-                                     this date (i.e. inclusive)
-    --min-views COUNT                Do not download any videos with less than
-                                     COUNT views
-    --max-views COUNT                Do not download any videos with more than
-                                     COUNT views
-    --match-filter FILTER            (Experimental) Generic video filter.
-                                     Specify any key (see help for -o for a list
-                                     of available keys) to match if the key is
-                                     present, !key to check if the key is not
-                                     present,key > NUMBER (like "comment_count >
-                                     12", also works with >=, <, <=, !=, =) to
-                                     compare against a number, and & to require
-                                     multiple matches. Values which are not
-                                     known are excluded unless you put a
-                                     question mark (?) after the operator.For
-                                     example, to only match videos that have
-                                     been liked more than 100 times and disliked
-                                     less than 50 times (or the dislike
-                                     functionality is not available at the given
-                                     service), but who also have a description,
-                                     use  --match-filter "like_count > 100 &
+    --min-filesize SIZE              Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
+    --max-filesize SIZE              Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
+    --date DATE                      Download only videos uploaded in this date
+    --datebefore DATE                Download only videos uploaded on or before this date (i.e. inclusive)
+    --dateafter DATE                 Download only videos uploaded on or after this date (i.e. inclusive)
+    --min-views COUNT                Do not download any videos with less than COUNT views
+    --max-views COUNT                Do not download any videos with more than COUNT views
+    --match-filter FILTER            Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present,
+                                     !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
+                                     a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
+                                     operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
+                                     functionality is not available at the given service), but who also have a description, use  --match-filter "like_count > 100 &
                                      dislike_count <? 50 & description" .
-    --no-playlist                    If the URL refers to a video and a
-                                     playlist, download only the video.
-    --yes-playlist                   If the URL refers to a video and a
-                                     playlist, download the playlist.
-    --age-limit YEARS                download only videos suitable for the given
-                                     age
-    --download-archive FILE          Download only videos not listed in the
-                                     archive file. Record the IDs of all
-                                     downloaded videos in it.
-    --include-ads                    Download advertisements as well
-                                     (experimental)
+    --no-playlist                    Download only the video, if the URL refers to a video and a playlist.
+    --yes-playlist                   Download the playlist, if the URL refers to a video and a playlist.
+    --age-limit YEARS                Download only videos suitable for the given age
+    --download-archive FILE          Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
+    --include-ads                    Download advertisements as well (experimental)
 
 ## Download Options:
-    -r, --rate-limit LIMIT           maximum download rate in bytes per second
-                                     (e.g. 50K or 4.2M)
-    -R, --retries RETRIES            number of retries (default is 10), or
-                                     "infinite".
-    --buffer-size SIZE               size of download buffer (e.g. 1024 or 16K)
-                                     (default is 1024)
-    --no-resize-buffer               do not automatically adjust the buffer
-                                     size. By default, the buffer size is
-                                     automatically resized from an initial value
-                                     of SIZE.
+    -r, --rate-limit LIMIT           Maximum download rate in bytes per second (e.g. 50K or 4.2M)
+    -R, --retries RETRIES            Number of retries (default is 10), or "infinite".
+    --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K) (default is 1024)
+    --no-resize-buffer               Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
     --playlist-reverse               Download playlist videos in reverse order
-    --xattr-set-filesize             (experimental) set file xattribute
-                                     ytdl.filesize with expected filesize
-    --hls-prefer-native              (experimental) Use the native HLS
-                                     downloader instead of ffmpeg.
-    --external-downloader COMMAND    (experimental) Use the specified external
-                                     downloader. Currently supports
-                                     aria2c,curl,wget
+    --xattr-set-filesize             Set file xattribute ytdl.filesize with expected filesize (experimental)
+    --hls-prefer-native              Use the native HLS downloader instead of ffmpeg (experimental)
+    --external-downloader COMMAND    Use the specified external downloader. Currently supports aria2c,curl,wget
+    --external-downloader-args ARGS  Give these arguments to the external downloader
 
 ## Filesystem Options:
-    -a, --batch-file FILE            file containing URLs to download ('-' for
-                                     stdin)
-    --id                             use only video ID in file name
-    -o, --output TEMPLATE            output filename template. Use %(title)s to
-                                     get the title, %(uploader)s for the
-                                     uploader name, %(uploader_id)s for the
-                                     uploader nickname if different,
-                                     %(autonumber)s to get an automatically
-                                     incremented number, %(ext)s for the
-                                     filename extension, %(format)s for the
-                                     format description (like "22 - 1280x720" or
-                                     "HD"), %(format_id)s for the unique id of
-                                     the format (like Youtube's itags: "137"),
-                                     %(upload_date)s for the upload date
-                                     (YYYYMMDD), %(extractor)s for the provider
-                                     (youtube, metacafe, etc), %(id)s for the
-                                     video id, %(playlist_title)s,
-                                     %(playlist_id)s, or %(playlist)s (=title if
-                                     present, ID otherwise) for the playlist the
-                                     video is in, %(playlist_index)s for the
-                                     position in the playlist. %(height)s and
-                                     %(width)s for the width and height of the
-                                     video format. %(resolution)s for a textual
-                                     description of the resolution of the video
-                                     format. %% for a literal percent. Use - to
-                                     output to stdout. Can also be used to
-                                     download to a different directory, for
-                                     example with -o '/my/downloads/%(uploader)s
-                                     /%(title)s-%(id)s.%(ext)s' .
-    --autonumber-size NUMBER         Specifies the number of digits in
-                                     %(autonumber)s when it is present in output
-                                     filename template or --auto-number option
-                                     is given
-    --restrict-filenames             Restrict filenames to only ASCII
-                                     characters, and avoid "&" and spaces in
-                                     filenames
-    -A, --auto-number                [deprecated; use  -o
-                                     "%(autonumber)s-%(title)s.%(ext)s" ] number
-                                     downloaded files starting from 00000
-    -t, --title                      [deprecated] use title in file name
-                                     (default)
-    -l, --literal                    [deprecated] alias of --title
-    -w, --no-overwrites              do not overwrite files
-    -c, --continue                   force resume of partially downloaded files.
-                                     By default, youtube-dl will resume
-                                     downloads if possible.
-    --no-continue                    do not resume partially downloaded files
-                                     (restart from beginning)
-    --no-part                        do not use .part files - write directly
-                                     into output file
-    --no-mtime                       do not use the Last-modified header to set
-                                     the file modification time
-    --write-description              write video description to a .description
-                                     file
-    --write-info-json                write video metadata to a .info.json file
-    --write-annotations              write video annotations to a .annotation
-                                     file
-    --load-info FILE                 json file containing the video information
-                                     (created with the "--write-json" option)
-    --cookies FILE                   file to read cookies from and dump cookie
-                                     jar in
-    --cache-dir DIR                  Location in the filesystem where youtube-dl
-                                     can store some downloaded information
-                                     permanently. By default $XDG_CACHE_HOME
-                                     /youtube-dl or ~/.cache/youtube-dl . At the
-                                     moment, only YouTube player files (for
-                                     videos with obfuscated signatures) are
-                                     cached, but that may change.
+    -a, --batch-file FILE            File containing URLs to download ('-' for stdin)
+    --id                             Use only video ID in file name
+    -o, --output TEMPLATE            Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
+                                     nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
+                                     the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube's itags: "137"),
+                                     %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
+                                     %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
+                                     %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
+                                     %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
+                                     Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
+    --autonumber-size NUMBER         Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
+    --restrict-filenames             Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
+    -A, --auto-number                [deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000
+    -t, --title                      [deprecated] Use title in file name (default)
+    -l, --literal                    [deprecated] Alias of --title
+    -w, --no-overwrites              Do not overwrite files
+    -c, --continue                   Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
+    --no-continue                    Do not resume partially downloaded files (restart from beginning)
+    --no-part                        Do not use .part files - write directly into output file
+    --no-mtime                       Do not use the Last-modified header to set the file modification time
+    --write-description              Write video description to a .description file
+    --write-info-json                Write video metadata to a .info.json file
+    --write-annotations              Write video annotations to a .annotations.xml file
+    --load-info FILE                 JSON file containing the video information (created with the "--write-info-json" option)
+    --cookies FILE                   File to read cookies from and dump cookie jar in
+    --cache-dir DIR                  Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
+                                     or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
+                                     change.
     --no-cache-dir                   Disable filesystem caching
     --rm-cache-dir                   Delete all filesystem cache files
 
 ## Thumbnail images:
-    --write-thumbnail                write thumbnail image to disk
-    --write-all-thumbnails           write all thumbnail image formats to disk
-    --list-thumbnails                Simulate and list all available thumbnail
-                                     formats
+    --write-thumbnail                Write thumbnail image to disk
+    --write-all-thumbnails           Write all thumbnail image formats to disk
+    --list-thumbnails                Simulate and list all available thumbnail formats
 
 ## Verbosity / Simulation Options:
-    -q, --quiet                      activates quiet mode
+    -q, --quiet                      Activate quiet mode
     --no-warnings                    Ignore warnings
-    -s, --simulate                   do not download the video and do not write
-                                     anything to disk
-    --skip-download                  do not download the video
-    -g, --get-url                    simulate, quiet but print URL
-    -e, --get-title                  simulate, quiet but print title
-    --get-id                         simulate, quiet but print id
-    --get-thumbnail                  simulate, quiet but print thumbnail URL
-    --get-description                simulate, quiet but print video description
-    --get-duration                   simulate, quiet but print video length
-    --get-filename                   simulate, quiet but print output filename
-    --get-format                     simulate, quiet but print output format
-    -j, --dump-json                  simulate, quiet but print JSON information.
-                                     See --output for a description of available
-                                     keys.
-    -J, --dump-single-json           simulate, quiet but print JSON information
-                                     for each command-line argument. If the URL
-                                     refers to a playlist, dump the whole
-                                     playlist information in a single line.
-    --print-json                     Be quiet and print the video information as
-                                     JSON (video is still being downloaded).
-    --newline                        output progress bar as new lines
-    --no-progress                    do not print progress bar
-    --console-title                  display progress in console titlebar
-    -v, --verbose                    print various debugging information
-    --dump-intermediate-pages        print downloaded pages to debug problems
-                                     (very verbose)
-    --write-pages                    Write downloaded intermediary pages to
-                                     files in the current directory to debug
-                                     problems
+    -s, --simulate                   Do not download the video and do not write anything to disk
+    --skip-download                  Do not download the video
+    -g, --get-url                    Simulate, quiet but print URL
+    -e, --get-title                  Simulate, quiet but print title
+    --get-id                         Simulate, quiet but print id
+    --get-thumbnail                  Simulate, quiet but print thumbnail URL
+    --get-description                Simulate, quiet but print video description
+    --get-duration                   Simulate, quiet but print video length
+    --get-filename                   Simulate, quiet but print output filename
+    --get-format                     Simulate, quiet but print output format
+    -j, --dump-json                  Simulate, quiet but print JSON information. See --output for a description of available keys.
+    -J, --dump-single-json           Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
+                                     information in a single line.
+    --print-json                     Be quiet and print the video information as JSON (video is still being downloaded).
+    --newline                        Output progress bar as new lines
+    --no-progress                    Do not print progress bar
+    --console-title                  Display progress in console titlebar
+    -v, --verbose                    Print various debugging information
+    --dump-pages                     Print downloaded pages to debug problems (very verbose)
+    --write-pages                    Write downloaded intermediary pages to files in the current directory to debug problems
     --print-traffic                  Display sent and read HTTP traffic
-    -C, --call-home                  Contact the youtube-dl server for
-                                     debugging.
-    --no-call-home                   Do NOT contact the youtube-dl server for
-                                     debugging.
+    -C, --call-home                  Contact the youtube-dl server for debugging
+    --no-call-home                   Do NOT contact the youtube-dl server for debugging
 
 ## Workarounds:
     --encoding ENCODING              Force the specified encoding (experimental)
-    --no-check-certificate           Suppress HTTPS certificate validation.
-    --prefer-insecure                Use an unencrypted connection to retrieve
-                                     information about the video. (Currently
-                                     supported only for YouTube)
-    --user-agent UA                  specify a custom user agent
-    --referer URL                    specify a custom referer, use if the video
-                                     access is restricted to one domain
-    --add-header FIELD:VALUE         specify a custom HTTP header and its value,
-                                     separated by a colon ':'. You can use this
-                                     option multiple times
-    --bidi-workaround                Work around terminals that lack
-                                     bidirectional text support. Requires bidiv
-                                     or fribidi executable in PATH
-    --sleep-interval SECONDS         Number of seconds to sleep before each
-                                     download.
+    --no-check-certificate           Suppress HTTPS certificate validation
+    --prefer-insecure                Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
+    --user-agent UA                  Specify a custom user agent
+    --referer URL                    Specify a custom referer, use if the video access is restricted to one domain
+    --add-header FIELD:VALUE         Specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
+    --bidi-workaround                Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
+    --sleep-interval SECONDS         Number of seconds to sleep before each download.
 
 ## Video Format Options:
-    -f, --format FORMAT              video format code, specify the order of
-                                     preference using slashes, as in -f 22/17/18
-                                     .  Instead of format codes, you can select
-                                     by extension for the extensions aac, m4a,
-                                     mp3, mp4, ogg, wav, webm. You can also use
-                                     the special names "best", "bestvideo",
-                                     "bestaudio", "worst".  You can filter the
-                                     video results by putting a condition in
-                                     brackets, as in -f "best[height=720]" (or
-                                     -f "[filesize>10M]").  This works for
-                                     filesize, height, width, tbr, abr, vbr,
-                                     asr, and fps and the comparisons <, <=, >,
-                                     >=, =, != and for ext, acodec, vcodec,
-                                     container, and protocol and the comparisons
-                                     =, != . Formats for which the value is not
-                                     known are excluded unless you put a
-                                     question mark (?) after the operator. You
-                                     can combine format filters, so  -f "[height
-                                     <=? 720][tbr>500]" selects up to 720p
-                                     videos (or videos where the height is not
-                                     known) with a bitrate of at least 500
-                                     KBit/s. By default, youtube-dl will pick
-                                     the best quality. Use commas to download
-                                     multiple audio formats, such as -f
-                                     136/137/mp4/bestvideo,140/m4a/bestaudio.
-                                     You can merge the video and audio of two
-                                     formats into a single file using -f <video-
-                                     format>+<audio-format> (requires ffmpeg or
-                                     avconv), for example -f
-                                     bestvideo+bestaudio.
-    --all-formats                    download all available video formats
-    --prefer-free-formats            prefer free video formats unless a specific
-                                     one is requested
-    --max-quality FORMAT             highest quality format to download
-    -F, --list-formats               list all available formats
-    --youtube-skip-dash-manifest     Do not download the DASH manifest on
-                                     YouTube videos
-    --merge-output-format FORMAT     If a merge is required (e.g.
-                                     bestvideo+bestaudio), output to given
-                                     container format. One of mkv, mp4, ogg,
-                                     webm, flv.Ignored if no merge is required
+    -f, --format FORMAT              Video format code, see the "FORMAT SELECTION" for all the info
+    --all-formats                    Download all available video formats
+    --prefer-free-formats            Prefer free video formats unless a specific one is requested
+    -F, --list-formats               List all available formats
+    --youtube-skip-dash-manifest     Do not download the DASH manifest on YouTube videos
+    --merge-output-format FORMAT     If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
+                                     merge is required
 
 ## Subtitle Options:
-    --write-sub                      write subtitle file
-    --write-auto-sub                 write automatic subtitle file (youtube
-                                     only)
-    --all-subs                       downloads all the available subtitles of
-                                     the video
-    --list-subs                      lists all available subtitles for the video
-    --sub-format FORMAT              subtitle format, accepts formats
-                                     preference, for example: "ass/srt/best"
-    --sub-lang LANGS                 languages of the subtitles to download
-                                     (optional) separated by commas, use IETF
-                                     language tags like 'en,pt'
+    --write-sub                      Write subtitle file
+    --write-auto-sub                 Write automatic subtitle file (YouTube only)
+    --all-subs                       Download all the available subtitles of the video
+    --list-subs                      List all available subtitles for the video
+    --sub-format FORMAT              Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"
+    --sub-lang LANGS                 Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
 
 ## Authentication Options:
-    -u, --username USERNAME          login with this account ID
-    -p, --password PASSWORD          account password. If this option is left
-                                     out, youtube-dl will ask interactively.
-    -2, --twofactor TWOFACTOR        two-factor auth code
-    -n, --netrc                      use .netrc authentication data
-    --video-password PASSWORD        video password (vimeo, smotri)
+    -u, --username USERNAME          Login with this account ID
+    -p, --password PASSWORD          Account password. If this option is left out, youtube-dl will ask interactively.
+    -2, --twofactor TWOFACTOR        Two-factor auth code
+    -n, --netrc                      Use .netrc authentication data
+    --video-password PASSWORD        Video password (vimeo, smotri)
 
 ## Post-processing Options:
-    -x, --extract-audio              convert video files to audio-only files
-                                     (requires ffmpeg or avconv and ffprobe or
-                                     avprobe)
-    --audio-format FORMAT            "best", "aac", "vorbis", "mp3", "m4a",
-                                     "opus", or "wav"; "best" by default
-    --audio-quality QUALITY          ffmpeg/avconv audio quality specification,
-                                     insert a value between 0 (better) and 9
-                                     (worse) for VBR or a specific bitrate like
-                                     128K (default 5)
-    --recode-video FORMAT            Encode the video to another format if
-                                     necessary (currently supported:
-                                     mp4|flv|ogg|webm|mkv)
-    -k, --keep-video                 keeps the video file on disk after the
-                                     post-processing; the video is erased by
-                                     default
-    --no-post-overwrites             do not overwrite post-processed files; the
-                                     post-processed files are overwritten by
-                                     default
-    --embed-subs                     embed subtitles in the video (only for mp4
-                                     videos)
-    --embed-thumbnail                embed thumbnail in the audio as cover art
-    --add-metadata                   write metadata to the video file
-    --xattrs                         write metadata to the video file's xattrs
-                                     (using dublin core and xdg standards)
-    --fixup POLICY                   Automatically correct known faults of the
-                                     file. One of never (do nothing), warn (only
-                                     emit a warning), detect_or_warn(the
-                                     default; fix file if we can, warn
-                                     otherwise)
-    --prefer-avconv                  Prefer avconv over ffmpeg for running the
-                                     postprocessors (default)
-    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
-                                     postprocessors
-    --ffmpeg-location PATH           Location of the ffmpeg/avconv binary;
-                                     either the path to the binary or its
-                                     containing directory.
-    --exec CMD                       Execute a command on the file after
-                                     downloading, similar to find's -exec
-                                     syntax. Example: --exec 'adb push {}
-                                     /sdcard/Music/ && rm {}'
-    --convert-subtitles FORMAT       Convert the subtitles to other format
-                                     (currently supported: srt|ass|vtt)
+    -x, --extract-audio              Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
+    --audio-format FORMAT            Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
+    --audio-quality QUALITY          Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
+                                     5)
+    --recode-video FORMAT            Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
+    -k, --keep-video                 Keep the video file on disk after the post-processing; the video is erased by default
+    --no-post-overwrites             Do not overwrite post-processed files; the post-processed files are overwritten by default
+    --embed-subs                     Embed subtitles in the video (only for mkv and mp4 videos)
+    --embed-thumbnail                Embed thumbnail in the audio as cover art
+    --add-metadata                   Write metadata to the video file
+    --metadata-from-title FORMAT     Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
+                                     parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
+                                     %(title)s" matches a title like "Coldplay - Paradise"
+    --xattrs                         Write metadata to the video file's xattrs (using dublin core and xdg standards)
+    --fixup POLICY                   Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
+                                     fix file if we can, warn otherwise)
+    --prefer-avconv                  Prefer avconv over ffmpeg for running the postprocessors (default)
+    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the postprocessors
+    --ffmpeg-location PATH           Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.
+    --exec CMD                       Execute a command on the file after downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm
+                                     {}'
+    --convert-subtitles FORMAT       Convert the subtitles to other format (currently supported: srt|ass|vtt)
 
 # CONFIGURATION
 
@@ -441,6 +262,17 @@ $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filena
 youtube-dl_test_video_.mp4          # A simple file name
 ```
 
+# FORMAT SELECTION
+
+By default youtube-dl tries to download the best quality, but sometimes you may want to download other format.
+The simplest case is requesting a specific format, for example `-f 22`. You can get the list of available formats using `--list-formats`, you can also use a file extension (currently it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the special names `best`, `bestvideo`, `bestaudio` and `worst`.
+
+If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).  This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so  `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`.
+
+Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
+
+If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
+
 # VIDEO SELECTION
 
 Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`, they accept dates in two formats:
@@ -491,9 +323,9 @@ YouTube changed their playlist format in March 2014 and later on, so you'll need
 
 If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
 
-### Do I always have to pass in `--max-quality FORMAT`, or `-citw`?
+### Do I always have to pass `-citw`?
 
-By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`.
+By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
 
 ### Can you please put the -b option back?
 
@@ -525,10 +357,30 @@ YouTube has switched to a new video info format in July 2011 which is not suppor
 
 YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
 
+### Video URL contains an ampersand and I'm getting some strange output `[1] 2839` or `'v' is not recognized as an internal or external command` ###
+
+That's actually the output from your shell. Since ampersand is one of the special shell characters it's interpreted by shell preventing you from passing the whole URL to youtube-dl. To disable your shell from interpreting the ampersands (or any other special characters) you have to either put the whole URL in quotes or escape them with a backslash (which approach will work depends on your shell).
+
+For example if your URL is https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with following command:
+
+```youtube-dl 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'```
+
+or
+
+```youtube-dl https://www.youtube.com/watch?t=4\&v=BaW_jenozKc```
+
+For Windows you have to use the double quotes:
+
+```youtube-dl "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"```
+
 ### ExtractorError: Could not find JS function u'OF'
 
 In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
 
+### HTTP Error 429: Too Many Requests or 402: Payment Required
+
+These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
+
 ### SyntaxError: Non-ASCII character ###
 
 The error
@@ -573,6 +425,18 @@ A note on the service that they don't host the infringing content, but just link
 
 Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
 
+### How can I speed up work on my issue?
+
+(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
+
+First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel.
+
+Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
+
+If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
+
+Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
+
 ### How can I detect whether a given URL is supported by youtube-dl?
 
 For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
@@ -672,6 +536,7 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
 From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
 
 ```python
+from __future__ import unicode_literals
 import youtube_dl
 
 ydl_opts = {}
@@ -684,6 +549,7 @@ Most likely, you'll want to use various options. For a list of what can be done,
 Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
 
 ```python
+from __future__ import unicode_literals
 import youtube_dl
 
 
@@ -741,7 +607,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
 
 For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
 
-Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
+If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
+
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
 
 ###  Are you using the latest version?
 
index 173daf0b6d3eda53e5249e3cd35fcff7c1b78b87..25ca37155c22e2d142449a320c44d5c8912034ee 100644 (file)
@@ -5,14 +5,17 @@ youtube-dl - download videos from youtube.com or other video platforms
 -   OPTIONS
 -   CONFIGURATION
 -   OUTPUT TEMPLATE
+-   FORMAT SELECTION
 -   VIDEO SELECTION
 -   FAQ
 -   DEVELOPER INSTRUCTIONS
 -   BUGS
 -   COPYRIGHT
 
+
+
 INSTALLATION
-============
+
 
 To install it right away for all UNIX users (Linux, OS X, etc.), type:
 
@@ -27,7 +30,7 @@ If you do not have curl, you can alternatively use a recent wget:
 Windows users can download a .exe file and place it in their home
 directory or any other location on their PATH.
 
-OS X users can install youtube-dl with Homebrew.
+OS X users can install YOUTUBE-DL with Homebrew.
 
     brew install youtube-dl
 
@@ -40,10 +43,12 @@ check out and work with the git repository. For further options,
 including PGP signatures, see
 https://rg3.github.io/youtube-dl/download.html .
 
+
+
 DESCRIPTION
-===========
 
-youtube-dl is a small command-line program to download videos from
+
+YOUTUBE-DL is a small command-line program to download videos from
 YouTube.com and a few more sites. It requires the Python interpreter,
 version 2.6, 2.7, or 3.2+, and it is not platform specific. It should
 work on your Unix box, on Windows or on Mac OS X. It is released to the
@@ -52,399 +57,223 @@ however you like.
 
     youtube-dl [OPTIONS] URL [URL...]
 
+
+
 OPTIONS
-=======
-
-    -h, --help                       print this help text and exit
-    --version                        print program version and exit
-    -U, --update                     update this program to latest version. Make
-                                     sure that you have sufficient permissions
-                                     (run with sudo if needed)
-    -i, --ignore-errors              continue on download errors, for example to
-                                     skip unavailable videos in a playlist
-    --abort-on-error                 Abort downloading of further videos (in the
-                                     playlist or the command line) if an error
-                                     occurs
-    --dump-user-agent                display the current browser identification
-    --list-extractors                List all supported extractors and the URLs
-                                     they would handle
-    --extractor-descriptions         Output descriptions of all supported
-                                     extractors
-    --default-search PREFIX          Use this prefix for unqualified URLs. For
-                                     example "gvsearch2:" downloads two videos
-                                     from google videos for  youtube-dl "large
-                                     apple". Use the value "auto" to let
-                                     youtube-dl guess ("auto_warning" to emit a
-                                     warning when guessing). "error" just throws
-                                     an error. The default value "fixup_error"
-                                     repairs broken URLs, but emits an error if
-                                     this is not possible instead of searching.
-    --ignore-config                  Do not read configuration files. When given
-                                     in the global configuration file /etc
-                                     /youtube-dl.conf: Do not read the user
-                                     configuration in ~/.config/youtube-
-                                     dl/config (%APPDATA%/youtube-dl/config.txt
-                                     on Windows)
-    --flat-playlist                  Do not extract the videos of a playlist,
-                                     only list them.
-    --no-color                       Do not emit color codes in output.
+
+
+    -h, --help                       Print this help text and exit
+    --version                        Print program version and exit
+    -U, --update                     Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
+    -i, --ignore-errors              Continue on download errors, for example to skip unavailable videos in a playlist
+    --abort-on-error                 Abort downloading of further videos (in the playlist or the command line) if an error occurs
+    --dump-user-agent                Display the current browser identification
+    --list-extractors                List all supported extractors and the URLs they would handle
+    --extractor-descriptions         Output descriptions of all supported extractors
+    --default-search PREFIX          Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
+                                     Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
+                                     default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
+    --ignore-config                  Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
+                                     in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
+    --flat-playlist                  Do not extract the videos of a playlist, only list them.
+    --no-color                       Do not emit color codes in output
+
 
 Network Options:
-----------------
 
-    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
-                                     an empty string (--proxy "") for direct
-                                     connection
+    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
     --socket-timeout SECONDS         Time to wait before giving up, in seconds
-    --source-address IP              Client-side IP address to bind to
-                                     (experimental)
-    -4, --force-ipv4                 Make all connections via IPv4
-                                     (experimental)
-    -6, --force-ipv6                 Make all connections via IPv6
-                                     (experimental)
+    --source-address IP              Client-side IP address to bind to (experimental)
+    -4, --force-ipv4                 Make all connections via IPv4 (experimental)
+    -6, --force-ipv6                 Make all connections via IPv6 (experimental)
+    --cn-verification-proxy URL      Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is
+                                     not present) is used for the actual downloading. (experimental)
+
 
 Video Selection:
-----------------
-
-    --playlist-start NUMBER          playlist video to start at (default is 1)
-    --playlist-end NUMBER            playlist video to end at (default is last)
-    --playlist-items ITEM_SPEC       playlist video items to download. Specify
-                                     indices of the videos in the playlist
-                                     seperated by commas like: "--playlist-items
-                                     1,2,5,8" if you want to download videos
-                                     indexed 1, 2, 5, 8 in the playlist. You can
-                                     specify range: "--playlist-items
-                                     1-3,7,10-13", it will download the videos
-                                     at index 1, 2, 3, 7, 10, 11, 12 and 13.
-    --match-title REGEX              download only matching titles (regex or
-                                     caseless sub-string)
-    --reject-title REGEX             skip download for matching titles (regex or
-                                     caseless sub-string)
+
+    --playlist-start NUMBER          Playlist video to start at (default is 1)
+    --playlist-end NUMBER            Playlist video to end at (default is last)
+    --playlist-items ITEM_SPEC       Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
+                                     if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
+                                     download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
+    --match-title REGEX              Download only matching titles (regex or caseless sub-string)
+    --reject-title REGEX             Skip download for matching titles (regex or caseless sub-string)
     --max-downloads NUMBER           Abort after downloading NUMBER files
-    --min-filesize SIZE              Do not download any videos smaller than
-                                     SIZE (e.g. 50k or 44.6m)
-    --max-filesize SIZE              Do not download any videos larger than SIZE
-                                     (e.g. 50k or 44.6m)
-    --date DATE                      download only videos uploaded in this date
-    --datebefore DATE                download only videos uploaded on or before
-                                     this date (i.e. inclusive)
-    --dateafter DATE                 download only videos uploaded on or after
-                                     this date (i.e. inclusive)
-    --min-views COUNT                Do not download any videos with less than
-                                     COUNT views
-    --max-views COUNT                Do not download any videos with more than
-                                     COUNT views
-    --match-filter FILTER            (Experimental) Generic video filter.
-                                     Specify any key (see help for -o for a list
-                                     of available keys) to match if the key is
-                                     present, !key to check if the key is not
-                                     present,key > NUMBER (like "comment_count >
-                                     12", also works with >=, <, <=, !=, =) to
-                                     compare against a number, and & to require
-                                     multiple matches. Values which are not
-                                     known are excluded unless you put a
-                                     question mark (?) after the operator.For
-                                     example, to only match videos that have
-                                     been liked more than 100 times and disliked
-                                     less than 50 times (or the dislike
-                                     functionality is not available at the given
-                                     service), but who also have a description,
-                                     use  --match-filter "like_count > 100 &
+    --min-filesize SIZE              Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
+    --max-filesize SIZE              Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
+    --date DATE                      Download only videos uploaded in this date
+    --datebefore DATE                Download only videos uploaded on or before this date (i.e. inclusive)
+    --dateafter DATE                 Download only videos uploaded on or after this date (i.e. inclusive)
+    --min-views COUNT                Do not download any videos with less than COUNT views
+    --max-views COUNT                Do not download any videos with more than COUNT views
+    --match-filter FILTER            Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present,
+                                     !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
+                                     a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
+                                     operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
+                                     functionality is not available at the given service), but who also have a description, use  --match-filter "like_count > 100 &
                                      dislike_count <? 50 & description" .
-    --no-playlist                    If the URL refers to a video and a
-                                     playlist, download only the video.
-    --yes-playlist                   If the URL refers to a video and a
-                                     playlist, download the playlist.
-    --age-limit YEARS                download only videos suitable for the given
-                                     age
-    --download-archive FILE          Download only videos not listed in the
-                                     archive file. Record the IDs of all
-                                     downloaded videos in it.
-    --include-ads                    Download advertisements as well
-                                     (experimental)
+    --no-playlist                    Download only the video, if the URL refers to a video and a playlist.
+    --yes-playlist                   Download the playlist, if the URL refers to a video and a playlist.
+    --age-limit YEARS                Download only videos suitable for the given age
+    --download-archive FILE          Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
+    --include-ads                    Download advertisements as well (experimental)
+
 
 Download Options:
------------------
-
-    -r, --rate-limit LIMIT           maximum download rate in bytes per second
-                                     (e.g. 50K or 4.2M)
-    -R, --retries RETRIES            number of retries (default is 10), or
-                                     "infinite".
-    --buffer-size SIZE               size of download buffer (e.g. 1024 or 16K)
-                                     (default is 1024)
-    --no-resize-buffer               do not automatically adjust the buffer
-                                     size. By default, the buffer size is
-                                     automatically resized from an initial value
-                                     of SIZE.
+
+    -r, --rate-limit LIMIT           Maximum download rate in bytes per second (e.g. 50K or 4.2M)
+    -R, --retries RETRIES            Number of retries (default is 10), or "infinite".
+    --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K) (default is 1024)
+    --no-resize-buffer               Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
     --playlist-reverse               Download playlist videos in reverse order
-    --xattr-set-filesize             (experimental) set file xattribute
-                                     ytdl.filesize with expected filesize
-    --hls-prefer-native              (experimental) Use the native HLS
-                                     downloader instead of ffmpeg.
-    --external-downloader COMMAND    (experimental) Use the specified external
-                                     downloader. Currently supports
-                                     aria2c,curl,wget
+    --xattr-set-filesize             Set file xattribute ytdl.filesize with expected filesize (experimental)
+    --hls-prefer-native              Use the native HLS downloader instead of ffmpeg (experimental)
+    --external-downloader COMMAND    Use the specified external downloader. Currently supports aria2c,curl,wget
+    --external-downloader-args ARGS  Give these arguments to the external downloader
+
 
 Filesystem Options:
--------------------
-
-    -a, --batch-file FILE            file containing URLs to download ('-' for
-                                     stdin)
-    --id                             use only video ID in file name
-    -o, --output TEMPLATE            output filename template. Use %(title)s to
-                                     get the title, %(uploader)s for the
-                                     uploader name, %(uploader_id)s for the
-                                     uploader nickname if different,
-                                     %(autonumber)s to get an automatically
-                                     incremented number, %(ext)s for the
-                                     filename extension, %(format)s for the
-                                     format description (like "22 - 1280x720" or
-                                     "HD"), %(format_id)s for the unique id of
-                                     the format (like Youtube's itags: "137"),
-                                     %(upload_date)s for the upload date
-                                     (YYYYMMDD), %(extractor)s for the provider
-                                     (youtube, metacafe, etc), %(id)s for the
-                                     video id, %(playlist_title)s,
-                                     %(playlist_id)s, or %(playlist)s (=title if
-                                     present, ID otherwise) for the playlist the
-                                     video is in, %(playlist_index)s for the
-                                     position in the playlist. %(height)s and
-                                     %(width)s for the width and height of the
-                                     video format. %(resolution)s for a textual
-                                     description of the resolution of the video
-                                     format. %% for a literal percent. Use - to
-                                     output to stdout. Can also be used to
-                                     download to a different directory, for
-                                     example with -o '/my/downloads/%(uploader)s
-                                     /%(title)s-%(id)s.%(ext)s' .
-    --autonumber-size NUMBER         Specifies the number of digits in
-                                     %(autonumber)s when it is present in output
-                                     filename template or --auto-number option
-                                     is given
-    --restrict-filenames             Restrict filenames to only ASCII
-                                     characters, and avoid "&" and spaces in
-                                     filenames
-    -A, --auto-number                [deprecated; use  -o
-                                     "%(autonumber)s-%(title)s.%(ext)s" ] number
-                                     downloaded files starting from 00000
-    -t, --title                      [deprecated] use title in file name
-                                     (default)
-    -l, --literal                    [deprecated] alias of --title
-    -w, --no-overwrites              do not overwrite files
-    -c, --continue                   force resume of partially downloaded files.
-                                     By default, youtube-dl will resume
-                                     downloads if possible.
-    --no-continue                    do not resume partially downloaded files
-                                     (restart from beginning)
-    --no-part                        do not use .part files - write directly
-                                     into output file
-    --no-mtime                       do not use the Last-modified header to set
-                                     the file modification time
-    --write-description              write video description to a .description
-                                     file
-    --write-info-json                write video metadata to a .info.json file
-    --write-annotations              write video annotations to a .annotation
-                                     file
-    --load-info FILE                 json file containing the video information
-                                     (created with the "--write-json" option)
-    --cookies FILE                   file to read cookies from and dump cookie
-                                     jar in
-    --cache-dir DIR                  Location in the filesystem where youtube-dl
-                                     can store some downloaded information
-                                     permanently. By default $XDG_CACHE_HOME
-                                     /youtube-dl or ~/.cache/youtube-dl . At the
-                                     moment, only YouTube player files (for
-                                     videos with obfuscated signatures) are
-                                     cached, but that may change.
+
+    -a, --batch-file FILE            File containing URLs to download ('-' for stdin)
+    --id                             Use only video ID in file name
+    -o, --output TEMPLATE            Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
+                                     nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
+                                     the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube's itags: "137"),
+                                     %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
+                                     %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
+                                     %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
+                                     %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
+                                     Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
+    --autonumber-size NUMBER         Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
+    --restrict-filenames             Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
+    -A, --auto-number                [deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000
+    -t, --title                      [deprecated] Use title in file name (default)
+    -l, --literal                    [deprecated] Alias of --title
+    -w, --no-overwrites              Do not overwrite files
+    -c, --continue                   Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
+    --no-continue                    Do not resume partially downloaded files (restart from beginning)
+    --no-part                        Do not use .part files - write directly into output file
+    --no-mtime                       Do not use the Last-modified header to set the file modification time
+    --write-description              Write video description to a .description file
+    --write-info-json                Write video metadata to a .info.json file
+    --write-annotations              Write video annotations to a .annotations.xml file
+    --load-info FILE                 JSON file containing the video information (created with the "--write-info-json" option)
+    --cookies FILE                   File to read cookies from and dump cookie jar in
+    --cache-dir DIR                  Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
+                                     or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
+                                     change.
     --no-cache-dir                   Disable filesystem caching
     --rm-cache-dir                   Delete all filesystem cache files
 
+
 Thumbnail images:
------------------
 
-    --write-thumbnail                write thumbnail image to disk
-    --write-all-thumbnails           write all thumbnail image formats to disk
-    --list-thumbnails                Simulate and list all available thumbnail
-                                     formats
+    --write-thumbnail                Write thumbnail image to disk
+    --write-all-thumbnails           Write all thumbnail image formats to disk
+    --list-thumbnails                Simulate and list all available thumbnail formats
+
 
 Verbosity / Simulation Options:
--------------------------------
 
-    -q, --quiet                      activates quiet mode
+    -q, --quiet                      Activate quiet mode
     --no-warnings                    Ignore warnings
-    -s, --simulate                   do not download the video and do not write
-                                     anything to disk
-    --skip-download                  do not download the video
-    -g, --get-url                    simulate, quiet but print URL
-    -e, --get-title                  simulate, quiet but print title
-    --get-id                         simulate, quiet but print id
-    --get-thumbnail                  simulate, quiet but print thumbnail URL
-    --get-description                simulate, quiet but print video description
-    --get-duration                   simulate, quiet but print video length
-    --get-filename                   simulate, quiet but print output filename
-    --get-format                     simulate, quiet but print output format
-    -j, --dump-json                  simulate, quiet but print JSON information.
-                                     See --output for a description of available
-                                     keys.
-    -J, --dump-single-json           simulate, quiet but print JSON information
-                                     for each command-line argument. If the URL
-                                     refers to a playlist, dump the whole
-                                     playlist information in a single line.
-    --print-json                     Be quiet and print the video information as
-                                     JSON (video is still being downloaded).
-    --newline                        output progress bar as new lines
-    --no-progress                    do not print progress bar
-    --console-title                  display progress in console titlebar
-    -v, --verbose                    print various debugging information
-    --dump-intermediate-pages        print downloaded pages to debug problems
-                                     (very verbose)
-    --write-pages                    Write downloaded intermediary pages to
-                                     files in the current directory to debug
-                                     problems
+    -s, --simulate                   Do not download the video and do not write anything to disk
+    --skip-download                  Do not download the video
+    -g, --get-url                    Simulate, quiet but print URL
+    -e, --get-title                  Simulate, quiet but print title
+    --get-id                         Simulate, quiet but print id
+    --get-thumbnail                  Simulate, quiet but print thumbnail URL
+    --get-description                Simulate, quiet but print video description
+    --get-duration                   Simulate, quiet but print video length
+    --get-filename                   Simulate, quiet but print output filename
+    --get-format                     Simulate, quiet but print output format
+    -j, --dump-json                  Simulate, quiet but print JSON information. See --output for a description of available keys.
+    -J, --dump-single-json           Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
+                                     information in a single line.
+    --print-json                     Be quiet and print the video information as JSON (video is still being downloaded).
+    --newline                        Output progress bar as new lines
+    --no-progress                    Do not print progress bar
+    --console-title                  Display progress in console titlebar
+    -v, --verbose                    Print various debugging information
+    --dump-pages                     Print downloaded pages to debug problems (very verbose)
+    --write-pages                    Write downloaded intermediary pages to files in the current directory to debug problems
     --print-traffic                  Display sent and read HTTP traffic
-    -C, --call-home                  Contact the youtube-dl server for
-                                     debugging.
-    --no-call-home                   Do NOT contact the youtube-dl server for
-                                     debugging.
+    -C, --call-home                  Contact the youtube-dl server for debugging
+    --no-call-home                   Do NOT contact the youtube-dl server for debugging
+
 
 Workarounds:
-------------
 
     --encoding ENCODING              Force the specified encoding (experimental)
-    --no-check-certificate           Suppress HTTPS certificate validation.
-    --prefer-insecure                Use an unencrypted connection to retrieve
-                                     information about the video. (Currently
-                                     supported only for YouTube)
-    --user-agent UA                  specify a custom user agent
-    --referer URL                    specify a custom referer, use if the video
-                                     access is restricted to one domain
-    --add-header FIELD:VALUE         specify a custom HTTP header and its value,
-                                     separated by a colon ':'. You can use this
-                                     option multiple times
-    --bidi-workaround                Work around terminals that lack
-                                     bidirectional text support. Requires bidiv
-                                     or fribidi executable in PATH
-    --sleep-interval SECONDS         Number of seconds to sleep before each
-                                     download.
+    --no-check-certificate           Suppress HTTPS certificate validation
+    --prefer-insecure                Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
+    --user-agent UA                  Specify a custom user agent
+    --referer URL                    Specify a custom referer, use if the video access is restricted to one domain
+    --add-header FIELD:VALUE         Specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
+    --bidi-workaround                Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
+    --sleep-interval SECONDS         Number of seconds to sleep before each download.
+
 
 Video Format Options:
----------------------
-
-    -f, --format FORMAT              video format code, specify the order of
-                                     preference using slashes, as in -f 22/17/18
-                                     .  Instead of format codes, you can select
-                                     by extension for the extensions aac, m4a,
-                                     mp3, mp4, ogg, wav, webm. You can also use
-                                     the special names "best", "bestvideo",
-                                     "bestaudio", "worst".  You can filter the
-                                     video results by putting a condition in
-                                     brackets, as in -f "best[height=720]" (or
-                                     -f "[filesize>10M]").  This works for
-                                     filesize, height, width, tbr, abr, vbr,
-                                     asr, and fps and the comparisons <, <=, >,
-                                     >=, =, != and for ext, acodec, vcodec,
-                                     container, and protocol and the comparisons
-                                     =, != . Formats for which the value is not
-                                     known are excluded unless you put a
-                                     question mark (?) after the operator. You
-                                     can combine format filters, so  -f "[height
-                                     <=? 720][tbr>500]" selects up to 720p
-                                     videos (or videos where the height is not
-                                     known) with a bitrate of at least 500
-                                     KBit/s. By default, youtube-dl will pick
-                                     the best quality. Use commas to download
-                                     multiple audio formats, such as -f
-                                     136/137/mp4/bestvideo,140/m4a/bestaudio.
-                                     You can merge the video and audio of two
-                                     formats into a single file using -f <video-
-                                     format>+<audio-format> (requires ffmpeg or
-                                     avconv), for example -f
-                                     bestvideo+bestaudio.
-    --all-formats                    download all available video formats
-    --prefer-free-formats            prefer free video formats unless a specific
-                                     one is requested
-    --max-quality FORMAT             highest quality format to download
-    -F, --list-formats               list all available formats
-    --youtube-skip-dash-manifest     Do not download the DASH manifest on
-                                     YouTube videos
-    --merge-output-format FORMAT     If a merge is required (e.g.
-                                     bestvideo+bestaudio), output to given
-                                     container format. One of mkv, mp4, ogg,
-                                     webm, flv.Ignored if no merge is required
+
+    -f, --format FORMAT              Video format code, see the "FORMAT SELECTION" for all the info
+    --all-formats                    Download all available video formats
+    --prefer-free-formats            Prefer free video formats unless a specific one is requested
+    -F, --list-formats               List all available formats
+    --youtube-skip-dash-manifest     Do not download the DASH manifest on YouTube videos
+    --merge-output-format FORMAT     If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
+                                     merge is required
+
 
 Subtitle Options:
------------------
-
-    --write-sub                      write subtitle file
-    --write-auto-sub                 write automatic subtitle file (youtube
-                                     only)
-    --all-subs                       downloads all the available subtitles of
-                                     the video
-    --list-subs                      lists all available subtitles for the video
-    --sub-format FORMAT              subtitle format, accepts formats
-                                     preference, for example: "ass/srt/best"
-    --sub-lang LANGS                 languages of the subtitles to download
-                                     (optional) separated by commas, use IETF
-                                     language tags like 'en,pt'
+
+    --write-sub                      Write subtitle file
+    --write-auto-sub                 Write automatic subtitle file (YouTube only)
+    --all-subs                       Download all the available subtitles of the video
+    --list-subs                      List all available subtitles for the video
+    --sub-format FORMAT              Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"
+    --sub-lang LANGS                 Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
+
 
 Authentication Options:
------------------------
 
-    -u, --username USERNAME          login with this account ID
-    -p, --password PASSWORD          account password. If this option is left
-                                     out, youtube-dl will ask interactively.
-    -2, --twofactor TWOFACTOR        two-factor auth code
-    -n, --netrc                      use .netrc authentication data
-    --video-password PASSWORD        video password (vimeo, smotri)
+    -u, --username USERNAME          Login with this account ID
+    -p, --password PASSWORD          Account password. If this option is left out, youtube-dl will ask interactively.
+    -2, --twofactor TWOFACTOR        Two-factor auth code
+    -n, --netrc                      Use .netrc authentication data
+    --video-password PASSWORD        Video password (vimeo, smotri)
+
 
 Post-processing Options:
-------------------------
-
-    -x, --extract-audio              convert video files to audio-only files
-                                     (requires ffmpeg or avconv and ffprobe or
-                                     avprobe)
-    --audio-format FORMAT            "best", "aac", "vorbis", "mp3", "m4a",
-                                     "opus", or "wav"; "best" by default
-    --audio-quality QUALITY          ffmpeg/avconv audio quality specification,
-                                     insert a value between 0 (better) and 9
-                                     (worse) for VBR or a specific bitrate like
-                                     128K (default 5)
-    --recode-video FORMAT            Encode the video to another format if
-                                     necessary (currently supported:
-                                     mp4|flv|ogg|webm|mkv)
-    -k, --keep-video                 keeps the video file on disk after the
-                                     post-processing; the video is erased by
-                                     default
-    --no-post-overwrites             do not overwrite post-processed files; the
-                                     post-processed files are overwritten by
-                                     default
-    --embed-subs                     embed subtitles in the video (only for mp4
-                                     videos)
-    --embed-thumbnail                embed thumbnail in the audio as cover art
-    --add-metadata                   write metadata to the video file
-    --xattrs                         write metadata to the video file's xattrs
-                                     (using dublin core and xdg standards)
-    --fixup POLICY                   Automatically correct known faults of the
-                                     file. One of never (do nothing), warn (only
-                                     emit a warning), detect_or_warn(the
-                                     default; fix file if we can, warn
-                                     otherwise)
-    --prefer-avconv                  Prefer avconv over ffmpeg for running the
-                                     postprocessors (default)
-    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
-                                     postprocessors
-    --ffmpeg-location PATH           Location of the ffmpeg/avconv binary;
-                                     either the path to the binary or its
-                                     containing directory.
-    --exec CMD                       Execute a command on the file after
-                                     downloading, similar to find's -exec
-                                     syntax. Example: --exec 'adb push {}
-                                     /sdcard/Music/ && rm {}'
-    --convert-subtitles FORMAT       Convert the subtitles to other format
-                                     (currently supported: srt|ass|vtt)
+
+    -x, --extract-audio              Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
+    --audio-format FORMAT            Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
+    --audio-quality QUALITY          Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
+                                     5)
+    --recode-video FORMAT            Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
+    -k, --keep-video                 Keep the video file on disk after the post-processing; the video is erased by default
+    --no-post-overwrites             Do not overwrite post-processed files; the post-processed files are overwritten by default
+    --embed-subs                     Embed subtitles in the video (only for mkv and mp4 videos)
+    --embed-thumbnail                Embed thumbnail in the audio as cover art
+    --add-metadata                   Write metadata to the video file
+    --metadata-from-title FORMAT     Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
+                                     parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
+                                     %(title)s" matches a title like "Coldplay - Paradise"
+    --xattrs                         Write metadata to the video file's xattrs (using dublin core and xdg standards)
+    --fixup POLICY                   Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
+                                     fix file if we can, warn otherwise)
+    --prefer-avconv                  Prefer avconv over ffmpeg for running the postprocessors (default)
+    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the postprocessors
+    --ffmpeg-location PATH           Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.
+    --exec CMD                       Execute a command on the file after downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm
+                                     {}'
+    --convert-subtitles FORMAT       Convert the subtitles to other format (currently supported: srt|ass|vtt)
+
+
 
 CONFIGURATION
-=============
+
 
 You can configure youtube-dl by placing default arguments (such as
 --extract-audio --no-mtime to always extract the audio and not copy the
@@ -453,8 +282,10 @@ Windows, the configuration file locations are
 %APPDATA%\youtube-dl\config.txt and
 C:\Users\<user name>\youtube-dl.conf.
 
+
+
 OUTPUT TEMPLATE
-===============
+
 
 The -o option allows users to indicate a template for the output file
 names. The basic usage is not to set any template arguments when
@@ -497,8 +328,63 @@ $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filena
 youtube-dl_test_video_.mp4          # A simple file name
 ```
 
+
+
+FORMAT SELECTION
+
+
+By default youtube-dl tries to download the best quality, but sometimes
+you may want to download other format. The simplest case is requesting a
+specific format, for example -f 22. You can get the list of available
+formats using --list-formats, you can also use a file extension
+(currently it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the
+special names best, bestvideo, bestaudio and worst.
+
+If you want to download multiple videos and they don't have the same
+formats available, you can specify the order of preference using
+slashes, as in -f 22/17/18. You can also filter the video results by
+putting a condition in brackets, as in -f "best[height=720]" (or
+-f "[filesize>10M]"). This works for filesize, height, width, tbr, abr,
+vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext,
+acodec, vcodec, container, and protocol and the comparisons =, != .
+Formats for which the value is not known are excluded unless you put a
+question mark (?) after the operator. You can combine format filters, so
+-f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos
+where the height is not known) with a bitrate of at least 500 KBit/s.
+Use commas to download multiple formats, such as
+-f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and
+audio of two formats into a single file using
+-f <video-format>+<audio-format> (requires ffmpeg or avconv), for
+example -f bestvideo+bestaudio.
+
+Since the end of April 2015 and version 2015.04.26 youtube-dl uses
+-f bestvideo+bestaudio/best as default format selection (see #5447,
+#5456). If ffmpeg or avconv are installed this results in downloading
+bestvideo and bestaudio separately and muxing them together into a
+single file giving the best overall quality available. Otherwise it
+falls back to best and results in downloading best available quality
+served as a single file. best is also needed for videos that don't come
+from YouTube because they don't provide the audio and video in two
+different files. If you want to only download some dash formats (for
+example if you are not interested in getting videos with a resolution
+higher than 1080p), you can add
+-f bestvideo[height<=?1080]+bestaudio/best to your configuration file.
+Note that if you use youtube-dl to stream to stdout (and most likely to
+pipe it to your media player then), i.e. you explicitly specify output
+template as -o -, youtube-dl still uses -f best format selection in
+order to start content delivery immediately to your player and not to
+wait until bestvideo and bestaudio are downloaded and muxed.
+
+If you want to preserve the old format selection behavior (prior to
+youtube-dl 2015.04.26), i.e. you want to download best available quality
+media served as a single file, you should explicitly specify your choice
+with -f best. You may want to add it to the configuration file in order
+not to type it every time you run youtube-dl.
+
+
+
 VIDEO SELECTION
-===============
+
 
 Videos can be filtered by their upload date using the options --date,
 --datebefore or --dateafter, they accept dates in two formats:
@@ -520,8 +406,10 @@ $ # will only download the videos uploaded in the 200x decade
 $ youtube-dl --dateafter 20000101 --datebefore 20091231
 ```
 
+
+
 FAQ
-===
+
 
 How do I update youtube-dl?
 
@@ -531,8 +419,8 @@ run youtube-dl -U (or, on Linux, sudo youtube-dl -U).
 If you have used pip, a simple sudo pip install -U youtube-dl is
 sufficient to update.
 
-If you have installed youtube-dl using a package manager like apt-get or
-yum, use the standard system update mechanism to update. Note that
+If you have installed youtube-dl using a package manager like _apt-get_
+or _yum_, use the standard system update mechanism to update. Note that
 distribution packages are often outdated. As a rule of thumb, youtube-dl
 releases at least once a month, and often weekly or even daily. Simply
 go to http://yt-dl.org/ to find out the current version. Unfortunately,
@@ -567,15 +455,13 @@ Ubuntu, there is little we can do. Feel free to report bugs to the
 Ubuntu packaging guys - all they have to do is update the package to a
 somewhat recent version. See above for a way to update.
 
-Do I always have to pass in --max-quality FORMAT, or -citw?
+Do I always have to pass -citw?
 
 By default, youtube-dl intends to have the best options (incidentally,
 if you have a convincing case that these should be different, please
 file an issue where you explain that). Therefore, it is unnecessary and
 sometimes harmful to copy long option strings from webpages. In
-particular, --max-quality limits the video quality (so if you want the
-best quality, do NOT pass it in), and the only option out of -citw that
-is regularly useful is -i.
+particular, the only option out of -citw that is regularly useful is -i.
 
 Can you please put the -b option back?
 
@@ -614,7 +500,7 @@ only applied to IPv4. Some services (sometimes only for a subset of
 videos) do not restrict the video URL by IP address, cookie, or
 user-agent, but these are the exception rather than the rule.
 
-Please bear in mind that some URL protocols are not supported by
+Please bear in mind that some URL protocols are NOT supported by
 browsers out of the box, including RTMP. If you are using -g, your own
 downloader must support these as well.
 
@@ -636,12 +522,43 @@ YouTube requires an additional signature since September 2012 which is
 not supported by old versions of youtube-dl. See above for how to update
 youtube-dl.
 
+Video URL contains an ampersand and I'm getting some strange output [1] 2839 or 'v' is not recognized as an internal or external command
+
+That's actually the output from your shell. Since ampersand is one of
+the special shell characters it's interpreted by shell preventing you
+from passing the whole URL to youtube-dl. To disable your shell from
+interpreting the ampersands (or any other special characters) you have
+to either put the whole URL in quotes or escape them with a backslash
+(which approach will work depends on your shell).
+
+For example if your URL is
+https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with
+following command:
+
+youtube-dl 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'
+
+or
+
+youtube-dl https://www.youtube.com/watch?t=4\&v=BaW_jenozKc
+
+For Windows you have to use the double quotes:
+
+youtube-dl "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"
+
 ExtractorError: Could not find JS function u'OF'
 
 In February 2015, the new YouTube player contained a character sequence
 in a string that was misinterpreted by old versions of youtube-dl. See
 above for how to update youtube-dl.
 
+HTTP Error 429: Too Many Requests or 402: Payment Required
+
+These two error codes indicate that the service is blocking your IP
+address because of overuse. Contact the service and ask them to unblock
+your IP address, or - if you have acquired a whitelisted IP address
+already - use the --proxy or --network-address options to select another
+IP address.
+
 SyntaxError: Non-ASCII character
 
 The error
@@ -660,7 +577,7 @@ systems) or clone the git repository, as laid out above. If you modify
 the code, you can run it by executing the __main__.py file. To recompile
 the executable, run make youtube-dl.
 
-The exe throws a Runtime error from Visual C++
+The exe throws a _Runtime error from Visual C++_
 
 To run the exe you need to install first the Microsoft Visual C++ 2008
 Redistributable Package.
@@ -704,16 +621,43 @@ creator, the creator's distributor, or is published under a free
 license), the service is probably unfit for inclusion to youtube-dl.
 
 A note on the service that they don't host the infringing content, but
-just link to those who do, is evidence that the service should not be
+just link to those who do, is evidence that the service should NOT be
 included into youtube-dl. The same goes for any DMCA note when the whole
 front page of the service is filled with videos they are not allowed to
 distribute. A "fair use" note is equally unconvincing if the service
 shows copyright-protected videos in full without authorization.
 
-Support requests for services that do purchase the rights to distribute
+Support requests for services that DO purchase the rights to distribute
 their content are perfectly fine though. If in doubt, you can simply
 include a source that mentions the legitimate purchase of content.
 
+How can I speed up work on my issue?
+
+(Also known as: Help, my important issue not being solved!) The
+youtube-dl core developer team is quite small. While we do our best to
+solve as many issues as possible, sometimes that can take quite a while.
+To speed up your issue, here's what you can do:
+
+First of all, please do report the issue at our issue tracker. That
+allows us to coordinate all efforts by users and developers, and serves
+as a unified point. Unfortunately, the youtube-dl project has grown too
+large to use personal email as an effective communication channel.
+
+Please read the bug reporting instructions below. A lot of bugs lack all
+the necessary information. If you can, offer proxy, VPN, or shell access
+to the youtube-dl developers. If you are able to, test the issue from
+multiple computers in multiple countries to exclude local censorship or
+misconfiguration issues.
+
+If nobody is interested in solving your issue, you are welcome to take
+matters into your own hands and submit a pull request (or coerce/pay
+somebody else to do so).
+
+Feel free to bump the issue from time to time by writing a small comment
+("Issue is still present in youtube-dl version ...from France, but fixed
+from Belgium"), but please not more than once a month. Please do not
+declare your issue as important or urgent.
+
 How can I detect whether a given URL is supported by youtube-dl?
 
 For one, have a look at the list of supported sites. Note that it can
@@ -722,8 +666,8 @@ http://example.com/video/1234567 to http://example.com/v/1234567 ) and
 youtube-dl reports an URL of a service in that list as unsupported. In
 that case, simply report a bug.
 
-It is not possible to detect whether a URL is supported or not. That's
-because youtube-dl contains a generic extractor which matches all URLs.
+It is _not_ possible to detect whether a URL is supported or not. That's
+because youtube-dl contains a generic extractor which matches ALL URLs.
 You may be tempted to disable, exclude, or remove the generic extractor,
 but the generic extractor not only allows users to extract videos from
 lots of websites that embed a video from another service, but may also
@@ -738,8 +682,10 @@ by examining the output (if you run youtube-dl on the console) or
 catching an UnsupportedError exception if you run it from a Python
 program.
 
+
+
 DEVELOPER INSTRUCTIONS
-======================
+
 
 Most users do not need to build youtube-dl and can download the builds
 or get them from their distribution.
@@ -819,7 +765,7 @@ list (assuming your service is called yourextractor):
 
 5.  Add an import in youtube_dl/extractor/__init__.py.
 6.  Run python test/test_download.py TestDownload.test_YourExtractor.
-    This should fail at first, but you can continually re-run it until
+    This _should fail_ at first, but you can continually re-run it until
     you're done. If you decide to add more than one test, then rename
     _TEST to _TESTS and make it into a list of dictionaries. The tests
     will be then be named TestDownload.test_YourExtractor,
@@ -841,8 +787,10 @@ list (assuming your service is called yourextractor):
 
 In any case, thank you very much for your contributions!
 
+
+
 EMBEDDING YOUTUBE-DL
-====================
+
 
 youtube-dl makes the best effort to be a good command-line program, and
 thus should be callable from any programming language. If you encounter
@@ -852,6 +800,7 @@ From a Python program, you can embed youtube-dl in a more powerful
 fashion, like this:
 
 ``` {.python}
+from __future__ import unicode_literals
 import youtube_dl
 
 ydl_opts = {}
@@ -868,6 +817,7 @@ Here's a more complete example of a program that outputs only errors
 downloads/converts the video to an mp3 file:
 
 ``` {.python}
+from __future__ import unicode_literals
 import youtube_dl
 
 
@@ -901,8 +851,10 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
     ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
 ```
 
+
+
 BUGS
-====
+
 
 Bugs and suggestions should be reported at:
 https://github.com/rg3/youtube-dl/issues . Unless you were prompted so
@@ -910,7 +862,7 @@ or there is another pertinent reason (e.g. GitHub fails to accept the
 bug report), please do not send bug reports via personal email. For
 discussions, join us in the irc channel #youtube-dl on freenode.
 
-Please include the full output of youtube-dl when run with -v.
+PLEASE INCLUDE THE FULL OUTPUT OF YOUTUBE-DL WHEN RUN WITH -v.
 
 The output (including the first lines) contain important debugging
 information. Issues without the full output are often not reproducible
@@ -941,16 +893,24 @@ misinterpretation likely. As a commiter myself, I often get frustrated
 by these issues, since the only possible way for me to move forward on
 them is to ask for clarification over and over.
 
-For bug reports, this means that your report should contain the complete
-output of youtube-dl when called with the -v flag. The error message you
-get for (most) bugs even says so, but you would not believe how many of
-our bug reports do not contain this information.
+For bug reports, this means that your report should contain the
+_complete_ output of youtube-dl when called with the -v flag. The error
+message you get for (most) bugs even says so, but you would not believe
+how many of our bug reports do not contain this information.
+
+If your server has multiple IPs or you suspect censorship,
+adding --call-home may be a good idea to get more diagnostics. If the
+error is ERROR: Unable to extract ... and you cannot reproduce it from
+multiple countries, add --dump-pages (warning: this will yield a rather
+large output, redirect it to the file log.txt by adding >log.txt 2>&1 to
+your command-line) or upload the .dump files you get when you add
+--write-pages somewhere.
 
-Site support requests must contain an example URL. An example URL is a
+SITE SUPPORT REQUESTS MUST CONTAIN AN EXAMPLE URL. An example URL is a
 URL you might want to download, like
 http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious
 video present. Except under very special circumstances, the main page of
-a video service (e.g. http://www.youtube.com/ ) is not an example URL.
+a video service (e.g. http://www.youtube.com/ ) is _not_ an example URL.
 
 Are you using the latest version?
 
@@ -974,7 +934,7 @@ Why are existing options not enough?
 Before requesting a new feature, please have a quick peek at the list of
 supported options. Many feature requests are for features that actually
 exist already! Please, absolutely do show off your work in the issue
-report and detail how the existing similar options do not solve your
+report and detail how the existing similar options do _not_ solve your
 problem.
 
 Is there enough context in your bug report?
@@ -1031,12 +991,13 @@ maintainer of the actual application providing the UI. On the other
 hand, if your UI for youtube-dl fails in some way you believe is related
 to youtube-dl, by all means, go ahead and report the bug.
 
+
+
 COPYRIGHT
-=========
+
 
 youtube-dl is released into the public domain by the copyright holders.
 
 This README file was originally written by Daniel Bolton
 (https://github.com/dbbolton) and is likewise released into the public
 domain.
-
index 6a5bd9eda333246c47064bf84cfc03da09de4caf..7a219ebe97c555be79a55cf4dc30bf2cb823ca28 100644 (file)
@@ -28,7 +28,7 @@ for test in get_testcases():
     if METHOD == 'EURISTIC':
         try:
             webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
-        except:
+        except Exception:
             print('\nFail: {0}'.format(test['name']))
             continue
 
diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py
new file mode 100644 (file)
index 0000000..2e389fc
--- /dev/null
@@ -0,0 +1,42 @@
+from __future__ import unicode_literals
+
+import codecs
+import subprocess
+
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.utils import intlist_to_bytes
+from youtube_dl.aes import aes_encrypt, key_expansion
+
+secret_msg = b'Secret message goes here'
+
+
+def hex_str(int_list):
+    return codecs.encode(intlist_to_bytes(int_list), 'hex')
+
+
+def openssl_encode(algo, key, iv):
+    cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
+    prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+    out, _ = prog.communicate(secret_msg)
+    return out
+
+iv = key = [0x20, 0x15] + 14 * [0]
+
+r = openssl_encode('aes-128-cbc', key, iv)
+print('aes_cbc_decrypt')
+print(repr(r))
+
+password = key
+new_key = aes_encrypt(password, key_expansion(password))
+r = openssl_encode('aes-128-ctr', new_key, iv)
+print('aes_decrypt_text 16')
+print(repr(r))
+
+password = key + 16 * [0]
+new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
+r = openssl_encode('aes-256-ctr', new_key, iv)
+print('aes_decrypt_text 32')
+print(repr(r))
index 062cb3d626443e7f69059aaea10c3ec3a30a7f28..43fbe8b1d75bf260dd98d29ee7cb029b4c373365 100644 (file)
@@ -2,6 +2,8 @@
  - **1tv**: Первый канал
  - **1up.com**
  - **220.ro**
+ - **22tracks:genre**
+ - **22tracks:track**
  - **24video**
  - **3sat**
  - **4tube**
  - **audiomack**
  - **audiomack:album**
  - **Azubu**
+ - **BaiduVideo**
  - **bambuser**
  - **bambuser:channel**
  - **Bandcamp**
  - **Bandcamp:album**
  - **bbc.co.uk**: BBC iPlayer
+ - **BeatportPro**
  - **Beeg**
  - **BehindKink**
  - **Bet**
@@ -60,6 +64,8 @@
  - **BR**: Bayerischer Rundfunk Mediathek
  - **Break**
  - **Brightcove**
+ - **bt:article**: Bergens Tidende Articles
+ - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
  - **BuzzFeed**
  - **BYUtv**
  - **Camdemy**
  - **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
  - **Cracked**
  - **Criterion**
+ - **CrooksAndLiars**
  - **Crunchyroll**
  - **crunchyroll:playlist**
  - **CSpan**: C-SPAN
  - **DctpTv**
  - **DeezerPlaylist**
  - **defense.gouv.fr**
+ - **DHM**: Filmarchiv - Deutsches Historisches Museum
  - **Discovery**
  - **divxstage**: DivxStage
  - **Dotsub**
+ - **DouyuTV**
  - **DRBonanza**
  - **Dropbox**
  - **DrTuber**
  - **DRTV**
  - **Dump**
+ - **Dumpert**
  - **dvtv**: http://video.aktualne.cz/
+ - **EaglePlatform**
  - **EbaumsWorld**
  - **EchoMsk**
  - **eHow**
  - **Firstpost**
  - **Flickr**
  - **Folketinget**: Folketinget (ft.dk; Danish parliament)
+ - **FootyRoom**
  - **Foxgay**
  - **FoxNews**
+ - **FoxSports**
  - **france2.fr:generation-quoi**
  - **FranceCulture**
  - **FranceInter**
  - **Gamekings**
  - **GameOne**
  - **gameone:playlist**
+ - **Gamersyde**
  - **GameSpot**
  - **GameStar**
  - **Gametrailers**
+ - **Gazeta**
  - **GDCVault**
  - **generic**: Generic downloader that works on some sites
+ - **Gfycat**
  - **GiantBomb**
  - **Giga**
  - **Glide**: Glide mobile video messages (glide.me)
  - **GodTube**
  - **GoldenMoustache**
  - **Golem**
- - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in and fastvideo.in
+ - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net
  - **Goshgay**
- - **Grooveshark**
  - **Groupon**
  - **Hark**
  - **HearThisAt**
  - **jpopsuki.tv**
  - **Jukebox**
  - **Kaltura**
+ - **KanalPlay**: Kanal 5/9/11 Play
  - **Kankan**
  - **Karaoketv**
  - **keek**
  - **Letv**
  - **LetvPlaylist**
  - **LetvTv**
+ - **Libsyn**
+ - **life:embed**
  - **lifenews**: LIFE | NEWS
  - **LiveLeak**
  - **livestream**
  - **Malemotion**
  - **MDR**
  - **media.ccc.de**
+ - **MegaVideoz**
  - **metacafe**
  - **Metacritic**
  - **Mgoon**
  - **Minhateca**
  - **MinistryGrid**
+ - **miomio.tv**
  - **mitele.es**
  - **mixcloud**
  - **MLB**
  - **MySpass**
  - **myvideo**
  - **MyVidster**
+ - **N-JOY**
  - **n-tv.de**
  - **NationalGeographic**
  - **Naver**
  - **NBA**
  - **NBC**
  - **NBCNews**
+ - **NBCSports**
+ - **NBCSportsVPlayer**
  - **ndr**: NDR.de - Mediathek
  - **NDTV**
  - **NerdCubedFeed**
  - **npo.nl:radio**
  - **npo.nl:radio:fragment**
  - **NRK**
+ - **NRKPlaylist**
  - **NRKTV**
  - **ntv.ru**
  - **Nuvid**
  - **NYTimes**
+ - **NYTimesArticle**
  - **ocw.mit.edu**
  - **Odnoklassniki**
  - **OktoberfestTV**
  - **Ooyala**
  - **OpenFilm**
  - **orf:fm4**: radio FM4
+ - **orf:iptv**: iptv.ORF.at
  - **orf:oe1**: Radio Österreich 1
  - **orf:tvthek**: ORF TVthek
  - **parliamentlive.tv**: UK parliament videos
  - **Patreon**
  - **PBS**
+ - **PhilharmonieDeParis**: Philharmonie de Paris
  - **Phoenix**
  - **Photobucket**
+ - **Pladform**
  - **PlanetaPlay**
  - **play.fm**
  - **played.to**
  - **Playvid**
+ - **Playwire**
  - **plus.google**: Google Plus
  - **pluzz.francetv.fr**
  - **podomatic**
  - **PornHub**
  - **PornHubPlaylist**
  - **Pornotube**
+ - **PornoVoisines**
  - **PornoXO**
+ - **PrimeShareTV**
  - **PromptFile**
  - **prosiebensat1**: ProSiebenSat.1 Digital
  - **Puls4**
  - **Pyvideo**
+ - **qqmusic**
+ - **qqmusic:album**
+ - **qqmusic:singer**
+ - **qqmusic:toplist**
  - **QuickVid**
  - **R7**
  - **radio.de**
  - **radiobremen**
  - **radiofrance**
+ - **RadioJavan**
  - **Rai**
  - **RBMARadio**
  - **RedTube**
  - **RTP**
  - **RTS**: RTS.ch
  - **rtve.es:alacarta**: RTVE a la carta
+ - **rtve.es:infantil**: RTVE infantil
  - **rtve.es:live**: RTVE.es live streams
  - **RUHD**
  - **rutube**: Rutube videos
  - **rutube:movie**: Rutube movies
  - **rutube:person**: Rutube person videos
  - **RUTV**: RUTV.RU
+ - **safari**: safaribooksonline.com online video
+ - **safari:course**: safaribooksonline.com online courses
  - **Sandia**: Sandia National Laboratories
  - **Sapo**: SAPO Vídeos
  - **savefrom.net**
  - **Screencast**
  - **ScreencastOMatic**
  - **ScreenwaveMedia**
+ - **SenateISVP**
  - **ServingSys**
  - **Sexu**
  - **SexyKarma**: Sexy Karma and Watch Indian Porn
  - **soundgasm**
  - **soundgasm:profile**
  - **southpark.cc.com**
+ - **southpark.cc.com:español**
  - **southpark.de**
+ - **southpark.nl**
+ - **southparkstudios.dk**
  - **Space**
+ - **SpankBang**
  - **Spankwire**
  - **Spiegel**
  - **Spiegel:Article**: Articles on spiegel.de
  - **Sport5**
  - **SportBox**
  - **SportDeutschland**
+ - **Srf**
  - **SRMediathek**: Saarländischer Rundfunk
+ - **SSA**
  - **stanfordoc**: Stanford Open ClassRoom
  - **Steam**
  - **streamcloud.eu**
  - **StreamCZ**
  - **StreetVoice**
  - **SunPorno**
+ - **SVT**
  - **SVTPlay**: SVT Play and Öppet arkiv
  - **SWRMediathek**
  - **Syfy**
  - **TeamFour**
  - **TechTalks**
  - **techtv.mit.edu**
- - **TED**
+ - **ted**
  - **tegenlicht.vpro.nl**
  - **TeleBruxelles**
  - **telecinco.es**
  - **tlc.com**
  - **tlc.de**
  - **TMZ**
+ - **TMZArticle**
  - **TNAFlix**
  - **tou.tv**
  - **Toypics**: Toypics user profile
  - **Ubu**
  - **udemy**
  - **udemy:course**
+ - **UDNEmbed**
+ - **Ultimedia**
  - **Unistra**
  - **Urort**: NRK P3 Urørt
  - **ustream**
  - **ustream:channel**
+ - **Varzesh3**
  - **Vbox7**
  - **VeeHD**
  - **Veoh**
+ - **Vessel**
  - **Vesti**: Вести.Ru
  - **Vevo**
- - **VGTV**
+ - **VGTV**: VGTV and BTTV
  - **vh1.com**
  - **Vice**
  - **Viddler**
  - **Vidzi**
  - **vier**
  - **vier:videos**
+ - **Viewster**
  - **viki**
  - **vimeo**
  - **vimeo:album**
  - **vimeo:review**: Review pages on vimeo
  - **vimeo:user**
  - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
- - **Vimple**: Vimple.ru
+ - **Vimple**: Vimple - one-click video hosting
  - **Vine**
  - **vine:user**
  - **vk.com**
  - **vk.com:user-videos**: vk.com:All of a user's videos
  - **Vodlocker**
+ - **VoiceRepublic**
  - **Vporn**
  - **VRT**
  - **vube**: Vube.com
  - **XHamster**
  - **XMinus**
  - **XNXX**
+ - **Xstream**
  - **XTube**
  - **XTubeUser**: XTube user profile
  - **Xuite**
  - **XXXYMovies**
  - **Yahoo**: Yahoo screen and movies
  - **Yam**
+ - **yandexmusic:album**: Яндекс.Музыка - Альбом
+ - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
+ - **yandexmusic:track**: Яндекс.Музыка - Трек
  - **YesJapan**
  - **Ynet**
  - **YouJizz**
  - **youtube:show**: YouTube.com (multi-season) shows
  - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
  - **Zapiks**
  - **ZDF**
  - **ZDFChannel**
index 12afdf184f0215e9947515cd3a8516ccad2e480e..e1129e58f44c9f5118b16a52dacdd869d3dd0123 100644 (file)
@@ -150,7 +150,7 @@ def expect_info_dict(self, got_dict, expected_dict):
                              'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
 
     # Check for the presence of mandatory fields
-    if got_dict.get('_type') != 'playlist':
+    if got_dict.get('_type') not in ('playlist', 'multi_video'):
         for key in ('id', 'url', 'title', 'ext'):
             self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
     # Check for mandatory fields that are automatically set by YoutubeDL
index cbff9bd16486fcda2c155e6978c354e320cfc95b..7bf59c25fdf77dd4c736f21b9ede9304e9bb214c 100644 (file)
@@ -7,8 +7,7 @@
     "forcethumbnail": false, 
     "forcetitle": false, 
     "forceurl": false, 
-    "format": null, 
-    "format_limit": null, 
+    "format": "best",
     "ignoreerrors": false, 
     "listformats": null, 
     "logtostderr": false, 
index 055e4255583d500805facc4fc59e296170e876e4..82b827536d746246d3241fd856f749945cf6d561 100644 (file)
@@ -14,6 +14,9 @@ from test.helper import FakeYDL, assertRegexpMatches
 from youtube_dl import YoutubeDL
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.postprocessor.common import PostProcessor
+from youtube_dl.utils import match_filter_func
+
+TEST_URL = 'http://localhost/sample.mp4'
 
 
 class YDL(FakeYDL):
@@ -46,8 +49,8 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL()
         ydl.params['prefer_free_formats'] = True
         formats = [
-            {'ext': 'webm', 'height': 460, 'url': 'x'},
-            {'ext': 'mp4', 'height': 460, 'url': 'y'},
+            {'ext': 'webm', 'height': 460, 'url': TEST_URL},
+            {'ext': 'mp4', 'height': 460, 'url': TEST_URL},
         ]
         info_dict = _make_result(formats)
         yie = YoutubeIE(ydl)
@@ -60,8 +63,8 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL()
         ydl.params['prefer_free_formats'] = True
         formats = [
-            {'ext': 'webm', 'height': 720, 'url': 'a'},
-            {'ext': 'mp4', 'height': 1080, 'url': 'b'},
+            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+            {'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
         ]
         info_dict['formats'] = formats
         yie = YoutubeIE(ydl)
@@ -74,9 +77,9 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL()
         ydl.params['prefer_free_formats'] = False
         formats = [
-            {'ext': 'webm', 'height': 720, 'url': '_'},
-            {'ext': 'mp4', 'height': 720, 'url': '_'},
-            {'ext': 'flv', 'height': 720, 'url': '_'},
+            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+            {'ext': 'mp4', 'height': 720, 'url': TEST_URL},
+            {'ext': 'flv', 'height': 720, 'url': TEST_URL},
         ]
         info_dict['formats'] = formats
         yie = YoutubeIE(ydl)
@@ -88,8 +91,8 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL()
         ydl.params['prefer_free_formats'] = False
         formats = [
-            {'ext': 'flv', 'height': 720, 'url': '_'},
-            {'ext': 'webm', 'height': 720, 'url': '_'},
+            {'ext': 'flv', 'height': 720, 'url': TEST_URL},
+            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
         ]
         info_dict['formats'] = formats
         yie = YoutubeIE(ydl)
@@ -98,45 +101,12 @@ class TestFormatSelection(unittest.TestCase):
         downloaded = ydl.downloaded_info_dicts[0]
         self.assertEqual(downloaded['ext'], 'flv')
 
-    def test_format_limit(self):
-        formats = [
-            {'format_id': 'meh', 'url': 'http://example.com/meh', 'preference': 1},
-            {'format_id': 'good', 'url': 'http://example.com/good', 'preference': 2},
-            {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
-            {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
-        ]
-        info_dict = _make_result(formats)
-
-        ydl = YDL()
-        ydl.process_ie_result(info_dict)
-        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded['format_id'], 'excellent')
-
-        ydl = YDL({'format_limit': 'good'})
-        assert ydl.params['format_limit'] == 'good'
-        ydl.process_ie_result(info_dict.copy())
-        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded['format_id'], 'good')
-
-        ydl = YDL({'format_limit': 'great', 'format': 'all'})
-        ydl.process_ie_result(info_dict.copy())
-        self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'meh')
-        self.assertEqual(ydl.downloaded_info_dicts[1]['format_id'], 'good')
-        self.assertEqual(ydl.downloaded_info_dicts[2]['format_id'], 'great')
-        self.assertTrue('3' in ydl.msgs[0])
-
-        ydl = YDL()
-        ydl.params['format_limit'] = 'excellent'
-        ydl.process_ie_result(info_dict.copy())
-        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded['format_id'], 'excellent')
-
     def test_format_selection(self):
         formats = [
-            {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
-            {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
-            {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
-            {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
+            {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+            {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
+            {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
+            {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
         ]
         info_dict = _make_result(formats)
 
@@ -167,10 +137,10 @@ class TestFormatSelection(unittest.TestCase):
 
     def test_format_selection_audio(self):
         formats = [
-            {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
-            {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
-            {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
-            {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
+            {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
+            {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
+            {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
+            {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
         ]
         info_dict = _make_result(formats)
 
@@ -185,8 +155,8 @@ class TestFormatSelection(unittest.TestCase):
         self.assertEqual(downloaded['format_id'], 'audio-low')
 
         formats = [
-            {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
-            {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
+            {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+            {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
         ]
         info_dict = _make_result(formats)
 
@@ -228,9 +198,9 @@ class TestFormatSelection(unittest.TestCase):
 
     def test_format_selection_video(self):
         formats = [
-            {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
-            {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
-            {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
+            {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
+            {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
+            {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
         ]
         info_dict = _make_result(formats)
 
@@ -267,7 +237,7 @@ class TestFormatSelection(unittest.TestCase):
             f2['url'] = 'url:' + f2id
 
             info_dict = _make_result([f1, f2], extractor='youtube')
-            ydl = YDL()
+            ydl = YDL({'format': 'best/bestvideo'})
             yie = YoutubeIE(ydl)
             yie._sort_formats(info_dict['formats'])
             ydl.process_ie_result(info_dict)
@@ -275,7 +245,7 @@ class TestFormatSelection(unittest.TestCase):
             self.assertEqual(downloaded['format_id'], f1id)
 
             info_dict = _make_result([f2, f1], extractor='youtube')
-            ydl = YDL()
+            ydl = YDL({'format': 'best/bestvideo'})
             yie = YoutubeIE(ydl)
             yie._sort_formats(info_dict['formats'])
             ydl.process_ie_result(info_dict)
@@ -337,6 +307,8 @@ class TestFormatSelection(unittest.TestCase):
         downloaded = ydl.downloaded_info_dicts[0]
         self.assertEqual(downloaded['format_id'], 'G')
 
+
+class TestYoutubeDL(unittest.TestCase):
     def test_subtitles(self):
         def s_formats(lang, autocaption=False):
             return [{
@@ -438,27 +410,103 @@ class TestFormatSelection(unittest.TestCase):
             def run(self, info):
                 with open(audiofile, 'wt') as f:
                     f.write('EXAMPLE')
-                info['filepath']
-                return False, info
+                return [info['filepath']], info
 
-        def run_pp(params):
+        def run_pp(params, PP):
             with open(filename, 'wt') as f:
                 f.write('EXAMPLE')
             ydl = YoutubeDL(params)
-            ydl.add_post_processor(SimplePP())
+            ydl.add_post_processor(PP())
             ydl.post_process(filename, {'filepath': filename})
 
-        run_pp({'keepvideo': True})
+        run_pp({'keepvideo': True}, SimplePP)
         self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
         self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
         os.unlink(filename)
         os.unlink(audiofile)
 
-        run_pp({'keepvideo': False})
+        run_pp({'keepvideo': False}, SimplePP)
         self.assertFalse(os.path.exists(filename), '%s exists' % filename)
         self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
         os.unlink(audiofile)
 
+        class ModifierPP(PostProcessor):
+            def run(self, info):
+                with open(info['filepath'], 'wt') as f:
+                    f.write('MODIFIED')
+                return [], info
+
+        run_pp({'keepvideo': False}, ModifierPP)
+        self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
+        os.unlink(filename)
+
+    def test_match_filter(self):
+        class FilterYDL(YDL):
+            def __init__(self, *args, **kwargs):
+                super(FilterYDL, self).__init__(*args, **kwargs)
+                self.params['simulate'] = True
+
+            def process_info(self, info_dict):
+                super(YDL, self).process_info(info_dict)
+
+            def _match_entry(self, info_dict, incomplete):
+                res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
+                if res is None:
+                    self.downloaded_info_dicts.append(info_dict)
+                return res
+
+        first = {
+            'id': '1',
+            'url': TEST_URL,
+            'title': 'one',
+            'extractor': 'TEST',
+            'duration': 30,
+            'filesize': 10 * 1024,
+        }
+        second = {
+            'id': '2',
+            'url': TEST_URL,
+            'title': 'two',
+            'extractor': 'TEST',
+            'duration': 10,
+            'description': 'foo',
+            'filesize': 5 * 1024,
+        }
+        videos = [first, second]
+
+        def get_videos(filter_=None):
+            ydl = FilterYDL({'match_filter': filter_})
+            for v in videos:
+                ydl.process_ie_result(v, download=True)
+            return [v['id'] for v in ydl.downloaded_info_dicts]
+
+        res = get_videos()
+        self.assertEqual(res, ['1', '2'])
+
+        def f(v):
+            if v['id'] == '1':
+                return None
+            else:
+                return 'Video id is not 1'
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func('duration < 30')
+        res = get_videos(f)
+        self.assertEqual(res, ['2'])
+
+        f = match_filter_func('description = foo')
+        res = get_videos(f)
+        self.assertEqual(res, ['2'])
+
+        f = match_filter_func('description =? foo')
+        res = get_videos(f)
+        self.assertEqual(res, ['1', '2'])
+
+        f = match_filter_func('filesize > 5KiB')
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_aes.py b/test/test_aes.py
new file mode 100644 (file)
index 0000000..4dc7de7
--- /dev/null
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
+from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
+import base64
+
+# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
+
+
+class TestAES(unittest.TestCase):
+    def setUp(self):
+        self.key = self.iv = [0x20, 0x15] + 14 * [0]
+        self.secret_msg = b'Secret message goes here'
+
+    def test_encrypt(self):
+        msg = b'message'
+        key = list(range(16))
+        encrypted = aes_encrypt(bytes_to_intlist(msg), key)
+        decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
+        self.assertEqual(decrypted, msg)
+
+    def test_cbc_decrypt(self):
+        data = bytes_to_intlist(
+            b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
+        )
+        decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
+        self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+
+    def test_decrypt_text(self):
+        password = intlist_to_bytes(self.key).decode('utf-8')
+        encrypted = base64.b64encode(
+            intlist_to_bytes(self.iv[:8]) +
+            b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
+        )
+        decrypted = (aes_decrypt_text(encrypted, password, 16))
+        self.assertEqual(decrypted, self.secret_msg)
+
+        password = intlist_to_bytes(self.key).decode('utf-8')
+        encrypted = base64.b64encode(
+            intlist_to_bytes(self.iv[:8]) +
+            b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
+        )
+        decrypted = (aes_decrypt_text(encrypted, password, 32))
+        self.assertEqual(decrypted, self.secret_msg)
+
+if __name__ == '__main__':
+    unittest.main()
index e66264b4b16147cae6e41d329bf07dcc31ff83e4..a9db42b300864180c10dca730f772f7f5a26aad8 100644 (file)
@@ -59,7 +59,7 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
 
     def test_youtube_feeds(self):
-        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
+        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
         self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
         self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
         self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
@@ -104,11 +104,11 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch(':tds', ['ComedyCentralShows'])
 
     def test_vimeo_matching(self):
-        self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
-        self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
-        self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
-        self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
-        self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
+        self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
+        self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
+        self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
+        self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
+        self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
         self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
 
     # https://github.com/rg3/youtube-dl/issues/1930
index 6a149ae4f707e1dc048890b72a4903ccb8a5f785..1110357a7e8097eb38479d2a15837516af32a726 100644 (file)
@@ -153,7 +153,7 @@ def generator(test_case):
                     break
 
             if is_playlist:
-                self.assertEqual(res_dict['_type'], 'playlist')
+                self.assertTrue(res_dict['_type'] in ['playlist', 'multi_video'])
                 self.assertTrue('entries' in res_dict)
                 expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
 
index 60df187de4921dfa7df808302f55f1ccd66bcb13..620db080e9bd836c7239a93e86e0944b95f793e0 100644 (file)
@@ -1,4 +1,6 @@
 #!/usr/bin/env python
+# coding: utf-8
+
 from __future__ import unicode_literals
 
 import unittest
@@ -6,6 +8,9 @@ import unittest
 import sys
 import os
 import subprocess
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.utils import encodeArgument
 
 rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
@@ -27,5 +32,12 @@ class TestExecution(unittest.TestCase):
     def test_main_exec(self):
         subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
+    def test_cmdline_umlauts(self):
+        p = subprocess.Popen(
+            [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
+            cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
+        _, stderr = p.communicate()
+        self.assertFalse(stderr)
+
 if __name__ == '__main__':
     unittest.main()
index bd4d46fef95e60eb7d91752b4c8ba0462b4c469e..f2e305b6fed3ce2f0574a7c20e89ffb977934f28 100644 (file)
@@ -8,7 +8,7 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_http_server
+from youtube_dl.compat import compat_http_server, compat_urllib_request
 import ssl
 import threading
 
@@ -68,5 +68,52 @@ class TestHTTP(unittest.TestCase):
         r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
         self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
 
+
+def _build_proxy_handler(name):
+    class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+        proxy_name = name
+
+        def log_message(self, format, *args):
+            pass
+
+        def do_GET(self):
+            self.send_response(200)
+            self.send_header('Content-Type', 'text/plain; charset=utf-8')
+            self.end_headers()
+            self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
+    return HTTPTestRequestHandler
+
+
+class TestProxy(unittest.TestCase):
+    def setUp(self):
+        self.proxy = compat_http_server.HTTPServer(
+            ('localhost', 0), _build_proxy_handler('normal'))
+        self.port = self.proxy.socket.getsockname()[1]
+        self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
+        self.proxy_thread.daemon = True
+        self.proxy_thread.start()
+
+        self.cn_proxy = compat_http_server.HTTPServer(
+            ('localhost', 0), _build_proxy_handler('cn'))
+        self.cn_port = self.cn_proxy.socket.getsockname()[1]
+        self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
+        self.cn_proxy_thread.daemon = True
+        self.cn_proxy_thread.start()
+
+    def test_proxy(self):
+        cn_proxy = 'localhost:{0}'.format(self.cn_port)
+        ydl = YoutubeDL({
+            'proxy': 'localhost:{0}'.format(self.port),
+            'cn_verification_proxy': cn_proxy,
+        })
+        url = 'http://foo.com/bar'
+        response = ydl.urlopen(url).read().decode('utf-8')
+        self.assertEqual(response, 'normal: {0}'.format(url))
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Ytdl-request-proxy', cn_proxy)
+        response = ydl.urlopen(req).read().decode('utf-8')
+        self.assertEqual(response, 'cn: {0}'.format(url))
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_netrc.py b/test/test_netrc.py
new file mode 100644 (file)
index 0000000..7cf3a6a
--- /dev/null
@@ -0,0 +1,26 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from youtube_dl.extractor import (
+    gen_extractors,
+)
+
+
+class TestNetRc(unittest.TestCase):
+    def test_netrc_present(self):
+        for ie in gen_extractors():
+            if not hasattr(ie, '_login'):
+                continue
+            self.assertTrue(
+                hasattr(ie, '_NETRC_MACHINE'),
+                'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
new file mode 100644 (file)
index 0000000..addb69d
--- /dev/null
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.postprocessor import MetadataFromTitlePP
+
+
+class TestMetadataFromTitle(unittest.TestCase):
+    def test_format_to_regex(self):
+        pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
+        self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
index 3f2d8a2ba74e6b4f04d4159a64deb1f69f9d105b..891ee620b1f2627dd6991e0cccfbc58b59fb6a95 100644 (file)
@@ -26,6 +26,7 @@ from youtube_dl.extractor import (
     VikiIE,
     ThePlatformIE,
     RTVEALaCartaIE,
+    FunnyOrDieIE,
 )
 
 
@@ -320,5 +321,17 @@ class TestRtveSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
 
 
+class TestFunnyOrDieSubtitles(BaseTestSubtitles):
+    url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
+    IE = FunnyOrDieIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
+
+
 if __name__ == '__main__':
     unittest.main()
index 7f816698e7b2e20bc982b0eeb9638885edf19b48..6c1b7ec915c60321e62c7c44728f5486921e772f 100644 (file)
@@ -17,13 +17,22 @@ IGNORED_FILES = [
     'buildserver.py',
 ]
 
+IGNORED_DIRS = [
+    '.git',
+    '.tox',
+]
 
 from test.helper import assertRegexpMatches
 
 
 class TestUnicodeLiterals(unittest.TestCase):
     def test_all_files(self):
-        for dirpath, _, filenames in os.walk(rootDir):
+        for dirpath, dirnames, filenames in os.walk(rootDir):
+            for ignore_dir in IGNORED_DIRS:
+                if ignore_dir in dirnames:
+                    # If we remove the directory from dirnames os.walk won't
+                    # recurse into it
+                    dirnames.remove(ignore_dir)
             for basename in filenames:
                 if not basename.endswith('.py'):
                     continue
index 3fba8ae11c3b516d86d82051bb179ac5e15b0a91..b401070371bfcea183abc2b08419ddd0c75dd3fe 100644 (file)
@@ -24,6 +24,7 @@ from youtube_dl.utils import (
     encodeFilename,
     escape_rfc3986,
     escape_url,
+    ExtractorError,
     find_xpath_attr,
     fix_xml_ampersands,
     InAdvancePagedList,
@@ -38,6 +39,9 @@ from youtube_dl.utils import (
     parse_iso8601,
     read_batch_urls,
     sanitize_filename,
+    sanitize_path,
+    prepend_extension,
+    replace_extension,
     shell_quote,
     smuggle_url,
     str_to_int,
@@ -48,12 +52,16 @@ from youtube_dl.utils import (
     unified_strdate,
     unsmuggle_url,
     uppercase_escape,
+    lowercase_escape,
     url_basename,
     urlencode_postdata,
     version_tuple,
     xpath_with_ns,
+    xpath_text,
     render_table,
     match_str,
+    parse_dfxp_time_expr,
+    dfxp2srt,
 )
 
 
@@ -85,8 +93,11 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(
             sanitize_filename('New World record at 0:12:34'),
             'New World record at 0_12_34')
+
         self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
         self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
+        self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
+        self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
 
         forbidden = '"\0\\/'
         for fc in forbidden:
@@ -128,6 +139,58 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
         self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
 
+    def test_sanitize_path(self):
+        if sys.platform != 'win32':
+            return
+
+        self.assertEqual(sanitize_path('abc'), 'abc')
+        self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
+        self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
+        self.assertEqual(sanitize_path('abc|def'), 'abc#def')
+        self.assertEqual(sanitize_path('<>:"|?*'), '#######')
+        self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
+        self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
+
+        self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
+        self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
+
+        self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+        self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
+        self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
+        self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+
+        self.assertEqual(
+            sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
+            'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
+
+        self.assertEqual(
+            sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
+            'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
+        self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
+        self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
+        self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
+
+        self.assertEqual(sanitize_path('../abc'), '..\\abc')
+        self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
+        self.assertEqual(sanitize_path('./abc'), 'abc')
+        self.assertEqual(sanitize_path('./../abc'), '..\\abc')
+
+    def test_prepend_extension(self):
+        self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
+        self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
+        self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+        self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp')
+        self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
+        self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
+
+    def test_replace_extension(self):
+        self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
+        self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
+        self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+        self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp')
+        self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
+        self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
+
     def test_ordered_set(self):
         self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
         self.assertEqual(orderedSet([]), [])
@@ -137,6 +200,8 @@ class TestUtil(unittest.TestCase):
 
     def test_unescape_html(self):
         self.assertEqual(unescapeHTML('%20;'), '%20;')
+        self.assertEqual(unescapeHTML('&#x2F;'), '/')
+        self.assertEqual(unescapeHTML('&#47;'), '/')
         self.assertEqual(
             unescapeHTML('&eacute;'), 'é')
 
@@ -162,6 +227,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(
             unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
             '20150202')
+        self.assertEqual(unified_strdate('25-09-2014'), '20140925')
 
     def test_find_xpath_attr(self):
         testxml = '''<root>
@@ -189,6 +255,17 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(find('media:song/media:author').text, 'The Author')
         self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
 
+    def test_xpath_text(self):
+        testxml = '''<root>
+            <div>
+                <p>Foo</p>
+            </div>
+        </root>'''
+        doc = xml.etree.ElementTree.fromstring(testxml)
+        self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
+        self.assertTrue(xpath_text(doc, 'div/bar') is None)
+        self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
+
     def test_smuggle_url(self):
         data = {"ö": "ö", "abc": [3]}
         url = 'https://foo.bar/baz?x=y#a'
@@ -321,6 +398,10 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(uppercase_escape('aä'), 'aä')
         self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
 
+    def test_lowercase_escape(self):
+        self.assertEqual(lowercase_escape('aä'), 'aä')
+        self.assertEqual(lowercase_escape('\\u0026'), '&')
+
     def test_limit_length(self):
         self.assertEqual(limit_length(None, 12), None)
         self.assertEqual(limit_length('foo', 12), 'foo')
@@ -394,6 +475,12 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(d['x'], 1)
         self.assertEqual(d['y'], 'a')
 
+        on = js_to_json('["abc", "def",]')
+        self.assertEqual(json.loads(on), ['abc', 'def'])
+
+        on = js_to_json('{"abc": "def",}')
+        self.assertEqual(json.loads(on), {'abc': 'def'})
+
     def test_clean_html(self):
         self.assertEqual(clean_html('a:\nb'), 'a: b')
         self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
@@ -498,6 +585,42 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
             'like_count > 100 & dislike_count <? 50 & description',
             {'like_count': 190, 'dislike_count': 10}))
 
+    def test_parse_dfxp_time_expr(self):
+        self.assertEqual(parse_dfxp_time_expr(None), 0.0)
+        self.assertEqual(parse_dfxp_time_expr(''), 0.0)
+        self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
+        self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
+        self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
+        self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
+
+    def test_dfxp2srt(self):
+        dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
+            <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+            <body>
+                <div xml:lang="en">
+                    <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
+                    <p begin="1" end="2">第二行<br/>♪♪</p>
+                    <p begin="2" dur="1"><span>Third<br/>Line</span></p>
+                </div>
+            </body>
+            </tt>'''
+        srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+The following line contains Chinese characters and special symbols
+
+2
+00:00:01,000 --> 00:00:02,000
+第二行
+♪♪
+
+3
+00:00:02,000 --> 00:00:03,000
+Third
+Line
+
+'''
+        self.assertEqual(dfxp2srt(dfxp_data), srt_data)
+
 
 if __name__ == '__main__':
     unittest.main()
index 7cdc1016c6d585be3eb423c866e56035ec0f8881..eb88c49a74e25c082ad86597daa5b3bb1935ff4a 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 65d4fc3086d3254abba340580d6dd333c60e663e..e09ef883d6f2bb7e7e89f28d426b5e5ea6efa96c 100644 (file)
@@ -18,215 +18,115 @@ redistribute it or use it however you like.
 .IP
 .nf
 \f[C]
-\-h,\ \-\-help\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ this\ help\ text\ and\ exit
-\-\-version\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ program\ version\ and\ exit
-\-U,\ \-\-update\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ update\ this\ program\ to\ latest\ version.\ Make
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ sure\ that\ you\ have\ sufficient\ permissions
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (run\ with\ sudo\ if\ needed)
-\-i,\ \-\-ignore\-errors\ \ \ \ \ \ \ \ \ \ \ \ \ \ continue\ on\ download\ errors,\ for\ example\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ skip\ unavailable\ videos\ in\ a\ playlist
-\-\-abort\-on\-error\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Abort\ downloading\ of\ further\ videos\ (in\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist\ or\ the\ command\ line)\ if\ an\ error
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ occurs
-\-\-dump\-user\-agent\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification
-\-\-list\-extractors\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ they\ would\ handle
-\-\-extractor\-descriptions\ \ \ \ \ \ \ \ \ Output\ descriptions\ of\ all\ supported
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ extractors
-\-\-default\-search\ PREFIX\ \ \ \ \ \ \ \ \ \ Use\ this\ prefix\ for\ unqualified\ URLs.\ For
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example\ "gvsearch2:"\ downloads\ two\ videos
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ google\ videos\ for\ \ youtube\-dl\ "large
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ apple".\ Use\ the\ value\ "auto"\ to\ let
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ youtube\-dl\ guess\ ("auto_warning"\ to\ emit\ a
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ warning\ when\ guessing).\ "error"\ just\ throws
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ an\ error.\ The\ default\ value\ "fixup_error"
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ repairs\ broken\ URLs,\ but\ emits\ an\ error\ if
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ this\ is\ not\ possible\ instead\ of\ searching.
-\-\-ignore\-config\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ read\ configuration\ files.\ When\ given
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ in\ the\ global\ configuration\ file\ /etc
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /youtube\-dl.conf:\ Do\ not\ read\ the\ user
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ configuration\ in\ ~/.config/youtube\-
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ dl/config\ (%APPDATA%/youtube\-dl/config.txt
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ on\ Windows)
-\-\-flat\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ extract\ the\ videos\ of\ a\ playlist,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only\ list\ them.
-\-\-no\-color\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ emit\ color\ codes\ in\ output.
+\-h,\ \-\-help\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Print\ this\ help\ text\ and\ exit
+\-\-version\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Print\ program\ version\ and\ exit
+\-U,\ \-\-update\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Update\ this\ program\ to\ latest\ version.\ Make\ sure\ that\ you\ have\ sufficient\ permissions\ (run\ with\ sudo\ if\ needed)
+\-i,\ \-\-ignore\-errors\ \ \ \ \ \ \ \ \ \ \ \ \ \ Continue\ on\ download\ errors,\ for\ example\ to\ skip\ unavailable\ videos\ in\ a\ playlist
+\-\-abort\-on\-error\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Abort\ downloading\ of\ further\ videos\ (in\ the\ playlist\ or\ the\ command\ line)\ if\ an\ error\ occurs
+\-\-dump\-user\-agent\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Display\ the\ current\ browser\ identification
+\-\-list\-extractors\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they\ would\ handle
+\-\-extractor\-descriptions\ \ \ \ \ \ \ \ \ Output\ descriptions\ of\ all\ supported\ extractors
+\-\-default\-search\ PREFIX\ \ \ \ \ \ \ \ \ \ Use\ this\ prefix\ for\ unqualified\ URLs.\ For\ example\ "gvsearch2:"\ downloads\ two\ videos\ from\ google\ videos\ for\ youtube\-dl\ "large\ apple".
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ value\ "auto"\ to\ let\ youtube\-dl\ guess\ ("auto_warning"\ to\ emit\ a\ warning\ when\ guessing).\ "error"\ just\ throws\ an\ error.\ The
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default\ value\ "fixup_error"\ repairs\ broken\ URLs,\ but\ emits\ an\ error\ if\ this\ is\ not\ possible\ instead\ of\ searching.
+\-\-ignore\-config\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ read\ configuration\ files.\ When\ given\ in\ the\ global\ configuration\ file\ /etc/youtube\-dl.conf:\ Do\ not\ read\ the\ user\ configuration
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ in\ ~/.config/youtube\-dl/config\ (%APPDATA%/youtube\-dl/config.txt\ on\ Windows)
+\-\-flat\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ extract\ the\ videos\ of\ a\ playlist,\ only\ list\ them.
+\-\-no\-color\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ emit\ color\ codes\ in\ output
 \f[]
 .fi
 .SS Network Options:
 .IP
 .nf
 \f[C]
-\-\-proxy\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ specified\ HTTP/HTTPS\ proxy.\ Pass\ in
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ an\ empty\ string\ (\-\-proxy\ "")\ for\ direct
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ connection
+\-\-proxy\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ specified\ HTTP/HTTPS\ proxy.\ Pass\ in\ an\ empty\ string\ (\-\-proxy\ "")\ for\ direct\ connection
 \-\-socket\-timeout\ SECONDS\ \ \ \ \ \ \ \ \ Time\ to\ wait\ before\ giving\ up,\ in\ seconds
-\-\-source\-address\ IP\ \ \ \ \ \ \ \ \ \ \ \ \ \ Client\-side\ IP\ address\ to\ bind\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)
-\-4,\ \-\-force\-ipv4\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Make\ all\ connections\ via\ IPv4
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)
-\-6,\ \-\-force\-ipv6\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Make\ all\ connections\ via\ IPv6
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)
+\-\-source\-address\ IP\ \ \ \ \ \ \ \ \ \ \ \ \ \ Client\-side\ IP\ address\ to\ bind\ to\ (experimental)
+\-4,\ \-\-force\-ipv4\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Make\ all\ connections\ via\ IPv4\ (experimental)
+\-6,\ \-\-force\-ipv6\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Make\ all\ connections\ via\ IPv6\ (experimental)
+\-\-cn\-verification\-proxy\ URL\ \ \ \ \ \ Use\ this\ proxy\ to\ verify\ the\ IP\ address\ for\ some\ Chinese\ sites.\ The\ default\ proxy\ specified\ by\ \-\-proxy\ (or\ none,\ if\ the\ options\ is
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ not\ present)\ is\ used\ for\ the\ actual\ downloading.\ (experimental)
 \f[]
 .fi
 .SS Video Selection:
 .IP
 .nf
 \f[C]
-\-\-playlist\-start\ NUMBER\ \ \ \ \ \ \ \ \ \ playlist\ video\ to\ start\ at\ (default\ is\ 1)
-\-\-playlist\-end\ NUMBER\ \ \ \ \ \ \ \ \ \ \ \ playlist\ video\ to\ end\ at\ (default\ is\ last)
-\-\-playlist\-items\ ITEM_SPEC\ \ \ \ \ \ \ playlist\ video\ items\ to\ download.\ Specify
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ indices\ of\ the\ videos\ in\ the\ playlist
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ seperated\ by\ commas\ like:\ "\-\-playlist\-items
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 1,2,5,8"\ if\ you\ want\ to\ download\ videos
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ indexed\ 1,\ 2,\ 5,\ 8\ in\ the\ playlist.\ You\ can
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ specify\ range:\ "\-\-playlist\-items
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 1\-3,7,10\-13",\ it\ will\ download\ the\ videos
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ at\ index\ 1,\ 2,\ 3,\ 7,\ 10,\ 11,\ 12\ and\ 13.
-\-\-match\-title\ REGEX\ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ matching\ titles\ (regex\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ caseless\ sub\-string)
-\-\-reject\-title\ REGEX\ \ \ \ \ \ \ \ \ \ \ \ \ skip\ download\ for\ matching\ titles\ (regex\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ caseless\ sub\-string)
+\-\-playlist\-start\ NUMBER\ \ \ \ \ \ \ \ \ \ Playlist\ video\ to\ start\ at\ (default\ is\ 1)
+\-\-playlist\-end\ NUMBER\ \ \ \ \ \ \ \ \ \ \ \ Playlist\ video\ to\ end\ at\ (default\ is\ last)
+\-\-playlist\-items\ ITEM_SPEC\ \ \ \ \ \ \ Playlist\ video\ items\ to\ download.\ Specify\ indices\ of\ the\ videos\ in\ the\ playlist\ seperated\ by\ commas\ like:\ "\-\-playlist\-items\ 1,2,5,8"
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ if\ you\ want\ to\ download\ videos\ indexed\ 1,\ 2,\ 5,\ 8\ in\ the\ playlist.\ You\ can\ specify\ range:\ "\-\-playlist\-items\ 1\-3,7,10\-13",\ it\ will
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ the\ videos\ at\ index\ 1,\ 2,\ 3,\ 7,\ 10,\ 11,\ 12\ and\ 13.
+\-\-match\-title\ REGEX\ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ matching\ titles\ (regex\ or\ caseless\ sub\-string)
+\-\-reject\-title\ REGEX\ \ \ \ \ \ \ \ \ \ \ \ \ Skip\ download\ for\ matching\ titles\ (regex\ or\ caseless\ sub\-string)
 \-\-max\-downloads\ NUMBER\ \ \ \ \ \ \ \ \ \ \ Abort\ after\ downloading\ NUMBER\ files
-\-\-min\-filesize\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ smaller\ than
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ SIZE\ (e.g.\ 50k\ or\ 44.6m)
-\-\-max\-filesize\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ larger\ than\ SIZE
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (e.g.\ 50k\ or\ 44.6m)
-\-\-date\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ in\ this\ date
-\-\-datebefore\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ on\ or\ before
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ this\ date\ (i.e.\ inclusive)
-\-\-dateafter\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ on\ or\ after
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ this\ date\ (i.e.\ inclusive)
-\-\-min\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ less\ than
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
-\-\-max\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ more\ than
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
-\-\-match\-filter\ FILTER\ \ \ \ \ \ \ \ \ \ \ \ (Experimental)\ Generic\ video\ filter.
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Specify\ any\ key\ (see\ help\ for\ \-o\ for\ a\ list
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ of\ available\ keys)\ to\ match\ if\ the\ key\ is
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ present,\ !key\ to\ check\ if\ the\ key\ is\ not
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ present,key\ >\ NUMBER\ (like\ "comment_count\ >
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 12",\ also\ works\ with\ >=,\ <,\ <=,\ !=,\ =)\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ compare\ against\ a\ number,\ and\ &\ to\ require
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ multiple\ matches.\ Values\ which\ are\ not
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known\ are\ excluded\ unless\ you\ put\ a
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ question\ mark\ (?)\ after\ the\ operator.For
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example,\ to\ only\ match\ videos\ that\ have
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ been\ liked\ more\ than\ 100\ times\ and\ disliked
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ less\ than\ 50\ times\ (or\ the\ dislike
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ functionality\ is\ not\ available\ at\ the\ given
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ service),\ but\ who\ also\ have\ a\ description,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ \ \-\-match\-filter\ "like_count\ >\ 100\ &
+\-\-min\-filesize\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ smaller\ than\ SIZE\ (e.g.\ 50k\ or\ 44.6m)
+\-\-max\-filesize\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ larger\ than\ SIZE\ (e.g.\ 50k\ or\ 44.6m)
+\-\-date\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ uploaded\ in\ this\ date
+\-\-datebefore\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ uploaded\ on\ or\ before\ this\ date\ (i.e.\ inclusive)
+\-\-dateafter\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ uploaded\ on\ or\ after\ this\ date\ (i.e.\ inclusive)
+\-\-min\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ less\ than\ COUNT\ views
+\-\-max\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ more\ than\ COUNT\ views
+\-\-match\-filter\ FILTER\ \ \ \ \ \ \ \ \ \ \ \ Generic\ video\ filter\ (experimental).\ Specify\ any\ key\ (see\ help\ for\ \-o\ for\ a\ list\ of\ available\ keys)\ to\ match\ if\ the\ key\ is\ present,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ !key\ to\ check\ if\ the\ key\ is\ not\ present,key\ >\ NUMBER\ (like\ "comment_count\ >\ 12",\ also\ works\ with\ >=,\ <,\ <=,\ !=,\ =)\ to\ compare\ against
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ a\ number,\ and\ &\ to\ require\ multiple\ matches.\ Values\ which\ are\ not\ known\ are\ excluded\ unless\ you\ put\ a\ question\ mark\ (?)\ after\ the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ operator.For\ example,\ to\ only\ match\ videos\ that\ have\ been\ liked\ more\ than\ 100\ times\ and\ disliked\ less\ than\ 50\ times\ (or\ the\ dislike
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ functionality\ is\ not\ available\ at\ the\ given\ service),\ but\ who\ also\ have\ a\ description,\ use\ \ \-\-match\-filter\ "like_count\ >\ 100\ &
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ dislike_count\ <?\ 50\ &\ description"\ .
-\-\-no\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ If\ the\ URL\ refers\ to\ a\ video\ and\ a
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist,\ download\ only\ the\ video.
-\-\-yes\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ If\ the\ URL\ refers\ to\ a\ video\ and\ a
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist,\ download\ the\ playlist.
-\-\-age\-limit\ YEARS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ suitable\ for\ the\ given
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ age
-\-\-download\-archive\ FILE\ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ not\ listed\ in\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ archive\ file.\ Record\ the\ IDs\ of\ all
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloaded\ videos\ in\ it.
-\-\-include\-ads\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ advertisements\ as\ well
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)
+\-\-no\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ the\ video,\ if\ the\ URL\ refers\ to\ a\ video\ and\ a\ playlist.
+\-\-yes\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ the\ playlist,\ if\ the\ URL\ refers\ to\ a\ video\ and\ a\ playlist.
+\-\-age\-limit\ YEARS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ suitable\ for\ the\ given\ age
+\-\-download\-archive\ FILE\ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ not\ listed\ in\ the\ archive\ file.\ Record\ the\ IDs\ of\ all\ downloaded\ videos\ in\ it.
+\-\-include\-ads\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ advertisements\ as\ well\ (experimental)
 \f[]
 .fi
 .SS Download Options:
 .IP
 .nf
 \f[C]
-\-r,\ \-\-rate\-limit\ LIMIT\ \ \ \ \ \ \ \ \ \ \ maximum\ download\ rate\ in\ bytes\ per\ second
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (e.g.\ 50K\ or\ 4.2M)
-\-R,\ \-\-retries\ RETRIES\ \ \ \ \ \ \ \ \ \ \ \ number\ of\ retries\ (default\ is\ 10),\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "infinite".
-\-\-buffer\-size\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16K)
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (default\ is\ 1024)
-\-\-no\-resize\-buffer\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ size.\ By\ default,\ the\ buffer\ size\ is
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ automatically\ resized\ from\ an\ initial\ value
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ of\ SIZE.
+\-r,\ \-\-rate\-limit\ LIMIT\ \ \ \ \ \ \ \ \ \ \ Maximum\ download\ rate\ in\ bytes\ per\ second\ (e.g.\ 50K\ or\ 4.2M)
+\-R,\ \-\-retries\ RETRIES\ \ \ \ \ \ \ \ \ \ \ \ Number\ of\ retries\ (default\ is\ 10),\ or\ "infinite".
+\-\-buffer\-size\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16K)\ (default\ is\ 1024)
+\-\-no\-resize\-buffer\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ automatically\ adjust\ the\ buffer\ size.\ By\ default,\ the\ buffer\ size\ is\ automatically\ resized\ from\ an\ initial\ value\ of\ SIZE.
 \-\-playlist\-reverse\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ playlist\ videos\ in\ reverse\ order
-\-\-xattr\-set\-filesize\ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)\ set\ file\ xattribute
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ytdl.filesize\ with\ expected\ filesize
-\-\-hls\-prefer\-native\ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)\ Use\ the\ native\ HLS
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloader\ instead\ of\ ffmpeg.
-\-\-external\-downloader\ COMMAND\ \ \ \ (experimental)\ Use\ the\ specified\ external
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloader.\ Currently\ supports
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ aria2c,curl,wget
+\-\-xattr\-set\-filesize\ \ \ \ \ \ \ \ \ \ \ \ \ Set\ file\ xattribute\ ytdl.filesize\ with\ expected\ filesize\ (experimental)
+\-\-hls\-prefer\-native\ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ native\ HLS\ downloader\ instead\ of\ ffmpeg\ (experimental)
+\-\-external\-downloader\ COMMAND\ \ \ \ Use\ the\ specified\ external\ downloader.\ Currently\ supports\ aria2c,curl,wget
+\-\-external\-downloader\-args\ ARGS\ \ Give\ these\ arguments\ to\ the\ external\ downloader
 \f[]
 .fi
 .SS Filesystem Options:
 .IP
 .nf
 \f[C]
-\-a,\ \-\-batch\-file\ FILE\ \ \ \ \ \ \ \ \ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]\-\[aq]\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ stdin)
-\-\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ only\ video\ ID\ in\ file\ name
-\-o,\ \-\-output\ TEMPLATE\ \ \ \ \ \ \ \ \ \ \ \ output\ filename\ template.\ Use\ %(title)s\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ get\ the\ title,\ %(uploader)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ uploader\ name,\ %(uploader_id)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ uploader\ nickname\ if\ different,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(autonumber)s\ to\ get\ an\ automatically
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ incremented\ number,\ %(ext)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filename\ extension,\ %(format)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ format\ description\ (like\ "22\ \-\ 1280x720"\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "HD"),\ %(format_id)s\ for\ the\ unique\ id\ of
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ format\ (like\ Youtube\[aq]s\ itags:\ "137"),
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(upload_date)s\ for\ the\ upload\ date
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (YYYYMMDD),\ %(extractor)s\ for\ the\ provider
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (youtube,\ metacafe,\ etc),\ %(id)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ id,\ %(playlist_title)s,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(playlist_id)s,\ or\ %(playlist)s\ (=title\ if
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ present,\ ID\ otherwise)\ for\ the\ playlist\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ is\ in,\ %(playlist_index)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ position\ in\ the\ playlist.\ %(height)s\ and
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(width)s\ for\ the\ width\ and\ height\ of\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ format.\ %(resolution)s\ for\ a\ textual
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ description\ of\ the\ resolution\ of\ the\ video
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ format.\ %%\ for\ a\ literal\ percent.\ Use\ \-\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ output\ to\ stdout.\ Can\ also\ be\ used\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ to\ a\ different\ directory,\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example\ with\ \-o\ \[aq]/my/downloads/%(uploader)s
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /%(title)s\-%(id)s.%(ext)s\[aq]\ .
-\-\-autonumber\-size\ NUMBER\ \ \ \ \ \ \ \ \ Specifies\ the\ number\ of\ digits\ in
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(autonumber)s\ when\ it\ is\ present\ in\ output
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filename\ template\ or\ \-\-auto\-number\ option
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ given
-\-\-restrict\-filenames\ \ \ \ \ \ \ \ \ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ characters,\ and\ avoid\ "&"\ and\ spaces\ in
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
-\-A,\ \-\-auto\-number\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated;\ use\ \ \-o
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "%(autonumber)s\-%(title)s.%(ext)s"\ ]\ number
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloaded\ files\ starting\ from\ 00000
-\-t,\ \-\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ use\ title\ in\ file\ name
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (default)
-\-l,\ \-\-literal\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-title
-\-w,\ \-\-no\-overwrites\ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ overwrite\ files
-\-c,\ \-\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ force\ resume\ of\ partially\ downloaded\ files.
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ By\ default,\ youtube\-dl\ will\ resume
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ if\ possible.
-\-\-no\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ resume\ partially\ downloaded\ files
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (restart\ from\ beginning)
-\-\-no\-part\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ use\ .part\ files\ \-\ write\ directly
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ into\ output\ file
-\-\-no\-mtime\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ use\ the\ Last\-modified\ header\ to\ set
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ file\ modification\ time
-\-\-write\-description\ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ video\ description\ to\ a\ .description
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file
-\-\-write\-info\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ video\ metadata\ to\ a\ .info.json\ file
-\-\-write\-annotations\ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ video\ annotations\ to\ a\ .annotation
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file
-\-\-load\-info\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ json\ file\ containing\ the\ video\ information
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (created\ with\ the\ "\-\-write\-json"\ option)
-\-\-cookies\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file\ to\ read\ cookies\ from\ and\ dump\ cookie
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ jar\ in
-\-\-cache\-dir\ DIR\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Location\ in\ the\ filesystem\ where\ youtube\-dl
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ can\ store\ some\ downloaded\ information
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ permanently.\ By\ default\ $XDG_CACHE_HOME
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /youtube\-dl\ or\ ~/.cache/youtube\-dl\ .\ At\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ moment,\ only\ YouTube\ player\ files\ (for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ videos\ with\ obfuscated\ signatures)\ are
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ cached,\ but\ that\ may\ change.
+\-a,\ \-\-batch\-file\ FILE\ \ \ \ \ \ \ \ \ \ \ \ File\ containing\ URLs\ to\ download\ (\[aq]\-\[aq]\ for\ stdin)
+\-\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ only\ video\ ID\ in\ file\ name
+\-o,\ \-\-output\ TEMPLATE\ \ \ \ \ \ \ \ \ \ \ \ Output\ filename\ template.\ Use\ %(title)s\ to\ get\ the\ title,\ %(uploader)s\ for\ the\ uploader\ name,\ %(uploader_id)s\ for\ the\ uploader
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ nickname\ if\ different,\ %(autonumber)s\ to\ get\ an\ automatically\ incremented\ number,\ %(ext)s\ for\ the\ filename\ extension,\ %(format)s\ for
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ format\ description\ (like\ "22\ \-\ 1280x720"\ or\ "HD"),\ %(format_id)s\ for\ the\ unique\ id\ of\ the\ format\ (like\ YouTube\[aq]s\ itags:\ "137"),
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(upload_date)s\ for\ the\ upload\ date\ (YYYYMMDD),\ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,\ etc),\ %(id)s\ for\ the\ video\ id,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(playlist_title)s,\ %(playlist_id)s,\ or\ %(playlist)s\ (=title\ if\ present,\ ID\ otherwise)\ for\ the\ playlist\ the\ video\ is\ in,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(playlist_index)s\ for\ the\ position\ in\ the\ playlist.\ %(height)s\ and\ %(width)s\ for\ the\ width\ and\ height\ of\ the\ video\ format.
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(resolution)s\ for\ a\ textual\ description\ of\ the\ resolution\ of\ the\ video\ format.\ %%\ for\ a\ literal\ percent.\ Use\ \-\ to\ output\ to\ stdout.
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Can\ also\ be\ used\ to\ download\ to\ a\ different\ directory,\ for\ example\ with\ \-o\ \[aq]/my/downloads/%(uploader)s/%(title)s\-%(id)s.%(ext)s\[aq]\ .
+\-\-autonumber\-size\ NUMBER\ \ \ \ \ \ \ \ \ Specify\ the\ number\ of\ digits\ in\ %(autonumber)s\ when\ it\ is\ present\ in\ output\ filename\ template\ or\ \-\-auto\-number\ option\ is\ given
+\-\-restrict\-filenames\ \ \ \ \ \ \ \ \ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and\ avoid\ "&"\ and\ spaces\ in\ filenames
+\-A,\ \-\-auto\-number\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated;\ use\ \ \-o\ "%(autonumber)s\-%(title)s.%(ext)s"\ ]\ Number\ downloaded\ files\ starting\ from\ 00000
+\-t,\ \-\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ Use\ title\ in\ file\ name\ (default)
+\-l,\ \-\-literal\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ Alias\ of\ \-\-title
+\-w,\ \-\-no\-overwrites\ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ overwrite\ files
+\-c,\ \-\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Force\ resume\ of\ partially\ downloaded\ files.\ By\ default,\ youtube\-dl\ will\ resume\ downloads\ if\ possible.
+\-\-no\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ resume\ partially\ downloaded\ files\ (restart\ from\ beginning)
+\-\-no\-part\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ use\ .part\ files\ \-\ write\ directly\ into\ output\ file
+\-\-no\-mtime\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ use\ the\ Last\-modified\ header\ to\ set\ the\ file\ modification\ time
+\-\-write\-description\ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ video\ description\ to\ a\ .description\ file
+\-\-write\-info\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ video\ metadata\ to\ a\ .info.json\ file
+\-\-write\-annotations\ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ video\ annotations\ to\ a\ .annotations.xml\ file
+\-\-load\-info\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ JSON\ file\ containing\ the\ video\ information\ (created\ with\ the\ "\-\-write\-info\-json"\ option)
+\-\-cookies\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ File\ to\ read\ cookies\ from\ and\ dump\ cookie\ jar\ in
+\-\-cache\-dir\ DIR\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Location\ in\ the\ filesystem\ where\ youtube\-dl\ can\ store\ some\ downloaded\ information\ permanently.\ By\ default\ $XDG_CACHE_HOME/youtube\-dl
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ or\ ~/.cache/youtube\-dl\ .\ At\ the\ moment,\ only\ YouTube\ player\ files\ (for\ videos\ with\ obfuscated\ signatures)\ are\ cached,\ but\ that\ may
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ change.
 \-\-no\-cache\-dir\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Disable\ filesystem\ caching
 \-\-rm\-cache\-dir\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Delete\ all\ filesystem\ cache\ files
 \f[]
@@ -235,52 +135,40 @@ redistribute it or use it however you like.
 .IP
 .nf
 \f[C]
-\-\-write\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ thumbnail\ image\ to\ disk
-\-\-write\-all\-thumbnails\ \ \ \ \ \ \ \ \ \ \ write\ all\ thumbnail\ image\ formats\ to\ disk
-\-\-list\-thumbnails\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate\ and\ list\ all\ available\ thumbnail
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ formats
+\-\-write\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ thumbnail\ image\ to\ disk
+\-\-write\-all\-thumbnails\ \ \ \ \ \ \ \ \ \ \ Write\ all\ thumbnail\ image\ formats\ to\ disk
+\-\-list\-thumbnails\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate\ and\ list\ all\ available\ thumbnail\ formats
 \f[]
 .fi
 .SS Verbosity / Simulation Options:
 .IP
 .nf
 \f[C]
-\-q,\ \-\-quiet\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ activates\ quiet\ mode
+\-q,\ \-\-quiet\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Activate\ quiet\ mode
 \-\-no\-warnings\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Ignore\ warnings
-\-s,\ \-\-simulate\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ download\ the\ video\ and\ do\ not\ write
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ anything\ to\ disk
-\-\-skip\-download\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ download\ the\ video
-\-g,\ \-\-get\-url\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ URL
-\-e,\ \-\-get\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ title
-\-\-get\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ id
-\-\-get\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ thumbnail\ URL
-\-\-get\-description\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ video\ description
-\-\-get\-duration\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ video\ length
-\-\-get\-filename\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ output\ filename
-\-\-get\-format\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ output\ format
-\-j,\ \-\-dump\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ JSON\ information.
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ See\ \-\-output\ for\ a\ description\ of\ available
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ keys.
-\-J,\ \-\-dump\-single\-json\ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ JSON\ information
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ for\ each\ command\-line\ argument.\ If\ the\ URL
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ refers\ to\ a\ playlist,\ dump\ the\ whole
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist\ information\ in\ a\ single\ line.
-\-\-print\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Be\ quiet\ and\ print\ the\ video\ information\ as
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ JSON\ (video\ is\ still\ being\ downloaded).
-\-\-newline\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ output\ progress\ bar\ as\ new\ lines
-\-\-no\-progress\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ print\ progress\ bar
-\-\-console\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ display\ progress\ in\ console\ titlebar
-\-v,\ \-\-verbose\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ various\ debugging\ information
-\-\-dump\-intermediate\-pages\ \ \ \ \ \ \ \ print\ downloaded\ pages\ to\ debug\ problems
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (very\ verbose)
-\-\-write\-pages\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ downloaded\ intermediary\ pages\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ files\ in\ the\ current\ directory\ to\ debug
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ problems
+\-s,\ \-\-simulate\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ the\ video\ and\ do\ not\ write\ anything\ to\ disk
+\-\-skip\-download\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ the\ video
+\-g,\ \-\-get\-url\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ URL
+\-e,\ \-\-get\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ title
+\-\-get\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ id
+\-\-get\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ thumbnail\ URL
+\-\-get\-description\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ video\ description
+\-\-get\-duration\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ video\ length
+\-\-get\-filename\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ output\ filename
+\-\-get\-format\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ output\ format
+\-j,\ \-\-dump\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ JSON\ information.\ See\ \-\-output\ for\ a\ description\ of\ available\ keys.
+\-J,\ \-\-dump\-single\-json\ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ JSON\ information\ for\ each\ command\-line\ argument.\ If\ the\ URL\ refers\ to\ a\ playlist,\ dump\ the\ whole\ playlist
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ information\ in\ a\ single\ line.
+\-\-print\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Be\ quiet\ and\ print\ the\ video\ information\ as\ JSON\ (video\ is\ still\ being\ downloaded).
+\-\-newline\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Output\ progress\ bar\ as\ new\ lines
+\-\-no\-progress\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ print\ progress\ bar
+\-\-console\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Display\ progress\ in\ console\ titlebar
+\-v,\ \-\-verbose\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Print\ various\ debugging\ information
+\-\-dump\-pages\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Print\ downloaded\ pages\ to\ debug\ problems\ (very\ verbose)
+\-\-write\-pages\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ downloaded\ intermediary\ pages\ to\ files\ in\ the\ current\ directory\ to\ debug\ problems
 \-\-print\-traffic\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Display\ sent\ and\ read\ HTTP\ traffic
-\-C,\ \-\-call\-home\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Contact\ the\ youtube\-dl\ server\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ debugging.
-\-\-no\-call\-home\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ NOT\ contact\ the\ youtube\-dl\ server\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ debugging.
+\-C,\ \-\-call\-home\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Contact\ the\ youtube\-dl\ server\ for\ debugging
+\-\-no\-call\-home\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ NOT\ contact\ the\ youtube\-dl\ server\ for\ debugging
 \f[]
 .fi
 .SS Workarounds:
@@ -288,145 +176,77 @@ redistribute it or use it however you like.
 .nf
 \f[C]
 \-\-encoding\ ENCODING\ \ \ \ \ \ \ \ \ \ \ \ \ \ Force\ the\ specified\ encoding\ (experimental)
-\-\-no\-check\-certificate\ \ \ \ \ \ \ \ \ \ \ Suppress\ HTTPS\ certificate\ validation.
-\-\-prefer\-insecure\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ an\ unencrypted\ connection\ to\ retrieve
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ information\ about\ the\ video.\ (Currently
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ supported\ only\ for\ YouTube)
-\-\-user\-agent\ UA\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent
-\-\-referer\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ referer,\ use\ if\ the\ video
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ access\ is\ restricted\ to\ one\ domain
-\-\-add\-header\ FIELD:VALUE\ \ \ \ \ \ \ \ \ specify\ a\ custom\ HTTP\ header\ and\ its\ value,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ separated\ by\ a\ colon\ \[aq]:\[aq].\ You\ can\ use\ this
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ option\ multiple\ times
-\-\-bidi\-workaround\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Work\ around\ terminals\ that\ lack
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ bidirectional\ text\ support.\ Requires\ bidiv
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ or\ fribidi\ executable\ in\ PATH
-\-\-sleep\-interval\ SECONDS\ \ \ \ \ \ \ \ \ Number\ of\ seconds\ to\ sleep\ before\ each
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download.
+\-\-no\-check\-certificate\ \ \ \ \ \ \ \ \ \ \ Suppress\ HTTPS\ certificate\ validation
+\-\-prefer\-insecure\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ an\ unencrypted\ connection\ to\ retrieve\ information\ about\ the\ video.\ (Currently\ supported\ only\ for\ YouTube)
+\-\-user\-agent\ UA\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Specify\ a\ custom\ user\ agent
+\-\-referer\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Specify\ a\ custom\ referer,\ use\ if\ the\ video\ access\ is\ restricted\ to\ one\ domain
+\-\-add\-header\ FIELD:VALUE\ \ \ \ \ \ \ \ \ Specify\ a\ custom\ HTTP\ header\ and\ its\ value,\ separated\ by\ a\ colon\ \[aq]:\[aq].\ You\ can\ use\ this\ option\ multiple\ times
+\-\-bidi\-workaround\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Work\ around\ terminals\ that\ lack\ bidirectional\ text\ support.\ Requires\ bidiv\ or\ fribidi\ executable\ in\ PATH
+\-\-sleep\-interval\ SECONDS\ \ \ \ \ \ \ \ \ Number\ of\ seconds\ to\ sleep\ before\ each\ download.
 \f[]
 .fi
 .SS Video Format Options:
 .IP
 .nf
 \f[C]
-\-f,\ \-\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ format\ code,\ specify\ the\ order\ of
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ preference\ using\ slashes,\ as\ in\ \-f\ 22/17/18
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ .\ \ Instead\ of\ format\ codes,\ you\ can\ select
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ by\ extension\ for\ the\ extensions\ aac,\ m4a,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mp3,\ mp4,\ ogg,\ wav,\ webm.\ You\ can\ also\ use
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ special\ names\ "best",\ "bestvideo",
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "bestaudio",\ "worst".\ \ You\ can\ filter\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ results\ by\ putting\ a\ condition\ in
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ brackets,\ as\ in\ \-f\ "best[height=720]"\ (or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-f\ "[filesize>10M]").\ \ This\ works\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filesize,\ height,\ width,\ tbr,\ abr,\ vbr,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ asr,\ and\ fps\ and\ the\ comparisons\ <,\ <=,\ >,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ >=,\ =,\ !=\ and\ for\ ext,\ acodec,\ vcodec,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ container,\ and\ protocol\ and\ the\ comparisons
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ =,\ !=\ .\ Formats\ for\ which\ the\ value\ is\ not
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known\ are\ excluded\ unless\ you\ put\ a
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ question\ mark\ (?)\ after\ the\ operator.\ You
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ can\ combine\ format\ filters,\ so\ \ \-f\ "[height
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ <=?\ 720][tbr>500]"\ selects\ up\ to\ 720p
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ videos\ (or\ videos\ where\ the\ height\ is\ not
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known)\ with\ a\ bitrate\ of\ at\ least\ 500
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ KBit/s.\ By\ default,\ youtube\-dl\ will\ pick
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ best\ quality.\ Use\ commas\ to\ download
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ multiple\ audio\ formats,\ such\ as\ \-f
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 136/137/mp4/bestvideo,140/m4a/bestaudio.
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ You\ can\ merge\ the\ video\ and\ audio\ of\ two
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ formats\ into\ a\ single\ file\ using\ \-f\ <video\-
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ format>+<audio\-format>\ (requires\ ffmpeg\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avconv),\ for\ example\ \-f
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ bestvideo+bestaudio.
-\-\-all\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ all\ available\ video\ formats
-\-\-prefer\-free\-formats\ \ \ \ \ \ \ \ \ \ \ \ prefer\ free\ video\ formats\ unless\ a\ specific
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ one\ is\ requested
-\-\-max\-quality\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ highest\ quality\ format\ to\ download
-\-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ list\ all\ available\ formats
-\-\-youtube\-skip\-dash\-manifest\ \ \ \ \ Do\ not\ download\ the\ DASH\ manifest\ on
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ YouTube\ videos
-\-\-merge\-output\-format\ FORMAT\ \ \ \ \ If\ a\ merge\ is\ required\ (e.g.
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ bestvideo+bestaudio),\ output\ to\ given
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ container\ format.\ One\ of\ mkv,\ mp4,\ ogg,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ webm,\ flv.Ignored\ if\ no\ merge\ is\ required
+\-f,\ \-\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ Video\ format\ code,\ see\ the\ "FORMAT\ SELECTION"\ for\ all\ the\ info
+\-\-all\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ all\ available\ video\ formats
+\-\-prefer\-free\-formats\ \ \ \ \ \ \ \ \ \ \ \ Prefer\ free\ video\ formats\ unless\ a\ specific\ one\ is\ requested
+\-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ List\ all\ available\ formats
+\-\-youtube\-skip\-dash\-manifest\ \ \ \ \ Do\ not\ download\ the\ DASH\ manifest\ on\ YouTube\ videos
+\-\-merge\-output\-format\ FORMAT\ \ \ \ \ If\ a\ merge\ is\ required\ (e.g.\ bestvideo+bestaudio),\ output\ to\ given\ container\ format.\ One\ of\ mkv,\ mp4,\ ogg,\ webm,\ flv.Ignored\ if\ no
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ merge\ is\ required
 \f[]
 .fi
 .SS Subtitle Options:
 .IP
 .nf
 \f[C]
-\-\-write\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ subtitle\ file
-\-\-write\-auto\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ automatic\ subtitle\ file\ (youtube
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
-\-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ all\ the\ available\ subtitles\ of
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ video
-\-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ lists\ all\ available\ subtitles\ for\ the\ video
-\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ subtitle\ format,\ accepts\ formats
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ preference,\ for\ example:\ "ass/srt/best"
-\-\-sub\-lang\ LANGS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ languages\ of\ the\ subtitles\ to\ download
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (optional)\ separated\ by\ commas,\ use\ IETF
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ language\ tags\ like\ \[aq]en,pt\[aq]
+\-\-write\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ subtitle\ file
+\-\-write\-auto\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ automatic\ subtitle\ file\ (YouTube\ only)
+\-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ all\ the\ available\ subtitles\ of\ the\ video
+\-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ List\ all\ available\ subtitles\ for\ the\ video
+\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ Subtitle\ format,\ accepts\ formats\ preference,\ for\ example:\ "srt"\ or\ "ass/srt/best"
+\-\-sub\-lang\ LANGS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Languages\ of\ the\ subtitles\ to\ download\ (optional)\ separated\ by\ commas,\ use\ IETF\ language\ tags\ like\ \[aq]en,pt\[aq]
 \f[]
 .fi
 .SS Authentication Options:
 .IP
 .nf
 \f[C]
-\-u,\ \-\-username\ USERNAME\ \ \ \ \ \ \ \ \ \ login\ with\ this\ account\ ID
-\-p,\ \-\-password\ PASSWORD\ \ \ \ \ \ \ \ \ \ account\ password.\ If\ this\ option\ is\ left
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ out,\ youtube\-dl\ will\ ask\ interactively.
-\-2,\ \-\-twofactor\ TWOFACTOR\ \ \ \ \ \ \ \ two\-factor\ auth\ code
-\-n,\ \-\-netrc\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ .netrc\ authentication\ data
-\-\-video\-password\ PASSWORD\ \ \ \ \ \ \ \ video\ password\ (vimeo,\ smotri)
+\-u,\ \-\-username\ USERNAME\ \ \ \ \ \ \ \ \ \ Login\ with\ this\ account\ ID
+\-p,\ \-\-password\ PASSWORD\ \ \ \ \ \ \ \ \ \ Account\ password.\ If\ this\ option\ is\ left\ out,\ youtube\-dl\ will\ ask\ interactively.
+\-2,\ \-\-twofactor\ TWOFACTOR\ \ \ \ \ \ \ \ Two\-factor\ auth\ code
+\-n,\ \-\-netrc\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ .netrc\ authentication\ data
+\-\-video\-password\ PASSWORD\ \ \ \ \ \ \ \ Video\ password\ (vimeo,\ smotri)
 \f[]
 .fi
 .SS Post\-processing Options:
 .IP
 .nf
 \f[C]
-\-x,\ \-\-extract\-audio\ \ \ \ \ \ \ \ \ \ \ \ \ \ convert\ video\ files\ to\ audio\-only\ files
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (requires\ ffmpeg\ or\ avconv\ and\ ffprobe\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avprobe)
-\-\-audio\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "opus",\ or\ "wav";\ "best"\ by\ default
-\-\-audio\-quality\ QUALITY\ \ \ \ \ \ \ \ \ \ ffmpeg/avconv\ audio\ quality\ specification,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ insert\ a\ value\ between\ 0\ (better)\ and\ 9
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (worse)\ for\ VBR\ or\ a\ specific\ bitrate\ like
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 128K\ (default\ 5)
-\-\-recode\-video\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ Encode\ the\ video\ to\ another\ format\ if
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ necessary\ (currently\ supported:
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mp4|flv|ogg|webm|mkv)
-\-k,\ \-\-keep\-video\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ keeps\ the\ video\ file\ on\ disk\ after\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ post\-processing;\ the\ video\ is\ erased\ by
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default
-\-\-no\-post\-overwrites\ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ overwrite\ post\-processed\ files;\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ post\-processed\ files\ are\ overwritten\ by
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default
-\-\-embed\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ embed\ subtitles\ in\ the\ video\ (only\ for\ mp4
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ videos)
-\-\-embed\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ embed\ thumbnail\ in\ the\ audio\ as\ cover\ art
-\-\-add\-metadata\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ metadata\ to\ the\ video\ file
-\-\-xattrs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ metadata\ to\ the\ video\ file\[aq]s\ xattrs
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (using\ dublin\ core\ and\ xdg\ standards)
-\-\-fixup\ POLICY\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Automatically\ correct\ known\ faults\ of\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file.\ One\ of\ never\ (do\ nothing),\ warn\ (only
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ emit\ a\ warning),\ detect_or_warn(the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default;\ fix\ file\ if\ we\ can,\ warn
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ otherwise)
-\-\-prefer\-avconv\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ avconv\ over\ ffmpeg\ for\ running\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ postprocessors\ (default)
-\-\-prefer\-ffmpeg\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ ffmpeg\ over\ avconv\ for\ running\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ postprocessors
-\-\-ffmpeg\-location\ PATH\ \ \ \ \ \ \ \ \ \ \ Location\ of\ the\ ffmpeg/avconv\ binary;
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ either\ the\ path\ to\ the\ binary\ or\ its
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ containing\ directory.
-\-\-exec\ CMD\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Execute\ a\ command\ on\ the\ file\ after
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloading,\ similar\ to\ find\[aq]s\ \-exec
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ syntax.\ Example:\ \-\-exec\ \[aq]adb\ push\ {}
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /sdcard/Music/\ &&\ rm\ {}\[aq]
-\-\-convert\-subtitles\ FORMAT\ \ \ \ \ \ \ Convert\ the\ subtitles\ to\ other\ format
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ supported:\ srt|ass|vtt)
+\-x,\ \-\-extract\-audio\ \ \ \ \ \ \ \ \ \ \ \ \ \ Convert\ video\ files\ to\ audio\-only\ files\ (requires\ ffmpeg\ or\ avconv\ and\ ffprobe\ or\ avprobe)
+\-\-audio\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ Specify\ audio\ format:\ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",\ "opus",\ or\ "wav";\ "best"\ by\ default
+\-\-audio\-quality\ QUALITY\ \ \ \ \ \ \ \ \ \ Specify\ ffmpeg/avconv\ audio\ quality,\ insert\ a\ value\ between\ 0\ (better)\ and\ 9\ (worse)\ for\ VBR\ or\ a\ specific\ bitrate\ like\ 128K\ (default
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 5)
+\-\-recode\-video\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ Encode\ the\ video\ to\ another\ format\ if\ necessary\ (currently\ supported:\ mp4|flv|ogg|webm|mkv)
+\-k,\ \-\-keep\-video\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Keep\ the\ video\ file\ on\ disk\ after\ the\ post\-processing;\ the\ video\ is\ erased\ by\ default
+\-\-no\-post\-overwrites\ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ overwrite\ post\-processed\ files;\ the\ post\-processed\ files\ are\ overwritten\ by\ default
+\-\-embed\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Embed\ subtitles\ in\ the\ video\ (only\ for\ mkv\ and\ mp4\ videos)
+\-\-embed\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Embed\ thumbnail\ in\ the\ audio\ as\ cover\ art
+\-\-add\-metadata\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ metadata\ to\ the\ video\ file
+\-\-metadata\-from\-title\ FORMAT\ \ \ \ \ Parse\ additional\ metadata\ like\ song\ title\ /\ artist\ from\ the\ video\ title.\ The\ format\ syntax\ is\ the\ same\ as\ \-\-output,\ the\ parsed
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ parameters\ replace\ existing\ values.\ Additional\ templates:\ %(album),\ %(artist).\ Example:\ \-\-metadata\-from\-title\ "%(artist)s\ \-
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(title)s"\ matches\ a\ title\ like\ "Coldplay\ \-\ Paradise"
+\-\-xattrs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ metadata\ to\ the\ video\ file\[aq]s\ xattrs\ (using\ dublin\ core\ and\ xdg\ standards)
+\-\-fixup\ POLICY\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Automatically\ correct\ known\ faults\ of\ the\ file.\ One\ of\ never\ (do\ nothing),\ warn\ (only\ emit\ a\ warning),\ detect_or_warn(the\ default;
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ fix\ file\ if\ we\ can,\ warn\ otherwise)
+\-\-prefer\-avconv\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ avconv\ over\ ffmpeg\ for\ running\ the\ postprocessors\ (default)
+\-\-prefer\-ffmpeg\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ ffmpeg\ over\ avconv\ for\ running\ the\ postprocessors
+\-\-ffmpeg\-location\ PATH\ \ \ \ \ \ \ \ \ \ \ Location\ of\ the\ ffmpeg/avconv\ binary;\ either\ the\ path\ to\ the\ binary\ or\ its\ containing\ directory.
+\-\-exec\ CMD\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Execute\ a\ command\ on\ the\ file\ after\ downloading,\ similar\ to\ find\[aq]s\ \-exec\ syntax.\ Example:\ \-\-exec\ \[aq]adb\ push\ {}\ /sdcard/Music/\ &&\ rm
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ {}\[aq]
+\-\-convert\-subtitles\ FORMAT\ \ \ \ \ \ \ Convert\ the\ subtitles\ to\ other\ format\ (currently\ supported:\ srt|ass|vtt)
 \f[]
 .fi
 .SH CONFIGURATION
@@ -495,6 +315,67 @@ $\ youtube\-dl\ \-\-get\-filename\ \-o\ "%(title)s.%(ext)s"\ BaW_jenozKc\ \-\-re
 youtube\-dl_test_video_.mp4\ \ \ \ \ \ \ \ \ \ #\ A\ simple\ file\ name
 \f[]
 .fi
+.SH FORMAT SELECTION
+.PP
+By default youtube\-dl tries to download the best quality, but sometimes
+you may want to download other format.
+The simplest case is requesting a specific format, for example
+\f[C]\-f\ 22\f[].
+You can get the list of available formats using
+\f[C]\-\-list\-formats\f[], you can also use a file extension (currently
+it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the special names
+\f[C]best\f[], \f[C]bestvideo\f[], \f[C]bestaudio\f[] and
+\f[C]worst\f[].
+.PP
+If you want to download multiple videos and they don\[aq]t have the same
+formats available, you can specify the order of preference using
+slashes, as in \f[C]\-f\ 22/17/18\f[].
+You can also filter the video results by putting a condition in
+brackets, as in \f[C]\-f\ "best[height=720]"\f[] (or
+\f[C]\-f\ "[filesize>10M]"\f[]).
+This works for filesize, height, width, tbr, abr, vbr, asr, and fps and
+the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec,
+container, and protocol and the comparisons =, != .
+Formats for which the value is not known are excluded unless you put a
+question mark (?) after the operator.
+You can combine format filters, so
+\f[C]\-f\ "[height\ <=?\ 720][tbr>500]"\f[] selects up to 720p videos
+(or videos where the height is not known) with a bitrate of at least 500
+KBit/s.
+Use commas to download multiple formats, such as
+\f[C]\-f\ 136/137/mp4/bestvideo,140/m4a/bestaudio\f[].
+You can merge the video and audio of two formats into a single file
+using \f[C]\-f\ <video\-format>+<audio\-format>\f[] (requires ffmpeg or
+avconv), for example \f[C]\-f\ bestvideo+bestaudio\f[].
+.PP
+Since the end of April 2015 and version 2015.04.26 youtube\-dl uses
+\f[C]\-f\ bestvideo+bestaudio/best\f[] as default format selection (see
+#5447, #5456).
+If ffmpeg or avconv are installed this results in downloading
+\f[C]bestvideo\f[] and \f[C]bestaudio\f[] separately and muxing them
+together into a single file giving the best overall quality available.
+Otherwise it falls back to \f[C]best\f[] and results in downloading best
+available quality served as a single file.
+\f[C]best\f[] is also needed for videos that don\[aq]t come from YouTube
+because they don\[aq]t provide the audio and video in two different
+files.
+If you want to only download some dash formats (for example if you are
+not interested in getting videos with a resolution higher than 1080p),
+you can add \f[C]\-f\ bestvideo[height<=?1080]+bestaudio/best\f[] to
+your configuration file.
+Note that if you use youtube\-dl to stream to \f[C]stdout\f[] (and most
+likely to pipe it to your media player then), i.e.
+you explicitly specify output template as \f[C]\-o\ \-\f[], youtube\-dl
+still uses \f[C]\-f\ best\f[] format selection in order to start content
+delivery immediately to your player and not to wait until
+\f[C]bestvideo\f[] and \f[C]bestaudio\f[] are downloaded and muxed.
+.PP
+If you want to preserve the old format selection behavior (prior to
+youtube\-dl 2015.04.26), i.e.
+you want to download best available quality media served as a single
+file, you should explicitly specify your choice with \f[C]\-f\ best\f[].
+You may want to add it to the configuration file (#configuration) in
+order not to type it every time you run youtube\-dl.
 .SH VIDEO SELECTION
 .PP
 Videos can be filtered by their upload date using the options
@@ -584,18 +465,15 @@ guys (mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20yout
 \- all they have to do is update the package to a somewhat recent
 version.
 See above for a way to update.
-.SS Do I always have to pass in \f[C]\-\-max\-quality\ FORMAT\f[], or
-\f[C]\-citw\f[]?
+.SS Do I always have to pass \f[C]\-citw\f[]?
 .PP
 By default, youtube\-dl intends to have the best options (incidentally,
 if you have a convincing case that these should be different, please
 file an issue where you explain that (https://yt-dl.org/bug)).
 Therefore, it is unnecessary and sometimes harmful to copy long option
 strings from webpages.
-In particular, \f[C]\-\-max\-quality\f[] \f[I]limits\f[] the video
-quality (so if you want the best quality, do NOT pass it in), and the
-only option out of \f[C]\-citw\f[] that is regularly useful is
-\f[C]\-i\f[].
+In particular, the only option out of \f[C]\-citw\f[] that is regularly
+useful is \f[C]\-i\f[].
 .SS Can you please put the \-b option back?
 .PP
 Most people asking this question are not aware that youtube\-dl now
@@ -656,11 +534,45 @@ See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
 YouTube requires an additional signature since September 2012 which is
 not supported by old versions of youtube\-dl.
 See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+.SS Video URL contains an ampersand and I\[aq]m getting some strange
+output \f[C][1]\ 2839\f[] or
+\f[C]\[aq]v\[aq]\ is\ not\ recognized\ as\ an\ internal\ or\ external\ command\f[]
+.PP
+That\[aq]s actually the output from your shell.
+Since ampersand is one of the special shell characters it\[aq]s
+interpreted by shell preventing you from passing the whole URL to
+youtube\-dl.
+To disable your shell from interpreting the ampersands (or any other
+special characters) you have to either put the whole URL in quotes or
+escape them with a backslash (which approach will work depends on your
+shell).
+.PP
+For example if your URL is
+https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with
+following command:
+.PP
+\f[C]youtube\-dl\ \[aq]https://www.youtube.com/watch?t=4&v=BaW_jenozKc\[aq]\f[]
+.PP
+or
+.PP
+\f[C]youtube\-dl\ https://www.youtube.com/watch?t=4\\&v=BaW_jenozKc\f[]
+.PP
+For Windows you have to use the double quotes:
+.PP
+\f[C]youtube\-dl\ "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"\f[]
 .SS ExtractorError: Could not find JS function u\[aq]OF\[aq]
 .PP
 In February 2015, the new YouTube player contained a character sequence
 in a string that was misinterpreted by old versions of youtube\-dl.
 See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+.SS HTTP Error 429: Too Many Requests or 402: Payment Required
+.PP
+These two error codes indicate that the service is blocking your IP
+address because of overuse.
+Contact the service and ask them to unblock your IP address, or \- if
+you have acquired a whitelisted IP address already \- use the
+\f[C]\-\-proxy\f[] or \f[C]\-\-network\-address\f[]
+options (#network-options) to select another IP address.
 .SS SyntaxError: Non\-ASCII character
 .PP
 The error
@@ -745,6 +657,37 @@ Support requests for services that \f[B]do\f[] purchase the rights to
 distribute their content are perfectly fine though.
 If in doubt, you can simply include a source that mentions the
 legitimate purchase of content.
+.SS How can I speed up work on my issue?
+.PP
+(Also known as: Help, my important issue not being solved!) The
+youtube\-dl core developer team is quite small.
+While we do our best to solve as many issues as possible, sometimes that
+can take quite a while.
+To speed up your issue, here\[aq]s what you can do:
+.PP
+First of all, please do report the issue at our issue
+tracker (https://yt-dl.org/bugs).
+That allows us to coordinate all efforts by users and developers, and
+serves as a unified point.
+Unfortunately, the youtube\-dl project has grown too large to use
+personal email as an effective communication channel.
+.PP
+Please read the bug reporting instructions (#bugs) below.
+A lot of bugs lack all the necessary information.
+If you can, offer proxy, VPN, or shell access to the youtube\-dl
+developers.
+If you are able to, test the issue from multiple computers in multiple
+countries to exclude local censorship or misconfiguration issues.
+.PP
+If nobody is interested in solving your issue, you are welcome to take
+matters into your own hands and submit a pull request (or coerce/pay
+somebody else to do so).
+.PP
+Feel free to bump the issue from time to time by writing a small comment
+("Issue is still present in youtube\-dl version ...from France, but
+fixed from Belgium"), but please not more than once a month.
+Please do not declare your issue as \f[C]important\f[] or
+\f[C]urgent\f[].
 .SS How can I detect whether a given URL is supported by youtube\-dl?
 .PP
 For one, have a look at the list of supported
@@ -926,6 +869,7 @@ fashion, like this:
 .IP
 .nf
 \f[C]
+from\ __future__\ import\ unicode_literals
 import\ youtube_dl
 
 ydl_opts\ =\ {}
@@ -946,6 +890,7 @@ downloads/converts the video to an mp3 file:
 .IP
 .nf
 \f[C]
+from\ __future__\ import\ unicode_literals
 import\ youtube_dl
 
 
@@ -1030,7 +975,16 @@ For bug reports, this means that your report should contain the
 The error message you get for (most) bugs even says so, but you would
 not believe how many of our bug reports do not contain this information.
 .PP
-Site support requests \f[B]must contain an example URL\f[].
+If your server has multiple IPs or you suspect censorship, adding
+\-\-call\-home may be a good idea to get more diagnostics.
+If the error is \f[C]ERROR:\ Unable\ to\ extract\ ...\f[] and you cannot
+reproduce it from multiple countries, add \f[C]\-\-dump\-pages\f[]
+(warning: this will yield a rather large output, redirect it to the file
+\f[C]log.txt\f[] by adding \f[C]>log.txt\ 2>&1\f[] to your
+command\-line) or upload the \f[C]\&.dump\f[] files you get when you add
+\f[C]\-\-write\-pages\f[] somewhere (https://gist.github.com/).
+.PP
+\f[B]Site support requests must contain an example URL\f[].
 An example URL is a URL you might want to download, like
 http://www.youtube.com/watch?v=BaW_jenozKc .
 There should be an obvious video present.
index 5457ada2f0375a885b6e69520ba073ed29a94cbc..8367bdbfddd573ab7c2579e9cbfd8a55719766ce 100644 (file)
@@ -4,7 +4,7 @@ __youtube_dl()
     COMPREPLY=()
     cur="${COMP_WORDS[COMP_CWORD]}"
     prev="${COMP_WORDS[COMP_CWORD-1]}"
-    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles"
+    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles"
     keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
     fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
     diropts="--cache-dir"
index f2f1636c4add7318efb50fb88bfe3c85b5a17603..0eaa6a0cc31917eed4ab98998449f19399a16fd5 100644 (file)
 
-complete --command youtube-dl --long-option help --short-option h --description 'print this help text and exit'
-complete --command youtube-dl --long-option version --description 'print program version and exit'
-complete --command youtube-dl --long-option update --short-option U --description 'update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)'
-complete --command youtube-dl --long-option ignore-errors --short-option i --description 'continue on download errors, for example to skip unavailable videos in a playlist'
+complete --command youtube-dl --long-option help --short-option h --description 'Print this help text and exit'
+complete --command youtube-dl --long-option version --description 'Print program version and exit'
+complete --command youtube-dl --long-option update --short-option U --description 'Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)'
+complete --command youtube-dl --long-option ignore-errors --short-option i --description 'Continue on download errors, for example to skip unavailable videos in a playlist'
 complete --command youtube-dl --long-option abort-on-error --description 'Abort downloading of further videos (in the playlist or the command line) if an error occurs'
-complete --command youtube-dl --long-option dump-user-agent --description 'display the current browser identification'
+complete --command youtube-dl --long-option dump-user-agent --description 'Display the current browser identification'
 complete --command youtube-dl --long-option list-extractors --description 'List all supported extractors and the URLs they would handle'
 complete --command youtube-dl --long-option extractor-descriptions --description 'Output descriptions of all supported extractors'
-complete --command youtube-dl --long-option default-search --description 'Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.'
+complete --command youtube-dl --long-option default-search --description 'Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.'
 complete --command youtube-dl --long-option ignore-config --description 'Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)'
 complete --command youtube-dl --long-option flat-playlist --description 'Do not extract the videos of a playlist, only list them.'
-complete --command youtube-dl --long-option no-color --description 'Do not emit color codes in output.'
+complete --command youtube-dl --long-option no-color --description 'Do not emit color codes in output'
 complete --command youtube-dl --long-option proxy --description 'Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection'
 complete --command youtube-dl --long-option socket-timeout --description 'Time to wait before giving up, in seconds'
 complete --command youtube-dl --long-option source-address --description 'Client-side IP address to bind to (experimental)'
 complete --command youtube-dl --long-option force-ipv4 --short-option 4 --description 'Make all connections via IPv4 (experimental)'
 complete --command youtube-dl --long-option force-ipv6 --short-option 6 --description 'Make all connections via IPv6 (experimental)'
-complete --command youtube-dl --long-option playlist-start --description 'playlist video to start at (default is %default)'
-complete --command youtube-dl --long-option playlist-end --description 'playlist video to end at (default is last)'
-complete --command youtube-dl --long-option playlist-items --description 'playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.'
-complete --command youtube-dl --long-option match-title --description 'download only matching titles (regex or caseless sub-string)'
-complete --command youtube-dl --long-option reject-title --description 'skip download for matching titles (regex or caseless sub-string)'
+complete --command youtube-dl --long-option cn-verification-proxy --description 'Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
+complete --command youtube-dl --long-option playlist-start --description 'Playlist video to start at (default is %default)'
+complete --command youtube-dl --long-option playlist-end --description 'Playlist video to end at (default is last)'
+complete --command youtube-dl --long-option playlist-items --description 'Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.'
+complete --command youtube-dl --long-option match-title --description 'Download only matching titles (regex or caseless sub-string)'
+complete --command youtube-dl --long-option reject-title --description 'Skip download for matching titles (regex or caseless sub-string)'
 complete --command youtube-dl --long-option max-downloads --description 'Abort after downloading NUMBER files'
 complete --command youtube-dl --long-option min-filesize --description 'Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)'
 complete --command youtube-dl --long-option max-filesize --description 'Do not download any videos larger than SIZE (e.g. 50k or 44.6m)'
-complete --command youtube-dl --long-option date --description 'download only videos uploaded in this date'
-complete --command youtube-dl --long-option datebefore --description 'download only videos uploaded on or before this date (i.e. inclusive)'
-complete --command youtube-dl --long-option dateafter --description 'download only videos uploaded on or after this date (i.e. inclusive)'
+complete --command youtube-dl --long-option date --description 'Download only videos uploaded in this date'
+complete --command youtube-dl --long-option datebefore --description 'Download only videos uploaded on or before this date (i.e. inclusive)'
+complete --command youtube-dl --long-option dateafter --description 'Download only videos uploaded on or after this date (i.e. inclusive)'
 complete --command youtube-dl --long-option min-views --description 'Do not download any videos with less than COUNT views'
 complete --command youtube-dl --long-option max-views --description 'Do not download any videos with more than COUNT views'
-complete --command youtube-dl --long-option match-filter --description '(Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use  --match-filter "like_count > 100 & dislike_count <? 50 & description" .'
-complete --command youtube-dl --long-option no-playlist --description 'If the URL refers to a video and a playlist, download only the video.'
-complete --command youtube-dl --long-option yes-playlist --description 'If the URL refers to a video and a playlist, download the playlist.'
-complete --command youtube-dl --long-option age-limit --description 'download only videos suitable for the given age'
+complete --command youtube-dl --long-option match-filter --description 'Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use  --match-filter "like_count > 100 & dislike_count <? 50 & description" .'
+complete --command youtube-dl --long-option no-playlist --description 'Download only the video, if the URL refers to a video and a playlist.'
+complete --command youtube-dl --long-option yes-playlist --description 'Download the playlist, if the URL refers to a video and a playlist.'
+complete --command youtube-dl --long-option age-limit --description 'Download only videos suitable for the given age'
 complete --command youtube-dl --long-option download-archive --description 'Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.' --require-parameter
 complete --command youtube-dl --long-option include-ads --description 'Download advertisements as well (experimental)'
-complete --command youtube-dl --long-option rate-limit --short-option r --description 'maximum download rate in bytes per second (e.g. 50K or 4.2M)'
-complete --command youtube-dl --long-option retries --short-option R --description 'number of retries (default is %default), or "infinite".'
-complete --command youtube-dl --long-option buffer-size --description 'size of download buffer (e.g. 1024 or 16K) (default is %default)'
-complete --command youtube-dl --long-option no-resize-buffer --description 'do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.'
+complete --command youtube-dl --long-option rate-limit --short-option r --description 'Maximum download rate in bytes per second (e.g. 50K or 4.2M)'
+complete --command youtube-dl --long-option retries --short-option R --description 'Number of retries (default is %default), or "infinite".'
+complete --command youtube-dl --long-option buffer-size --description 'Size of download buffer (e.g. 1024 or 16K) (default is %default)'
+complete --command youtube-dl --long-option no-resize-buffer --description 'Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.'
 complete --command youtube-dl --long-option test
 complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
-complete --command youtube-dl --long-option xattr-set-filesize --description '(experimental) set file xattribute ytdl.filesize with expected filesize'
-complete --command youtube-dl --long-option hls-prefer-native --description '(experimental) Use the native HLS downloader instead of ffmpeg.'
-complete --command youtube-dl --long-option external-downloader --description '(experimental) Use the specified external downloader. Currently supports aria2c,curl,wget'
-complete --command youtube-dl --long-option batch-file --short-option a --description 'file containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
-complete --command youtube-dl --long-option id --description 'use only video ID in file name'
-complete --command youtube-dl --long-option output --short-option o --description 'output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube'"'"'s itags: "137"), %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id, %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format. %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o '"'"'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s'"'"' .'
-complete --command youtube-dl --long-option autonumber-size --description 'Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given'
+complete --command youtube-dl --long-option xattr-set-filesize --description 'Set file xattribute ytdl.filesize with expected filesize (experimental)'
+complete --command youtube-dl --long-option hls-prefer-native --description 'Use the native HLS downloader instead of ffmpeg (experimental)'
+complete --command youtube-dl --long-option external-downloader --description 'Use the specified external downloader. Currently supports aria2c,curl,wget'
+complete --command youtube-dl --long-option external-downloader-args --description 'Give these arguments to the external downloader'
+complete --command youtube-dl --long-option batch-file --short-option a --description 'File containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
+complete --command youtube-dl --long-option id --description 'Use only video ID in file name'
+complete --command youtube-dl --long-option output --short-option o --description 'Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube'"'"'s itags: "137"), %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id, %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format. %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o '"'"'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s'"'"' .'
+complete --command youtube-dl --long-option autonumber-size --description 'Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given'
 complete --command youtube-dl --long-option restrict-filenames --description 'Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames'
-complete --command youtube-dl --long-option auto-number --short-option A --description '[deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000'
-complete --command youtube-dl --long-option title --short-option t --description '[deprecated] use title in file name (default)'
-complete --command youtube-dl --long-option literal --short-option l --description '[deprecated] alias of --title'
-complete --command youtube-dl --long-option no-overwrites --short-option w --description 'do not overwrite files'
-complete --command youtube-dl --long-option continue --short-option c --description 'force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.'
-complete --command youtube-dl --long-option no-continue --description 'do not resume partially downloaded files (restart from beginning)'
-complete --command youtube-dl --long-option no-part --description 'do not use .part files - write directly into output file'
-complete --command youtube-dl --long-option no-mtime --description 'do not use the Last-modified header to set the file modification time'
-complete --command youtube-dl --long-option write-description --description 'write video description to a .description file'
-complete --command youtube-dl --long-option write-info-json --description 'write video metadata to a .info.json file'
-complete --command youtube-dl --long-option write-annotations --description 'write video annotations to a .annotation file'
-complete --command youtube-dl --long-option load-info --description 'json file containing the video information (created with the "--write-json" option)' --require-parameter
-complete --command youtube-dl --long-option cookies --description 'file to read cookies from and dump cookie jar in' --require-parameter
+complete --command youtube-dl --long-option auto-number --short-option A --description '[deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000'
+complete --command youtube-dl --long-option title --short-option t --description '[deprecated] Use title in file name (default)'
+complete --command youtube-dl --long-option literal --short-option l --description '[deprecated] Alias of --title'
+complete --command youtube-dl --long-option no-overwrites --short-option w --description 'Do not overwrite files'
+complete --command youtube-dl --long-option continue --short-option c --description 'Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.'
+complete --command youtube-dl --long-option no-continue --description 'Do not resume partially downloaded files (restart from beginning)'
+complete --command youtube-dl --long-option no-part --description 'Do not use .part files - write directly into output file'
+complete --command youtube-dl --long-option no-mtime --description 'Do not use the Last-modified header to set the file modification time'
+complete --command youtube-dl --long-option write-description --description 'Write video description to a .description file'
+complete --command youtube-dl --long-option write-info-json --description 'Write video metadata to a .info.json file'
+complete --command youtube-dl --long-option write-annotations --description 'Write video annotations to a .annotations.xml file'
+complete --command youtube-dl --long-option load-info --description 'JSON file containing the video information (created with the "--write-info-json" option)' --require-parameter
+complete --command youtube-dl --long-option cookies --description 'File to read cookies from and dump cookie jar in' --require-parameter
 complete --command youtube-dl --long-option cache-dir --description 'Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.'
 complete --command youtube-dl --long-option no-cache-dir --description 'Disable filesystem caching'
 complete --command youtube-dl --long-option rm-cache-dir --description 'Delete all filesystem cache files'
-complete --command youtube-dl --long-option write-thumbnail --description 'write thumbnail image to disk'
-complete --command youtube-dl --long-option write-all-thumbnails --description 'write all thumbnail image formats to disk'
+complete --command youtube-dl --long-option write-thumbnail --description 'Write thumbnail image to disk'
+complete --command youtube-dl --long-option write-all-thumbnails --description 'Write all thumbnail image formats to disk'
 complete --command youtube-dl --long-option list-thumbnails --description 'Simulate and list all available thumbnail formats'
-complete --command youtube-dl --long-option quiet --short-option q --description 'activates quiet mode'
+complete --command youtube-dl --long-option quiet --short-option q --description 'Activate quiet mode'
 complete --command youtube-dl --long-option no-warnings --description 'Ignore warnings'
-complete --command youtube-dl --long-option simulate --short-option s --description 'do not download the video and do not write anything to disk'
-complete --command youtube-dl --long-option skip-download --description 'do not download the video'
-complete --command youtube-dl --long-option get-url --short-option g --description 'simulate, quiet but print URL'
-complete --command youtube-dl --long-option get-title --short-option e --description 'simulate, quiet but print title'
-complete --command youtube-dl --long-option get-id --description 'simulate, quiet but print id'
-complete --command youtube-dl --long-option get-thumbnail --description 'simulate, quiet but print thumbnail URL'
-complete --command youtube-dl --long-option get-description --description 'simulate, quiet but print video description'
-complete --command youtube-dl --long-option get-duration --description 'simulate, quiet but print video length'
-complete --command youtube-dl --long-option get-filename --description 'simulate, quiet but print output filename'
-complete --command youtube-dl --long-option get-format --description 'simulate, quiet but print output format'
-complete --command youtube-dl --long-option dump-json --short-option j --description 'simulate, quiet but print JSON information. See --output for a description of available keys.'
-complete --command youtube-dl --long-option dump-single-json --short-option J --description 'simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.'
+complete --command youtube-dl --long-option simulate --short-option s --description 'Do not download the video and do not write anything to disk'
+complete --command youtube-dl --long-option skip-download --description 'Do not download the video'
+complete --command youtube-dl --long-option get-url --short-option g --description 'Simulate, quiet but print URL'
+complete --command youtube-dl --long-option get-title --short-option e --description 'Simulate, quiet but print title'
+complete --command youtube-dl --long-option get-id --description 'Simulate, quiet but print id'
+complete --command youtube-dl --long-option get-thumbnail --description 'Simulate, quiet but print thumbnail URL'
+complete --command youtube-dl --long-option get-description --description 'Simulate, quiet but print video description'
+complete --command youtube-dl --long-option get-duration --description 'Simulate, quiet but print video length'
+complete --command youtube-dl --long-option get-filename --description 'Simulate, quiet but print output filename'
+complete --command youtube-dl --long-option get-format --description 'Simulate, quiet but print output format'
+complete --command youtube-dl --long-option dump-json --short-option j --description 'Simulate, quiet but print JSON information. See --output for a description of available keys.'
+complete --command youtube-dl --long-option dump-single-json --short-option J --description 'Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.'
 complete --command youtube-dl --long-option print-json --description 'Be quiet and print the video information as JSON (video is still being downloaded).'
-complete --command youtube-dl --long-option newline --description 'output progress bar as new lines'
-complete --command youtube-dl --long-option no-progress --description 'do not print progress bar'
-complete --command youtube-dl --long-option console-title --description 'display progress in console titlebar'
-complete --command youtube-dl --long-option verbose --short-option v --description 'print various debugging information'
-complete --command youtube-dl --long-option dump-intermediate-pages --description 'print downloaded pages to debug problems (very verbose)'
+complete --command youtube-dl --long-option newline --description 'Output progress bar as new lines'
+complete --command youtube-dl --long-option no-progress --description 'Do not print progress bar'
+complete --command youtube-dl --long-option console-title --description 'Display progress in console titlebar'
+complete --command youtube-dl --long-option verbose --short-option v --description 'Print various debugging information'
+complete --command youtube-dl --long-option dump-pages --description 'Print downloaded pages to debug problems (very verbose)'
 complete --command youtube-dl --long-option write-pages --description 'Write downloaded intermediary pages to files in the current directory to debug problems'
 complete --command youtube-dl --long-option youtube-print-sig-code
 complete --command youtube-dl --long-option print-traffic --description 'Display sent and read HTTP traffic'
-complete --command youtube-dl --long-option call-home --short-option C --description 'Contact the youtube-dl server for debugging.'
-complete --command youtube-dl --long-option no-call-home --description 'Do NOT contact the youtube-dl server for debugging.'
+complete --command youtube-dl --long-option call-home --short-option C --description 'Contact the youtube-dl server for debugging'
+complete --command youtube-dl --long-option no-call-home --description 'Do NOT contact the youtube-dl server for debugging'
 complete --command youtube-dl --long-option encoding --description 'Force the specified encoding (experimental)'
-complete --command youtube-dl --long-option no-check-certificate --description 'Suppress HTTPS certificate validation.'
+complete --command youtube-dl --long-option no-check-certificate --description 'Suppress HTTPS certificate validation'
 complete --command youtube-dl --long-option prefer-insecure --description 'Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)'
-complete --command youtube-dl --long-option user-agent --description 'specify a custom user agent'
-complete --command youtube-dl --long-option referer --description 'specify a custom referer, use if the video access is restricted to one domain'
-complete --command youtube-dl --long-option add-header --description 'specify a custom HTTP header and its value, separated by a colon '"'"':'"'"'. You can use this option multiple times'
+complete --command youtube-dl --long-option user-agent --description 'Specify a custom user agent'
+complete --command youtube-dl --long-option referer --description 'Specify a custom referer, use if the video access is restricted to one domain'
+complete --command youtube-dl --long-option add-header --description 'Specify a custom HTTP header and its value, separated by a colon '"'"':'"'"'. You can use this option multiple times'
 complete --command youtube-dl --long-option bidi-workaround --description 'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH'
 complete --command youtube-dl --long-option sleep-interval --description 'Number of seconds to sleep before each download.'
-complete --command youtube-dl --long-option format --short-option f --description 'video format code, specify the order of preference using slashes, as in -f 22/17/18 .  Instead of format codes, you can select by extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", "worst".  You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").  This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so  -f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f  136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.'
-complete --command youtube-dl --long-option all-formats --description 'download all available video formats'
-complete --command youtube-dl --long-option prefer-free-formats --description 'prefer free video formats unless a specific one is requested'
-complete --command youtube-dl --long-option max-quality --description 'highest quality format to download'
-complete --command youtube-dl --long-option list-formats --short-option F --description 'list all available formats'
+complete --command youtube-dl --long-option format --short-option f --description 'Video format code, see the "FORMAT SELECTION" for all the info'
+complete --command youtube-dl --long-option all-formats --description 'Download all available video formats'
+complete --command youtube-dl --long-option prefer-free-formats --description 'Prefer free video formats unless a specific one is requested'
+complete --command youtube-dl --long-option list-formats --short-option F --description 'List all available formats'
 complete --command youtube-dl --long-option youtube-include-dash-manifest
 complete --command youtube-dl --long-option youtube-skip-dash-manifest --description 'Do not download the DASH manifest on YouTube videos'
 complete --command youtube-dl --long-option merge-output-format --description 'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no merge is required'
-complete --command youtube-dl --long-option write-sub --description 'write subtitle file'
-complete --command youtube-dl --long-option write-auto-sub --description 'write automatic subtitle file (youtube only)'
-complete --command youtube-dl --long-option all-subs --description 'downloads all the available subtitles of the video'
-complete --command youtube-dl --long-option list-subs --description 'lists all available subtitles for the video'
-complete --command youtube-dl --long-option sub-format --description 'subtitle format, accepts formats preference, for example: "ass/srt/best"'
-complete --command youtube-dl --long-option sub-lang --description 'languages of the subtitles to download (optional) separated by commas, use IETF language tags like '"'"'en,pt'"'"''
-complete --command youtube-dl --long-option username --short-option u --description 'login with this account ID'
-complete --command youtube-dl --long-option password --short-option p --description 'account password. If this option is left out, youtube-dl will ask interactively.'
-complete --command youtube-dl --long-option twofactor --short-option 2 --description 'two-factor auth code'
-complete --command youtube-dl --long-option netrc --short-option n --description 'use .netrc authentication data'
-complete --command youtube-dl --long-option video-password --description 'video password (vimeo, smotri)'
-complete --command youtube-dl --long-option extract-audio --short-option x --description 'convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)'
-complete --command youtube-dl --long-option audio-format --description '"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default'
-complete --command youtube-dl --long-option audio-quality --description 'ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)'
+complete --command youtube-dl --long-option write-sub --description 'Write subtitle file'
+complete --command youtube-dl --long-option write-auto-sub --description 'Write automatic subtitle file (YouTube only)'
+complete --command youtube-dl --long-option all-subs --description 'Download all the available subtitles of the video'
+complete --command youtube-dl --long-option list-subs --description 'List all available subtitles for the video'
+complete --command youtube-dl --long-option sub-format --description 'Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"'
+complete --command youtube-dl --long-option sub-lang --description 'Languages of the subtitles to download (optional) separated by commas, use IETF language tags like '"'"'en,pt'"'"''
+complete --command youtube-dl --long-option username --short-option u --description 'Login with this account ID'
+complete --command youtube-dl --long-option password --short-option p --description 'Account password. If this option is left out, youtube-dl will ask interactively.'
+complete --command youtube-dl --long-option twofactor --short-option 2 --description 'Two-factor auth code'
+complete --command youtube-dl --long-option netrc --short-option n --description 'Use .netrc authentication data'
+complete --command youtube-dl --long-option video-password --description 'Video password (vimeo, smotri)'
+complete --command youtube-dl --long-option extract-audio --short-option x --description 'Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)'
+complete --command youtube-dl --long-option audio-format --description 'Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default'
+complete --command youtube-dl --long-option audio-quality --description 'Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)'
 complete --command youtube-dl --long-option recode-video --description 'Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)' --arguments 'mp4 flv ogg webm mkv' --exclusive
-complete --command youtube-dl --long-option keep-video --short-option k --description 'keeps the video file on disk after the post-processing; the video is erased by default'
-complete --command youtube-dl --long-option no-post-overwrites --description 'do not overwrite post-processed files; the post-processed files are overwritten by default'
-complete --command youtube-dl --long-option embed-subs --description 'embed subtitles in the video (only for mp4 videos)'
-complete --command youtube-dl --long-option embed-thumbnail --description 'embed thumbnail in the audio as cover art'
-complete --command youtube-dl --long-option add-metadata --description 'write metadata to the video file'
-complete --command youtube-dl --long-option xattrs --description 'write metadata to the video file'"'"'s xattrs (using dublin core and xdg standards)'
+complete --command youtube-dl --long-option keep-video --short-option k --description 'Keep the video file on disk after the post-processing; the video is erased by default'
+complete --command youtube-dl --long-option no-post-overwrites --description 'Do not overwrite post-processed files; the post-processed files are overwritten by default'
+complete --command youtube-dl --long-option embed-subs --description 'Embed subtitles in the video (only for mkv and mp4 videos)'
+complete --command youtube-dl --long-option embed-thumbnail --description 'Embed thumbnail in the audio as cover art'
+complete --command youtube-dl --long-option add-metadata --description 'Write metadata to the video file'
+complete --command youtube-dl --long-option metadata-from-title --description 'Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise"'
+complete --command youtube-dl --long-option xattrs --description 'Write metadata to the video file'"'"'s xattrs (using dublin core and xdg standards)'
 complete --command youtube-dl --long-option fixup --description 'Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; fix file if we can, warn otherwise)'
 complete --command youtube-dl --long-option prefer-avconv --description 'Prefer avconv over ffmpeg for running the postprocessors (default)'
 complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors'
index 5071f16f2289f1c10f6e6c4007253bd9e0305a1d..7dc3ad403d89d2fb7076dae2c6a8c053780ff5bd 100644 (file)
@@ -19,7 +19,7 @@ __youtube_dl() {
             elif [[ ${prev} == "--recode-video" ]]; then
                 _arguments '*: :(mp4 flv ogg webm mkv)'
             else
-                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles)'
+                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles)'
             fi
         ;;
     esac
index 74e4261680da06d9892b4c6f60a26481771c950b..691f3e09f807de52c1c19f334befd0ccc0d4f82c 100755 (executable)
@@ -4,8 +4,10 @@
 from __future__ import absolute_import, unicode_literals
 
 import collections
+import contextlib
 import datetime
 import errno
+import fileinput
 import io
 import itertools
 import json
@@ -28,6 +30,7 @@ from .compat import (
     compat_basestring,
     compat_cookiejar,
     compat_expanduser,
+    compat_get_terminal_size,
     compat_http_client,
     compat_kwargs,
     compat_str,
@@ -46,21 +49,21 @@ from .utils import (
     ExtractorError,
     format_bytes,
     formatSeconds,
-    get_term_width,
     locked_file,
     make_HTTPS_handler,
     MaxDownloadsReached,
     PagedList,
     parse_filesize,
+    PerRequestProxyHandler,
     PostProcessingError,
     platform_name,
     preferredencoding,
     render_table,
     SameFileError,
     sanitize_filename,
+    sanitize_path,
     std_headers,
     subtitles_filename,
-    takewhile_inclusive,
     UnavailableVideoError,
     url_basename,
     version_tuple,
@@ -68,6 +71,7 @@ from .utils import (
     write_string,
     YoutubeDLHandler,
     prepend_extension,
+    replace_extension,
     args_to_str,
     age_restricted,
 )
@@ -131,7 +135,6 @@ class YoutubeDL(object):
                        (or video) as a single JSON line.
     simulate:          Do not download the video files.
     format:            Video format code. See options.py for more information.
-    format_limit:      Highest quality format to try.
     outtmpl:           Template for output names.
     restrictfilenames: Do not allow "&" and spaces in file names
     ignoreerrors:      Do not stop on download errors.
@@ -181,6 +184,8 @@ class YoutubeDL(object):
     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
                        At the moment, this is only supported by YouTube.
     proxy:             URL of the proxy server to use
+    cn_verification_proxy:  URL of the proxy to use for IP address verification
+                       on Chinese sites. (Experimental)
     socket_timeout:    Time to wait for unresponsive hosts, in seconds
     bidi_workaround:   Work around buggy terminals without bidirectional text
                        support, using fridibi
@@ -247,15 +252,14 @@ class YoutubeDL(object):
     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 
     The following parameters are not used by YoutubeDL itself, they are used by
-    the FileDownloader:
+    the downloader (see youtube_dl/downloader/common.py):
     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
     noresizebuffer, retries, continuedl, noprogress, consoletitle,
-    xattr_set_filesize.
+    xattr_set_filesize, external_downloader_args.
 
     The following options are used by the post processors:
     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
                        otherwise prefer avconv.
-    exec_cmd:          Arbitrary command to run after downloading
     """
 
     params = None
@@ -284,7 +288,7 @@ class YoutubeDL(object):
             try:
                 import pty
                 master, slave = pty.openpty()
-                width = get_term_width()
+                width = compat_get_terminal_size().columns
                 if width is None:
                     width_args = []
                 else:
@@ -317,8 +321,10 @@ class YoutubeDL(object):
                 'Set the LC_ALL environment variable to fix this.')
             self.params['restrictfilenames'] = True
 
-        if '%(stitle)s' in self.params.get('outtmpl', ''):
-            self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
+        if isinstance(params.get('outtmpl'), bytes):
+            self.report_warning(
+                'Parameter outtmpl is bytes, but should be a unicode string. '
+                'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 
         self._setup_opener()
 
@@ -557,7 +563,7 @@ class YoutubeDL(object):
                                  if v is not None)
             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 
-            outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+            outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
             tmpl = compat_expanduser(outtmpl)
             filename = tmpl % template_dict
             # Temporary fix for #4787
@@ -624,7 +630,7 @@ class YoutubeDL(object):
         Returns a list with a dictionary for each video we find.
         If 'download', also downloads the videos.
         extra_info is a dict containing the extra values to add to each result
-         '''
+        '''
 
         if ie_key:
             ies = [self.get_info_extractor(ie_key)]
@@ -908,10 +914,16 @@ class YoutubeDL(object):
         if not available_formats:
             return None
 
-        if format_spec == 'best' or format_spec is None:
-            return available_formats[-1]
-        elif format_spec == 'worst':
-            return available_formats[0]
+        if format_spec in ['best', 'worst', None]:
+            format_idx = 0 if format_spec == 'worst' else -1
+            audiovideo_formats = [
+                f for f in available_formats
+                if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
+            if audiovideo_formats:
+                return audiovideo_formats[format_idx]
+            # for audio only urls, select the best/worst audio format
+            elif all(f.get('acodec') != 'none' for f in available_formats):
+                return available_formats[format_idx]
         elif format_spec == 'bestaudio':
             audio_formats = [
                 f for f in available_formats
@@ -1055,12 +1067,6 @@ class YoutubeDL(object):
             full_format_info.update(format)
             format['http_headers'] = self._calc_headers(full_format_info)
 
-        format_limit = self.params.get('format_limit', None)
-        if format_limit:
-            formats = list(takewhile_inclusive(
-                lambda f: f['format_id'] != format_limit, formats
-            ))
-
         # TODO Central sorting goes here
 
         if formats[0] is not info_dict:
@@ -1078,10 +1084,16 @@ class YoutubeDL(object):
 
         req_format = self.params.get('format')
         if req_format is None:
-            req_format = 'best'
+            req_format_list = []
+            if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
+                    info_dict['extractor'] in ['youtube', 'ted']):
+                merger = FFmpegMergerPP(self)
+                if merger.available and merger.can_merge():
+                    req_format_list.append('bestvideo+bestaudio')
+            req_format_list.append('best')
+            req_format = '/'.join(req_format_list)
         formats_to_download = []
-        # The -1 is for supporting YoutubeIE
-        if req_format in ('-1', 'all'):
+        if req_format == 'all':
             formats_to_download = formats
         else:
             for rfstr in req_format.split(','):
@@ -1208,9 +1220,6 @@ class YoutubeDL(object):
         if len(info_dict['title']) > 200:
             info_dict['title'] = info_dict['title'][:197] + '...'
 
-        # Keep for backwards compatibility
-        info_dict['stitle'] = info_dict['title']
-
         if 'format' not in info_dict:
             info_dict['format'] = info_dict['ext']
 
@@ -1256,7 +1265,7 @@ class YoutubeDL(object):
             return
 
         try:
-            dn = os.path.dirname(encodeFilename(filename))
+            dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
             if dn and not os.path.exists(dn):
                 os.makedirs(dn)
         except (OSError, IOError) as err:
@@ -1264,7 +1273,7 @@ class YoutubeDL(object):
             return
 
         if self.params.get('writedescription', False):
-            descfn = filename + '.description'
+            descfn = replace_extension(filename, 'description', info_dict.get('ext'))
             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
                 self.to_screen('[info] Video description is already present')
             elif info_dict.get('description') is None:
@@ -1279,7 +1288,7 @@ class YoutubeDL(object):
                     return
 
         if self.params.get('writeannotations', False):
-            annofn = filename + '.annotations.xml'
+            annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
                 self.to_screen('[info] Video annotations are already present')
             else:
@@ -1326,13 +1335,13 @@ class YoutubeDL(object):
                     return
 
         if self.params.get('writeinfojson', False):
-            infofn = os.path.splitext(filename)[0] + '.info.json'
+            infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
                 self.to_screen('[info] Video description metadata is already present')
             else:
                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
                 try:
-                    write_json_file(info_dict, infofn)
+                    write_json_file(self.filter_requested_info(info_dict), infofn)
                 except (OSError, IOError):
                     self.report_error('Cannot write metadata to JSON file ' + infofn)
                     return
@@ -1352,7 +1361,7 @@ class YoutubeDL(object):
                 if info_dict.get('requested_formats') is not None:
                     downloaded = []
                     success = True
-                    merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
+                    merger = FFmpegMergerPP(self)
                     if not merger.available:
                         postprocessors = []
                         self.report_warning('You have requested multiple '
@@ -1360,16 +1369,49 @@ class YoutubeDL(object):
                                             ' The formats won\'t be merged')
                     else:
                         postprocessors = [merger]
-                    for f in info_dict['requested_formats']:
-                        new_info = dict(info_dict)
-                        new_info.update(f)
-                        fname = self.prepare_filename(new_info)
-                        fname = prepend_extension(fname, 'f%s' % f['format_id'])
-                        downloaded.append(fname)
-                        partial_success = dl(fname, new_info)
-                        success = success and partial_success
-                    info_dict['__postprocessors'] = postprocessors
-                    info_dict['__files_to_merge'] = downloaded
+
+                    def compatible_formats(formats):
+                        video, audio = formats
+                        # Check extension
+                        video_ext, audio_ext = audio.get('ext'), video.get('ext')
+                        if video_ext and audio_ext:
+                            COMPATIBLE_EXTS = (
+                                ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
+                                ('webm')
+                            )
+                            for exts in COMPATIBLE_EXTS:
+                                if video_ext in exts and audio_ext in exts:
+                                    return True
+                        # TODO: Check acodec/vcodec
+                        return False
+
+                    filename_real_ext = os.path.splitext(filename)[1][1:]
+                    filename_wo_ext = (
+                        os.path.splitext(filename)[0]
+                        if filename_real_ext == info_dict['ext']
+                        else filename)
+                    requested_formats = info_dict['requested_formats']
+                    if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
+                        info_dict['ext'] = 'mkv'
+                        self.report_warning('You have requested formats incompatible for merge. '
+                                            'The formats will be merged into mkv')
+                    # Ensure filename always has a correct extension for successful merge
+                    filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
+                    if os.path.exists(encodeFilename(filename)):
+                        self.to_screen(
+                            '[download] %s has already been downloaded and '
+                            'merged' % filename)
+                    else:
+                        for f in requested_formats:
+                            new_info = dict(info_dict)
+                            new_info.update(f)
+                            fname = self.prepare_filename(new_info)
+                            fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
+                            downloaded.append(fname)
+                            partial_success = dl(fname, new_info)
+                            success = success and partial_success
+                        info_dict['__postprocessors'] = postprocessors
+                        info_dict['__files_to_merge'] = downloaded
                 else:
                     # Just a single file
                     success = dl(filename, info_dict)
@@ -1452,8 +1494,11 @@ class YoutubeDL(object):
         return self._download_retcode
 
     def download_with_info_file(self, info_filename):
-        with io.open(info_filename, 'r', encoding='utf-8') as f:
-            info = json.load(f)
+        with contextlib.closing(fileinput.FileInput(
+                [info_filename], mode='r',
+                openhook=fileinput.hook_encoded('utf-8'))) as f:
+            # FileInput doesn't have a read method, we can't call json.load
+            info = self.filter_requested_info(json.loads('\n'.join(f)))
         try:
             self.process_ie_result(info, download=True)
         except DownloadError:
@@ -1465,6 +1510,12 @@ class YoutubeDL(object):
                 raise
         return self._download_retcode
 
+    @staticmethod
+    def filter_requested_info(info_dict):
+        return dict(
+            (k, v) for k, v in info_dict.items()
+            if k not in ['requested_formats', 'requested_subtitles'])
+
     def post_process(self, filename, ie_info):
         """Run all the postprocessors on the given file."""
         info = dict(ie_info)
@@ -1474,24 +1525,17 @@ class YoutubeDL(object):
             pps_chain.extend(ie_info['__postprocessors'])
         pps_chain.extend(self._pps)
         for pp in pps_chain:
-            keep_video = None
-            old_filename = info['filepath']
             try:
-                keep_video_wish, info = pp.run(info)
-                if keep_video_wish is not None:
-                    if keep_video_wish:
-                        keep_video = keep_video_wish
-                    elif keep_video is None:
-                        # No clear decision yet, let IE decide
-                        keep_video = keep_video_wish
+                files_to_delete, info = pp.run(info)
             except PostProcessingError as e:
                 self.report_error(e.msg)
-            if keep_video is False and not self.params.get('keepvideo', False):
-                try:
+            if files_to_delete and not self.params.get('keepvideo', False):
+                for old_filename in files_to_delete:
                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
-                    os.remove(encodeFilename(old_filename))
-                except (IOError, OSError):
-                    self.report_warning('Unable to remove downloaded video file')
+                    try:
+                        os.remove(encodeFilename(old_filename))
+                    except (IOError, OSError):
+                        self.report_warning('Unable to remove downloaded original file')
 
     def _make_archive_id(self, info_dict):
         # Future-proof against any change in case
@@ -1694,10 +1738,10 @@ class YoutubeDL(object):
             out = out.decode().strip()
             if re.match('[0-9a-f]+', out):
                 self._write_string('[debug] Git HEAD: ' + out + '\n')
-        except:
+        except Exception:
             try:
                 sys.exc_clear()
-            except:
+            except Exception:
                 pass
         self._write_string('[debug] Python version %s - %s\n' % (
             platform.python_version(), platform_name()))
@@ -1757,13 +1801,14 @@ class YoutubeDL(object):
             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
             if 'http' in proxies and 'https' not in proxies:
                 proxies['https'] = proxies['http']
-        proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+        proxy_handler = PerRequestProxyHandler(proxies)
 
         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
         opener = compat_urllib_request.build_opener(
-            https_handler, proxy_handler, cookie_processor, ydlh)
+            proxy_handler, https_handler, cookie_processor, ydlh)
+
         # Delete the default user-agent header, which would otherwise apply in
         # cases where our custom HTTP handler doesn't come into play
         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
@@ -1804,7 +1849,7 @@ class YoutubeDL(object):
             thumb_ext = determine_ext(t['url'], 'jpg')
             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
-            thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+            t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
 
             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
index 49f382695d478acad6892cbec3079d41eba64685..ace17857c8cb28320ba1fab2988e56c020583af7 100644 (file)
@@ -9,6 +9,7 @@ import codecs
 import io
 import os
 import random
+import shlex
 import sys
 
 
@@ -188,10 +189,6 @@ def _real_main(argv=None):
     if opts.allsubtitles and not opts.writeautomaticsub:
         opts.writesubtitles = True
 
-    if sys.version_info < (3,):
-        # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
-        if opts.outtmpl is not None:
-            opts.outtmpl = opts.outtmpl.decode(preferredencoding())
     outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
                (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
                (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
@@ -212,6 +209,11 @@ def _real_main(argv=None):
     # PostProcessors
     postprocessors = []
     # Add the metadata pp first, the other pps will copy it
+    if opts.metafromtitle:
+        postprocessors.append({
+            'key': 'MetadataFromTitle',
+            'titleformat': opts.metafromtitle
+        })
     if opts.addmetadata:
         postprocessors.append({'key': 'FFmpegMetadata'})
     if opts.extractaudio:
@@ -238,15 +240,18 @@ def _real_main(argv=None):
     if opts.xattrs:
         postprocessors.append({'key': 'XAttrMetadata'})
     if opts.embedthumbnail:
-        if not opts.addmetadata:
-            postprocessors.append({'key': 'FFmpegAudioFix'})
-        postprocessors.append({'key': 'AtomicParsley'})
+        already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
+        postprocessors.append({
+            'key': 'EmbedThumbnail',
+            'already_have_thumbnail': already_have_thumbnail
+        })
+        if not already_have_thumbnail:
+            opts.writethumbnail = True
     # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
     # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
     if opts.exec_cmd:
         postprocessors.append({
             'key': 'ExecAfterDownload',
-            'verboseOutput': opts.verbose,
             'exec_cmd': opts.exec_cmd,
         })
     if opts.xattr_set_filesize:
@@ -255,6 +260,9 @@ def _real_main(argv=None):
             xattr  # Confuse flake8
         except ImportError:
             parser.error('setting filesize xattr requested but python-xattr is not available')
+    external_downloader_args = None
+    if opts.external_downloader_args:
+        external_downloader_args = shlex.split(opts.external_downloader_args)
     match_filter = (
         None if opts.match_filter is None
         else match_filter_func(opts.match_filter))
@@ -280,7 +288,6 @@ def _real_main(argv=None):
         'simulate': opts.simulate or any_getting,
         'skip_download': opts.skip_download,
         'format': opts.format,
-        'format_limit': opts.format_limit,
         'listformats': opts.listformats,
         'outtmpl': outtmpl,
         'autonumber_size': opts.autonumber_size,
@@ -343,7 +350,6 @@ def _real_main(argv=None):
         'default_search': opts.default_search,
         'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
         'encoding': opts.encoding,
-        'exec_cmd': opts.exec_cmd,
         'extract_flat': opts.extract_flat,
         'merge_output_format': opts.merge_output_format,
         'postprocessors': postprocessors,
@@ -359,6 +365,8 @@ def _real_main(argv=None):
         'no_color': opts.no_color,
         'ffmpeg_location': opts.ffmpeg_location,
         'hls_prefer_native': opts.hls_prefer_native,
+        'external_downloader_args': external_downloader_args,
+        'cn_verification_proxy': opts.cn_verification_proxy,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
index e989cdbbd180abf4543726e86d088cd45225bfca..f9529210dd955932eca837aa7022696470c557ed 100644 (file)
@@ -1,9 +1,11 @@
 from __future__ import unicode_literals
 
+import collections
 import getpass
 import optparse
 import os
 import re
+import shutil
 import socket
 import subprocess
 import sys
@@ -44,11 +46,6 @@ try:
 except ImportError:  # Python 2
     import htmlentitydefs as compat_html_entities
 
-try:
-    import html.parser as compat_html_parser
-except ImportError:  # Python 2
-    import HTMLParser as compat_html_parser
-
 try:
     import http.client as compat_http_client
 except ImportError:  # Python 2
@@ -364,6 +361,33 @@ def workaround_optparse_bug9161():
             return real_add_option(self, *bargs, **bkwargs)
         optparse.OptionGroup.add_option = _compat_add_option
 
+if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3
+    compat_get_terminal_size = shutil.get_terminal_size
+else:
+    _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
+
+    def compat_get_terminal_size():
+        columns = compat_getenv('COLUMNS', None)
+        if columns:
+            columns = int(columns)
+        else:
+            columns = None
+        lines = compat_getenv('LINES', None)
+        if lines:
+            lines = int(lines)
+        else:
+            lines = None
+
+        try:
+            sp = subprocess.Popen(
+                ['stty', 'size'],
+                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            out, err = sp.communicate()
+            lines, columns = map(int, out.split())
+        except Exception:
+            pass
+        return _terminal_size(columns, lines)
+
 
 __all__ = [
     'compat_HTTPError',
@@ -371,10 +395,10 @@ __all__ = [
     'compat_chr',
     'compat_cookiejar',
     'compat_expanduser',
+    'compat_get_terminal_size',
     'compat_getenv',
     'compat_getpass',
     'compat_html_entities',
-    'compat_html_parser',
     'compat_http_client',
     'compat_http_server',
     'compat_kwargs',
index 9fb66e2f7f680a71c05fdd866c72b0db2dd91a77..f110830c472eb451d77b48fa9337bd5feee55952 100644 (file)
@@ -6,7 +6,7 @@ from .f4m import F4mFD
 from .hls import HlsFD
 from .hls import NativeHlsFD
 from .http import HttpFD
-from .mplayer import MplayerFD
+from .rtsp import RtspFD
 from .rtmp import RtmpFD
 
 from ..utils import (
@@ -17,8 +17,8 @@ PROTOCOL_MAP = {
     'rtmp': RtmpFD,
     'm3u8_native': NativeHlsFD,
     'm3u8': HlsFD,
-    'mms': MplayerFD,
-    'rtsp': MplayerFD,
+    'mms': RtspFD,
+    'rtsp': RtspFD,
     'f4m': F4mFD,
 }
 
index 3ae90021a28e661ab532a2d42a7c4e0826d1f46f..97e755d4baa56972a9a4e5223a6871edd8bf0565 100644 (file)
@@ -8,6 +8,7 @@ import time
 from ..compat import compat_str
 from ..utils import (
     encodeFilename,
+    decodeArgument,
     format_bytes,
     timeconvert,
 )
@@ -42,6 +43,8 @@ class FileDownloader(object):
     max_filesize:       Skip files larger than this size
     xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
                         (experimenatal)
+    external_downloader_args:  A list of additional command-line arguments for the
+                        external downloader.
 
     Subclasses of this one must re-define the real_download method.
     """
@@ -202,7 +205,7 @@ class FileDownloader(object):
             return
         try:
             os.utime(filename, (time.time(), filetime))
-        except:
+        except Exception:
             pass
         return filetime
 
@@ -316,7 +319,7 @@ class FileDownloader(object):
         )
 
         continuedl_and_exists = (
-            self.params.get('continuedl', False) and
+            self.params.get('continuedl', True) and
             os.path.isfile(encodeFilename(filename)) and
             not self.params.get('nopart', False)
         )
@@ -351,19 +354,15 @@ class FileDownloader(object):
         # this interface
         self._progress_hooks.append(ph)
 
-    def _debug_cmd(self, args, subprocess_encoding, exe=None):
+    def _debug_cmd(self, args, exe=None):
         if not self.params.get('verbose', False):
             return
 
+        str_args = [decodeArgument(a) for a in args]
+
         if exe is None:
-            exe = os.path.basename(args[0])
+            exe = os.path.basename(str_args[0])
 
-        if subprocess_encoding:
-            str_args = [
-                a.decode(subprocess_encoding) if isinstance(a, bytes) else a
-                for a in args]
-        else:
-            str_args = args
         try:
             import pipes
             shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
index 51c41c70462674ee3a07aae6f645c06ae7c88c71..7ca2d314348400fc6ba3095e23d69e1c925ad7dd 100644 (file)
@@ -2,11 +2,11 @@ from __future__ import unicode_literals
 
 import os.path
 import subprocess
-import sys
 
 from .common import FileDownloader
 from ..utils import (
     encodeFilename,
+    encodeArgument,
 )
 
 
@@ -51,19 +51,18 @@ class ExternalFD(FileDownloader):
             return []
         return [command_option, source_address]
 
+    def _configuration_args(self, default=[]):
+        ex_args = self.params.get('external_downloader_args')
+        if ex_args is None:
+            return default
+        assert isinstance(ex_args, list)
+        return ex_args
+
     def _call_downloader(self, tmpfilename, info_dict):
         """ Either overwrite this or implement _make_cmd """
-        cmd = self._make_cmd(tmpfilename, info_dict)
-
-        if sys.platform == 'win32' and sys.version_info < (3, 0):
-            # Windows subprocess module does not actually support Unicode
-            # on Python 2.x
-            # See http://stackoverflow.com/a/9951851/35070
-            subprocess_encoding = sys.getfilesystemencoding()
-            cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd]
-        else:
-            subprocess_encoding = None
-        self._debug_cmd(cmd, subprocess_encoding)
+        cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
+
+        self._debug_cmd(cmd)
 
         p = subprocess.Popen(
             cmd, stderr=subprocess.PIPE)
@@ -79,6 +78,7 @@ class CurlFD(ExternalFD):
         for key, val in info_dict['http_headers'].items():
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += self._source_address('--interface')
+        cmd += self._configuration_args()
         cmd += ['--', info_dict['url']]
         return cmd
 
@@ -89,15 +89,16 @@ class WgetFD(ExternalFD):
         for key, val in info_dict['http_headers'].items():
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += self._source_address('--bind-address')
+        cmd += self._configuration_args()
         cmd += ['--', info_dict['url']]
         return cmd
 
 
 class Aria2cFD(ExternalFD):
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [
-            self.exe, '-c',
-            '--min-split-size', '1M', '--max-connection-per-server', '4']
+        cmd = [self.exe, '-c']
+        cmd += self._configuration_args([
+            '--min-split-size', '1M', '--max-connection-per-server', '4'])
         dn = os.path.dirname(tmpfilename)
         if dn:
             cmd += ['--dir', dn]
index 3dc796faaf038c00383089274c0005382977a431..b1a858c452617ed452bc0dcae8d612d22fd224d3 100644 (file)
@@ -281,7 +281,7 @@ class F4mFD(FileDownloader):
             boot_info = self._get_bootstrap_from_url(bootstrap_url)
         else:
             bootstrap_url = None
-            bootstrap = base64.b64decode(node.text)
+            bootstrap = base64.b64decode(node.text.encode('ascii'))
             boot_info = read_bootstrap_info(bootstrap)
         return (boot_info, bootstrap_url)
 
@@ -308,7 +308,7 @@ class F4mFD(FileDownloader):
         live = boot_info['live']
         metadata_node = media.find(_add_ns('metadata'))
         if metadata_node is not None:
-            metadata = base64.b64decode(metadata_node.text)
+            metadata = base64.b64decode(metadata_node.text.encode('ascii'))
         else:
             metadata = None
 
@@ -389,6 +389,8 @@ class F4mFD(FileDownloader):
             url = base_url + name
             if akamai_pv:
                 url += '?' + akamai_pv.strip(';')
+            if info_dict.get('extra_param_to_segment_url'):
+                url += info_dict.get('extra_param_to_segment_url')
             frag_filename = '%s-%s' % (tmpfilename, name)
             try:
                 success = http_dl.download(frag_filename, {'url': url})
index 2e3dac8251dbaf5d8b3e1a90bc459f362d14f72e..b7f144af9ea33a102246632e04e71707be3d98ad 100644 (file)
@@ -28,13 +28,8 @@ class HttpFD(FileDownloader):
         add_headers = info_dict.get('http_headers')
         if add_headers:
             headers.update(add_headers)
-        data = info_dict.get('http_post_data')
-        http_method = info_dict.get('http_method')
-        basic_request = compat_urllib_request.Request(url, data, headers)
-        request = compat_urllib_request.Request(url, data, headers)
-        if http_method is not None:
-            basic_request.get_method = lambda: http_method
-            request.get_method = lambda: http_method
+        basic_request = compat_urllib_request.Request(url, None, headers)
+        request = compat_urllib_request.Request(url, None, headers)
 
         is_test = self.params.get('test', False)
 
@@ -49,7 +44,7 @@ class HttpFD(FileDownloader):
 
         open_mode = 'wb'
         if resume_len != 0:
-            if self.params.get('continuedl', False):
+            if self.params.get('continuedl', True):
                 self.report_resuming_byte(resume_len)
                 request.add_header('Range', 'bytes=%d-' % resume_len)
                 open_mode = 'ab'
@@ -92,6 +87,8 @@ class HttpFD(FileDownloader):
                             self._hook_progress({
                                 'filename': filename,
                                 'status': 'finished',
+                                'downloaded_bytes': resume_len,
+                                'total_bytes': resume_len,
                             })
                             return True
                         else:
@@ -218,12 +215,6 @@ class HttpFD(FileDownloader):
         if tmpfilename != '-':
             stream.close()
 
-        self._hook_progress({
-            'downloaded_bytes': byte_counter,
-            'total_bytes': data_len,
-            'tmpfilename': tmpfilename,
-            'status': 'error',
-        })
         if data_len is not None and byte_counter != data_len:
             raise ContentTooShortError(byte_counter, int(data_len))
         self.try_rename(tmpfilename, filename)
index 89e98ae61e128c80eab5b0e04109b1baa2ecff7e..7d19bb808a820da77aeb21070ebbdec4355f6739 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import os
 import re
 import subprocess
-import sys
 import time
 
 from .common import FileDownloader
@@ -11,6 +10,7 @@ from ..compat import compat_str
 from ..utils import (
     check_executable,
     encodeFilename,
+    encodeArgument,
     get_exe_version,
 )
 
@@ -105,7 +105,7 @@ class RtmpFD(FileDownloader):
         protocol = info_dict.get('rtmp_protocol', None)
         real_time = info_dict.get('rtmp_real_time', False)
         no_resume = info_dict.get('no_resume', False)
-        continue_dl = info_dict.get('continuedl', False)
+        continue_dl = info_dict.get('continuedl', True)
 
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
@@ -121,7 +121,7 @@ class RtmpFD(FileDownloader):
         # possible. This is part of rtmpdump's normal usage, AFAIK.
         basic_args = [
             'rtmpdump', '--verbose', '-r', url,
-            '-o', encodeFilename(tmpfilename, True)]
+            '-o', tmpfilename]
         if player_url is not None:
             basic_args += ['--swfVfy', player_url]
         if page_url is not None:
@@ -131,7 +131,7 @@ class RtmpFD(FileDownloader):
         if play_path is not None:
             basic_args += ['--playpath', play_path]
         if tc_url is not None:
-            basic_args += ['--tcUrl', url]
+            basic_args += ['--tcUrl', tc_url]
         if test:
             basic_args += ['--stop', '1']
         if flash_version is not None:
@@ -154,16 +154,9 @@ class RtmpFD(FileDownloader):
         if not live and continue_dl:
             args += ['--skip', '1']
 
-        if sys.platform == 'win32' and sys.version_info < (3, 0):
-            # Windows subprocess module does not actually support Unicode
-            # on Python 2.x
-            # See http://stackoverflow.com/a/9951851/35070
-            subprocess_encoding = sys.getfilesystemencoding()
-            args = [a.encode(subprocess_encoding, 'ignore') for a in args]
-        else:
-            subprocess_encoding = None
+        args = [encodeArgument(a) for a in args]
 
-        self._debug_cmd(args, subprocess_encoding, exe='rtmpdump')
+        self._debug_cmd(args, exe='rtmpdump')
 
         RD_SUCCESS = 0
         RD_FAILED = 1
@@ -180,7 +173,11 @@ class RtmpFD(FileDownloader):
             prevsize = os.path.getsize(encodeFilename(tmpfilename))
             self.to_screen('[rtmpdump] %s bytes' % prevsize)
             time.sleep(5.0)  # This seems to be needed
-            retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
+            args = basic_args + ['--resume']
+            if retval == RD_FAILED:
+                args += ['--skip', '1']
+            args = [encodeArgument(a) for a in args]
+            retval = run_rtmpdump(args)
             cursize = os.path.getsize(encodeFilename(tmpfilename))
             if prevsize == cursize and retval == RD_FAILED:
                 break
similarity index 64%
rename from youtube_dl/downloader/mplayer.py
rename to youtube_dl/downloader/rtsp.py
index 72cef30eaf3718ad8932814a627042cc0bdff361..3eb29526cbc90cb3351c75876698a1b238c07ef8 100644 (file)
@@ -10,21 +10,23 @@ from ..utils import (
 )
 
 
-class MplayerFD(FileDownloader):
+class RtspFD(FileDownloader):
     def real_download(self, filename, info_dict):
         url = info_dict['url']
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
 
-        args = [
-            'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
-            '-dumpstream', '-dumpfile', tmpfilename, url]
-        # Check for mplayer first
-        if not check_executable('mplayer', ['-h']):
-            self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
+        if check_executable('mplayer', ['-h']):
+            args = [
+                'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
+                '-dumpstream', '-dumpfile', tmpfilename, url]
+        elif check_executable('mpv', ['-h']):
+            args = [
+                'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
+        else:
+            self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
             return False
 
-        # Download using mplayer.
         retval = subprocess.call(args)
         if retval == 0:
             fsize = os.path.getsize(encodeFilename(tmpfilename))
@@ -39,5 +41,5 @@ class MplayerFD(FileDownloader):
             return True
         else:
             self.to_stderr('\n')
-            self.report_error('mplayer exited with code %d' % retval)
+            self.report_error('%s exited with code %d' % (args[0], retval))
             return False
index ffcc7d9ab3c8fa44b99e5660ab179b84ace94429..8ec0c1032a1538f87a17a66e4211deb2db88418d 100644 (file)
@@ -32,11 +32,13 @@ from .atresplayer import AtresPlayerIE
 from .atttechchannel import ATTTechChannelIE
 from .audiomack import AudiomackIE, AudiomackAlbumIE
 from .azubu import AzubuIE
+from .baidu import BaiduVideoIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
+from .beatportpro import BeatportProIE
 from .bet import BetIE
 from .bild import BildIE
 from .bilibili import BiliBiliIE
@@ -69,6 +71,7 @@ from .chirbit import (
     ChirbitProfileIE,
 )
 from .cinchcast import CinchcastIE
+from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
 from .clipsyndicate import ClipsyndicateIE
@@ -89,6 +92,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .criterion import CriterionIE
+from .crooksandliars import CrooksAndLiarsIE
 from .crunchyroll import (
     CrunchyrollIE,
     CrunchyrollShowPlaylistIE
@@ -105,17 +109,21 @@ from .dbtv import DBTVIE
 from .dctp import DctpTvIE
 from .deezer import DeezerPlaylistIE
 from .dfb import DFBIE
+from .dhm import DHMIE
 from .dotsub import DotsubIE
+from .douyutv import DouyuTVIE
 from .dreisat import DreiSatIE
 from .drbonanza import DRBonanzaIE
 from .drtuber import DrTuberIE
 from .drtv import DRTVIE
 from .dvtv import DVTVIE
 from .dump import DumpIE
+from .dumpert import DumpertIE
 from .defense import DefenseGouvFrIE
 from .discovery import DiscoveryIE
 from .divxstage import DivxStageIE
 from .dropbox import DropboxIE
+from .eagleplatform import EaglePlatformIE
 from .ebaumsworld import EbaumsWorldIE
 from .echomsk import EchoMskIE
 from .ehow import EHowIE
@@ -150,9 +158,11 @@ from .fktv import (
 )
 from .flickr import FlickrIE
 from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
 from .fourtube import FourTubeIE
 from .foxgay import FoxgayIE
 from .foxnews import FoxNewsIE
+from .foxsports import FoxSportsIE
 from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
 from .francetv import (
@@ -171,11 +181,14 @@ from .gameone import (
     GameOneIE,
     GameOnePlaylistIE,
 )
+from .gamersyde import GamersydeIE
 from .gamespot import GameSpotIE
 from .gamestar import GameStarIE
 from .gametrailers import GametrailersIE
+from .gazeta import GazetaIE
 from .gdcvault import GDCVaultIE
 from .generic import GenericIE
+from .gfycat import GfycatIE
 from .giantbomb import GiantBombIE
 from .giga import GigaIE
 from .glide import GlideIE
@@ -187,7 +200,6 @@ from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
 from .goshgay import GoshgayIE
-from .grooveshark import GroovesharkIE
 from .groupon import GrouponIE
 from .hark import HarkIE
 from .hearthisat import HearThisAtIE
@@ -228,6 +240,7 @@ from .jove import JoveIE
 from .jukebox import JukeboxIE
 from .jpopsukitv import JpopsukiIE
 from .kaltura import KalturaIE
+from .kanalplay import KanalPlayIE
 from .kankan import KankanIE
 from .karaoketv import KaraoketvIE
 from .keezmovies import KeezMoviesIE
@@ -244,7 +257,11 @@ from .letv import (
     LetvTvIE,
     LetvPlaylistIE
 )
-from .lifenews import LifeNewsIE
+from .libsyn import LibsynIE
+from .lifenews import (
+    LifeNewsIE,
+    LifeEmbedIE,
+)
 from .liveleak import LiveLeakIE
 from .livestream import (
     LivestreamIE,
@@ -262,11 +279,13 @@ from .macgamestore import MacGameStoreIE
 from .mailru import MailRuIE
 from .malemotion import MalemotionIE
 from .mdr import MDRIE
+from .megavideoz import MegaVideozIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mgoon import MgoonIE
 from .minhateca import MinhatecaIE
 from .ministrygrid import MinistryGridIE
+from .miomio import MioMioIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mitele import MiTeleIE
 from .mixcloud import MixcloudIE
@@ -302,8 +321,13 @@ from .nba import NBAIE
 from .nbc import (
     NBCIE,
     NBCNewsIE,
+    NBCSportsIE,
+    NBCSportsVPlayerIE,
+)
+from .ndr import (
+    NDRIE,
+    NJoyIE,
 )
-from .ndr import NDRIE
 from .ndtv import NDTVIE
 from .netzkino import NetzkinoIE
 from .nerdcubed import NerdCubedFeedIE
@@ -340,11 +364,15 @@ from .npo import (
 )
 from .nrk import (
     NRKIE,
+    NRKPlaylistIE,
     NRKTVIE,
 )
 from .ntvde import NTVDeIE
 from .ntvru import NTVRuIE
-from .nytimes import NYTimesIE
+from .nytimes import (
+    NYTimesIE,
+    NYTimesArticleIE,
+)
 from .nuvid import NuvidIE
 from .odnoklassniki import OdnoklassnikiIE
 from .oktoberfesttv import OktoberfestTVIE
@@ -354,16 +382,20 @@ from .orf import (
     ORFTVthekIE,
     ORFOE1IE,
     ORFFM4IE,
+    ORFIPTVIE,
 )
 from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
 from .pbs import PBSIE
+from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .planetaplay import PlanetaPlayIE
+from .pladform import PladformIE
 from .played import PlayedIE
 from .playfm import PlayFMIE
 from .playvid import PlayvidIE
+from .playwire import PlaywireIE
 from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
 from .pornhub import (
@@ -371,14 +403,23 @@ from .pornhub import (
     PornHubPlaylistIE,
 )
 from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
 from .pornoxo import PornoXOIE
+from .primesharetv import PrimeShareTVIE
 from .promptfile import PromptFileIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .puls4 import Puls4IE
 from .pyvideo import PyvideoIE
+from .qqmusic import (
+    QQMusicIE,
+    QQMusicSingerIE,
+    QQMusicAlbumIE,
+    QQMusicToplistIE,
+)
 from .quickvid import QuickVidIE
 from .r7 import R7IE
 from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
 from .radiofrance import RadioFranceIE
 from .rai import RaiIE
@@ -397,7 +438,7 @@ from .rtlnow import RTLnowIE
 from .rtl2 import RTL2IE
 from .rtp import RTPIE
 from .rts import RTSIE
-from .rtve import RTVEALaCartaIE, RTVELiveIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
 from .ruhd import RUHDIE
 from .rutube import (
     RutubeIE,
@@ -408,13 +449,18 @@ from .rutube import (
 )
 from .rutv import RUTVIE
 from .sandia import SandiaIE
+from .safari import (
+    SafariIE,
+    SafariCourseIE,
+)
 from .sapo import SapoIE
 from .savefrom import SaveFromIE
 from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .screencastomatic import ScreencastOMaticIE
-from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
+from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
+from .senateisvp import SenateISVPIE
 from .servingsys import ServingSysIE
 from .sexu import SexuIE
 from .sexykarma import SexyKarmaIE
@@ -444,9 +490,13 @@ from .soundgasm import (
 )
 from .southpark import (
     SouthParkIE,
-    SouthparkDeIE,
+    SouthParkDeIE,
+    SouthParkDkIE,
+    SouthParkEsIE,
+    SouthParkNlIE
 )
 from .space import SpaceIE
+from .spankbang import SpankBangIE
 from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE, SpiegelArticleIE
 from .spiegeltv import SpiegeltvIE
@@ -454,14 +504,19 @@ from .spike import SpikeIE
 from .sport5 import Sport5IE
 from .sportbox import SportBoxIE
 from .sportdeutschland import SportDeutschlandIE
+from .srf import SrfIE
 from .srmediathek import SRMediathekIE
+from .ssa import SSAIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
 from .streetvoice import StreetVoiceIE
 from .sunporno import SunPornoIE
-from .svtplay import SVTPlayIE
+from .svt import (
+    SVTIE,
+    SVTPlayIE,
+)
 from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
 from .sztvhu import SztvHuIE
@@ -490,7 +545,10 @@ from .thesixtyone import TheSixtyOneIE
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
 from .tlc import TlcIE, TlcDeIE
-from .tmz import TMZIE
+from .tmz import (
+    TMZIE,
+    TMZArticleIE,
+)
 from .tnaflix import TNAFlixIE
 from .thvideo import (
     THVideoIE,
@@ -513,6 +571,10 @@ from .tvp import TvpIE, TvpSeriesIE
 from .tvplay import TVPlayIE
 from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
+from .twentytwotracks import (
+    TwentyTwoTracksIE,
+    TwentyTwoTracksGenreIE
+)
 from .twitch import (
     TwitchVideoIE,
     TwitchChapterIE,
@@ -527,15 +589,23 @@ from .udemy import (
     UdemyIE,
     UdemyCourseIE
 )
+from .udn import UDNEmbedIE
+from .ultimedia import UltimediaIE
 from .unistra import UnistraIE
 from .urort import UrortIE
 from .ustream import UstreamIE, UstreamChannelIE
+from .varzesh3 import Varzesh3IE
 from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
+from .vessel import VesselIE
 from .vesti import VestiIE
 from .vevo import VevoIE
-from .vgtv import VGTVIE
+from .vgtv import (
+    BTArticleIE,
+    BTVestlendingenIE,
+    VGTVIE,
+)
 from .vh1 import VH1IE
 from .vice import ViceIE
 from .viddler import ViddlerIE
@@ -550,6 +620,7 @@ from .videoweed import VideoWeedIE
 from .vidme import VidmeIE
 from .vidzi import VidziIE
 from .vier import VierIE, VierVideosIE
+from .viewster import ViewsterIE
 from .vimeo import (
     VimeoIE,
     VimeoAlbumIE,
@@ -571,6 +642,7 @@ from .vk import (
     VKUserVideosIE,
 )
 from .vodlocker import VodlockerIE
+from .voicerepublic import VoiceRepublicIE
 from .vporn import VpornIE
 from .vrt import VRTIE
 from .vube import VubeIE
@@ -597,15 +669,21 @@ from .xboxclips import XboxClipsIE
 from .xhamster import XHamsterIE
 from .xminus import XMinusIE
 from .xnxx import XNXXIE
-from .xvideos import XVideosIE
+from .xstream import XstreamIE
 from .xtube import XTubeUserIE, XTubeIE
 from .xuite import XuiteIE
+from .xvideos import XVideosIE
 from .xxxymovies import XXXYMoviesIE
 from .yahoo import (
     YahooIE,
     YahooSearchIE,
 )
 from .yam import YamIE
+from .yandexmusic import (
+    YandexMusicTrackIE,
+    YandexMusicAlbumIE,
+    YandexMusicPlaylistIE,
+)
 from .yesjapan import YesJapanIE
 from .ynet import YnetIE
 from .youjizz import YouJizzIE
index 203936e54a3797ae37535022ad02757a925f24d7..e3e6d21137994593d593fbc51313bf38032ce7f8 100644 (file)
@@ -11,12 +11,13 @@ from ..compat import (
 )
 from ..utils import (
     ExtractorError,
+    qualities,
 )
 
 
 class AddAnimeIE(InfoExtractor):
-    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
-    _TEST = {
+    _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
+    _TESTS = [{
         'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
         'md5': '72954ea10bc979ab5e2eb288b21425a0',
         'info_dict': {
@@ -25,7 +26,10 @@ class AddAnimeIE(InfoExtractor):
             'description': 'One Piece 606',
             'title': 'One Piece 606',
         }
-    }
+    }, {
+        'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -63,8 +67,10 @@ class AddAnimeIE(InfoExtractor):
                 note='Confirming after redirect')
             webpage = self._download_webpage(url, video_id)
 
+        FORMATS = ('normal', 'hq')
+        quality = qualities(FORMATS)
         formats = []
-        for format_id in ('normal', 'hq'):
+        for format_id in FORMATS:
             rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
             video_url = self._search_regex(rex, webpage, 'video file URLx',
                                            fatal=False)
@@ -73,6 +79,7 @@ class AddAnimeIE(InfoExtractor):
             formats.append({
                 'format_id': format_id,
                 'url': video_url,
+                'quality': quality(format_id),
             })
         self._sort_formats(formats)
         video_title = self._og_search_title(webpage)
index 34b8b01157bb930937f6f69c4950d8d01c39ed6e..39335b8272295dbf2b640881cd29a6f5b99acaba 100644 (file)
@@ -2,13 +2,12 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
-    xpath_text,
     float_or_none,
+    xpath_text,
 )
 
 
@@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor):
             'title': 'American Dad - Putting Francine Out of Business',
             'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
         },
+    }, {
+        'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
+        'playlist': [
+            {
+                'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
+                'info_dict': {
+                    'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
+                    'ext': 'flv',
+                    'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+                    'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+                },
+            }
+        ],
+        'info_dict': {
+            'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
+            'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+            'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+        },
     }]
 
     @staticmethod
@@ -80,6 +97,7 @@ class AdultSwimIE(InfoExtractor):
             for video in collection.get('videos'):
                 if video.get('slug') == slug:
                     return collection, video
+        return None, None
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -90,28 +108,30 @@ class AdultSwimIE(InfoExtractor):
         webpage = self._download_webpage(url, episode_path)
 
         # Extract the value of `bootstrappedData` from the Javascript in the page.
-        bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
-
-        try:
-            bootstrappedData = json.loads(bootstrappedDataJS)
-        except ValueError as ve:
-            errmsg = '%s: Failed to parse JSON ' % episode_path
-            raise ExtractorError(errmsg, cause=ve)
+        bootstrapped_data = self._parse_json(self._search_regex(
+            r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
 
         # Downloading videos from a /videos/playlist/ URL needs to be handled differently.
         # NOTE: We are only downloading one video (the current one) not the playlist
         if is_playlist:
-            collections = bootstrappedData['playlists']['collections']
+            collections = bootstrapped_data['playlists']['collections']
             collection = self.find_collection_by_linkURL(collections, show_path)
             video_info = self.find_video_info(collection, episode_path)
 
             show_title = video_info['showTitle']
             segment_ids = [video_info['videoPlaybackID']]
         else:
-            collections = bootstrappedData['show']['collections']
+            collections = bootstrapped_data['show']['collections']
             collection, video_info = self.find_collection_containing_video(collections, episode_path)
 
-            show = bootstrappedData['show']
+            # Video wasn't found in the collections, let's try `slugged_video`.
+            if video_info is None:
+                if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
+                    video_info = bootstrapped_data['slugged_video']
+                else:
+                    raise ExtractorError('Unable to find video info')
+
+            show = bootstrapped_data['show']
             show_title = show['title']
             segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
 
index 2b257ede7457e2844250f3034f6a8031d31624a9..0c00acfb5766ad6e899877b8ed173225c10fc4ec 100644 (file)
@@ -1,23 +1,13 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    parse_iso8601,
-    xpath_with_ns,
-    xpath_text,
-    find_xpath_attr,
-)
 
 
 class AftenpostenIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
-
+    _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
     _TEST = {
-        'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=&section=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
+        'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
         'md5': 'fd828cd29774a729bf4d4425fe192972',
         'info_dict': {
             'id': '21039',
@@ -30,74 +20,4 @@ class AftenpostenIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, display_id)
-
-        video_id = self._html_search_regex(
-            r'data-xs-id="(\d+)"', webpage, 'video id')
-
-        data = self._download_xml(
-            'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
-
-        NS_MAP = {
-            'atom': 'http://www.w3.org/2005/Atom',
-            'xt': 'http://xstream.dk/',
-            'media': 'http://search.yahoo.com/mrss/',
-        }
-
-        entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
-
-        title = xpath_text(
-            entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
-        description = xpath_text(
-            entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
-        timestamp = parse_iso8601(xpath_text(
-            entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
-
-        formats = []
-        media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
-        for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
-            media_url = media_content.get('url')
-            if not media_url:
-                continue
-            tbr = int_or_none(media_content.get('bitrate'))
-            mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
-            if mobj:
-                formats.append({
-                    'url': mobj.group('url'),
-                    'play_path': 'mp4:%s' % mobj.group('playpath'),
-                    'app': mobj.group('app'),
-                    'ext': 'flv',
-                    'tbr': tbr,
-                    'format_id': 'rtmp-%d' % tbr,
-                })
-            else:
-                formats.append({
-                    'url': media_url,
-                    'tbr': tbr,
-                })
-        self._sort_formats(formats)
-
-        link = find_xpath_attr(
-            entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
-        if link is not None:
-            formats.append({
-                'url': link.get('href'),
-                'format_id': link.get('rel'),
-            })
-
-        thumbnails = [{
-            'url': splash.get('url'),
-            'width': int_or_none(splash.get('width')),
-            'height': int_or_none(splash.get('height')),
-        } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'timestamp': timestamp,
-            'formats': formats,
-            'thumbnails': thumbnails,
-        }
+        return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream')
index 8442019eac3eaa0a373140d494ffa2ca420f4606..a117502bc0ad7bfec11592ec57da575898cacc3d 100644 (file)
@@ -2,10 +2,11 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import int_or_none
 
 
 class AftonbladetIE(InfoExtractor):
-    _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
+    _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])'
     _TEST = {
         'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
         'info_dict': {
@@ -43,9 +44,9 @@ class AftonbladetIE(InfoExtractor):
             formats.append({
                 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
                 'ext': 'mp4',
-                'width': fmt['width'],
-                'height': fmt['height'],
-                'tbr': fmt['bitrate'],
+                'width': int_or_none(fmt.get('width')),
+                'height': int_or_none(fmt.get('height')),
+                'tbr': int_or_none(fmt.get('bitrate')),
                 'protocol': 'http',
             })
         self._sort_formats(formats)
@@ -54,9 +55,9 @@ class AftonbladetIE(InfoExtractor):
             'id': video_id,
             'title': internal_meta_json['title'],
             'formats': formats,
-            'thumbnail': internal_meta_json['imageUrl'],
-            'description': internal_meta_json['shortPreamble'],
-            'timestamp': internal_meta_json['timePublished'],
-            'duration': internal_meta_json['duration'],
-            'view_count': internal_meta_json['views'],
+            'thumbnail': internal_meta_json.get('imageUrl'),
+            'description': internal_meta_json.get('shortPreamble'),
+            'timestamp': int_or_none(internal_meta_json.get('timePublished')),
+            'duration': int_or_none(internal_meta_json.get('duration')),
+            'view_count': int_or_none(internal_meta_json.get('views')),
         }
index 9fc35a42b8612d828ccc3ae43c9e4f74782f5352..8feb7cb7456ec4db8d6a8f28b411a58cb5ac47a1 100644 (file)
@@ -33,7 +33,7 @@ class ArchiveOrgIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        json_url = url + ('?' if '?' in url else '&') + 'output=json'
+        json_url = url + ('&' if '?' in url else '?') + 'output=json'
         data = self._download_json(json_url, video_id)
 
         def get_optional(data_dict, field):
index 783b53e23035a7bd3f3feac628ff2de8daefbea5..6a35ea463edcafe3b9d7db4c53b9bf0c53198fd0 100644 (file)
@@ -50,6 +50,9 @@ class ARDMediathekIE(InfoExtractor):
         if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
             raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
 
+        if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
+            raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
+
         if re.search(r'[\?&]rss($|[=&])', url):
             doc = parse_xml(webpage)
             if doc.tag == 'rss':
index 929dd3cc5550beb1b2da8874763084b5146d2f33..8273bd6c9ae3cdff82052c8f63efc68be97561b3 100644 (file)
@@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
 
             formats.append(format)
 
+        self._check_formats(formats, video_id)
         self._sort_formats(formats)
 
         info_dict['formats'] = formats
index 7669e0e3dc643b3bcf8d39663efcf6cba4540b04..29f8795d3dfe2bdae9993f9b1fd3d278cb8c3a9c 100644 (file)
@@ -19,6 +19,7 @@ from ..utils import (
 
 class AtresPlayerIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
+    _NETRC_MACHINE = 'atresplayer'
     _TESTS = [
         {
             'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py
new file mode 100644 (file)
index 0000000..906895c
--- /dev/null
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+
+
+class BaiduVideoIE(InfoExtractor):
+    _VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
+    _TESTS = [{
+        'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
+        'info_dict': {
+            'id': '1069',
+            'title': '中华小当家 TV版 (全52集)',
+            'description': 'md5:395a419e41215e531c857bb037bbaf80',
+        },
+        'playlist_count': 52,
+    }, {
+        'url': 'http://v.baidu.com/show/11595.htm?frp=bdbrand',
+        'info_dict': {
+            'id': '11595',
+            'title': 're:^奔跑吧兄弟',
+            'description': 'md5:1bf88bad6d850930f542d51547c089b8',
+        },
+        'playlist_mincount': 3,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        category = category2 = mobj.group('type')
+        if category == 'show':
+            category2 = 'tvshow'
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        playlist_title = self._html_search_regex(
+            r'title\s*:\s*(["\'])(?P<title>[^\']+)\1', webpage,
+            'playlist title', group='title')
+        playlist_description = self._html_search_regex(
+            r'<input[^>]+class="j-data-intro"[^>]+value="([^"]+)"/>', webpage,
+            playlist_id, 'playlist description')
+
+        site = self._html_search_regex(
+            r'filterSite\s*:\s*["\']([^"]*)["\']', webpage,
+            'primary provider site')
+        api_result = self._download_json(
+            'http://v.baidu.com/%s_intro/?dtype=%sPlayUrl&id=%s&site=%s' % (
+                category, category2, playlist_id, site),
+            playlist_id, 'Get playlist links')
+
+        entries = []
+        for episode in api_result[0]['episodes']:
+            episode_id = '%s_%s' % (playlist_id, episode['episode'])
+
+            redirect_page = self._download_webpage(
+                compat_urlparse.urljoin(url, episode['url']), episode_id,
+                note='Download Baidu redirect page')
+            real_url = self._html_search_regex(
+                r'location\.replace\("([^"]+)"\)', redirect_page, 'real URL')
+
+            entries.append(self.url_result(
+                real_url, video_title=episode['single_title']))
+
+        return self.playlist_result(
+            entries, playlist_id, playlist_title, playlist_description)
index c193e66cad7275cffb6ee96e051d567b9262e773..8dff1d6e377c0c246cfc958821b1d18cae4b2b64 100644 (file)
@@ -1,12 +1,18 @@
 from __future__ import unicode_literals
 
 import re
-import json
 import itertools
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_urllib_parse,
     compat_urllib_request,
+    compat_str,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    float_or_none,
 )
 
 
@@ -14,6 +20,8 @@ class BambuserIE(InfoExtractor):
     IE_NAME = 'bambuser'
     _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
     _API_KEY = '005f64509e19a868399060af746a00aa'
+    _LOGIN_URL = 'https://bambuser.com/user'
+    _NETRC_MACHINE = 'bambuser'
 
     _TEST = {
         'url': 'http://bambuser.com/v/4050584',
@@ -26,6 +34,9 @@ class BambuserIE(InfoExtractor):
             'duration': 3741,
             'uploader': 'pixelversity',
             'uploader_id': '344706',
+            'timestamp': 1382976692,
+            'upload_date': '20131028',
+            'view_count': int,
         },
         'params': {
             # It doesn't respect the 'Range' header, it would download the whole video
@@ -34,23 +45,60 @@ class BambuserIE(InfoExtractor):
         },
     }
 
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_form = {
+            'form_id': 'user_login',
+            'op': 'Log in',
+            'name': username,
+            'pass': password,
+        }
+
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        request.add_header('Referer', self._LOGIN_URL)
+        response = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        login_error = self._html_search_regex(
+            r'(?s)<div class="messages error">(.+?)</div>',
+            response, 'login error', default=None)
+        if login_error:
+            raise ExtractorError(
+                'Unable to login: %s' % login_error, expected=True)
+
+    def _real_initialize(self):
+        self._login()
+
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
-                    '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
-        info_json = self._download_webpage(info_url, video_id)
-        info = json.loads(info_json)['result']
+        video_id = self._match_id(url)
+
+        info = self._download_json(
+            'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s'
+            % (self._API_KEY, video_id), video_id)
+
+        error = info.get('error')
+        if error:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error), expected=True)
+
+        result = info['result']
 
         return {
             'id': video_id,
-            'title': info['title'],
-            'url': info['url'],
-            'thumbnail': info.get('preview'),
-            'duration': int(info['length']),
-            'view_count': int(info['views_total']),
-            'uploader': info['username'],
-            'uploader_id': info['owner']['uid'],
+            'title': result['title'],
+            'url': result['url'],
+            'thumbnail': result.get('preview'),
+            'duration': int_or_none(result.get('length')),
+            'uploader': result.get('username'),
+            'uploader_id': compat_str(result.get('owner', {}).get('uid')),
+            'timestamp': int_or_none(result.get('created')),
+            'fps': float_or_none(result.get('framerate')),
+            'view_count': int_or_none(result.get('views_total')),
+            'comment_count': int_or_none(result.get('comment_count')),
         }
 
 
index 86929496708fccf3bc0febe78cd1e599fda1ab97..505877b773d45b36be31d8dea8a6a1766d72d4ca 100644 (file)
@@ -72,7 +72,7 @@ class BandcampIE(InfoExtractor):
 
         download_link = m_download.group(1)
         video_id = self._search_regex(
-            r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$',
+            r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
             webpage, 'video id')
 
         download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
index abc34a5761487b5a900294dac59db4a053b95cb0..249bc6bbde85dc568796f094f421f989df664a1c 100644 (file)
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
 import xml.etree.ElementTree
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
 from ..compat import compat_HTTPError
 
 
@@ -112,6 +115,20 @@ class BBCCoUkIE(InfoExtractor):
                 # rtmp download
                 'skip_download': True,
             }
+        }, {
+            'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
+            'info_dict': {
+                'id': 'p02n76xf',
+                'ext': 'flv',
+                'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
+                'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
+                'duration': 3540,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+            'skip': 'geolocation',
         }, {
             'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
             'only_matching': True,
@@ -326,16 +343,27 @@ class BBCCoUkIE(InfoExtractor):
 
         webpage = self._download_webpage(url, group_id, 'Downloading video page')
 
-        programme_id = self._search_regex(
-            r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
+        programme_id = None
+
+        tviplayer = self._search_regex(
+            r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
+            webpage, 'player', default=None)
+
+        if tviplayer:
+            player = self._parse_json(tviplayer, group_id).get('player', {})
+            duration = int_or_none(player.get('duration'))
+            programme_id = player.get('vpid')
+
+        if not programme_id:
+            programme_id = self._search_regex(
+                r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
+
         if programme_id:
-            player = self._download_json(
-                'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
-                group_id)['jsConf']['player']
-            title = player['title']
-            description = player['subtitle']
-            duration = player['duration']
             formats, subtitles = self._download_media_selector(programme_id)
+            title = self._og_search_title(webpage)
+            description = self._search_regex(
+                r'<p class="medium-description">([^<]+)</p>',
+                webpage, 'description', fatal=False)
         else:
             programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
 
@@ -345,6 +373,7 @@ class BBCCoUkIE(InfoExtractor):
             'id': programme_id,
             'title': title,
             'description': description,
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
             'duration': duration,
             'formats': formats,
             'subtitles': subtitles,
diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py
new file mode 100644 (file)
index 0000000..3c7775d
--- /dev/null
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class BeatportProIE(InfoExtractor):
+    _VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
+        'md5': 'b3c34d8639a2f6a7f734382358478887',
+        'info_dict': {
+            'id': '5379371',
+            'display_id': 'synesthesia-original-mix',
+            'ext': 'mp4',
+            'title': 'Froxic - Synesthesia (Original Mix)',
+        },
+    }, {
+        'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
+        'md5': 'e44c3025dfa38c6577fbaeb43da43514',
+        'info_dict': {
+            'id': '3756896',
+            'display_id': 'love-and-war-original-mix',
+            'ext': 'mp3',
+            'title': 'Wolfgang Gartner - Love & War (Original Mix)',
+        },
+    }, {
+        'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
+        'md5': 'a1fd8e8046de3950fd039304c186c05f',
+        'info_dict': {
+            'id': '4991738',
+            'display_id': 'birds-original-mix',
+            'ext': 'mp4',
+            'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        track_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        playables = self._parse_json(
+            self._search_regex(
+                r'window\.Playables\s*=\s*({.+?});', webpage,
+                'playables info', flags=re.DOTALL),
+            track_id)
+
+        track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
+
+        title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
+        if track['mix']:
+            title += ' (' + track['mix'] + ')'
+
+        formats = []
+        for ext, info in track['preview'].items():
+            if not info['url']:
+                continue
+            fmt = {
+                'url': info['url'],
+                'ext': ext,
+                'format_id': ext,
+                'vcodec': 'none',
+            }
+            if ext == 'mp3':
+                fmt['preference'] = 0
+                fmt['acodec'] = 'mp3'
+                fmt['abr'] = 96
+                fmt['asr'] = 44100
+            elif ext == 'mp4':
+                fmt['preference'] = 1
+                fmt['acodec'] = 'aac'
+                fmt['abr'] = 96
+                fmt['asr'] = 44100
+            formats.append(fmt)
+        self._sort_formats(formats)
+
+        images = []
+        for name, info in track['images'].items():
+            image_url = info.get('url')
+            if name == 'dynamic' or not image_url:
+                continue
+            image = {
+                'id': name,
+                'url': image_url,
+                'height': int_or_none(info.get('height')),
+                'width': int_or_none(info.get('width')),
+            }
+            images.append(image)
+
+        return {
+            'id': compat_str(track.get('id')) or track_id,
+            'display_id': track.get('slug') or display_id,
+            'title': title,
+            'formats': formats,
+            'thumbnails': images,
+        }
index d2abd4d772c95e9877a607af7cc0b6e4d56e123a..26b934543a7ac0d28cabf0ca70610460fc253a2f 100644 (file)
@@ -16,11 +16,11 @@ class BetIE(InfoExtractor):
         {
             'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
             'info_dict': {
-                'id': '740ab250-bb94-4a8a-8787-fe0de7c74471',
+                'id': 'news/national/2014/a-conversation-with-president-obama',
                 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
                 'ext': 'flv',
-                'title': 'BET News Presents: A Conversation With President Obama',
-                'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
+                'title': 'A Conversation With President Obama',
+                'description': 'md5:699d0652a350cf3e491cd15cc745b5da',
                 'duration': 1534,
                 'timestamp': 1418075340,
                 'upload_date': '20141208',
@@ -35,7 +35,7 @@ class BetIE(InfoExtractor):
         {
             'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
             'info_dict': {
-                'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d',
+                'id': 'news/national/2014/justice-for-ferguson-a-community-reacts',
                 'display_id': 'justice-for-ferguson-a-community-reacts',
                 'ext': 'flv',
                 'title': 'Justice for Ferguson: A Community Reacts',
@@ -61,6 +61,9 @@ class BetIE(InfoExtractor):
             [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
             webpage, 'media URL'))
 
+        video_id = self._search_regex(
+            r'/video/(.*)/_jcr_content/', media_url, 'video id')
+
         mrss = self._download_xml(media_url, display_id)
 
         item = mrss.find('./channel/item')
@@ -75,8 +78,6 @@ class BetIE(InfoExtractor):
         description = xpath_text(
             item, './description', 'description', fatal=False)
 
-        video_id = xpath_text(item, './guid', 'video id', fatal=False)
-
         timestamp = parse_iso8601(xpath_text(
             item, xpath_with_ns('./dc:date', NS_MAP),
             'upload date', fatal=False))
index 77b562d99625a30035a38d14e15c6927e8b007e9..4d8cce1ef252fde0ac02dc166d3fb4fff528d1a8 100644 (file)
@@ -2,7 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    fix_xml_ampersands,
+)
 
 
 class BildIE(InfoExtractor):
@@ -15,7 +18,7 @@ class BildIE(InfoExtractor):
             'id': '38184146',
             'ext': 'mp4',
             'title': 'BILD hat sie getestet',
-            'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg',
+            'thumbnail': 're:^https?://.*\.jpg$',
             'duration': 196,
             'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ',
         }
@@ -25,7 +28,7 @@ class BildIE(InfoExtractor):
         video_id = self._match_id(url)
 
         xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml"
-        doc = self._download_xml(xml_url, video_id)
+        doc = self._download_xml(xml_url, video_id, transform_source=fix_xml_ampersands)
 
         duration = int_or_none(doc.attrib.get('duration'), scale=1000)
 
index 75d744852edc382721cee8556067f89ccb0092df..7ca835e31f3477f8e46c74804aea36ecfe789686 100644 (file)
@@ -2,34 +2,45 @@
 from __future__ import unicode_literals
 
 import re
+import itertools
 
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
     unified_strdate,
+    ExtractorError,
 )
 
 
 class BiliBiliIE(InfoExtractor):
     _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.bilibili.tv/video/av1074402/',
         'md5': '2c301e4dab317596e837c3e7633e7d86',
         'info_dict': {
-            'id': '1074402',
+            'id': '1074402_part1',
             'ext': 'flv',
             'title': '【金坷垃】金泡沫',
             'duration': 308,
             'upload_date': '20140420',
             'thumbnail': 're:^https?://.+\.jpg',
         },
-    }
+    }, {
+        'url': 'http://www.bilibili.com/video/av1041170/',
+        'info_dict': {
+            'id': '1041170',
+            'title': '【BD1080P】刀语【诸神&异域】',
+        },
+        'playlist_count': 9,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
+        if self._search_regex(r'(此视频不存在或被删除)', webpage, 'error message', default=None):
+            raise ExtractorError('The video does not exist or was deleted', expected=True)
         video_code = self._search_regex(
             r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
 
@@ -54,19 +65,14 @@ class BiliBiliIE(InfoExtractor):
 
         cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
 
+        entries = []
+
         lq_doc = self._download_xml(
             'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
             video_id,
             note='Downloading LQ video info'
         )
-        lq_durl = lq_doc.find('./durl')
-        formats = [{
-            'format_id': 'lq',
-            'quality': 1,
-            'url': lq_durl.find('./url').text,
-            'filesize': int_or_none(
-                lq_durl.find('./size'), get_attr='text'),
-        }]
+        lq_durls = lq_doc.findall('./durl')
 
         hq_doc = self._download_xml(
             'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
@@ -74,23 +80,44 @@ class BiliBiliIE(InfoExtractor):
             note='Downloading HQ video info',
             fatal=False,
         )
-        if hq_doc is not False:
-            hq_durl = hq_doc.find('./durl')
-            formats.append({
-                'format_id': 'hq',
-                'quality': 2,
-                'ext': 'flv',
-                'url': hq_durl.find('./url').text,
+        hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None)
+
+        assert len(lq_durls) == len(hq_durls)
+
+        i = 1
+        for lq_durl, hq_durl in zip(lq_durls, hq_durls):
+            formats = [{
+                'format_id': 'lq',
+                'quality': 1,
+                'url': lq_durl.find('./url').text,
                 'filesize': int_or_none(
-                    hq_durl.find('./size'), get_attr='text'),
+                    lq_durl.find('./size'), get_attr='text'),
+            }]
+            if hq_durl:
+                formats.append({
+                    'format_id': 'hq',
+                    'quality': 2,
+                    'ext': 'flv',
+                    'url': hq_durl.find('./url').text,
+                    'filesize': int_or_none(
+                        hq_durl.find('./size'), get_attr='text'),
+                })
+            self._sort_formats(formats)
+
+            entries.append({
+                'id': '%s_part%d' % (video_id, i),
+                'title': title,
+                'formats': formats,
+                'duration': duration,
+                'upload_date': upload_date,
+                'thumbnail': thumbnail,
             })
 
-        self._sort_formats(formats)
+            i += 1
+
         return {
+            '_type': 'multi_video',
+            'entries': entries,
             'id': video_id,
-            'title': title,
-            'formats': formats,
-            'duration': duration,
-            'upload_date': upload_date,
-            'thumbnail': thumbnail,
+            'title': title
         }
index 8c7ba4b910bcc78e5e3fa02d7168a9e4f443bf65..fb56cd78d07ab396ae26ff5a15ac7ebdef933237 100644 (file)
@@ -102,6 +102,15 @@ class BlipTVIE(InfoExtractor):
         },
     ]
 
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
+        if mobj:
+            return 'http://blip.tv/a/a-' + mobj.group(1)
+        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
+        if mobj:
+            return mobj.group(1)
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         lookup_id = mobj.group('lookup_id')
@@ -172,6 +181,7 @@ class BlipTVIE(InfoExtractor):
                     'width': int_or_none(media_content.get('width')),
                     'height': int_or_none(media_content.get('height')),
                 })
+        self._check_formats(formats, video_id)
         self._sort_formats(formats)
 
         subtitles = self.extract_subtitles(video_id, subtitles_urls)
index 4a88ccd13caf604f3ea892c6784d603434fb06ee..0dca29b712c79a27fb621f094a6f64ab503ba3df 100644 (file)
@@ -6,32 +6,39 @@ from .common import InfoExtractor
 
 
 class BloombergIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
+    _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
 
     _TEST = {
-        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+        'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
         # The md5 checksum changes
         'info_dict': {
             'id': 'qurhIVlJSB6hzkVi229d8g',
             'ext': 'flv',
             'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
-            'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
+            'description': 'md5:a8ba0302912d03d246979735c17d2761',
         },
     }
 
     def _real_extract(self, url):
         name = self._match_id(url)
         webpage = self._download_webpage(url, name)
-
-        f4m_url = self._search_regex(
-            r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
-            'f4m url')
+        video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')
         title = re.sub(': Video$', '', self._og_search_title(webpage))
 
+        embed_info = self._download_json(
+            'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
+        formats = []
+        for stream in embed_info['streams']:
+            if stream["muxing_format"] == "TS":
+                formats.extend(self._extract_m3u8_formats(stream['url'], video_id))
+            else:
+                formats.extend(self._extract_f4m_formats(stream['url'], video_id))
+        self._sort_formats(formats)
+
         return {
-            'id': name.split('-')[-1],
+            'id': video_id,
             'title': title,
-            'formats': self._extract_f4m_formats(f4m_url, name),
+            'formats': formats,
             'description': self._og_search_description(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
         }
index 45ba5173246575ab617dbab911280b75d61d61e8..66e394e1093105b936191da798734128d4ea1afe 100644 (file)
@@ -16,27 +16,38 @@ class BRIE(InfoExtractor):
 
     _TESTS = [
         {
-            'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html',
-            'md5': '93556dd2bcb2948d9259f8670c516d59',
+            'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html',
+            'md5': '83a0477cf0b8451027eb566d88b51106',
             'info_dict': {
-                'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
+                'id': '48f656ef-287e-486f-be86-459122db22cc',
                 'ext': 'mp4',
-                'title': 'Wenn das Traditions-Theater wackelt',
-                'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
-                'duration': 34,
-                'uploader': 'BR',
-                'upload_date': '20140802',
+                'title': 'Die böse Überraschung',
+                'description': 'Betriebliche Altersvorsorge: Die böse Überraschung',
+                'duration': 180,
+                'uploader': 'Reinhard Weber',
+                'upload_date': '20150422',
             }
         },
         {
-            'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
-            'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
+            'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
+            'md5': 'a44396d73ab6a68a69a568fae10705bb',
             'info_dict': {
-                'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab',
+                'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
+                'ext': 'mp4',
+                'title': 'Manfred Schreiber ist tot',
+                'description': 'Abendschau kompakt: Manfred Schreiber ist tot',
+                'duration': 26,
+            }
+        },
+        {
+            'url': 'http://www.br.de/radio/br-klassik/sendungen/allegro/premiere-urauffuehrung-the-land-2015-dance-festival-muenchen-100.html',
+            'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
+            'info_dict': {
+                'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
                 'ext': 'aac',
-                'title': '"Keine neuen Schulden im nächsten Jahr"',
-                'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
-                'duration': 64,
+                'title': 'Kurzweilig und sehr bewegend',
+                'description': '"The Land" von Peeping Tom: Kurzweilig und sehr bewegend',
+                'duration': 296,
             }
         },
         {
index 4bcc897c95229ea0ee509fe53443d355309a66aa..809287d144ca7d629bf42bad7ac4e213a323e6dd 100644 (file)
@@ -41,7 +41,7 @@ class BreakIE(InfoExtractor):
             'tbr': media['bitRate'],
             'width': media['width'],
             'height': media['height'],
-        } for media in info['media']]
+        } for media in info['media'] if media.get('mediaPurpose') == 'play']
 
         if not formats:
             formats.append({
index 0733bece7c45880ab5c20b916d5bd8c9700da548..4f60d53660fa7777b9e1b6152967ce2e7e567ec9 100644 (file)
@@ -117,7 +117,10 @@ class BrightcoveIE(InfoExtractor):
         object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
         object_str = fix_xml_ampersands(object_str)
 
-        object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+        try:
+            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+        except xml.etree.ElementTree.ParseError:
+            return
 
         fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
         if fv_el is not None:
@@ -183,9 +186,9 @@ class BrightcoveIE(InfoExtractor):
             (?:
                 [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
                 [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
-            ).+?</object>''',
+            ).+?>\s*</object>''',
             webpage)
-        return [cls._build_brighcove_url(m) for m in matches]
+        return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
index 6252be05b7f4b57787152b4edae5378675a96847..3b2de517e53da39e06912ce1a97c4aafe7fa250e 100644 (file)
@@ -16,7 +16,7 @@ class BYUtvIE(InfoExtractor):
             'ext': 'mp4',
             'description': 'md5:5438d33774b6bdc662f9485a340401cc',
             'title': 'Season 5 Episode 5',
-            'thumbnail': 're:^https?://.*promo.*'
+            'thumbnail': 're:^https?://.*\.jpg$'
         },
         'params': {
             'skip_download': True,
index 1b14471e57198c2a04833089c174c0c6c3108ab8..699b4f7d08b1928ffa1799adc755774977a84237 100644 (file)
@@ -25,14 +25,14 @@ class CanalplusIE(InfoExtractor):
     }
 
     _TESTS = [{
-        'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
-        'md5': '3db39fb48b9685438ecf33a1078023e4',
+        'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
+        'md5': 'b3481d7ca972f61e37420798d0a9d934',
         'info_dict': {
-            'id': '922470',
+            'id': '1263092',
             'ext': 'flv',
-            'title': 'Zapping - 26/08/13',
-            'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
-            'upload_date': '20130826',
+            'title': 'Le Zapping - 13/05/15',
+            'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
+            'upload_date': '20150513',
         },
     }, {
         'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
@@ -56,7 +56,7 @@ class CanalplusIE(InfoExtractor):
         'skip': 'videos get deleted after a while',
     }, {
         'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
-        'md5': '65aa83ad62fe107ce29e564bb8712580',
+        'md5': 'f3a46edcdf28006598ffaf5b30e6a2d4',
         'info_dict': {
             'id': '1213714',
             'ext': 'flv',
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py
new file mode 100644 (file)
index 0000000..cf0a755
--- /dev/null
@@ -0,0 +1,99 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+from .bliptv import BlipTVIE
+
+
+class CinemassacreIE(InfoExtractor):
+    _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
+    _TESTS = [
+        {
+            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
+            'md5': 'fde81fbafaee331785f58cd6c0d46190',
+            'info_dict': {
+                'id': 'Cinemassacre-19911',
+                'ext': 'mp4',
+                'upload_date': '20121110',
+                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
+                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
+            },
+        },
+        {
+            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
+            'md5': 'd72f10cd39eac4215048f62ab477a511',
+            'info_dict': {
+                'id': 'Cinemassacre-521be8ef82b16',
+                'ext': 'mp4',
+                'upload_date': '20131002',
+                'title': 'The Mummy’s Hand (1940)',
+            },
+        },
+        {
+            # blip.tv embedded video
+            'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
+            'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de',
+            'info_dict': {
+                'id': '4065369',
+                'ext': 'flv',
+                'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
+                'upload_date': '20061207',
+                'uploader': 'cinemassacre',
+                'uploader_id': '250778',
+                'timestamp': 1283233867,
+                'description': 'md5:0a108c78d130676b207d0f6d029ecffd',
+            }
+        },
+        {
+            # Youtube embedded video
+            'url': 'http://cinemassacre.com/2006/09/01/mckids/',
+            'md5': '6eb30961fa795fedc750eac4881ad2e1',
+            'info_dict': {
+                'id': 'FnxsNhuikpo',
+                'ext': 'mp4',
+                'upload_date': '20060901',
+                'uploader': 'Cinemassacre Extras',
+                'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
+                'uploader_id': 'Cinemassacre',
+                'title': 'AVGN: McKids',
+            }
+        }
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+        video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
+
+        webpage = self._download_webpage(url, display_id)
+
+        playerdata_url = self._search_regex(
+            [
+                r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+                r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
+            ],
+            webpage, 'player data URL', default=None)
+        if not playerdata_url:
+            playerdata_url = BlipTVIE._extract_url(webpage)
+        if not playerdata_url:
+            raise ExtractorError('Unable to find player data')
+
+        video_title = self._html_search_regex(
+            r'<title>(?P<title>.+?)\|', webpage, 'title')
+        video_description = self._html_search_regex(
+            r'<div class="entry-content">(?P<description>.+?)</div>',
+            webpage, 'description', flags=re.DOTALL, fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            '_type': 'url_transparent',
+            'display_id': display_id,
+            'title': video_title,
+            'description': video_description,
+            'upload_date': video_date,
+            'thumbnail': video_thumbnail,
+            'url': playerdata_url,
+        }
index abf8cc280b3d6f1aeefe8219a1fd0ea5d1224be1..0fa720ee8745cfc728b4413b41888e17787fb5db 100644 (file)
@@ -105,6 +105,7 @@ class CloudyIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         file_key = self._search_regex(
-            r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
+            [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
+            webpage, 'file_key')
 
         return self._extract_video(video_host, video_id, file_key)
index 90ea074387ef6afe4aaa87a41c13ec6cf5a1aa7b..5efc5f4fe556a4424542b441a83f2d6dbd5bc8e7 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class CNNIE(InfoExtractor):
     _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
-        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
+        (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))'''
 
     _TESTS = [{
         'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
@@ -45,6 +45,12 @@ class CNNIE(InfoExtractor):
             'description': 'md5:e7223a503315c9f150acac52e76de086',
             'upload_date': '20141222',
         }
+    }, {
+        'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
+        'only_matching': True,
+    }, {
+        'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index e5edcc84b69ef7bdffdbb7ed158c901c560a7575..91ebb0ce57136dc0076927acdca4e250774746e1 100644 (file)
@@ -201,7 +201,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
 
         uri = mMovieParams[0][1]
         # Correct cc.com in uri
-        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)
+        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
 
         index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
         idoc = self._download_xml(
index 7977fa8d00faa01e95665e347fda4c492ab91ec0..65bb7708638a20a7e162c8ad8a06321434461ae6 100644 (file)
@@ -23,6 +23,7 @@ from ..compat import (
 )
 from ..utils import (
     age_restricted,
+    bug_reports_message,
     clean_html,
     compiled_regex_type,
     ExtractorError,
@@ -46,7 +47,7 @@ class InfoExtractor(object):
     information possibly downloading the video to the file system, among
     other possible outcomes.
 
-    The type field determines the the type of the result.
+    The type field determines the type of the result.
     By far the most common value (and the default if _type is missing) is
     "video", which indicates a single video.
 
@@ -110,11 +111,8 @@ class InfoExtractor(object):
                                   (quality takes higher priority)
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
-                    * http_method  HTTP method to use for the download.
                     * http_headers  A dictionary of additional HTTP headers
                                  to add to the request.
-                    * http_post_data  Additional data to send with a POST
-                                 request.
                     * stretched_ratio  If given and not 1, indicates that the
                                  video's pixels are not square.
                                  width : height ratio as float.
@@ -324,7 +322,7 @@ class InfoExtractor(object):
                 self._downloader.report_warning(errmsg)
                 return False
 
-    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
+    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None):
         """ Returns a tuple (page content as string, URL handle) """
         # Strip hashes from the URL (#1038)
         if isinstance(url_or_request, (compat_str, str)):
@@ -334,14 +332,11 @@ class InfoExtractor(object):
         if urlh is False:
             assert not fatal
             return False
-        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
+        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
         return (content, urlh)
 
-    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
-        content_type = urlh.headers.get('Content-Type', '')
-        webpage_bytes = urlh.read()
-        if prefix is not None:
-            webpage_bytes = prefix + webpage_bytes
+    @staticmethod
+    def _guess_encoding_from_content(content_type, webpage_bytes):
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
         if m:
             encoding = m.group(1)
@@ -354,6 +349,16 @@ class InfoExtractor(object):
                 encoding = 'utf-16'
             else:
                 encoding = 'utf-8'
+
+        return encoding
+
+    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+        content_type = urlh.headers.get('Content-Type', '')
+        webpage_bytes = urlh.read()
+        if prefix is not None:
+            webpage_bytes = prefix + webpage_bytes
+        if not encoding:
+            encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
         if self._downloader.params.get('dump_intermediate_pages', False):
             try:
                 url = url_or_request.get_full_url()
@@ -410,13 +415,13 @@ class InfoExtractor(object):
 
         return content
 
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
+    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
         """ Returns the data of the page as a string """
         success = False
         try_count = 0
         while success is False:
             try:
-                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding)
                 success = True
             except compat_http_client.IncompleteRead as e:
                 try_count += 1
@@ -431,10 +436,10 @@ class InfoExtractor(object):
 
     def _download_xml(self, url_or_request, video_id,
                       note='Downloading XML', errnote='Unable to download XML',
-                      transform_source=None, fatal=True):
+                      transform_source=None, fatal=True, encoding=None):
         """Return the xml as an xml.etree.ElementTree.Element"""
         xml_string = self._download_webpage(
-            url_or_request, video_id, note, errnote, fatal=fatal)
+            url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding)
         if xml_string is False:
             return xml_string
         if transform_source:
@@ -445,9 +450,10 @@ class InfoExtractor(object):
                        note='Downloading JSON metadata',
                        errnote='Unable to download JSON metadata',
                        transform_source=None,
-                       fatal=True):
+                       fatal=True, encoding=None):
         json_string = self._download_webpage(
-            url_or_request, video_id, note, errnote, fatal=fatal)
+            url_or_request, video_id, note, errnote, fatal=fatal,
+            encoding=encoding)
         if (not fatal) and json_string is False:
             return None
         return self._parse_json(
@@ -492,7 +498,7 @@ class InfoExtractor(object):
 
     # Methods for following #608
     @staticmethod
-    def url_result(url, ie=None, video_id=None):
+    def url_result(url, ie=None, video_id=None, video_title=None):
         """Returns a url that points to a page that should be processed"""
         # TODO: ie should be the class used for getting the info
         video_info = {'_type': 'url',
@@ -500,6 +506,8 @@ class InfoExtractor(object):
                       'ie_key': ie}
         if video_id is not None:
             video_info['id'] = video_id
+        if video_title is not None:
+            video_info['title'] = video_title
         return video_info
 
     @staticmethod
@@ -546,8 +554,7 @@ class InfoExtractor(object):
         elif fatal:
             raise RegexNotFoundError('Unable to extract %s' % _name)
         else:
-            self._downloader.report_warning('unable to extract %s; '
-                                            'please report this issue on http://yt-dl.org/bug' % _name)
+            self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
             return None
 
     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
@@ -562,7 +569,7 @@ class InfoExtractor(object):
 
     def _get_login_info(self):
         """
-        Get the the login info as (username, password)
+        Get the login info as (username, password)
         It will look in the netrc file using the _NETRC_MACHINE value
         If there's no info available, return (None, None)
         """
@@ -698,7 +705,7 @@ class InfoExtractor(object):
         return self._html_search_meta('twitter:player', html,
                                       'twitter card player')
 
-    def _sort_formats(self, formats):
+    def _sort_formats(self, formats, field_preference=None):
         if not formats:
             raise ExtractorError('No video formats found')
 
@@ -708,6 +715,9 @@ class InfoExtractor(object):
             if not f.get('ext') and 'url' in f:
                 f['ext'] = determine_ext(f['url'])
 
+            if isinstance(field_preference, (list, tuple)):
+                return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference)
+
             preference = f.get('preference')
             if preference is None:
                 proto = f.get('protocol')
@@ -754,7 +764,7 @@ class InfoExtractor(object):
                 f.get('fps') if f.get('fps') is not None else -1,
                 f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
                 f.get('source_preference') if f.get('source_preference') is not None else -1,
-                f.get('format_id'),
+                f.get('format_id') if f.get('format_id') is not None else '',
             )
         formats.sort(key=_formats_key)
 
@@ -767,6 +777,10 @@ class InfoExtractor(object):
                 formats)
 
     def _is_valid_url(self, url, video_id, item='video'):
+        url = self._proto_relative_url(url, scheme='http:')
+        # For now assume non HTTP(S) URLs always valid
+        if not (url.startswith('http://') or url.startswith('https://')):
+            return True
         try:
             self._request_webpage(url, video_id, 'Checking %s URL' % item)
             return True
@@ -818,7 +832,7 @@ class InfoExtractor(object):
                                 (media_el.attrib.get('href') or media_el.attrib.get('url')))
             tbr = int_or_none(media_el.attrib.get('bitrate'))
             formats.append({
-                'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
+                'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
                 'url': manifest_url,
                 'ext': 'flv',
                 'tbr': tbr,
@@ -835,7 +849,7 @@ class InfoExtractor(object):
                               m3u8_id=None):
 
         formats = [{
-            'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
+            'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
             'url': m3u8_url,
             'ext': ext,
             'protocol': 'm3u8',
@@ -879,8 +893,13 @@ class InfoExtractor(object):
                     formats.append({'url': format_url(line)})
                     continue
                 tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
+                format_id = []
+                if m3u8_id:
+                    format_id.append(m3u8_id)
+                last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
+                format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
                 f = {
-                    'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
+                    'format_id': '-'.join(format_id),
                     'url': format_url(line.strip()),
                     'tbr': tbr,
                     'ext': ext,
index cf763ee7e03019adc5f957060b0f45e52e532084..94d03ce2af108a4a711f09f3db9fecd5bd62566e 100644 (file)
@@ -11,39 +11,65 @@ from ..utils import (
 
 class CrackedIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
-    _TEST = {
+    _TESTS = [{
+        'url': 'http://www.cracked.com/video_19070_if-animal-actors-got-e21-true-hollywood-stories.html',
+        'md5': '89b90b9824e3806ca95072c4d78f13f7',
+        'info_dict': {
+            'id': '19070',
+            'ext': 'mp4',
+            'title': 'If Animal Actors Got E! True Hollywood Stories',
+            'timestamp': 1404954000,
+            'upload_date': '20140710',
+        }
+    }, {
+        # youtube embed
         'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
-        'md5': '4b29a5eeec292cd5eca6388c7558db9e',
+        'md5': 'ccd52866b50bde63a6ef3b35016ba8c7',
         'info_dict': {
-            'id': '19006',
+            'id': 'EjI00A3rZD0',
             'ext': 'mp4',
-            'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies',
-            'description': 'md5:3b909e752661db86007d10e5ec2df769',
-            'timestamp': 1405659600,
-            'upload_date': '20140718',
+            'title': "4 Plot Holes You Didn't Notice in Your Favorite Movies - The Spit Take",
+            'description': 'md5:c603708c718b796fe6079e2b3351ffc7',
+            'upload_date': '20140725',
+            'uploader_id': 'Cracked',
+            'uploader': 'Cracked',
         }
-    }
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
 
+        youtube_url = self._search_regex(
+            r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
+            webpage, 'youtube url', default=None)
+        if youtube_url:
+            return self.url_result(youtube_url, 'Youtube')
+
         video_url = self._html_search_regex(
-            [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL')
+            [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
+            webpage, 'video URL')
+
+        title = self._search_regex(
+            [r'property="?og:title"?\s+content="([^"]+)"', r'class="?title"?>([^<]+)'],
+            webpage, 'title')
 
-        title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
+        description = self._search_regex(
+            r'name="?(?:og:)?description"?\s+content="([^"]+)"',
+            webpage, 'description', default=None)
 
-        timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False)
+        timestamp = self._html_search_regex(
+            r'"date"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False)
         if timestamp:
             timestamp = parse_iso8601(timestamp[:-6])
 
         view_count = str_to_int(self._html_search_regex(
-            r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False))
+            r'<span\s+class="?views"? id="?viewCounts"?>([\d,\.]+) Views</span>',
+            webpage, 'view count', fatal=False))
         comment_count = str_to_int(self._html_search_regex(
-            r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False))
+            r'<span\s+id="?commentCounts"?>([\d,\.]+)</span>',
+            webpage, 'comment count', fatal=False))
 
         m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
         if m:
diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dl/extractor/crooksandliars.py
new file mode 100644 (file)
index 0000000..443eb76
--- /dev/null
@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    qualities,
+)
+
+
+class CrooksAndLiarsIE(InfoExtractor):
+    _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)'
+    _TESTS = [{
+        'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi',
+        'info_dict': {
+            'id': '8RUoRhRi',
+            'ext': 'mp4',
+            'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!',
+            'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'timestamp': 1428207000,
+            'upload_date': '20150405',
+            'uploader': 'Heather',
+            'duration': 236,
+        }
+    }, {
+        'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
+
+        manifest = self._parse_json(
+            self._search_regex(
+                r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
+            video_id)
+
+        quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))
+
+        formats = [{
+            'url': item['url'],
+            'format_id': item['type'],
+            'quality': quality(item['type']),
+        } for item in manifest['flavors'] if item['mime'].startswith('video/')]
+        self._sort_formats(formats)
+
+        return {
+            'url': url,
+            'id': video_id,
+            'title': manifest['title'],
+            'description': manifest.get('description'),
+            'thumbnail': self._proto_relative_url(manifest.get('poster')),
+            'timestamp': int_or_none(manifest.get('created')),
+            'uploader': manifest.get('author'),
+            'duration': int_or_none(manifest.get('duration')),
+            'formats': formats,
+        }
index f1da7d09bc934af86f08aa45f8a1a3de32fa4673..1c77df47ef346173fc11a58396c98768e5afc986 100644 (file)
@@ -23,12 +23,12 @@ from ..utils import (
 )
 from ..aes import (
     aes_cbc_decrypt,
-    inc,
 )
 
 
 class CrunchyrollIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+    _NETRC_MACHINE = 'crunchyroll'
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
         'info_dict': {
@@ -101,13 +101,6 @@ class CrunchyrollIE(InfoExtractor):
 
         key = obfuscate_key(id)
 
-        class Counter:
-            __value = iv
-
-            def next_value(self):
-                temp = self.__value
-                self.__value = inc(self.__value)
-                return temp
         decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
         return zlib.decompress(decrypted_data)
 
@@ -270,8 +263,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             streamdata = self._download_xml(
                 streamdata_req, video_id,
                 note='Downloading media info for %s' % video_format)
-            video_url = streamdata.find('.//host').text
-            video_play_path = streamdata.find('.//file').text
+            video_url = streamdata.find('./host').text
+            video_play_path = streamdata.find('./file').text
             formats.append({
                 'url': video_url,
                 'play_path': video_play_path,
index 955119d40be3797e073b030790b3685b7ca4be15..fbefd37d09a98bb19c82b4c09b7b08c99d147d35 100644 (file)
@@ -7,7 +7,10 @@ from ..utils import (
     int_or_none,
     unescapeHTML,
     find_xpath_attr,
+    smuggle_url,
+    determine_ext,
 )
+from .senateisvp import SenateISVPIE
 
 
 class CSpanIE(InfoExtractor):
@@ -35,11 +38,22 @@ class CSpanIE(InfoExtractor):
         }
     }, {
         'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
+        'md5': '446562a736c6bf97118e389433ed88d4',
         'info_dict': {
             'id': '342759',
+            'ext': 'mp4',
             'title': 'General Motors Ignition Switch Recall',
+            'duration': 14848,
+            'description': 'md5:70c7c3b8fa63fa60d42772440596034c'
         },
-        'playlist_duration_sum': 14855,
+    }, {
+        # Video from senate.gov
+        'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
+        'info_dict': {
+            'id': 'judiciary031715',
+            'ext': 'flv',
+            'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
+        }
     }]
 
     def _real_extract(self, url):
@@ -56,7 +70,7 @@ class CSpanIE(InfoExtractor):
                 # present, otherwise this is a stripped version
                 r'<p class=\'initial\'>(.*?)</p>'
             ],
-            webpage, 'description', flags=re.DOTALL)
+            webpage, 'description', flags=re.DOTALL, default=None)
 
         info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
         data = self._download_json(info_url, video_id)
@@ -68,7 +82,16 @@ class CSpanIE(InfoExtractor):
         title = find_xpath_attr(doc, './/string', 'name', 'title').text
         thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
 
+        senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
+        if senate_isvp_url:
+            surl = smuggle_url(senate_isvp_url, {'force_title': title})
+            return self.url_result(surl, 'SenateISVP', video_id, title)
+
         files = data['video']['files']
+        try:
+            capfile = data['video']['capfile']['#text']
+        except KeyError:
+            capfile = None
 
         entries = [{
             'id': '%s_%d' % (video_id, partnum + 1),
@@ -79,11 +102,22 @@ class CSpanIE(InfoExtractor):
             'description': description,
             'thumbnail': thumbnail,
             'duration': int_or_none(f.get('length', {}).get('#text')),
+            'subtitles': {
+                'en': [{
+                    'url': capfile,
+                    'ext': determine_ext(capfile, 'dfxp')
+                }],
+            } if capfile else None,
         } for partnum, f in enumerate(files)]
 
-        return {
-            '_type': 'playlist',
-            'entries': entries,
-            'title': title,
-            'id': video_id,
-        }
+        if len(entries) == 1:
+            entry = dict(entries[0])
+            entry['id'] = video_id
+            return entry
+        else:
+            return {
+                '_type': 'playlist',
+                'entries': entries,
+                'title': title,
+                'id': video_id,
+            }
index 42b20a46ddefc1e4a7e66aacd0d959a1e062618f..db10b8d00b7482b157bd9dd0ecc9ef9b8191ce88 100644 (file)
@@ -25,8 +25,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
     def _build_request(url):
         """Build a request with the family filter disabled"""
         request = compat_urllib_request.Request(url)
-        request.add_header('Cookie', 'family_filter=off')
-        request.add_header('Cookie', 'ff=off')
+        request.add_header('Cookie', 'family_filter=off; ff=off')
         return request
 
 
@@ -46,13 +45,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
 
     _TESTS = [
         {
-            'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
-            'md5': '392c4b85a60a90dc4792da41ce3144eb',
+            'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
+            'md5': '2137c41a8e78554bb09225b8eb322406',
             'info_dict': {
-                'id': 'x33vw9',
+                'id': 'x2iuewm',
                 'ext': 'mp4',
-                'uploader': 'Amphora Alex and Van .',
-                'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
+                'uploader': 'IGN',
+                'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
+                'upload_date': '20150306',
             }
         },
         # Vevo video
@@ -86,7 +86,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        url = 'http://www.dailymotion.com/video/%s' % video_id
+        url = 'https://www.dailymotion.com/video/%s' % video_id
 
         # Retrieve video webpage to extract further information
         request = self._build_request(url)
@@ -107,13 +107,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
         age_limit = self._rta_search(webpage)
 
         video_upload_date = None
-        mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
+        mobj = re.search(r'<meta property="video:release_date" content="([0-9]{4})-([0-9]{2})-([0-9]{2}).+?"/>', webpage)
         if mobj is not None:
-            video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
+            video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
 
-        embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
-        embed_page = self._download_webpage(embed_url, video_id,
-                                            'Downloading embed page')
+        embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id
+        embed_request = self._build_request(embed_url)
+        embed_page = self._download_webpage(
+            embed_request, video_id, 'Downloading embed page')
         info = self._search_regex(r'var info = ({.*?}),$', embed_page,
                                   'video info', flags=re.MULTILINE)
         info = json.loads(info)
@@ -224,7 +225,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
 
 class DailymotionUserIE(DailymotionPlaylistIE):
     IE_NAME = 'dailymotion:user'
-    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)'
     _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
     _TESTS = [{
         'url': 'https://www.dailymotion.com/user/nqtv',
diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py
new file mode 100644 (file)
index 0000000..3ed1f16
--- /dev/null
@@ -0,0 +1,73 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    parse_duration,
+)
+
+
+class DHMIE(InfoExtractor):
+    IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
+    _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
+
+    _TESTS = [{
+        'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
+        'md5': '11c475f670209bf6acca0b2b7ef51827',
+        'info_dict': {
+            'id': 'the-marshallplan-at-work-in-west-germany',
+            'ext': 'flv',
+            'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
+            'description': 'md5:1fabd480c153f97b07add61c44407c82',
+            'duration': 660,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }, {
+        'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
+        'md5': '09890226332476a3e3f6f2cb74734aa5',
+        'info_dict': {
+            'id': 'rolle-1',
+            'ext': 'flv',
+            'title': 'ROLLE 1',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        playlist_url = self._search_regex(
+            r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
+
+        playlist = self._download_xml(playlist_url, video_id)
+
+        track = playlist.find(
+            './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
+
+        video_url = xpath_text(
+            track, './{http://xspf.org/ns/0/}location',
+            'video url', fatal=True)
+        thumbnail = xpath_text(
+            track, './{http://xspf.org/ns/0/}image',
+            'thumbnail')
+
+        title = self._search_regex(
+            [r'dc:title="([^"]+)"', r'<title> &raquo;([^<]+)</title>'],
+            webpage, 'title').strip()
+        description = self._html_search_regex(
+            r'<p><strong>Description:</strong>(.+?)</p>',
+            webpage, 'description', default=None)
+        duration = parse_duration(self._search_regex(
+            r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
+            webpage, 'duration', default=None))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'thumbnail': thumbnail,
+        }
index f51d88a986b79d65cae3c1604ee3d16e9515c0fd..e9ca236d4a03c13b1b29b3386535c4262332dab0 100644 (file)
@@ -36,7 +36,8 @@ class DotsubIE(InfoExtractor):
         if not video_url:
             webpage = self._download_webpage(url, video_id)
             video_url = self._search_regex(
-                r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
+                [r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'],
+                webpage, 'video url')
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py
new file mode 100644 (file)
index 0000000..479430c
--- /dev/null
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import time
+from .common import InfoExtractor
+from ..utils import (ExtractorError, unescapeHTML)
+from ..compat import (compat_str, compat_basestring)
+
+
+class DouyuTVIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
+    _TESTS = [{
+        'url': 'http://www.douyutv.com/iseven',
+        'info_dict': {
+            'id': '17732',
+            'display_id': 'iseven',
+            'ext': 'flv',
+            'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': '7师傅',
+            'uploader_id': '431925',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://www.douyutv.com/85982',
+        'info_dict': {
+            'id': '85982',
+            'display_id': '85982',
+            'ext': 'flv',
+            'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'douyu小漠',
+            'uploader_id': '3769985',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        if video_id.isdigit():
+            room_id = video_id
+        else:
+            page = self._download_webpage(url, video_id)
+            room_id = self._html_search_regex(
+                r'"room_id"\s*:\s*(\d+),', page, 'room id')
+
+        prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
+            room_id, int(time.time()))
+
+        auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
+        config = self._download_json(
+            'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
+            video_id)
+
+        data = config['data']
+
+        error_code = config.get('error', 0)
+        if error_code is not 0:
+            error_desc = 'Server reported error %i' % error_code
+            if isinstance(data, (compat_str, compat_basestring)):
+                error_desc += ': ' + data
+            raise ExtractorError(error_desc, expected=True)
+
+        show_status = data.get('show_status')
+        # 1 = live, 2 = offline
+        if show_status == '2':
+            raise ExtractorError(
+                'Live stream is offline', expected=True)
+
+        base_url = data['rtmp_url']
+        live_path = data['rtmp_live']
+
+        title = self._live_title(unescapeHTML(data['room_name']))
+        description = data.get('show_details')
+        thumbnail = data.get('room_src')
+
+        uploader = data.get('nickname')
+        uploader_id = data.get('owner_uid')
+
+        multi_formats = data.get('rtmp_multi_bitrate')
+        if not isinstance(multi_formats, dict):
+            multi_formats = {}
+        multi_formats['live'] = live_path
+
+        formats = [{
+            'url': '%s/%s' % (base_url, format_path),
+            'format_id': format_id,
+            'preference': 1 if format_id == 'live' else 0,
+        } for format_id, format_path in multi_formats.items()]
+        self._sort_formats(formats)
+
+        return {
+            'id': room_id,
+            'display_id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'formats': formats,
+            'is_live': True,
+        }
index 69ca75423cb1d4692f1958829dc61bcf5c2bac73..8ac8587be6af564af3674c8ff7e7754364bc311e 100644 (file)
@@ -3,24 +3,33 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+    ExtractorError,
+    unified_strdate,
+)
 
 
 class DreiSatIE(InfoExtractor):
     IE_NAME = '3sat'
-    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
-    _TEST = {
-        'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
-        'md5': '9dcfe344732808dbfcc901537973c922',
-        'info_dict': {
-            'id': '36983',
-            'ext': 'mp4',
-            'title': 'Kaffeeland Schweiz',
-            'description': 'md5:cc4424b18b75ae9948b13929a0814033',
-            'uploader': '3sat',
-            'upload_date': '20130622'
-        }
-    }
+    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+    _TESTS = [
+        {
+            'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
+            'md5': 'be37228896d30a88f315b638900a026e',
+            'info_dict': {
+                'id': '45918',
+                'ext': 'mp4',
+                'title': 'Waidmannsheil',
+                'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
+                'uploader': '3sat',
+                'upload_date': '20140913'
+            }
+        },
+        {
+            'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
+            'only_matching': True,
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -28,6 +37,15 @@ class DreiSatIE(InfoExtractor):
         details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
         details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
 
+        status_code = details_doc.find('./status/statuscode')
+        if status_code is not None and status_code.text != 'ok':
+            code = status_code.text
+            if code == 'notVisibleAnymore':
+                message = 'Video %s is not available' % video_id
+            else:
+                message = '%s returned error: %s' % (self.IE_NAME, code)
+            raise ExtractorError(message, expected=True)
+
         thumbnail_els = details_doc.findall('.//teaserimage')
         thumbnails = [{
             'width': int(te.attrib['key'].partition('x')[0]),
index 8257e35a437b075461114fbaf1b4dd2d578f56d8..f25ab319e66d4d5b151cd9a9d4509807b6a88617 100644 (file)
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor, ExtractorError
@@ -8,16 +9,16 @@ class DRTVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
 
     _TEST = {
-        'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
-        'md5': '4a7e1dd65cdb2643500a3f753c942f25',
+        'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
+        'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
         'info_dict': {
-            'id': 'partiets-mand-7-8',
+            'id': 'panisk-paske-5',
             'ext': 'mp4',
-            'title': 'Partiets mand (7:8)',
-            'description': 'md5:a684b90a8f9336cd4aab94b7647d7862',
-            'timestamp': 1403047940,
-            'upload_date': '20140617',
-            'duration': 1299.040,
+            'title': 'Panisk Påske (5)',
+            'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
+            'timestamp': 1426984612,
+            'upload_date': '20150322',
+            'duration': 1455,
         },
     }
 
@@ -26,6 +27,10 @@ class DRTVIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        if '>Programmet er ikke længere tilgængeligt' in webpage:
+            raise ExtractorError(
+                'Video %s is not available' % video_id, expected=True)
+
         video_id = self._search_regex(
             r'data-(?:material-identifier|episode-slug)="([^"]+)"',
             webpage, 'video id')
index 6b651778afa2faa57237314cde7a25f3ebb06278..ff78d4fd2e84c390a929f29a052b1dd87abe03d1 100644 (file)
@@ -28,12 +28,12 @@ class DumpIE(InfoExtractor):
         video_url = self._search_regex(
             r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
 
-        thumb = self._og_search_thumbnail(webpage)
-        title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
 
         return {
             'id': video_id,
             'title': title,
             'url': video_url,
-            'thumbnail': thumb,
+            'thumbnail': thumbnail,
         }
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py
new file mode 100644 (file)
index 0000000..999fb56
--- /dev/null
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import qualities
+
+
+class DumpertIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+    _TEST = {
+        'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
+        'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+        'info_dict': {
+            'id': '6646981/951bc60f',
+            'ext': 'mp4',
+            'title': 'Ik heb nieuws voor je',
+            'description': 'Niet schrikken hoor',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'nsfw=1; cpc=10')
+        webpage = self._download_webpage(req, video_id)
+
+        files_base64 = self._search_regex(
+            r'data-files="([^"]+)"', webpage, 'data files')
+
+        files = self._parse_json(
+            base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
+            video_id)
+
+        quality = qualities(['flv', 'mobile', 'tablet', '720p'])
+
+        formats = [{
+            'url': video_url,
+            'format_id': format_id,
+            'quality': quality(format_id),
+        } for format_id, video_url in files.items() if format_id != 'still']
+        self._sort_formats(formats)
+
+        title = self._html_search_meta(
+            'title', webpage) or self._og_search_title(webpage)
+        description = self._html_search_meta(
+            'description', webpage) or self._og_search_description(webpage)
+        thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats
+        }
diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py
new file mode 100644 (file)
index 0000000..688dfc2
--- /dev/null
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
+
+
+class EaglePlatformIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    (?:
+                        eagleplatform:(?P<custom_host>[^/]+):|
+                        https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
+                    )
+                    (?P<id>\d+)
+                '''
+    _TESTS = [{
+        # http://lenta.ru/news/2015/03/06/navalny/
+        'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
+        'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
+        'info_dict': {
+            'id': '227304',
+            'ext': 'mp4',
+            'title': 'Навальный вышел на свободу',
+            'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 87,
+            'view_count': int,
+            'age_limit': 0,
+        },
+    }, {
+        # http://muz-tv.ru/play/7129/
+        # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
+        'url': 'eagleplatform:media.clipyou.ru:12820',
+        'md5': '6c2ebeab03b739597ce8d86339d5a905',
+        'info_dict': {
+            'id': '12820',
+            'ext': 'mp4',
+            'title': "'O Sole Mio",
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 216,
+            'view_count': int,
+        },
+        'skip': 'Georestricted',
+    }]
+
+    def _handle_error(self, response):
+        status = int_or_none(response.get('status', 200))
+        if status != 200:
+            raise ExtractorError(' '.join(response['errors']), expected=True)
+
+    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
+        response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
+        self._handle_error(response)
+        return response
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
+
+        player_data = self._download_json(
+            'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
+
+        media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
+
+        title = media['title']
+        description = media.get('description')
+        thumbnail = media.get('snapshot')
+        duration = int_or_none(media.get('duration'))
+        view_count = int_or_none(media.get('views'))
+
+        age_restriction = media.get('age_restriction')
+        age_limit = None
+        if age_restriction:
+            age_limit = 0 if age_restriction == 'allow_all' else 18
+
+        m3u8_data = self._download_json(
+            media['sources']['secure_m3u8']['auto'],
+            video_id, 'Downloading m3u8 JSON')
+
+        formats = self._extract_m3u8_formats(
+            m3u8_data['data'][0], video_id,
+            'mp4', entry_protocol='m3u8_native')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'view_count': view_count,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
index fb5dbbe2b0c7d9bd15b87426e446ce73f903a6eb..0b61ea0ba60218043156d4f90680ff0348e827c7 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 import json
 import random
-import re
 
 from .common import InfoExtractor
 from ..compat import (
@@ -103,20 +102,23 @@ class EightTracksIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        playlist_id = mobj.group('id')
+        playlist_id = self._match_id(url)
 
         webpage = self._download_webpage(url, playlist_id)
 
-        json_like = self._search_regex(
-            r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
-        data = json.loads(json_like)
+        data = self._parse_json(
+            self._search_regex(
+                r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
+            playlist_id)
 
         session = str(random.randint(0, 1000000000))
         mix_id = data['id']
         track_count = data['tracks_count']
         duration = data['duration']
         avg_song_duration = float(duration) / track_count
+        # duration is sometimes negative, use predefined avg duration
+        if avg_song_duration <= 0:
+            avg_song_duration = 300
         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
         next_url = first_url
         entries = []
index fc92ff8253734f151fa973ea4979adbe5c6063bf..02c6a4615c4436fecda86fb152a131f084640612 100644 (file)
@@ -6,56 +6,42 @@ import json
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
-    parse_iso8601,
 )
 
 
 class EllenTVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
-    _TESTS = [{
-        'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
-        'md5': 'e4af06f3bf0d5f471921a18db5764642',
-        'info_dict': {
-            'id': '0-7jqrsr18',
-            'ext': 'mp4',
-            'title': 'What\'s Wrong with These Photos? A Whole Lot',
-            'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
-            'timestamp': 1406876400,
-            'upload_date': '20140801',
-        }
-    }, {
-        'url': 'http://ellentube.com/videos/0-dvzmabd5/',
-        'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb',
+    _TEST = {
+        'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
+        'md5': '8e3c576bf2e9bfff4d76565f56f94c9c',
         'info_dict': {
-            'id': '0-dvzmabd5',
+            'id': '0_ipq1gsai',
             'ext': 'mp4',
-            'title': '1 year old twin sister makes her brother laugh',
-            'description': '1 year old twin sister makes her brother laugh',
-            'timestamp': 1419542075,
-            'upload_date': '20141225',
+            'title': 'Fast Fingers of Fate',
+            'description': 'md5:587e79fbbd0d73b148bc596d99ce48e6',
+            'timestamp': 1428035648,
+            'upload_date': '20150403',
+            'uploader_id': 'batchUser',
         }
-    }]
+    }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
-        video_url = self._html_search_meta('VideoURL', webpage, 'url')
-        title = self._og_search_title(webpage, default=None) or self._search_regex(
-            r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
-        description = self._html_search_meta(
-            'description', webpage, 'description') or self._og_search_description(webpage)
-        timestamp = parse_iso8601(self._search_regex(
-            r'<span class="publish-date"><time datetime="([^"]+)">',
-            webpage, 'timestamp'))
+        webpage = self._download_webpage(
+            'http://widgets.ellentube.com/videos/%s' % video_id,
+            video_id)
 
-        return {
-            'id': video_id,
-            'url': video_url,
-            'title': title,
-            'description': description,
-            'timestamp': timestamp,
-        }
+        partner_id = self._search_regex(
+            r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id')
+
+        kaltura_id = self._search_regex(
+            [r'id="kaltura_player_([^"]+)"',
+             r"_wb_entry_id\s*:\s*'([^']+)",
+             r'data-kaltura-entry-id="([^"]+)'],
+            webpage, 'kaltura id')
+
+        return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
 
 
 class EllenTVClipsIE(InfoExtractor):
@@ -67,7 +53,7 @@ class EllenTVClipsIE(InfoExtractor):
             'id': 'meryl-streep-vanessa-hudgens',
             'title': 'Meryl Streep, Vanessa Hudgens',
         },
-        'playlist_mincount': 9,
+        'playlist_mincount': 7,
     }
 
     def _real_extract(self, url):
@@ -91,4 +77,8 @@ class EllenTVClipsIE(InfoExtractor):
             raise ExtractorError('Failed to download JSON', cause=ve)
 
     def _extract_entries(self, playlist):
-        return [self.url_result(item['url'], 'EllenTV') for item in playlist]
+        return [
+            self.url_result(
+                'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']),
+                'Kaltura')
+            for item in playlist]
index 79e2fbd394681283e07a7146bc51f39a7499324d..316033cf18b42cefead780ceca15b361ebbddac7 100644 (file)
@@ -1,11 +1,20 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import (
+    ExtractorError,
+    unescapeHTML
+)
 
 
 class EroProfileIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
-    _TEST = {
+    _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
+    _NETRC_MACHINE = 'eroprofile'
+    _TESTS = [{
         'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
         'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
         'info_dict': {
@@ -16,19 +25,61 @@ class EroProfileIE(InfoExtractor):
             'thumbnail': 're:https?://.*\.jpg',
             'age_limit': 18,
         }
-    }
+    }, {
+        'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
+        'md5': '1baa9602ede46ce904c431f5418d8916',
+        'info_dict': {
+            'id': '1133519',
+            'ext': 'm4v',
+            'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
+            'thumbnail': 're:https?://.*\.jpg',
+            'age_limit': 18,
+        },
+        'skip': 'Requires login',
+    }]
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        query = compat_urllib_parse.urlencode({
+            'username': username,
+            'password': password,
+            'url': 'http://www.eroprofile.com/',
+        })
+        login_url = self._LOGIN_URL + query
+        login_page = self._download_webpage(login_url, None, False)
+
+        m = re.search(r'Your username or password was incorrect\.', login_page)
+        if m:
+            raise ExtractorError(
+                'Wrong username and/or password.', expected=True)
+
+        self.report_login()
+        redirect_url = self._search_regex(
+            r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
+        self._download_webpage(redirect_url, None, False)
+
+    def _real_initialize(self):
+        self._login()
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
         webpage = self._download_webpage(url, display_id)
 
+        m = re.search(r'You must be logged in to view this video\.', webpage)
+        if m:
+            raise ExtractorError(
+                'This video requires login. Please specify a username and password and try again.', expected=True)
+
         video_id = self._search_regex(
             [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
             webpage, 'video id', default=None)
 
-        video_url = self._search_regex(
-            r'<source src="([^"]+)', webpage, 'video url')
+        video_url = unescapeHTML(self._search_regex(
+            r'<source src="([^"]+)', webpage, 'video url'))
         title = self._html_search_regex(
             r'Title:</th><td>([^<]+)</td>', webpage, 'title')
         thumbnail = self._search_regex(
index e47f3e27a57aa14e3eee526af8998230b524bb4f..c85b4c458d95882f56675fa135aab1f3492b6194 100644 (file)
 from __future__ import unicode_literals
 
+import json
+
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_request,
-)
+from ..compat import compat_urllib_request
+
 from ..utils import (
-    ExtractorError,
-    js_to_json,
-    parse_duration,
+    determine_ext,
+    clean_html,
+    int_or_none,
+    float_or_none,
 )
 
 
+def _decrypt_config(key, string):
+    a = ''
+    i = ''
+    r = ''
+
+    while len(a) < (len(string) / 2):
+        a += key
+
+    a = a[0:int(len(string) / 2)]
+
+    t = 0
+    while t < len(string):
+        i += chr(int(string[t] + string[t + 1], 16))
+        t += 2
+
+    icko = [s for s in i]
+
+    for t, c in enumerate(a):
+        r += chr(ord(c) ^ ord(icko[t]))
+
+    return r
+
+
 class EscapistIE(InfoExtractor):
-    _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
-    _USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
-    _TEST = {
+    _VALID_URL = r'https?://?(?:www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
+    _TESTS = [{
         'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
         'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
         'info_dict': {
             'id': '6618',
             'ext': 'mp4',
             'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
-            'uploader_id': 'the-escapist-presents',
-            'uploader': 'The Escapist Presents',
             'title': "Breaking Down Baldur's Gate",
             'thumbnail': 're:^https?://.*\.jpg$',
             'duration': 264,
+            'uploader': 'The Escapist',
+        }
+    }, {
+        'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
+        'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf',
+        'info_dict': {
+            'id': '10044',
+            'ext': 'mp4',
+            'description': 'This week, Zero Punctuation reviews Evolve.',
+            'title': 'Evolve - One vs Multiplayer',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 304,
+            'uploader': 'The Escapist',
         }
-    }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage_req = compat_urllib_request.Request(url)
-        webpage_req.add_header('User-Agent', self._USER_AGENT)
-        webpage = self._download_webpage(webpage_req, video_id)
-
-        uploader_id = self._html_search_regex(
-            r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
-            webpage, 'uploader ID', fatal=False)
-        uploader = self._html_search_regex(
-            r"<h1\s+class='headline'>(.*?)</a>",
-            webpage, 'uploader', fatal=False)
-        description = self._html_search_meta('description', webpage)
-        duration = parse_duration(self._html_search_meta('duration', webpage))
-
-        raw_title = self._html_search_meta('title', webpage, fatal=True)
-        title = raw_title.partition(' : ')[2]
-
-        config_url = compat_urllib_parse.unquote(self._html_search_regex(
-            r'''(?x)
-            (?:
-                <param\s+name="flashvars".*?\s+value="config=|
-                flashvars=&quot;config=
-            )
-            (https?://[^"&]+)
-            ''',
-            webpage, 'config URL'))
-
-        formats = []
-        ad_formats = []
-
-        def _add_format(name, cfg_url, quality):
-            cfg_req = compat_urllib_request.Request(cfg_url)
-            cfg_req.add_header('User-Agent', self._USER_AGENT)
-            config = self._download_json(
-                cfg_req, video_id,
-                'Downloading ' + name + ' configuration',
-                'Unable to download ' + name + ' configuration',
-                transform_source=js_to_json)
-
-            playlist = config['playlist']
-            for p in playlist:
-                if p.get('eventCategory') == 'Video':
-                    ar = formats
-                elif p.get('eventCategory') == 'Video Postroll':
-                    ar = ad_formats
-                else:
-                    continue
-
-                ar.append({
-                    'url': p['url'],
-                    'format_id': name,
-                    'quality': quality,
-                    'http_headers': {
-                        'User-Agent': self._USER_AGENT,
-                    },
-                })
-
-        _add_format('normal', config_url, quality=0)
-        hq_url = (config_url +
-                  ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
-        try:
-            _add_format('hq', hq_url, quality=1)
-        except ExtractorError:
-            pass  # That's fine, we'll just use normal quality
+        webpage = self._download_webpage(url, video_id)
+
+        ims_video = self._parse_json(
+            self._search_regex(
+                r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
+            video_id)
+        video_id = ims_video['videoID']
+        key = ims_video['hash']
+
+        config_req = compat_urllib_request.Request(
+            'http://www.escapistmagazine.com/videos/'
+            'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
+        config_req.add_header('Referer', url)
+        config = self._download_webpage(config_req, video_id, 'Downloading video config')
+
+        data = json.loads(_decrypt_config(key, config))
+
+        video_data = data['videoData']
+
+        title = clean_html(video_data['title'])
+        duration = float_or_none(video_data.get('duration'), 1000)
+        uploader = video_data.get('publisher')
+
+        formats = [{
+            'url': video['src'],
+            'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
+            'height': int_or_none(video.get('res')),
+        } for video in data['files']['videos']]
         self._sort_formats(formats)
 
-        if '/escapist/sales-marketing/' in formats[-1]['url']:
-            raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
-
-        res = {
+        return {
             'id': video_id,
             'formats': formats,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
             'title': title,
             'thumbnail': self._og_search_thumbnail(webpage),
-            'description': description,
+            'description': self._og_search_description(webpage),
             'duration': duration,
+            'uploader': uploader,
         }
-
-        if self._downloader.params.get('include_ads') and ad_formats:
-            self._sort_formats(ad_formats)
-            ad_res = {
-                'id': '%s-ad' % video_id,
-                'title': '%s (Postroll)' % title,
-                'formats': ad_formats,
-            }
-            return {
-                '_type': 'playlist',
-                'entries': [res, ad_res],
-                'title': title,
-                'id': video_id,
-            }
-
-        return res
index 36ba331285b434136b8d3c10e6a8a16bef18e7b7..c826a5404a4f7da298927460f1f8e41dd013d3a7 100644 (file)
@@ -4,11 +4,11 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse_urlparse,
+    compat_parse_qs,
     compat_urllib_request,
-    compat_urllib_parse,
 )
 from ..utils import (
+    qualities,
     str_to_int,
 )
 
@@ -17,7 +17,7 @@ class ExtremeTubeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
     _TESTS = [{
         'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
-        'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
+        'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
         'info_dict': {
             'id': '652431',
             'ext': 'mp4',
@@ -49,19 +49,27 @@ class ExtremeTubeIE(InfoExtractor):
             r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
             webpage, 'view count', fatal=False))
 
-        video_url = compat_urllib_parse.unquote(self._html_search_regex(
-            r'video_url=(.+?)&amp;', webpage, 'video_url'))
-        path = compat_urllib_parse_urlparse(video_url).path
-        format = path.split('/')[5].split('_')[:2]
-        format = "-".join(format)
+        flash_vars = compat_parse_qs(self._search_regex(
+            r'<param[^>]+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars'))
+
+        formats = []
+        quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p'])
+        for k, vals in flash_vars.items():
+            m = re.match(r'quality_(?P<quality>[0-9]+p)$', k)
+            if m is not None:
+                formats.append({
+                    'format_id': m.group('quality'),
+                    'quality': quality(m.group('quality')),
+                    'url': vals[0],
+                })
+
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': video_title,
+            'formats': formats,
             'uploader': uploader,
             'view_count': view_count,
-            'url': video_url,
-            'format': format,
-            'format_id': format,
             'age_limit': 18,
         }
index f0e575320015d435889b1bd610b4871dbd84ae21..937b28fcccf3bd58929adcca1bda9d05966460e5 100644 (file)
@@ -24,8 +24,12 @@ class FacebookIE(InfoExtractor):
     _VALID_URL = r'''(?x)
         https?://(?:\w+\.)?facebook\.com/
         (?:[^#]*?\#!/)?
-        (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
-        (?:v|video_id)=(?P<id>[0-9]+)
+        (?:
+            (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
+            (?:v|video_id)=|
+            [^/]+/videos/(?:[^/]+/)?
+        )
+        (?P<id>[0-9]+)
         (?:.*)'''
     _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
     _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
@@ -50,6 +54,12 @@ class FacebookIE(InfoExtractor):
     }, {
         'url': 'https://www.facebook.com/video.php?v=10204634152394104',
         'only_matching': True,
+    }, {
+        'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
+        'only_matching': True,
     }]
 
     def _login(self):
index 0c858b6544b919b1b569b4c4102447631298046e..2fe76d661bb432580cd2bd3f48c85035a4b6d7d9 100644 (file)
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urllib_request
 from ..utils import (
     ExtractorError,
-    unescapeHTML,
+    find_xpath_attr,
 )
 
 
@@ -29,25 +30,31 @@ class FlickrIE(InfoExtractor):
         video_id = mobj.group('id')
         video_uploader_id = mobj.group('uploader_id')
         webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
-        webpage = self._download_webpage(webpage_url, video_id)
+        req = compat_urllib_request.Request(webpage_url)
+        req.add_header(
+            'User-Agent',
+            # it needs a more recent version
+            'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)')
+        webpage = self._download_webpage(req, video_id)
 
-        secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, 'secret')
+        secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret')
 
         first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
-        first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
+        first_xml = self._download_xml(first_url, video_id, 'Downloading first data webpage')
 
-        node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
-                                          first_xml, 'node_id')
+        node_id = find_xpath_attr(
+            first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id',
+            'id').text
 
         second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
-        second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
+        second_xml = self._download_xml(second_url, video_id, 'Downloading second data webpage')
 
         self.report_extraction(video_id)
 
-        mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
-        if mobj is None:
+        stream = second_xml.find('.//STREAM')
+        if stream is None:
             raise ExtractorError('Unable to extract video url')
-        video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
+        video_url = stream.attrib['APP'] + stream.attrib['FULLPATH']
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py
new file mode 100644 (file)
index 0000000..4c7dbca
--- /dev/null
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FootyRoomIE(InfoExtractor):
+    _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
+    _TESTS = [{
+        'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
+        'info_dict': {
+            'id': 'schalke-04-0-2-real-madrid-2015-02',
+            'title': 'Schalke 04 0 – 2 Real Madrid',
+        },
+        'playlist_count': 3,
+    }, {
+        'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',
+        'info_dict': {
+            'id': 'georgia-0-2-germany-2015-03',
+            'title': 'Georgia 0 – 2 Germany',
+        },
+        'playlist_count': 1,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        playlist = self._parse_json(
+            self._search_regex(
+                r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
+            playlist_id)
+
+        playlist_title = self._og_search_title(webpage)
+
+        entries = []
+        for video in playlist:
+            payload = video.get('payload')
+            if not payload:
+                continue
+            playwire_url = self._search_regex(
+                r'data-config="([^"]+)"', payload,
+                'playwire url', default=None)
+            if playwire_url:
+                entries.append(self.url_result(self._proto_relative_url(
+                    playwire_url, 'http:'), 'Playwire'))
+
+        return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py
new file mode 100644 (file)
index 0000000..df76651
--- /dev/null
@@ -0,0 +1,32 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class FoxSportsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+
+    _TEST = {
+        'url': 'http://www.foxsports.com/video?vid=432609859715',
+        'info_dict': {
+            'id': 'gA0bHB3Ladz3',
+            'ext': 'flv',
+            'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
+            'description': 'Courtney Lee talks about Memphis being focused.',
+        },
+        'add_ie': ['ThePlatform'],
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        config = self._parse_json(
+            self._search_regex(
+                r"data-player-config='([^']+)'", webpage, 'data player config'),
+            video_id)
+
+        return self.url_result(smuggle_url(
+            config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True}))
index 170d6807529ac9b121187786cf9329b3b3525dc3..edf555b2987520618b70bf8bd423c5fc1f60e5a9 100644 (file)
@@ -14,7 +14,9 @@ from ..utils import (
     clean_html,
     ExtractorError,
     int_or_none,
+    float_or_none,
     parse_duration,
+    determine_ext,
 )
 
 
@@ -50,7 +52,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
             if not video_url:
                 continue
             format_id = video['format']
-            if video_url.endswith('.f4m'):
+            ext = determine_ext(video_url)
+            if ext == 'f4m':
                 if georestricted:
                     # See https://github.com/rg3/youtube-dl/issues/3963
                     # m3u8 urls work fine
@@ -60,12 +63,9 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                     'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
                     video_id, 'Downloading f4m manifest token', fatal=False)
                 if f4m_url:
-                    f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
-                    for f4m_format in f4m_formats:
-                        f4m_format['preference'] = 1
-                    formats.extend(f4m_formats)
-            elif video_url.endswith('.m3u8'):
-                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
+                    formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
+            elif ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
             elif video_url.startswith('rtmp'):
                 formats.append({
                     'url': video_url,
@@ -86,7 +86,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
             'title': info['titre'],
             'description': clean_html(info['synopsis']),
             'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
-            'duration': parse_duration(info['duree']),
+            'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
             'timestamp': int_or_none(info['diffusion']['timestamp']),
             'formats': formats,
         }
@@ -260,22 +260,28 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
     _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
 
     _TEST = {
-        'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553',
-        'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6',
+        'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
+        'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
         'info_dict': {
-            'id': 'EV_22853',
+            'id': 'EV_50111',
             'ext': 'flv',
-            'title': 'Dans les jardins de William Christie - Le Camus',
-            'description': 'md5:4710c82315c40f0c865ca8b9a68b5299',
-            'upload_date': '20140829',
-            'timestamp': 1409317200,
+            'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
+            'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
+            'upload_date': '20150320',
+            'timestamp': 1426892400,
+            'duration': 2760.9,
         },
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         name = mobj.group('name')
+
         webpage = self._download_webpage(url, name)
+
+        if ">Ce live n'est plus disponible en replay<" in webpage:
+            raise ExtractorError('Video %s is not available' % name, expected=True)
+
         video_id, catalogue = self._search_regex(
             r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
 
index a49fc1151cf324f5e4b61cbd4f1d586718410626..dd87257c465983dcda30a6faf5dbd7bc0950560c 100644 (file)
@@ -50,7 +50,6 @@ class FunnyOrDieIE(InfoExtractor):
         bitrates.sort()
 
         formats = []
-
         for bitrate in bitrates:
             for link in links:
                 formats.append({
@@ -59,6 +58,13 @@ class FunnyOrDieIE(InfoExtractor):
                     'vbr': bitrate,
                 })
 
+        subtitles = {}
+        for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
+            subtitles[src_lang] = [{
+                'ext': src.split('/')[-1],
+                'url': 'http://www.funnyordie.com%s' % src,
+            }]
+
         post_json = self._search_regex(
             r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
         post = json.loads(post_json)
@@ -69,4 +75,5 @@ class FunnyOrDieIE(InfoExtractor):
             'description': post.get('description'),
             'thumbnail': post.get('picture'),
             'formats': formats,
+            'subtitles': subtitles,
         }
diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py
new file mode 100644 (file)
index 0000000..d545e01
--- /dev/null
@@ -0,0 +1,70 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    parse_duration,
+    remove_start,
+)
+
+
+class GamersydeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html'
+    _TEST = {
+        'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html',
+        'md5': 'f38d400d32f19724570040d5ce3a505f',
+        'info_dict': {
+            'id': '34371',
+            'ext': 'mp4',
+            'duration': 372,
+            'title': 'Bloodborne - Birth of a hero',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        playlist = self._parse_json(
+            self._search_regex(
+                r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'),
+            display_id, transform_source=js_to_json)
+
+        formats = []
+        for source in playlist['sources']:
+            video_url = source.get('file')
+            if not video_url:
+                continue
+            format_id = source.get('label')
+            f = {
+                'url': video_url,
+                'format_id': format_id,
+            }
+            m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id)
+            if m:
+                f.update({
+                    'height': int(m.group('height')),
+                    'fps': int(m.group('fps')),
+                })
+            formats.append(f)
+        self._sort_formats(formats)
+
+        title = remove_start(playlist['title'], '%s - ' % video_id)
+        thumbnail = playlist.get('image')
+        duration = parse_duration(self._search_regex(
+            r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py
new file mode 100644 (file)
index 0000000..ea32b62
--- /dev/null
@@ -0,0 +1,38 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class GazetaIE(InfoExtractor):
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
+    _TESTS = [{
+        'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
+        'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
+        'info_dict': {
+            'id': '205566',
+            'ext': 'mp4',
+            'title': '«70–80 процентов гражданских в Донецке на грани голода»',
+            'description': 'md5:38617526050bd17b234728e7f9620a71',
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+    }, {
+        'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        display_id = mobj.group('id')
+        embed_url = '%s?p=embed' % mobj.group('url')
+        embed_page = self._download_webpage(
+            embed_url, display_id, 'Downloading embed page')
+
+        video_id = self._search_regex(
+            r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
+
+        return self.url_result(
+            'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
index f7b467b0aff8f46aa028d1898f5909277e973318..43f916412d9b97f3ca93cea830e5390bdcc70db0 100644 (file)
@@ -11,13 +11,15 @@ from ..utils import remove_end
 
 
 class GDCVaultIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
+    _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)?'
+    _NETRC_MACHINE = 'gdcvault'
     _TESTS = [
         {
             'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
             'md5': '7ce8388f544c88b7ac11c7ab1b593704',
             'info_dict': {
                 'id': '1019721',
+                'display_id': 'Doki-Doki-Universe-Sweet-Simple',
                 'ext': 'mp4',
                 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
             }
@@ -26,6 +28,7 @@ class GDCVaultIE(InfoExtractor):
             'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
             'info_dict': {
                 'id': '1015683',
+                'display_id': 'Embracing-the-Dark-Art-of',
                 'ext': 'flv',
                 'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
             },
@@ -38,10 +41,15 @@ class GDCVaultIE(InfoExtractor):
             'md5': 'a5eb77996ef82118afbbe8e48731b98e',
             'info_dict': {
                 'id': '1015301',
+                'display_id': 'Thexder-Meets-Windows-95-or',
                 'ext': 'flv',
                 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
             },
             'skip': 'Requires login',
+        },
+        {
+            'url': 'http://gdcvault.com/play/1020791/',
+            'only_matching': True,
         }
     ]
 
@@ -89,7 +97,7 @@ class GDCVaultIE(InfoExtractor):
         })
         return video_formats
 
-    def _login(self, webpage_url, video_id):
+    def _login(self, webpage_url, display_id):
         (username, password) = self._get_login_info()
         if username is None or password is None:
             self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
@@ -106,9 +114,9 @@ class GDCVaultIE(InfoExtractor):
 
         request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        self._download_webpage(request, video_id, 'Logging in')
-        start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page')
-        self._download_webpage(logout_url, video_id, 'Logging out')
+        self._download_webpage(request, display_id, 'Logging in')
+        start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
+        self._download_webpage(logout_url, display_id, 'Logging out')
 
         return start_page
 
@@ -116,8 +124,10 @@ class GDCVaultIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
 
         video_id = mobj.group('id')
+        display_id = mobj.group('name') or video_id
+
         webpage_url = 'http://www.gdcvault.com/play/' + video_id
-        start_page = self._download_webpage(webpage_url, video_id)
+        start_page = self._download_webpage(webpage_url, display_id)
 
         direct_url = self._search_regex(
             r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
@@ -130,6 +140,7 @@ class GDCVaultIE(InfoExtractor):
 
             return {
                 'id': video_id,
+                'display_id': display_id,
                 'url': video_url,
                 'ext': 'flv',
                 'title': title,
@@ -140,7 +151,7 @@ class GDCVaultIE(InfoExtractor):
             start_page, 'xml root', default=None)
         if xml_root is None:
             # Probably need to authenticate
-            login_res = self._login(webpage_url, video_id)
+            login_res = self._login(webpage_url, display_id)
             if login_res is None:
                 self.report_warning('Could not login.')
             else:
@@ -158,7 +169,7 @@ class GDCVaultIE(InfoExtractor):
             xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
 
         xml_decription_url = xml_root + 'xml/' + xml_name
-        xml_description = self._download_xml(xml_decription_url, video_id)
+        xml_description = self._download_xml(xml_decription_url, display_id)
 
         video_title = xml_description.find('./metadata/title').text
         video_formats = self._parse_mp4(xml_description)
@@ -167,6 +178,7 @@ class GDCVaultIE(InfoExtractor):
 
         return {
             'id': video_id,
+            'display_id': display_id,
             'title': video_title,
             'formats': video_formats,
         }
index 27e2bc3001c27750378cd790763d86b38442ffa7..3d756e8481e0aba09d2290cfd5a4a8b21369aa7d 100644 (file)
@@ -26,12 +26,18 @@ from ..utils import (
     unsmuggle_url,
     UnsupportedError,
     url_basename,
+    xpath_text,
 )
 from .brightcove import BrightcoveIE
+from .nbc import NBCSportsVPlayerIE
 from .ooyala import OoyalaIE
 from .rutv import RUTVIE
 from .smotri import SmotriIE
 from .condenast import CondeNastIE
+from .udn import UDNEmbedIE
+from .senateisvp import SenateISVPIE
+from .bliptv import BlipTVIE
+from .svt import SVTIE
 
 
 class GenericIE(InfoExtractor):
@@ -526,6 +532,17 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Viddler'],
         },
+        # Libsyn embed
+        {
+            'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
+            'info_dict': {
+                'id': '3377616',
+                'ext': 'mp3',
+                'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+                'description': 'md5:601cb790edd05908957dae8aaa866465',
+                'upload_date': '20150220',
+            },
+        },
         # jwplayer YouTube
         {
             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
@@ -569,6 +586,162 @@ class GenericIE(InfoExtractor):
                 'title': 'John Carlson Postgame 2/25/15',
             },
         },
+        # Eagle.Platform embed (generic URL)
+        {
+            'url': 'http://lenta.ru/news/2015/03/06/navalny/',
+            'info_dict': {
+                'id': '227304',
+                'ext': 'mp4',
+                'title': 'Навальный вышел на свободу',
+                'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 87,
+                'view_count': int,
+                'age_limit': 0,
+            },
+        },
+        # ClipYou (Eagle.Platform) embed (custom URL)
+        {
+            'url': 'http://muz-tv.ru/play/7129/',
+            'info_dict': {
+                'id': '12820',
+                'ext': 'mp4',
+                'title': "'O Sole Mio",
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 216,
+                'view_count': int,
+            },
+        },
+        # Pladform embed
+        {
+            'url': 'http://muz-tv.ru/kinozal/view/7400/',
+            'info_dict': {
+                'id': '100183293',
+                'ext': 'mp4',
+                'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
+                'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 694,
+                'age_limit': 0,
+            },
+        },
+        # Playwire embed
+        {
+            'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
+            'info_dict': {
+                'id': '3519514',
+                'ext': 'mp4',
+                'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
+                'thumbnail': 're:^https?://.*\.png$',
+                'duration': 45.115,
+            },
+        },
+        # 5min embed
+        {
+            'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
+            'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
+            'info_dict': {
+                'id': '518726732',
+                'ext': 'mp4',
+                'title': 'Facebook Creates "On This Day" | Crunch Report',
+            },
+        },
+        # SVT embed
+        {
+            'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
+            'info_dict': {
+                'id': '2900353',
+                'ext': 'flv',
+                'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
+                'duration': 27,
+                'age_limit': 0,
+            },
+        },
+        # RSS feed with enclosure
+        {
+            'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+            'info_dict': {
+                'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+                'ext': 'm4v',
+                'upload_date': '20150228',
+                'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+            }
+        },
+        # Crooks and Liars embed
+        {
+            'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
+            'info_dict': {
+                'id': '8RUoRhRi',
+                'ext': 'mp4',
+                'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
+                'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
+                'timestamp': 1428207000,
+                'upload_date': '20150405',
+                'uploader': 'Heather',
+            },
+        },
+        # Crooks and Liars external embed
+        {
+            'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
+            'info_dict': {
+                'id': 'MTE3MjUtMzQ2MzA',
+                'ext': 'mp4',
+                'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
+                'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
+                'timestamp': 1265032391,
+                'upload_date': '20100201',
+                'uploader': 'Heather',
+            },
+        },
+        # NBC Sports vplayer embed
+        {
+            'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+            'info_dict': {
+                'id': 'ln7x1qSThw4k',
+                'ext': 'flv',
+                'title': "PFT Live: New leader in the 'new-look' defense",
+                'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+            },
+        },
+        # UDN embed
+        {
+            'url': 'http://www.udn.com/news/story/7314/822787',
+            'md5': 'fd2060e988c326991037b9aff9df21a6',
+            'info_dict': {
+                'id': '300346',
+                'ext': 'mp4',
+                'title': '中一中男師變性 全校師生力挺',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            }
+        },
+        # Ooyala embed
+        {
+            'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
+            'info_dict': {
+                'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
+                'ext': 'mp4',
+                'description': 'VIDEO: Index/Match versus VLOOKUP.',
+                'title': 'This is what separates the Excel masters from the wannabes',
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            }
+        },
+        # Contains a SMIL manifest
+        {
+            'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
+            'info_dict': {
+                'id': 'file',
+                'ext': 'flv',
+                'title': '+ Football: Lottery Champions League Europe',
+                'uploader': 'www.telewebion.com',
+            },
+            'params': {
+                # rtmpe downloads
+                'skip_download': True,
+            }
+        }
     ]
 
     def report_following_redirect(self, new_url):
@@ -580,11 +753,24 @@ class GenericIE(InfoExtractor):
         playlist_desc_el = doc.find('./channel/description')
         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 
-        entries = [{
-            '_type': 'url',
-            'url': e.find('link').text,
-            'title': e.find('title').text,
-        } for e in doc.findall('./channel/item')]
+        entries = []
+        for it in doc.findall('./channel/item'):
+            next_url = xpath_text(it, 'link', fatal=False)
+            if not next_url:
+                enclosure_nodes = it.findall('./enclosure')
+                for e in enclosure_nodes:
+                    next_url = e.attrib.get('url')
+                    if next_url:
+                        break
+
+            if not next_url:
+                continue
+
+            entries.append({
+                '_type': 'url',
+                'url': next_url,
+                'title': it.find('title').text,
+            })
 
         return {
             '_type': 'playlist',
@@ -900,12 +1086,14 @@ class GenericIE(InfoExtractor):
             }
 
         # Look for embedded blip.tv player
-        mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
-        if mobj:
-            return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
-        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
-        if mobj:
-            return self.url_result(mobj.group(1), 'BlipTV')
+        bliptv_url = BlipTVIE._extract_url(webpage)
+        if bliptv_url:
+            return self.url_result(bliptv_url, 'BlipTV')
+
+        # Look for SVT player
+        svt_url = SVTIE._extract_url(webpage)
+        if svt_url:
+            return self.url_result(svt_url, 'SVT')
 
         # Look for embedded condenast player
         matches = re.findall(
@@ -943,10 +1131,24 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'))
 
+        # Look for NYTimes player
+        mobj = re.search(
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
+        # Look for Libsyn player
+        mobj = re.search(
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
         # Look for Ooyala videos
         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
-                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
+                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
+                re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
         if mobj is not None:
             return OoyalaIE._build_url_result(mobj.group('ec'))
 
@@ -1104,6 +1306,10 @@ class GenericIE(InfoExtractor):
         mobj = re.search(
             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
             webpage)
+        if not mobj:
+            mobj = re.search(
+                r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
+                webpage)
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'MLB')
 
@@ -1131,6 +1337,59 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
 
+        # Look for Eagle.Platform embeds
+        mobj = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'EaglePlatform')
+
+        # Look for ClipYou (uses Eagle.Platform) embeds
+        mobj = re.search(
+            r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
+        if mobj is not None:
+            return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
+
+        # Look for Pladform embeds
+        mobj = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Pladform')
+
+        # Look for Playwire embeds
+        mobj = re.search(
+            r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
+        # Look for 5min embeds
+        mobj = re.search(
+            r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
+        if mobj is not None:
+            return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
+
+        # Look for Crooks and Liars embeds
+        mobj = re.search(
+            r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
+        # Look for NBC Sports VPlayer embeds
+        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+        if nbc_sports_url:
+            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
+        # Look for UDN embeds
+        mobj = re.search(
+            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
+        if mobj is not None:
+            return self.url_result(
+                compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+
+        # Look for Senate ISVP iframe
+        senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
+        if senate_isvp_url:
+            return self.url_result(surl, 'SenateISVP')
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True
@@ -1187,12 +1446,18 @@ class GenericIE(InfoExtractor):
             # HTML5 video
             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
         if not found:
+            REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
             found = re.search(
                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
-                r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
+                r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
                 webpage)
+            if not found:
+                # Look also in Refresh HTTP header
+                refresh_header = head_response.headers.get('Refresh')
+                if refresh_header:
+                    found = re.search(REDIRECT_REGEX, refresh_header)
             if found:
-                new_url = found.group(1)
+                new_url = compat_urlparse.urljoin(url, found.group(1))
                 self.report_following_redirect(new_url)
                 return {
                     '_type': 'url',
@@ -1214,13 +1479,22 @@ class GenericIE(InfoExtractor):
             # here's a fun little line of code for you:
             video_id = os.path.splitext(video_id)[0]
 
-            entries.append({
-                'id': video_id,
-                'url': video_url,
-                'uploader': video_uploader,
-                'title': video_title,
-                'age_limit': age_limit,
-            })
+            if determine_ext(video_url) == 'smil':
+                entries.append({
+                    'id': video_id,
+                    'formats': self._extract_smil_formats(video_url, video_id),
+                    'uploader': video_uploader,
+                    'title': video_title,
+                    'age_limit': age_limit,
+                })
+            else:
+                entries.append({
+                    'id': video_id,
+                    'url': video_url,
+                    'uploader': video_uploader,
+                    'title': video_title,
+                    'age_limit': age_limit,
+                })
 
         if len(entries) == 1:
             return entries[0]
diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py
new file mode 100644 (file)
index 0000000..397f1d4
--- /dev/null
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    qualities,
+)
+
+
+class GfycatIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
+        'info_dict': {
+            'id': 'DeadlyDecisiveGermanpinscher',
+            'ext': 'mp4',
+            'title': 'Ghost in the Shell',
+            'timestamp': 1410656006,
+            'upload_date': '20140914',
+            'uploader': 'anonymous',
+            'duration': 10.4,
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+            'categories': list,
+            'age_limit': 0,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        gfy = self._download_json(
+            'http://gfycat.com/cajax/get/%s' % video_id,
+            video_id, 'Downloading video info')['gfyItem']
+
+        title = gfy.get('title') or gfy['gfyName']
+        description = gfy.get('description')
+        timestamp = int_or_none(gfy.get('createDate'))
+        uploader = gfy.get('userName')
+        view_count = int_or_none(gfy.get('views'))
+        like_count = int_or_none(gfy.get('likes'))
+        dislike_count = int_or_none(gfy.get('dislikes'))
+        age_limit = 18 if gfy.get('nsfw') == '1' else 0
+
+        width = int_or_none(gfy.get('width'))
+        height = int_or_none(gfy.get('height'))
+        fps = int_or_none(gfy.get('frameRate'))
+        num_frames = int_or_none(gfy.get('numFrames'))
+
+        duration = float_or_none(num_frames, fps) if num_frames and fps else None
+
+        categories = gfy.get('tags') or gfy.get('extraLemmas') or []
+
+        FORMATS = ('gif', 'webm', 'mp4')
+        quality = qualities(FORMATS)
+
+        formats = []
+        for format_id in FORMATS:
+            video_url = gfy.get('%sUrl' % format_id)
+            if not video_url:
+                continue
+            filesize = gfy.get('%sSize' % format_id)
+            formats.append({
+                'url': video_url,
+                'format_id': format_id,
+                'width': width,
+                'height': height,
+                'fps': fps,
+                'filesize': filesize,
+                'quality': quality(format_id),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'duration': duration,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'categories': categories,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
index 775890112d219cf14a7c78d8504c62a65c612e28..28eb733e2bac89818a54952f77b342fec6ebe4ff 100644 (file)
@@ -85,7 +85,8 @@ class GigaIE(InfoExtractor):
             r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
 
         view_count = str_to_int(self._search_regex(
-            r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False))
+            r'<span class="views"><strong>([\d.,]+)</strong>',
+            webpage, 'view count', fatal=False))
 
         return {
             'id': video_id,
index 29638a1948ff1230403f313f1c7725ab69224434..8a95793cae07734e67340bf49db088cdb043d1cb 100644 (file)
@@ -20,7 +20,7 @@ class GloboIE(InfoExtractor):
     _VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
 
     _API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
-    _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s'
+    _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
 
     _VIDEOID_REGEXES = [
         r'\bdata-video-id="(\d+)"',
index ae24aff84fd85c6796c7a4374964f70629175f43..6147596e4c5d082d54f975b7428a400679b9dc32 100644 (file)
@@ -15,10 +15,10 @@ from ..utils import (
 
 
 class GorillaVidIE(InfoExtractor):
-    IE_DESC = 'GorillaVid.in, daclips.in, movpod.in and fastvideo.in'
+    IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net'
     _VALID_URL = r'''(?x)
         https?://(?P<host>(?:www\.)?
-            (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in))/
+            (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net))/
         (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
     '''
 
@@ -35,13 +35,7 @@ class GorillaVidIE(InfoExtractor):
         },
     }, {
         'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
-        'md5': 'c9e293ca74d46cad638e199c3f3fe604',
-        'info_dict': {
-            'id': 'z08zf8le23c6',
-            'ext': 'mp4',
-            'title': 'Say something nice',
-            'thumbnail': 're:http://.*\.jpg',
-        },
+        'only_matching': True,
     }, {
         'url': 'http://daclips.in/3rso4kdn6f9m',
         'md5': '1ad8fd39bb976eeb66004d3a4895f106',
@@ -61,6 +55,15 @@ class GorillaVidIE(InfoExtractor):
             'title': 'Man of Steel - Trailer',
             'thumbnail': 're:http://.*\.jpg',
         },
+    }, {
+        'url': 'http://realvid.net/ctn2y6p2eviw',
+        'md5': 'b2166d2cf192efd6b6d764c18fd3710e',
+        'info_dict': {
+            'id': 'ctn2y6p2eviw',
+            'ext': 'flv',
+            'title': 'rdx 1955',
+            'thumbnail': 're:http://.*\.jpg',
+        },
     }, {
         'url': 'http://movpod.in/0wguyyxi1yca',
         'only_matching': True,
@@ -97,7 +100,7 @@ class GorillaVidIE(InfoExtractor):
             webpage = self._download_webpage(req, video_id, 'Downloading video page')
 
         title = self._search_regex(
-            r'style="z-index: [0-9]+;">([^<]+)</span>',
+            [r'style="z-index: [0-9]+;">([^<]+)</span>', r'>Watch (.+) '],
             webpage, 'title', default=None) or self._og_search_title(webpage)
         video_url = self._search_regex(
             r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py
deleted file mode 100644 (file)
index 848d17b..0000000
+++ /dev/null
@@ -1,191 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import time
-import math
-import os.path
-import re
-
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_html_parser,
-    compat_urllib_parse,
-    compat_urllib_request,
-    compat_urlparse,
-)
-from ..utils import ExtractorError
-
-
-class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
-    def __init__(self):
-        self._current_object = None
-        self.objects = []
-        compat_html_parser.HTMLParser.__init__(self)
-
-    def handle_starttag(self, tag, attrs):
-        attrs = dict((k, v) for k, v in attrs)
-        if tag == 'object':
-            self._current_object = {'attrs': attrs, 'params': []}
-        elif tag == 'param':
-            self._current_object['params'].append(attrs)
-
-    def handle_endtag(self, tag):
-        if tag == 'object':
-            self.objects.append(self._current_object)
-            self._current_object = None
-
-    @classmethod
-    def extract_object_tags(cls, html):
-        p = cls()
-        p.feed(html)
-        p.close()
-        return p.objects
-
-
-class GroovesharkIE(InfoExtractor):
-    _VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)'
-    _TEST = {
-        'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5',
-        'md5': '7ecf8aefa59d6b2098517e1baa530023',
-        'info_dict': {
-            'id': '6SS1DW',
-            'title': 'Jolene (Tenth Key Remix ft. Will Sessions)',
-            'ext': 'mp3',
-            'duration': 227,
-        }
-    }
-
-    do_playerpage_request = True
-    do_bootstrap_request = True
-
-    def _parse_target(self, target):
-        uri = compat_urlparse.urlparse(target)
-        hash = uri.fragment[1:].split('?')[0]
-        token = os.path.basename(hash.rstrip('/'))
-        return (uri, hash, token)
-
-    def _build_bootstrap_url(self, target):
-        (uri, hash, token) = self._parse_target(target)
-        query = 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
-        return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
-
-    def _build_meta_url(self, target):
-        (uri, hash, token) = self._parse_target(target)
-        query = 'hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
-        return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
-
-    def _build_stream_url(self, meta):
-        return compat_urlparse.urlunparse(('http', meta['streamKey']['ip'], '/stream.php', None, None, None))
-
-    def _build_swf_referer(self, target, obj):
-        (uri, _, _) = self._parse_target(target)
-        return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
-
-    def _transform_bootstrap(self, js):
-        return re.split('(?m)^\s*try\s*\{', js)[0] \
-                 .split(' = ', 1)[1].strip().rstrip(';')
-
-    def _transform_meta(self, js):
-        return js.split('\n')[0].split('=')[1].rstrip(';')
-
-    def _get_meta(self, target):
-        (meta_url, token) = self._build_meta_url(target)
-        self.to_screen('Metadata URL: %s' % meta_url)
-
-        headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
-        req = compat_urllib_request.Request(meta_url, headers=headers)
-        res = self._download_json(req, token,
-                                  transform_source=self._transform_meta)
-
-        if 'getStreamKeyWithSong' not in res:
-            raise ExtractorError(
-                'Metadata not found. URL may be malformed, or Grooveshark API may have changed.')
-
-        if res['getStreamKeyWithSong'] is None:
-            raise ExtractorError(
-                'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.',
-                expected=True)
-
-        return res['getStreamKeyWithSong']
-
-    def _get_bootstrap(self, target):
-        (bootstrap_url, token) = self._build_bootstrap_url(target)
-
-        headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
-        req = compat_urllib_request.Request(bootstrap_url, headers=headers)
-        res = self._download_json(req, token, fatal=False,
-                                  note='Downloading player bootstrap data',
-                                  errnote='Unable to download player bootstrap data',
-                                  transform_source=self._transform_bootstrap)
-        return res
-
-    def _get_playerpage(self, target):
-        (_, _, token) = self._parse_target(target)
-
-        webpage = self._download_webpage(
-            target, token,
-            note='Downloading player page',
-            errnote='Unable to download player page',
-            fatal=False)
-
-        if webpage is not None:
-            # Search (for example German) error message
-            error_msg = self._html_search_regex(
-                r'<div id="content">\s*<h2>(.*?)</h2>', webpage,
-                'error message', default=None)
-            if error_msg is not None:
-                error_msg = error_msg.replace('\n', ' ')
-                raise ExtractorError('Grooveshark said: %s' % error_msg)
-
-        if webpage is not None:
-            o = GroovesharkHtmlParser.extract_object_tags(webpage)
-            return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
-
-        return (webpage, None)
-
-    def _real_initialize(self):
-        self.ts = int(time.time() * 1000)  # timestamp in millis
-
-    def _real_extract(self, url):
-        (target_uri, _, token) = self._parse_target(url)
-
-        # 1. Fill cookiejar by making a request to the player page
-        swf_referer = None
-        if self.do_playerpage_request:
-            (_, player_objs) = self._get_playerpage(url)
-            if player_objs is not None:
-                swf_referer = self._build_swf_referer(url, player_objs[0])
-                self.to_screen('SWF Referer: %s' % swf_referer)
-
-        # 2. Ask preload.php for swf bootstrap data to better mimic webapp
-        if self.do_bootstrap_request:
-            bootstrap = self._get_bootstrap(url)
-            self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken'])
-
-        # 3. Ask preload.php for track metadata.
-        meta = self._get_meta(url)
-
-        # 4. Construct stream request for track.
-        stream_url = self._build_stream_url(meta)
-        duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000))
-        post_dict = {'streamKey': meta['streamKey']['streamKey']}
-        post_data = compat_urllib_parse.urlencode(post_dict).encode('utf-8')
-        headers = {
-            'Content-Length': len(post_data),
-            'Content-Type': 'application/x-www-form-urlencoded'
-        }
-        if swf_referer is not None:
-            headers['Referer'] = swf_referer
-
-        return {
-            'id': token,
-            'title': meta['song']['Name'],
-            'http_method': 'POST',
-            'url': stream_url,
-            'ext': 'mp3',
-            'format': 'mp3 audio',
-            'duration': duration,
-            'http_post_data': post_data,
-            'http_headers': headers,
-        }
index 40afbe537c6eb930216afbe2afda23002151d8f2..6a36933ac2c98ada87b21af4089aa158d42a3112 100644 (file)
@@ -25,7 +25,8 @@ class HistoricFilmsIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         tape_id = self._search_regex(
-            r'class="tapeId">([^<]+)<', webpage, 'tape id')
+            [r'class="tapeId"[^>]*>([^<]+)<', r'tapeId\s*:\s*"([^"]+)"'],
+            webpage, 'tape id')
 
         title = self._og_search_title(webpage)
         description = self._og_search_description(webpage)
index 84bd7c0804eb2966c7062288a1a190c8509f62fc..421f55bbeaed2c1249833e5136ff479557c1bccc 100644 (file)
@@ -10,6 +10,7 @@ from ..utils import (
     float_or_none,
     int_or_none,
     compat_str,
+    determine_ext,
 )
 
 
@@ -42,7 +43,8 @@ class HitboxIE(InfoExtractor):
     def _extract_metadata(self, url, video_id):
         thumb_base = 'https://edge.sf.hitbox.tv'
         metadata = self._download_json(
-            '%s/%s' % (url, video_id), video_id)
+            '%s/%s' % (url, video_id), video_id,
+            'Downloading metadata JSON')
 
         date = 'media_live_since'
         media_type = 'livestream'
@@ -87,21 +89,41 @@ class HitboxIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        metadata = self._extract_metadata(
-            'https://www.hitbox.tv/api/media/video',
-            video_id)
-
         player_config = self._download_json(
             'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
-            video_id)
+            video_id, 'Downloading video JSON')
 
-        clip = player_config.get('clip')
-        video_url = clip.get('url')
-        res = clip.get('bitrates', [])[0].get('label')
+        formats = []
+        for video in player_config['clip']['bitrates']:
+            label = video.get('label')
+            if label == 'Auto':
+                continue
+            video_url = video.get('url')
+            if not video_url:
+                continue
+            bitrate = int_or_none(video.get('bitrate'))
+            if determine_ext(video_url) == 'm3u8':
+                if not video_url.startswith('http'):
+                    continue
+                formats.append({
+                    'url': video_url,
+                    'ext': 'mp4',
+                    'tbr': bitrate,
+                    'format_note': label,
+                    'protocol': 'm3u8_native',
+                })
+            else:
+                formats.append({
+                    'url': video_url,
+                    'tbr': bitrate,
+                    'format_note': label,
+                })
+        self._sort_formats(formats)
 
-        metadata['resolution'] = res
-        metadata['url'] = video_url
-        metadata['protocol'] = 'm3u8'
+        metadata = self._extract_metadata(
+            'https://www.hitbox.tv/api/media/video',
+            video_id)
+        metadata['formats'] = formats
 
         return metadata
 
@@ -129,10 +151,6 @@ class HitboxLiveIE(HitboxIE):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        metadata = self._extract_metadata(
-            'https://www.hitbox.tv/api/media/live',
-            video_id)
-
         player_config = self._download_json(
             'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
             video_id)
@@ -147,20 +165,39 @@ class HitboxLiveIE(HitboxIE):
                 servers.append(base_url)
                 for stream in cdn.get('bitrates'):
                     label = stream.get('label')
-                    if label != 'Auto':
+                    if label == 'Auto':
+                        continue
+                    stream_url = stream.get('url')
+                    if not stream_url:
+                        continue
+                    bitrate = int_or_none(stream.get('bitrate'))
+                    if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
+                        if not stream_url.startswith('http'):
+                            continue
+                        formats.append({
+                            'url': stream_url,
+                            'ext': 'mp4',
+                            'tbr': bitrate,
+                            'format_note': label,
+                            'rtmp_live': True,
+                        })
+                    else:
                         formats.append({
-                            'url': '%s/%s' % (base_url, stream.get('url')),
+                            'url': '%s/%s' % (base_url, stream_url),
                             'ext': 'mp4',
-                            'vbr': stream.get('bitrate'),
-                            'resolution': label,
+                            'tbr': bitrate,
                             'rtmp_live': True,
                             'format_note': host,
                             'page_url': url,
                             'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
                         })
-
         self._sort_formats(formats)
+
+        metadata = self._extract_metadata(
+            'https://www.hitbox.tv/api/media/live',
+            video_id)
         metadata['formats'] = formats
         metadata['is_live'] = True
         metadata['title'] = self._live_title(metadata.get('title'))
+
         return metadata
index 370e86e5ac7ce497c8b3c658805374246fb9690a..70e4c0d4173816e990749759cf2d36fe902904ee 100644 (file)
@@ -1,36 +1,75 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import int_or_none
 
 
 class IconosquareIE(InfoExtractor):
-    _VALID_URL = r'https?://(www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
     _TEST = {
         'url': 'http://statigr.am/p/522207370455279102_24101272',
         'md5': '6eb93b882a3ded7c378ee1d6884b1814',
         'info_dict': {
             'id': '522207370455279102_24101272',
             'ext': 'mp4',
-            'uploader_id': 'aguynamedpatrick',
-            'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
+            'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)',
             'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
+            'timestamp': 1376471991,
+            'upload_date': '20130814',
+            'uploader': 'aguynamedpatrick',
+            'uploader_id': '24101272',
+            'comment_count': int,
+            'like_count': int,
         },
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
         webpage = self._download_webpage(url, video_id)
+
+        media = self._parse_json(
+            self._search_regex(
+                r'window\.media\s*=\s*({.+?});\n', webpage, 'media'),
+            video_id)
+
+        formats = [{
+            'url': f['url'],
+            'format_id': format_id,
+            'width': int_or_none(f.get('width')),
+            'height': int_or_none(f.get('height'))
+        } for format_id, f in media['videos'].items()]
+        self._sort_formats(formats)
+
         title = self._html_search_regex(
             r'<title>(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)</title>',
             webpage, 'title')
-        uploader_id = self._html_search_regex(
-            r'@([^ ]+)', title, 'uploader name', fatal=False)
+
+        timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
+        description = media.get('caption', {}).get('text')
+
+        uploader = media.get('user', {}).get('username')
+        uploader_id = media.get('user', {}).get('id')
+
+        comment_count = int_or_none(media.get('comments', {}).get('count'))
+        like_count = int_or_none(media.get('likes', {}).get('count'))
+
+        thumbnails = [{
+            'url': t['url'],
+            'id': thumbnail_id,
+            'width': int_or_none(t.get('width')),
+            'height': int_or_none(t.get('height'))
+        } for thumbnail_id, t in media.get('images', {}).items()]
 
         return {
             'id': video_id,
-            'url': self._og_search_video_url(webpage),
             'title': title,
-            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
-            'uploader_id': uploader_id
+            'description': description,
+            'thumbnails': thumbnails,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'comment_count': comment_count,
+            'like_count': like_count,
+            'formats': formats,
         }
index 3aade9e740673da3193324add6a8a3ac4eff8b1f..bf2d2041b91a261d81e891bffee42966e0e53146 100644 (file)
@@ -61,7 +61,7 @@ class IGNIE(InfoExtractor):
         },
         {
             'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
-            'md5': '4e9a0bda1e5eebd31ddcf86ec0b9b3c7',
+            'md5': '618fedb9c901fd086f6f093564ef8558',
             'info_dict': {
                 'id': '078fdd005f6d3c02f63d795faa1b984f',
                 'ext': 'mp4',
@@ -77,10 +77,10 @@ class IGNIE(InfoExtractor):
     def _find_video_id(self, webpage):
         res_id = [
             r'"video_id"\s*:\s*"(.*?)"',
+            r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
             r'data-video-id="(.+?)"',
             r'<object id="vid_(.+?)"',
             r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
-            r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
         ]
         return self._search_regex(res_id, webpage, 'video id')
 
index b020e2621a5cc3c8d7ef6a1bc2cb6aaea989f779..65f6ca103973bb25c016ae92fcb551c65def31d1 100644 (file)
@@ -3,13 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-)
+from ..utils import int_or_none
 
 
 class InstagramIE(InfoExtractor):
-    _VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/'
+    _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
     _TEST = {
         'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
         'md5': '0d2da106a9d2631273e192b372806516',
@@ -23,8 +21,8 @@ class InstagramIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
+
         webpage = self._download_webpage(url, video_id)
         uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
                                          webpage, 'uploader id', fatal=False)
index 8094cc2e487f2880a66178d0a07c97a2ef9432f5..d0720ff561c16e8c0816c5ff7ab333e54c297dbc 100644 (file)
@@ -2,7 +2,6 @@
 
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
@@ -15,10 +14,10 @@ class JeuxVideoIE(InfoExtractor):
         'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
         'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
         'info_dict': {
-            'id': '5182',
+            'id': '114765',
             'ext': 'mp4',
-            'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
-            'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
+            'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
+            'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
         },
     }
 
@@ -26,26 +25,29 @@ class JeuxVideoIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         title = mobj.group(1)
         webpage = self._download_webpage(url, title)
-        xml_link = self._html_search_regex(
-            r'<param name="flashvars" value="config=(.*?)" />',
+        title = self._html_search_meta('name', webpage)
+        config_url = self._html_search_regex(
+            r'data-src="(/contenu/medias/video.php.*?)"',
             webpage, 'config URL')
+        config_url = 'http://www.jeuxvideo.com' + config_url
 
         video_id = self._search_regex(
-            r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
-            xml_link, 'video ID')
+            r'id=(\d+)',
+            config_url, 'video ID')
 
-        config = self._download_xml(
-            xml_link, title, 'Downloading XML config')
-        info_json = config.find('format.json').text
-        info = json.loads(info_json)['versions'][0]
+        config = self._download_json(
+            config_url, title, 'Downloading JSON config')
 
-        video_url = 'http://video720.jeuxvideo.com/' + info['file']
+        formats = [{
+            'url': source['file'],
+            'format_id': source['label'],
+            'resolution': source['label'],
+        } for source in reversed(config['sources'])]
 
         return {
             'id': video_id,
-            'title': config.find('titre_video').text,
-            'ext': 'mp4',
-            'url': video_url,
+            'title': title,
+            'formats': formats,
             'description': self._og_search_description(webpage),
-            'thumbnail': config.find('image').text,
+            'thumbnail': config.get('image'),
         }
diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py
new file mode 100644 (file)
index 0000000..4597d1b
--- /dev/null
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    srt_subtitles_timecode,
+)
+
+
+class KanalPlayIE(InfoExtractor):
+    IE_DESC = 'Kanal 5/9/11 Play'
+    _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
+        'info_dict': {
+            'id': '3270012277',
+            'ext': 'flv',
+            'title': 'Saknar både dusch och avlopp',
+            'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
+            'duration': 2636.36,
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
+        'only_matching': True,
+    }]
+
+    def _fix_subtitles(self, subs):
+        return '\r\n\r\n'.join(
+            '%s\r\n%s --> %s\r\n%s'
+            % (
+                num,
+                srt_subtitles_timecode(item['startMillis'] / 1000.0),
+                srt_subtitles_timecode(item['endMillis'] / 1000.0),
+                item['text'],
+            ) for num, item in enumerate(subs, 1))
+
+    def _get_subtitles(self, channel_id, video_id):
+        subs = self._download_json(
+            'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
+            video_id, 'Downloading subtitles JSON', fatal=False)
+        return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        channel_id = mobj.group('channel_id')
+
+        video = self._download_json(
+            'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
+            video_id)
+
+        reasons_for_no_streams = video.get('reasonsForNoStreams')
+        if reasons_for_no_streams:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
+                expected=True)
+
+        title = video['title']
+        description = video.get('description')
+        duration = float_or_none(video.get('length'), 1000)
+        thumbnail = video.get('posterUrl')
+
+        stream_base_url = video['streamBaseUrl']
+
+        formats = [{
+            'url': stream_base_url,
+            'play_path': stream['source'],
+            'ext': 'flv',
+            'tbr': float_or_none(stream.get('bitrate'), 1000),
+            'rtmp_real_time': True,
+        } for stream in video['streams']]
+        self._sort_formats(formats)
+
+        subtitles = {}
+        if video.get('hasSubtitle'):
+            subtitles = self.extract_subtitles(channel_id, video_id)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+            'subtitles': subtitles,
+        }
index e46954b47449b11be795c17478e168a6a57af0fd..96f95979a22429d2a19af3575ad1ca25c463b13e 100644 (file)
@@ -40,8 +40,10 @@ class KrasViewIE(InfoExtractor):
         description = self._og_search_description(webpage, default=None)
         thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
         duration = int_or_none(flashvars.get('duration'))
-        width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
-        height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
+        width = int_or_none(self._og_search_property(
+            'video:width', webpage, 'video width', default=None))
+        height = int_or_none(self._og_search_property(
+            'video:height', webpage, 'video height', default=None))
 
         return {
             'id': video_id,
index 583ce35b903dd5ee3214db7c7d3f0b604e0f4cf3..1484ac0d267697dceb34c9e406e3a26b26a37f54 100644 (file)
@@ -7,8 +7,9 @@ import time
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urlparse,
     compat_urllib_parse,
+    compat_urllib_request,
+    compat_urlparse,
 )
 from ..utils import (
     determine_ext,
@@ -39,12 +40,20 @@ class LetvIE(InfoExtractor):
             'title': '美人天下01',
             'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
         },
-        'expected_warnings': [
-            'publish time'
-        ]
+    }, {
+        'note': 'This video is available only in Mainland China, thus a proxy is needed',
+        'url': 'http://www.letv.com/ptv/vplay/1118082.html',
+        'md5': 'f80936fbe20fb2f58648e81386ff7927',
+        'info_dict': {
+            'id': '1118082',
+            'ext': 'mp4',
+            'title': '与龙共舞 完整版',
+            'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
+        },
+        'params': {
+            'cn_verification_proxy': 'http://proxy.uku.im:8888'
+        },
     }]
-    # http://www.letv.com/ptv/vplay/1118082.html
-    # This video is available only in Mainland China
 
     @staticmethod
     def urshift(val, n):
@@ -76,9 +85,16 @@ class LetvIE(InfoExtractor):
             'tkey': self.calc_time_key(int(time.time())),
             'domain': 'www.letv.com'
         }
+        play_json_req = compat_urllib_request.Request(
+            'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
+        )
+        cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+        if cn_verification_proxy:
+            play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
         play_json = self._download_json(
-            'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params),
-            media_id, 'playJson data')
+            play_json_req,
+            media_id, 'Downloading playJson data')
 
         # Check for errors
         playstatus = play_json['playstatus']
@@ -114,7 +130,8 @@ class LetvIE(InfoExtractor):
 
                 url_info_dict = {
                     'url': media_url,
-                    'ext': determine_ext(dispatch[format_id][1])
+                    'ext': determine_ext(dispatch[format_id][1]),
+                    'format_id': format_id,
                 }
 
                 if format_id[-1:] == 'p':
@@ -123,7 +140,7 @@ class LetvIE(InfoExtractor):
                 urls.append(url_info_dict)
 
         publish_time = parse_iso8601(self._html_search_regex(
-            r'发布时间&nbsp;([^<>]+) ', page, 'publish time', fatal=False),
+            r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
             delimiter=' ', timezone=datetime.timedelta(hours=8))
         description = self._html_search_meta('description', page, fatal=False)
 
diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py
new file mode 100644 (file)
index 0000000..9ab1416
--- /dev/null
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class LibsynIE(InfoExtractor):
+    _VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
+        'md5': '443360ee1b58007bc3dcf09b41d093bb',
+        'info_dict': {
+            'id': '3377616',
+            'ext': 'mp3',
+            'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+            'description': 'md5:601cb790edd05908957dae8aaa866465',
+            'upload_date': '20150220',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        formats = [{
+            'url': media_url,
+        } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
+
+        podcast_title = self._search_regex(
+            r'<h2>([^<]+)</h2>', webpage, 'title')
+        episode_title = self._search_regex(
+            r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
+
+        title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
+
+        description = self._html_search_regex(
+            r'<div id="info_text_body">(.+?)</div>', webpage,
+            'description', fatal=False)
+
+        thumbnail = self._search_regex(
+            r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+
+        release_date = unified_strdate(self._search_regex(
+            r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': release_date,
+            'formats': formats,
+        }
index 1dfe7f77f4ccdefa0b076f71f6467644e465cb52..42cb6e35f821256e90c8eef4f176812e3e0f42d0 100644 (file)
@@ -4,7 +4,9 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
+    determine_ext,
     int_or_none,
     unified_strdate,
     ExtractorError,
@@ -14,9 +16,9 @@ from ..utils import (
 class LifeNewsIE(InfoExtractor):
     IE_NAME = 'lifenews'
     IE_DESC = 'LIFE | NEWS'
-    _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
+    _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://lifenews.ru/news/126342',
         'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
         'info_dict': {
@@ -27,16 +29,47 @@ class LifeNewsIE(InfoExtractor):
             'thumbnail': 're:http://.*\.jpg',
             'upload_date': '20140130',
         }
-    }
+    }, {
+        # video in <iframe>
+        'url': 'http://lifenews.ru/news/152125',
+        'md5': '77d19a6f0886cd76bdbf44b4d971a273',
+        'info_dict': {
+            'id': '152125',
+            'ext': 'mp4',
+            'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
+            'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
+            'upload_date': '20150402',
+            'uploader': 'embed.life.ru',
+        }
+    }, {
+        'url': 'http://lifenews.ru/news/153461',
+        'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
+        'info_dict': {
+            'id': '153461',
+            'ext': 'mp4',
+            'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
+            'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+            'upload_date': '20150505',
+            'uploader': 'embed.life.ru',
+        }
+    }, {
+        'url': 'http://lifenews.ru/video/13035',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
+        section = mobj.group('section')
 
-        webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')
+        webpage = self._download_webpage(
+            'http://lifenews.ru/%s/%s' % (section, video_id),
+            video_id, 'Downloading page')
 
         videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
-        if not videos:
+        iframe_link = self._html_search_regex(
+            '<iframe[^>]+src=["\']([^"\']+)["\']', webpage, 'iframe link', default=None)
+        if not videos and not iframe_link:
             raise ExtractorError('No media links available for %s' % video_id)
 
         title = self._og_search_title(webpage)
@@ -47,28 +80,90 @@ class LifeNewsIE(InfoExtractor):
         description = self._og_search_description(webpage)
 
         view_count = self._html_search_regex(
-            r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False)
+            r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
         comment_count = self._html_search_regex(
-            r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False)
+            r'<div class=\'comments\'>\s*<span class=\'counter\'>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)
 
         upload_date = self._html_search_regex(
             r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
         if upload_date is not None:
             upload_date = unified_strdate(upload_date)
 
+        common_info = {
+            'description': description,
+            'view_count': int_or_none(view_count),
+            'comment_count': int_or_none(comment_count),
+            'upload_date': upload_date,
+        }
+
         def make_entry(video_id, media, video_number=None):
-            return {
+            cur_info = dict(common_info)
+            cur_info.update({
                 'id': video_id,
                 'url': media[1],
                 'thumbnail': media[0],
                 'title': title if video_number is None else '%s-video%s' % (title, video_number),
-                'description': description,
-                'view_count': int_or_none(view_count),
-                'comment_count': int_or_none(comment_count),
-                'upload_date': upload_date,
-            }
+            })
+            return cur_info
+
+        if iframe_link:
+            iframe_link = self._proto_relative_url(iframe_link, 'http:')
+            cur_info = dict(common_info)
+            cur_info.update({
+                '_type': 'url_transparent',
+                'id': video_id,
+                'title': title,
+                'url': iframe_link,
+            })
+            return cur_info
 
         if len(videos) == 1:
             return make_entry(video_id, videos[0])
         else:
             return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]
+
+
+class LifeEmbedIE(InfoExtractor):
+    IE_NAME = 'life:embed'
+    _VALID_URL = r'http://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
+
+    _TEST = {
+        'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
+        'md5': 'b889715c9e49cb1981281d0e5458fbbe',
+        'info_dict': {
+            'id': 'e50c2dec2867350528e2574c899b8291',
+            'ext': 'mp4',
+            'title': 'e50c2dec2867350528e2574c899b8291',
+            'thumbnail': 're:http://.*\.jpg',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        formats = []
+        for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
+            video_url = compat_urlparse.urljoin(url, video_url)
+            ext = determine_ext(video_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', m3u8_id='m3u8'))
+            else:
+                formats.append({
+                    'url': video_url,
+                    'format_id': ext,
+                    'preference': 1,
+                })
+        self._sort_formats(formats)
+
+        thumbnail = self._search_regex(
+            r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 3642089f7802238d77ec5c18e4f96b5cb21e3d72..6d7733e4111355a5011765336333f229596b8356 100644 (file)
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 import re
 import json
+import itertools
 
 from .common import InfoExtractor
 from ..compat import (
@@ -20,7 +21,7 @@ from ..utils import (
 
 class LivestreamIE(InfoExtractor):
     IE_NAME = 'livestream'
-    _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
+    _VALID_URL = r'https?://(?:new\.)?livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
     _TESTS = [{
         'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
         'md5': '53274c76ba7754fb0e8d072716f2292b',
@@ -40,9 +41,19 @@ class LivestreamIE(InfoExtractor):
             'id': '2245590',
         },
         'playlist_mincount': 4,
+    }, {
+        'url': 'http://new.livestream.com/chess24/tatasteelchess',
+        'info_dict': {
+            'title': 'Tata Steel Chess',
+            'id': '3705884',
+        },
+        'playlist_mincount': 60,
     }, {
         'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
         'only_matching': True,
+    }, {
+        'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
+        'only_matching': True,
     }]
 
     def _parse_smil(self, video_id, smil_url):
@@ -117,6 +128,30 @@ class LivestreamIE(InfoExtractor):
             'view_count': video_data.get('views'),
         }
 
+    def _extract_event(self, info):
+        event_id = compat_str(info['id'])
+        account = compat_str(info['owner_account_id'])
+        root_url = (
+            'https://new.livestream.com/api/accounts/{account}/events/{event}/'
+            'feed.json'.format(account=account, event=event_id))
+
+        def _extract_videos():
+            last_video = None
+            for i in itertools.count(1):
+                if last_video is None:
+                    info_url = root_url
+                else:
+                    info_url = '{root}?&id={id}&newer=-1&type=video'.format(
+                        root=root_url, id=last_video)
+                videos_info = self._download_json(info_url, event_id, 'Downloading page {0}'.format(i))['data']
+                videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
+                if not videos_info:
+                    break
+                for v in videos_info:
+                    yield self._extract_video_info(v)
+                last_video = videos_info[-1]['id']
+        return self.playlist_result(_extract_videos(), event_id, info['full_name'])
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
@@ -144,14 +179,13 @@ class LivestreamIE(InfoExtractor):
                 result = result and compat_str(vdata['data']['id']) == vid
             return result
 
-        videos = [self._extract_video_info(video_data['data'])
-                  for video_data in info['feed']['data']
-                  if is_relevant(video_data, video_id)]
         if video_id is None:
             # This is an event page:
-            return self.playlist_result(
-                videos, '%s' % info['id'], info['full_name'])
+            return self._extract_event(info)
         else:
+            videos = [self._extract_video_info(video_data['data'])
+                      for video_data in info['feed']['data']
+                      if is_relevant(video_data, video_id)]
             if not videos:
                 raise ExtractorError('Cannot find video %s' % video_id)
             return videos[0]
@@ -160,23 +194,19 @@ class LivestreamIE(InfoExtractor):
 # The original version of Livestream uses a different system
 class LivestreamOriginalIE(InfoExtractor):
     IE_NAME = 'livestream:original'
-    _VALID_URL = r'''(?x)https?://www\.livestream\.com/
+    _VALID_URL = r'''(?x)https?://original\.livestream\.com/
         (?P<user>[^/]+)/(?P<type>video|folder)
         (?:\?.*?Id=|/)(?P<id>.*?)(&|$)
         '''
     _TESTS = [{
-        'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+        'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
         'info_dict': {
             'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
         },
-        'params': {
-            # rtmp
-            'skip_download': True,
-        },
     }, {
-        'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+        'url': 'https://original.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
         'info_dict': {
             'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
         },
@@ -187,19 +217,17 @@ class LivestreamOriginalIE(InfoExtractor):
         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
 
         info = self._download_xml(api_url, video_id)
+        # this url is used on mobile devices
+        stream_url = 'http://x{0}x.api.channel.livestream.com/3.0/getstream.json?id={1}'.format(user, video_id)
+        stream_info = self._download_json(stream_url, video_id)
         item = info.find('channel').find('item')
         ns = {'media': 'http://search.yahoo.com/mrss'}
         thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
-        # Remove the extension and number from the path (like 1.jpg)
-        path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path')
 
         return {
             'id': video_id,
             'title': item.find('title').text,
-            'url': 'rtmp://extondemand.livestream.com/ondemand',
-            'play_path': 'trans/dv15/mogulus-{0}'.format(path),
-            'player_url': 'http://static.livestream.com/chromelessPlayer/v21/playerapi.swf?hash=5uetk&v=0803&classid=D27CDB6E-AE6D-11cf-96B8-444553540000&jsEnabled=false&wmode=opaque',
-            'ext': 'flv',
+            'url': stream_info['progressiveUrl'],
             'thumbnail': thumbnail_url,
         }
 
index 9c2fbdd96788aeb4a854777cfcb6c67dff18f0ce..e3236f7b5797ab80431ed11b7027249354015a33 100644 (file)
@@ -52,6 +52,7 @@ class LRTIE(InfoExtractor):
                     'url': data['streamer'],
                     'play_path': 'mp4:%s' % data['file'],
                     'preference': -1,
+                    'rtmp_real_time': True,
                 })
             else:
                 formats.extend(
index 5dc22da22a6a5324887379fdff339f09f13d0309..cfd3b14f4bfd755a7600701e86900ece12b0c3ac 100644 (file)
@@ -15,18 +15,73 @@ from ..utils import (
 )
 
 
-class LyndaIE(InfoExtractor):
+class LyndaBaseIE(InfoExtractor):
+    _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
+    _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
+    _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
+    _NETRC_MACHINE = 'lynda'
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_form = {
+            'username': username,
+            'password': password,
+            'remember': 'false',
+            'stayPut': 'false'
+        }
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
+        login_page = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        # Not (yet) logged in
+        m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
+        if m is not None:
+            response = m.group('json')
+            response_json = json.loads(response)
+            state = response_json['state']
+
+            if state == 'notlogged':
+                raise ExtractorError(
+                    'Unable to login, incorrect username and/or password',
+                    expected=True)
+
+            # This is when we get popup:
+            # > You're already logged in to lynda.com on two devices.
+            # > If you log in here, we'll log you out of another device.
+            # So, we need to confirm this.
+            if state == 'conflicted':
+                confirm_form = {
+                    'username': '',
+                    'password': '',
+                    'resolve': 'true',
+                    'remember': 'false',
+                    'stayPut': 'false',
+                }
+                request = compat_urllib_request.Request(
+                    self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
+                login_page = self._download_webpage(
+                    request, None,
+                    'Confirming log in and log out from another device')
+
+        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+            raise ExtractorError('Unable to log in')
+
+
+class LyndaIE(LyndaBaseIE):
     IE_NAME = 'lynda'
     IE_DESC = 'lynda.com videos'
-    _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'
-    _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
+    _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'
     _NETRC_MACHINE = 'lynda'
 
-    _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
     _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
 
-    ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
-
     _TESTS = [{
         'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
         'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
@@ -41,23 +96,22 @@ class LyndaIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    def _real_initialize(self):
-        self._login()
-
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = self._match_id(url)
 
-        page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
-                                      'Downloading video JSON')
+        page = self._download_webpage(
+            'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
+            video_id, 'Downloading video JSON')
         video_json = json.loads(page)
 
         if 'Status' in video_json:
-            raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
+            raise ExtractorError(
+                'lynda returned error: %s' % video_json['Message'], expected=True)
 
         if video_json['HasAccess'] is False:
             raise ExtractorError(
-                'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
+                'Video %s is only available for members. '
+                % video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True)
 
         video_id = compat_str(video_json['ID'])
         duration = video_json['DurationInSeconds']
@@ -100,50 +154,9 @@ class LyndaIE(InfoExtractor):
             'formats': formats
         }
 
-    def _login(self):
-        (username, password) = self._get_login_info()
-        if username is None:
-            return
-
-        login_form = {
-            'username': username,
-            'password': password,
-            'remember': 'false',
-            'stayPut': 'false'
-        }
-        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
-        login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
-
-        # Not (yet) logged in
-        m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
-        if m is not None:
-            response = m.group('json')
-            response_json = json.loads(response)
-            state = response_json['state']
-
-            if state == 'notlogged':
-                raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
-
-            # This is when we get popup:
-            # > You're already logged in to lynda.com on two devices.
-            # > If you log in here, we'll log you out of another device.
-            # So, we need to confirm this.
-            if state == 'conflicted':
-                confirm_form = {
-                    'username': '',
-                    'password': '',
-                    'resolve': 'true',
-                    'remember': 'false',
-                    'stayPut': 'false',
-                }
-                request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
-                login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
-
-        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
-            raise ExtractorError('Unable to log in')
-
     def _fix_subtitles(self, subs):
         srt = ''
+        seq_counter = 0
         for pos in range(0, len(subs) - 1):
             seq_current = subs[pos]
             m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
@@ -155,8 +168,10 @@ class LyndaIE(InfoExtractor):
                 continue
             appear_time = m_current.group('timecode')
             disappear_time = m_next.group('timecode')
-            text = seq_current['Caption'].lstrip()
-            srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
+            text = seq_current['Caption'].strip()
+            if text:
+                seq_counter += 1
+                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text)
         if srt:
             return srt
 
@@ -169,7 +184,7 @@ class LyndaIE(InfoExtractor):
             return {}
 
 
-class LyndaCourseIE(InfoExtractor):
+class LyndaCourseIE(LyndaBaseIE):
     IE_NAME = 'lynda:course'
     IE_DESC = 'lynda.com online courses'
 
@@ -182,35 +197,37 @@ class LyndaCourseIE(InfoExtractor):
         course_path = mobj.group('coursepath')
         course_id = mobj.group('courseid')
 
-        page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
-                                      course_id, 'Downloading course JSON')
+        page = self._download_webpage(
+            'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
+            course_id, 'Downloading course JSON')
         course_json = json.loads(page)
 
         if 'Status' in course_json and course_json['Status'] == 'NotFound':
-            raise ExtractorError('Course %s does not exist' % course_id, expected=True)
+            raise ExtractorError(
+                'Course %s does not exist' % course_id, expected=True)
 
         unaccessible_videos = 0
         videos = []
-        (username, _) = self._get_login_info()
 
         # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
         # by single video API anymore
 
         for chapter in course_json['Chapters']:
             for video in chapter['Videos']:
-                if username is None and video['HasAccess'] is False:
+                if video['HasAccess'] is False:
                     unaccessible_videos += 1
                     continue
                 videos.append(video['ID'])
 
         if unaccessible_videos > 0:
-            self._downloader.report_warning('%s videos are only available for members and will not be downloaded. '
-                                            % unaccessible_videos + LyndaIE.ACCOUNT_CREDENTIALS_HINT)
+            self._downloader.report_warning(
+                '%s videos are only available for members (or paid members) and will not be downloaded. '
+                % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
 
         entries = [
-            self.url_result('http://www.lynda.com/%s/%s-4.html' %
-                            (course_path, video_id),
-                            'Lynda')
+            self.url_result(
+                'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
+                'Lynda')
             for video_id in videos]
 
         course_title = course_json['Title']
diff --git a/youtube_dl/extractor/megavideoz.py b/youtube_dl/extractor/megavideoz.py
new file mode 100644 (file)
index 0000000..af7ff07
--- /dev/null
@@ -0,0 +1,56 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    xpath_text,
+)
+
+
+class MegaVideozIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>[^/]+)(?:/(?P<display_id>[^/]+))?'
+    _TEST = {
+        'url': 'http://megavideoz.eu/video/WM6UB919XMXH/SMPTE-Universal-Film-Leader',
+        'info_dict': {
+            'id': '48723',
+            'display_id': 'SMPTE-Universal-Film-Leader',
+            'ext': 'mp4',
+            'title': 'SMPTE Universal Film Leader',
+            'thumbnail': 're:https?://.*?\.jpg',
+            'duration': 10.93,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id') or video_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        if any(p in webpage for p in ('>Video Not Found<', '>404 Error<')):
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+        config = self._download_xml(
+            self._search_regex(
+                r"var\s+cnf\s*=\s*'([^']+)'", webpage, 'cnf url'),
+            display_id)
+
+        video_url = xpath_text(config, './file', 'video url', fatal=True)
+        title = xpath_text(config, './title', 'title', fatal=True)
+        thumbnail = xpath_text(config, './image', 'thumbnail')
+        duration = float_or_none(xpath_text(config, './duration', 'duration'))
+        video_id = xpath_text(config, './mediaid', 'video id') or video_id
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration
+        }
diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py
new file mode 100644 (file)
index 0000000..a784fc5
--- /dev/null
@@ -0,0 +1,106 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    int_or_none,
+    ExtractorError,
+)
+
+
+class MioMioIE(InfoExtractor):
+    IE_NAME = 'miomio.tv'
+    _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
+    _TESTS = [{
+        # "type=video" in flashvars
+        'url': 'http://www.miomio.tv/watch/cc88912/',
+        'md5': '317a5f7f6b544ce8419b784ca8edae65',
+        'info_dict': {
+            'id': '88912',
+            'ext': 'flv',
+            'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
+            'duration': 5923,
+        },
+    }, {
+        'url': 'http://www.miomio.tv/watch/cc184024/',
+        'info_dict': {
+            'id': '43729',
+            'title': '《动漫同人插画绘制》',
+        },
+        'playlist_mincount': 86,
+        'skip': 'This video takes time too long for retrieving the URL',
+    }, {
+        'url': 'http://www.miomio.tv/watch/cc173113/',
+        'info_dict': {
+            'id': '173113',
+            'title': 'The New Macbook 2015 上手试玩与简评'
+        },
+        'playlist_mincount': 2,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_meta(
+            'description', webpage, 'title', fatal=True)
+
+        mioplayer_path = self._search_regex(
+            r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
+
+        xml_config = self._search_regex(
+            r'flashvars="type=(?:sina|video)&amp;(.+?)&amp;',
+            webpage, 'xml config')
+
+        # skipping the following page causes lags and eventually connection drop-outs
+        self._request_webpage(
+            'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
+            video_id)
+
+        # the following xml contains the actual configuration information on the video file(s)
+        vid_config = self._download_xml(
+            'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
+            video_id)
+
+        http_headers = {
+            'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
+        }
+
+        if not int_or_none(xpath_text(vid_config, 'timelength')):
+            raise ExtractorError('Unable to load videos!', expected=True)
+
+        entries = []
+        for f in vid_config.findall('./durl'):
+            segment_url = xpath_text(f, 'url', 'video url')
+            if not segment_url:
+                continue
+            order = xpath_text(f, 'order', 'order')
+            segment_id = video_id
+            segment_title = title
+            if order:
+                segment_id += '-%s' % order
+                segment_title += ' part %s' % order
+            entries.append({
+                'id': segment_id,
+                'url': segment_url,
+                'title': segment_title,
+                'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
+                'http_headers': http_headers,
+            })
+
+        if len(entries) == 1:
+            segment = entries[0]
+            segment['id'] = video_id
+            segment['title'] = title
+            return segment
+
+        return {
+            '_type': 'multi_video',
+            'id': video_id,
+            'entries': entries,
+            'title': title,
+            'http_headers': http_headers,
+        }
index 1831c6749401405c5a39ca60f4347df03bfe3631..425a4ccf16fff96b1bface874748b93762d2194b 100644 (file)
@@ -10,7 +10,6 @@ from ..utils import (
     ExtractorError,
     HEADRequest,
     str_to_int,
-    parse_iso8601,
 )
 
 
@@ -27,8 +26,6 @@ class MixcloudIE(InfoExtractor):
             'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
             'uploader': 'Daniel Holbach',
             'uploader_id': 'dholbach',
-            'upload_date': '20111115',
-            'timestamp': 1321359578,
             'thumbnail': 're:https?://.*\.jpg',
             'view_count': int,
             'like_count': int,
@@ -37,32 +34,27 @@ class MixcloudIE(InfoExtractor):
         'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
         'info_dict': {
             'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
-            'ext': 'm4a',
-            'title': 'Electric Relaxation vol. 3',
+            'ext': 'mp3',
+            'title': 'Caribou 7 inch Vinyl Mix & Chat',
             'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
-            'uploader': 'Daniel Drumz',
+            'uploader': 'Gilles Peterson Worldwide',
             'uploader_id': 'gillespeterson',
-            'thumbnail': 're:https?://.*\.jpg',
+            'thumbnail': 're:https?://.*/images/',
             'view_count': int,
             'like_count': int,
         },
     }]
 
-    def _get_url(self, track_id, template_url):
-        server_count = 30
-        for i in range(server_count):
-            url = template_url % i
-            try:
-                # We only want to know if the request succeed
-                # don't download the whole file
-                self._request_webpage(
-                    HEADRequest(url), track_id,
-                    'Checking URL %d/%d ...' % (i + 1, server_count + 1))
-                return url
-            except ExtractorError:
-                pass
-
-        return None
+    def _check_url(self, url, track_id, ext):
+        try:
+            # We only want to know if the request succeed
+            # don't download the whole file
+            self._request_webpage(
+                HEADRequest(url), track_id,
+                'Trying %s URL' % ext)
+            return True
+        except ExtractorError:
+            return False
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -75,17 +67,13 @@ class MixcloudIE(InfoExtractor):
         preview_url = self._search_regex(
             r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
         song_url = preview_url.replace('/previews/', '/c/originals/')
-        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
-        final_song_url = self._get_url(track_id, template_url)
-        if final_song_url is None:
-            self.to_screen('Trying with m4a extension')
-            template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
-            final_song_url = self._get_url(track_id, template_url)
-        if final_song_url is None:
-            raise ExtractorError('Unable to extract track url')
+        if not self._check_url(song_url, track_id, 'mp3'):
+            song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
+            if not self._check_url(song_url, track_id, 'm4a'):
+                raise ExtractorError('Unable to extract track url')
 
         PREFIX = (
-            r'<span class="play-button[^"]*?"'
+            r'm-play-on-spacebar[^>]+'
             r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
         title = self._html_search_regex(
             PREFIX + r'm-title="([^"]+)"', webpage, 'title')
@@ -99,26 +87,21 @@ class MixcloudIE(InfoExtractor):
             r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
         description = self._og_search_description(webpage)
         like_count = str_to_int(self._search_regex(
-            [r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
-             r'/favorites/?">([0-9]+)<'],
+            r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',
             webpage, 'like count', fatal=False))
         view_count = str_to_int(self._search_regex(
             [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
              r'/listeners/?">([0-9,.]+)</a>'],
             webpage, 'play count', fatal=False))
-        timestamp = parse_iso8601(self._search_regex(
-            r'<time itemprop="dateCreated" datetime="([^"]+)">',
-            webpage, 'upload date', default=None))
 
         return {
             'id': track_id,
             'title': title,
-            'url': final_song_url,
+            'url': song_url,
             'description': description,
             'thumbnail': thumbnail,
             'uploader': uploader,
             'uploader_id': uploader_id,
-            'timestamp': timestamp,
             'view_count': view_count,
             'like_count': like_count,
         }
index 1a241aca77983ac9626a53e59bf85ad4394cc8fd..e242b897f2b63cf624805c7564cf7e2f02a9d16b 100644 (file)
@@ -10,7 +10,21 @@ from ..utils import (
 
 
 class MLBIE(InfoExtractor):
-    _VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:[\da-z_-]+\.)*mlb\.com/
+                        (?:
+                            (?:
+                                (?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|
+                                (?:
+                                    shared/video/embed/(?:embed|m-internal-embed)\.html|
+                                    (?:[^/]+/)+(?:play|index)\.jsp|
+                                )\?.*?\bcontent_id=
+                            )
+                            (?P<id>n?\d+)|
+                            (?:[^/]+/)*(?P<path>[^/]+)
+                        )
+                    '''
     _TESTS = [
         {
             'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
@@ -68,6 +82,18 @@ class MLBIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
         },
+        {
+            'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
+            'md5': 'b190e70141fb9a1552a85426b4da1b5d',
+            'info_dict': {
+                'id': '75609783',
+                'ext': 'mp4',
+                'title': 'Must C: Pillar climbs for catch',
+                'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
+                'timestamp': 1429124820,
+                'upload_date': '20150415',
+            }
+        },
         {
             'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
             'only_matching': True,
@@ -80,12 +106,31 @@ class MLBIE(InfoExtractor):
             'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
             'only_matching': True,
         },
+        {
+            'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
+            'only_matching': True,
+        },
+        {
+            # From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer
+            'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
 
+        if not video_id:
+            video_path = mobj.group('path')
+            webpage = self._download_webpage(url, video_path)
+            video_id = self._search_regex(
+                [r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id')
+
         detail = self._download_xml(
             'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
             % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
index 5de719bdc41d2af56d6133a85b998c4ed85af726..88dcd4f737544356091220d53078bc1c2e222d76 100644 (file)
@@ -9,6 +9,7 @@ from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
+from ..utils import ExtractorError
 
 
 class MonikerIE(InfoExtractor):
@@ -40,6 +41,15 @@ class MonikerIE(InfoExtractor):
         video_id = self._match_id(url)
         orig_webpage = self._download_webpage(url, video_id)
 
+        if '>File Not Found<' in orig_webpage:
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+        error = self._search_regex(
+            r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
+        if error:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error), expected=True)
+
         fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
         data = dict(fields)
 
index c11de1cb61b28d03ab2430ff1db3a82d317dc718..b48fac5e3e434569642284d0b6388cab34696b01 100644 (file)
@@ -25,6 +25,7 @@ def _media_xml_tag(tag):
 
 class MTVServicesInfoExtractor(InfoExtractor):
     _MOBILE_TEMPLATE = None
+    _LANG = None
 
     @staticmethod
     def _id_from_uri(uri):
@@ -118,6 +119,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
         mediagen_doc = self._download_xml(mediagen_url, video_id,
                                           'Downloading video urls')
 
+        item = mediagen_doc.find('./video/item')
+        if item is not None and item.get('type') == 'text':
+            message = '%s returned error: ' % self.IE_NAME
+            if item.get('code') is not None:
+                message += '%s - ' % item.get('code')
+            message += item.text
+            raise ExtractorError(message, expected=True)
+
         description_node = itemdoc.find('description')
         if description_node is not None:
             description = description_node.text.strip()
@@ -161,8 +170,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
         video_id = self._id_from_uri(uri)
         feed_url = self._get_feed_url(uri)
         data = compat_urllib_parse.urlencode({'uri': uri})
+        info_url = feed_url + '?'
+        if self._LANG:
+            info_url += 'lang=%s&' % self._LANG
+        info_url += data
         idoc = self._download_xml(
-            feed_url + '?' + data, video_id,
+            info_url, video_id,
             'Downloading info', transform_source=fix_xml_ampersands)
         return self.playlist_result(
             [self._get_video_info(item) for item in idoc.findall('.//item')])
index 3645d3033f74ae174e3eaa85ad55bbe677d9daba..dc2091be0d0c8706b2f3b6d78d88fa22fcb8b6d1 100644 (file)
@@ -10,11 +10,13 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     find_xpath_attr,
+    lowercase_escape,
+    unescapeHTML,
 )
 
 
 class NBCIE(InfoExtractor):
-    _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
+    _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
 
     _TESTS = [
         {
@@ -37,19 +39,88 @@ class NBCIE(InfoExtractor):
             },
             'skip': 'Only works from US',
         },
+        {
+            'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
+            'info_dict': {
+                'id': '8iUuyzWDdYUZ',
+                'ext': 'flv',
+                'title': 'Star Wars Teaser',
+                'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
+            },
+            'skip': 'Only works from US',
+        },
+        {
+            # This video has expired but with an escaped embedURL
+            'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
+            'skip': 'Expired'
+        }
     ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        theplatform_url = self._search_regex(
-            '(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
-            webpage, 'theplatform url').replace('_no_endcard', '')
+        theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
+            [
+                r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
+                r'"embedURL"\s*:\s*"([^"]+)"'
+            ],
+            webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
         if theplatform_url.startswith('//'):
             theplatform_url = 'http:' + theplatform_url
         return self.url_result(theplatform_url)
 
 
+class NBCSportsVPlayerIE(InfoExtractor):
+    _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+
+    _TESTS = [{
+        'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
+        'info_dict': {
+            'id': '9CsDKds0kvHI',
+            'ext': 'flv',
+            'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+            'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+        }
+    }, {
+        'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_url(webpage):
+        iframe_m = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+        if iframe_m:
+            return iframe_m.group('url')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        theplatform_url = self._og_search_video_url(webpage)
+        return self.url_result(theplatform_url, 'ThePlatform')
+
+
+class NBCSportsIE(InfoExtractor):
+    # Does not include https becuase its certificate is invalid
+    _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+    _TEST = {
+        'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+        'info_dict': {
+            'id': 'PHJSaFWbrTY9',
+            'ext': 'flv',
+            'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+            'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        return self.url_result(
+            NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
 class NBCNewsIE(InfoExtractor):
     _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
         (?:video/.+?/(?P<id>\d+)|
index f49c666909a270ad18e36a2d1177ef681adc3121..79a13958b05e25a1c9e586168bb3a10742fbe01f 100644 (file)
@@ -8,41 +8,11 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     qualities,
+    parse_duration,
 )
 
 
-class NDRIE(InfoExtractor):
-    IE_NAME = 'ndr'
-    IE_DESC = 'NDR.de - Mediathek'
-    _VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
-
-    _TESTS = [
-        {
-            'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
-            'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
-            'note': 'Video file',
-            'info_dict': {
-                'id': '25866',
-                'ext': 'mp4',
-                'title': 'Kartoffeltage in der Lewitz',
-                'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
-                'duration': 166,
-            }
-        },
-        {
-            'url': 'http://www.ndr.de/info/audio51535.html',
-            'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
-            'note': 'Audio file',
-            'info_dict': {
-                'id': '51535',
-                'ext': 'mp3',
-                'title': 'La Valette entgeht der Hinrichtung',
-                'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
-                'duration': 884,
-            }
-        }
-    ]
-
+class NDRBaseIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
@@ -54,7 +24,11 @@ class NDRIE(InfoExtractor):
         if description:
             description = description.strip()
 
-        duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', fatal=False))
+        duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', default=None))
+        if not duration:
+            duration = parse_duration(self._html_search_regex(
+                r'(<span class="min">\d+</span>:<span class="sec">\d+</span>)',
+                page, 'duration', default=None))
 
         formats = []
 
@@ -92,3 +66,65 @@ class NDRIE(InfoExtractor):
             'duration': duration,
             'formats': formats,
         }
+
+
+class NDRIE(NDRBaseIE):
+    IE_NAME = 'ndr'
+    IE_DESC = 'NDR.de - Mediathek'
+    _VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
+
+    _TESTS = [
+        {
+            'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
+            'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
+            'note': 'Video file',
+            'info_dict': {
+                'id': '25866',
+                'ext': 'mp4',
+                'title': 'Kartoffeltage in der Lewitz',
+                'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
+                'duration': 166,
+            },
+            'skip': '404 Not found',
+        },
+        {
+            'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
+            'md5': 'dadc003c55ae12a5d2f6bd436cd73f59',
+            'info_dict': {
+                'id': '988',
+                'ext': 'mp4',
+                'title': 'Party, Pötte und Parade',
+                'description': 'Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt.',
+                'duration': 3498,
+            },
+        },
+        {
+            'url': 'http://www.ndr.de/info/audio51535.html',
+            'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
+            'note': 'Audio file',
+            'info_dict': {
+                'id': '51535',
+                'ext': 'mp3',
+                'title': 'La Valette entgeht der Hinrichtung',
+                'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
+                'duration': 884,
+            }
+        }
+    ]
+
+
+class NJoyIE(NDRBaseIE):
+    IE_NAME = 'N-JOY'
+    _VALID_URL = r'https?://www\.n-joy\.de/.+?(?P<id>\d+)\.html'
+
+    _TEST = {
+        'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
+        'md5': 'cb63be60cd6f9dd75218803146d8dc67',
+        'info_dict': {
+            'id': '2480',
+            'ext': 'mp4',
+            'title': 'Benaissa beim NDR Comedy Contest',
+            'description': 'Von seinem sehr "behaarten" Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen.',
+            'duration': 654,
+        }
+    }
index bc17e20aa9d736eb9e4ba0a39929f20db47d8465..0d165a82ad53ac8ac16ca8943c934db9fb28b720 100644 (file)
@@ -49,7 +49,7 @@ class NetzkinoIE(InfoExtractor):
             'http://www.netzkino.de/beta/dist/production.min.js', video_id,
             note='Downloading player code')
         avo_js = self._search_regex(
-            r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})',
+            r'var urlTemplate=(\{.*?"\})',
             production_js, 'URL templates')
         templates = self._parse_json(
             avo_js, video_id, transform_source=js_to_json)
index 40746599880469f5c79110020f39d31f2a8cbff6..279b18386197560346e1cbce716ecf7ff61af2f9 100644 (file)
@@ -21,6 +21,9 @@ class NHLBaseInfoExtractor(InfoExtractor):
         return json_string.replace('\\\'', '\'')
 
     def _real_extract_video(self, video_id):
+        vid_parts = video_id.split(',')
+        if len(vid_parts) == 3:
+            video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0'))
         json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
         data = self._download_json(
             json_url, video_id, transform_source=self._fix_json)
@@ -47,7 +50,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
             video_url = initial_video_url
 
         join = compat_urlparse.urljoin
-        return {
+        ret = {
             'id': video_id,
             'title': info['name'],
             'url': video_url,
@@ -56,11 +59,20 @@ class NHLBaseInfoExtractor(InfoExtractor):
             'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
             'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
         }
+        if video_url.startswith('rtmp:'):
+            mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url)
+            ret.update({
+                'tc_url': mobj.group('tc_url'),
+                'play_path': mobj.group('play_path'),
+                'app': mobj.group('app'),
+                'no_resume': True,
+            })
+        return ret
 
 
 class NHLIE(NHLBaseInfoExtractor):
     IE_NAME = 'nhl.com'
-    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
+    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P<id>[-0-9a-zA-Z,]+)'
 
     _TESTS = [{
         'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
@@ -101,6 +113,29 @@ class NHLIE(NHLBaseInfoExtractor):
     }, {
         'url': 'http://video.nhl.com/videocenter/?id=736722',
         'only_matching': True,
+    }, {
+        'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en',
+        'md5': '076fcb88c255154aacbf0a7accc3f340',
+        'info_dict': {
+            'id': '2014020299-X-h',
+            'ext': 'mp4',
+            'title': 'Penguins at Islanders / Game Highlights',
+            'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014',
+            'duration': 268,
+            'upload_date': '20141122',
+        }
+    }, {
+        'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4',
+        'info_dict': {
+            'id': '691469',
+            'ext': 'mp4',
+            'title': 'RAW | Craig MacTavish Full Press Conference',
+            'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.',
+            'upload_date': '20141205',
+        },
+        'params': {
+            'skip_download': True,  # Requires rtmpdump
+        }
     }]
 
     def _real_extract(self, url):
index 4c18904169d3f69a0bf7e95fb21d98218bca7e91..3cecebf95a4acd0a388da5984aebc2822e79b32c 100644 (file)
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 
 import re
 import json
+import datetime
 
 from .common import InfoExtractor
 from ..compat import (
@@ -14,7 +15,9 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     parse_duration,
-    unified_strdate,
+    parse_iso8601,
+    xpath_text,
+    determine_ext,
 )
 
 
@@ -22,7 +25,7 @@ class NiconicoIE(InfoExtractor):
     IE_NAME = 'niconico'
     IE_DESC = 'ニコニコ動画'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.nicovideo.jp/watch/sm22312215',
         'md5': 'd1a75c0823e2f629128c43e1212760f9',
         'info_dict': {
@@ -32,16 +35,53 @@ class NiconicoIE(InfoExtractor):
             'uploader': 'takuya0301',
             'uploader_id': '2698420',
             'upload_date': '20131123',
+            'timestamp': 1385182762,
             'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
             'duration': 33,
         },
-        'params': {
-            'username': 'ydl.niconico@gmail.com',
-            'password': 'youtube-dl',
+    }, {
+        # File downloaded with and without credentials are different, so omit
+        # the md5 field
+        'url': 'http://www.nicovideo.jp/watch/nm14296458',
+        'info_dict': {
+            'id': 'nm14296458',
+            'ext': 'swf',
+            'title': '【鏡音リン】Dance on media【オリジナル】take2!',
+            'description': 'md5:689f066d74610b3b22e0f1739add0f58',
+            'uploader': 'りょうた',
+            'uploader_id': '18822557',
+            'upload_date': '20110429',
+            'timestamp': 1304065916,
+            'duration': 209,
         },
-    }
+    }, {
+        # 'video exists but is marked as "deleted"
+        # md5 is unstable
+        'url': 'http://www.nicovideo.jp/watch/sm10000',
+        'info_dict': {
+            'id': 'sm10000',
+            'ext': 'unknown_video',
+            'description': 'deleted',
+            'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
+            'upload_date': '20071224',
+            'timestamp': 1198527840,  # timestamp field has different value if logged in
+            'duration': 304,
+        },
+    }, {
+        'url': 'http://www.nicovideo.jp/watch/so22543406',
+        'info_dict': {
+            'id': '1388129933',
+            'ext': 'mp4',
+            'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
+            'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
+            'timestamp': 1388851200,
+            'upload_date': '20140104',
+            'uploader': 'アニメロチャンネル',
+            'uploader_id': '312',
+        }
+    }]
 
-    _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
     _NETRC_MACHINE = 'niconico'
     # Determine whether the downloader used authentication to download video
     _AUTHENTICATED = False
@@ -76,12 +116,15 @@ class NiconicoIE(InfoExtractor):
         return True
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = self._match_id(url)
 
-        # Get video webpage. We are not actually interested in it, but need
-        # the cookies in order to be able to download the info webpage
-        self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
+        # Get video webpage. We are not actually interested in it for normal
+        # cases, but need the cookies in order to be able to download the
+        # info webpage
+        webpage, handle = self._download_webpage_handle(
+            'http://www.nicovideo.jp/watch/' + video_id, video_id)
+        if video_id.startswith('so'):
+            video_id = self._match_id(handle.geturl())
 
         video_info = self._download_xml(
             'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
@@ -90,7 +133,7 @@ class NiconicoIE(InfoExtractor):
         if self._AUTHENTICATED:
             # Get flv info
             flv_info_webpage = self._download_webpage(
-                'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+                'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
                 video_id, 'Downloading flv info')
         else:
             # Get external player info
@@ -111,22 +154,78 @@ class NiconicoIE(InfoExtractor):
                 flv_info_request, video_id,
                 note='Downloading flv info', errnote='Unable to download flv info')
 
-        if 'deleted=' in flv_info_webpage:
-            raise ExtractorError('The video has been deleted.',
-                                 expected=True)
-        video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
+        flv_info = compat_urlparse.parse_qs(flv_info_webpage)
+        if 'url' not in flv_info:
+            if 'deleted' in flv_info:
+                raise ExtractorError('The video has been deleted.',
+                                     expected=True)
+            else:
+                raise ExtractorError('Unable to find video URL')
+
+        video_real_url = flv_info['url'][0]
 
         # Start extracting information
-        title = video_info.find('.//title').text
-        extension = video_info.find('.//movie_type').text
+        title = xpath_text(video_info, './/title')
+        if not title:
+            title = self._og_search_title(webpage, default=None)
+        if not title:
+            title = self._html_search_regex(
+                r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
+                webpage, 'video title')
+
+        watch_api_data_string = self._html_search_regex(
+            r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
+            webpage, 'watch api data', default=None)
+        watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
+        video_detail = watch_api_data.get('videoDetail', {})
+
+        extension = xpath_text(video_info, './/movie_type')
+        if not extension:
+            extension = determine_ext(video_real_url)
         video_format = extension.upper()
-        thumbnail = video_info.find('.//thumbnail_url').text
-        description = video_info.find('.//description').text
-        upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
-        view_count = int_or_none(video_info.find('.//view_counter').text)
-        comment_count = int_or_none(video_info.find('.//comment_num').text)
-        duration = parse_duration(video_info.find('.//length').text)
-        webpage_url = video_info.find('.//watch_url').text
+
+        thumbnail = (
+            xpath_text(video_info, './/thumbnail_url') or
+            self._html_search_meta('image', webpage, 'thumbnail', default=None) or
+            video_detail.get('thumbnail'))
+
+        description = xpath_text(video_info, './/description')
+
+        timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve'))
+        if not timestamp:
+            match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
+            if match:
+                timestamp = parse_iso8601(match.replace('+', ':00+'))
+        if not timestamp and video_detail.get('postedAt'):
+            timestamp = parse_iso8601(
+                video_detail['postedAt'].replace('/', '-'),
+                delimiter=' ', timezone=datetime.timedelta(hours=9))
+
+        view_count = int_or_none(xpath_text(video_info, './/view_counter'))
+        if not view_count:
+            match = self._html_search_regex(
+                r'>Views: <strong[^>]*>([^<]+)</strong>',
+                webpage, 'view count', default=None)
+            if match:
+                view_count = int_or_none(match.replace(',', ''))
+        view_count = view_count or video_detail.get('viewCount')
+
+        comment_count = int_or_none(xpath_text(video_info, './/comment_num'))
+        if not comment_count:
+            match = self._html_search_regex(
+                r'>Comments: <strong[^>]*>([^<]+)</strong>',
+                webpage, 'comment count', default=None)
+            if match:
+                comment_count = int_or_none(match.replace(',', ''))
+        comment_count = comment_count or video_detail.get('commentCount')
+
+        duration = (parse_duration(
+            xpath_text(video_info, './/length') or
+            self._html_search_meta(
+                'video:duration', webpage, 'video duration', default=None)) or
+            video_detail.get('length'))
+
+        webpage_url = xpath_text(video_info, './/watch_url') or url
 
         if video_info.find('.//ch_id') is not None:
             uploader_id = video_info.find('.//ch_id').text
@@ -146,7 +245,7 @@ class NiconicoIE(InfoExtractor):
             'thumbnail': thumbnail,
             'description': description,
             'uploader': uploader,
-            'upload_date': upload_date,
+            'timestamp': timestamp,
             'uploader_id': uploader_id,
             'view_count': view_count,
             'comment_count': comment_count,
index 251e6da07457b7e7be6b5703b5769214ae299c3d..664dc81d47ce7af613636022f4e540dffd67f8b6 100644 (file)
@@ -14,7 +14,9 @@ from ..compat import (
 from ..utils import (
     clean_html,
     ExtractorError,
-    unified_strdate,
+    int_or_none,
+    float_or_none,
+    parse_iso8601,
 )
 
 
@@ -25,21 +27,38 @@ class NocoIE(InfoExtractor):
     _SUB_LANG_TEMPLATE = '&sub_lang=%s'
     _NETRC_MACHINE = 'noco'
 
-    _TEST = {
-        'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
-        'md5': '0a993f0058ddbcd902630b2047ef710e',
-        'info_dict': {
-            'id': '11538',
-            'ext': 'mp4',
-            'title': 'Ami Ami Idol - Hello! France',
-            'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
-            'upload_date': '20140412',
-            'uploader': 'Nolife',
-            'uploader_id': 'NOL',
-            'duration': 2851.2,
+    _TESTS = [
+        {
+            'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
+            'md5': '0a993f0058ddbcd902630b2047ef710e',
+            'info_dict': {
+                'id': '11538',
+                'ext': 'mp4',
+                'title': 'Ami Ami Idol - Hello! France',
+                'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
+                'upload_date': '20140412',
+                'uploader': 'Nolife',
+                'uploader_id': 'NOL',
+                'duration': 2851.2,
+            },
+            'skip': 'Requires noco account',
         },
-        'skip': 'Requires noco account',
-    }
+        {
+            'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call',
+            'md5': 'c190f1f48e313c55838f1f412225934d',
+            'info_dict': {
+                'id': '12610',
+                'ext': 'mp4',
+                'title': 'The Guild #1 - Wake-Up Call',
+                'timestamp': 1403863200,
+                'upload_date': '20140627',
+                'uploader': 'LBL42',
+                'uploader_id': 'LBL',
+                'duration': 233.023,
+            },
+            'skip': 'Requires noco account',
+        }
+    ]
 
     def _real_initialize(self):
         self._login()
@@ -90,51 +109,66 @@ class NocoIE(InfoExtractor):
             'shows/%s/medias' % video_id,
             video_id, 'Downloading video JSON')
 
+        show = self._call_api(
+            'shows/by_id/%s' % video_id,
+            video_id, 'Downloading show JSON')[0]
+
+        options = self._call_api(
+            'users/init', video_id,
+            'Downloading user options JSON')['options']
+        audio_lang_pref = options.get('audio_language') or options.get('language', 'fr')
+
+        if audio_lang_pref == 'original':
+            audio_lang_pref = show['original_lang']
+        if len(medias) == 1:
+            audio_lang_pref = list(medias.keys())[0]
+        elif audio_lang_pref not in medias:
+            audio_lang_pref = 'fr'
+
         qualities = self._call_api(
             'qualities',
             video_id, 'Downloading qualities JSON')
 
         formats = []
 
-        for lang, lang_dict in medias['fr']['video_list'].items():
-            for format_id, fmt in lang_dict['quality_list'].items():
-                format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
-
-                video = self._call_api(
-                    'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
-                    video_id, 'Downloading %s video JSON' % format_id_extended,
-                    lang if lang != 'none' else None)
-
-                file_url = video['file']
-                if not file_url:
-                    continue
-
-                if file_url in ['forbidden', 'not found']:
-                    popmessage = video['popmessage']
-                    self._raise_error(popmessage['title'], popmessage['message'])
-
-                formats.append({
-                    'url': file_url,
-                    'format_id': format_id_extended,
-                    'width': fmt['res_width'],
-                    'height': fmt['res_lines'],
-                    'abr': fmt['audiobitrate'],
-                    'vbr': fmt['videobitrate'],
-                    'filesize': fmt['filesize'],
-                    'format_note': qualities[format_id]['quality_name'],
-                    'preference': qualities[format_id]['priority'],
-                })
+        for audio_lang, audio_lang_dict in medias.items():
+            preference = 1 if audio_lang == audio_lang_pref else 0
+            for sub_lang, lang_dict in audio_lang_dict['video_list'].items():
+                for format_id, fmt in lang_dict['quality_list'].items():
+                    format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id)
+
+                    video = self._call_api(
+                        'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang),
+                        video_id, 'Downloading %s video JSON' % format_id_extended,
+                        sub_lang if sub_lang != 'none' else None)
+
+                    file_url = video['file']
+                    if not file_url:
+                        continue
+
+                    if file_url in ['forbidden', 'not found']:
+                        popmessage = video['popmessage']
+                        self._raise_error(popmessage['title'], popmessage['message'])
+
+                    formats.append({
+                        'url': file_url,
+                        'format_id': format_id_extended,
+                        'width': int_or_none(fmt.get('res_width')),
+                        'height': int_or_none(fmt.get('res_lines')),
+                        'abr': int_or_none(fmt.get('audiobitrate')),
+                        'vbr': int_or_none(fmt.get('videobitrate')),
+                        'filesize': int_or_none(fmt.get('filesize')),
+                        'format_note': qualities[format_id].get('quality_name'),
+                        'quality': qualities[format_id].get('priority'),
+                        'preference': preference,
+                    })
 
         self._sort_formats(formats)
 
-        show = self._call_api(
-            'shows/by_id/%s' % video_id,
-            video_id, 'Downloading show JSON')[0]
-
-        upload_date = unified_strdate(show['online_date_start_utc'])
-        uploader = show['partner_name']
-        uploader_id = show['partner_key']
-        duration = show['duration_ms'] / 1000.0
+        timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
+        uploader = show.get('partner_name')
+        uploader_id = show.get('partner_key')
+        duration = float_or_none(show.get('duration_ms'), 1000)
 
         thumbnails = []
         for thumbnail_key, thumbnail_url in show.items():
@@ -166,7 +200,7 @@ class NocoIE(InfoExtractor):
             'title': title,
             'description': description,
             'thumbnails': thumbnails,
-            'upload_date': upload_date,
+            'timestamp': timestamp,
             'uploader': uploader,
             'uploader_id': uploader_id,
             'duration': duration,
index 9c01eb0af8067948878581a0a30d9be326f990e9..5d84485714b9f360d47c8676710e9c3e6d9578c7 100644 (file)
@@ -219,7 +219,8 @@ class NPOLiveIE(NPOBaseIE):
         if streams:
             for stream in streams:
                 stream_type = stream.get('type').lower()
-                if stream_type == 'ss':
+                # smooth streaming is not supported
+                if stream_type in ['ss', 'ms']:
                     continue
                 stream_info = self._download_json(
                     'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
@@ -230,7 +231,10 @@ class NPOLiveIE(NPOBaseIE):
                 stream_url = self._download_json(
                     stream_info['stream'], display_id,
                     'Downloading %s URL' % stream_type,
-                    transform_source=strip_jsonp)
+                    'Unable to download %s URL' % stream_type,
+                    transform_source=strip_jsonp, fatal=False)
+                if not stream_url:
+                    continue
                 if stream_type == 'hds':
                     f4m_formats = self._extract_f4m_formats(stream_url, display_id)
                     # f4m downloader downloads only piece of live stream
@@ -242,6 +246,7 @@ class NPOLiveIE(NPOBaseIE):
                 else:
                     formats.append({
                         'url': stream_url,
+                        'preference': -10,
                     })
 
         self._sort_formats(formats)
index 1e4cfa2e7c8c5e3ae05c7d5fbc11242a334a5322..cc70c295014f95fcb7e74f2f009889b5ca135663 100644 (file)
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     float_or_none,
@@ -14,46 +13,48 @@ from ..utils import (
 
 
 class NRKIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
+    _VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
 
     _TESTS = [
         {
-            'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
-            'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
+            'url': 'http://www.nrk.no/video/PS*150533',
+            'md5': 'bccd850baebefe23b56d708a113229c2',
             'info_dict': {
                 'id': '150533',
                 'ext': 'flv',
                 'title': 'Dompap og andre fugler i Piip-Show',
-                'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
+                'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+                'duration': 263,
             }
         },
         {
-            'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
-            'md5': '3471f2a51718195164e88f46bf427668',
+            'url': 'http://www.nrk.no/video/PS*154915',
+            'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
             'info_dict': {
                 'id': '154915',
                 'ext': 'flv',
                 'title': 'Slik høres internett ut når du er blind',
                 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+                'duration': 20,
             }
         },
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        page = self._download_webpage(url, video_id)
-
-        video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
+        video_id = self._match_id(url)
 
         data = self._download_json(
-            'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
+            'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
+            video_id, 'Downloading media JSON')
 
         if data['usageRights']['isGeoBlocked']:
-            raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
+            raise ExtractorError(
+                'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
+                expected=True)
+
+        video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
 
-        video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
+        duration = parse_duration(data.get('duration'))
 
         images = data.get('images')
         if images:
@@ -69,10 +70,51 @@ class NRKIE(InfoExtractor):
             'ext': 'flv',
             'title': data['title'],
             'description': data['description'],
+            'duration': duration,
             'thumbnail': thumbnail,
         }
 
 
+class NRKPlaylistIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+
+    _TESTS = [{
+        'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
+        'info_dict': {
+            'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
+            'title': 'Gjenopplev den historiske solformørkelsen',
+            'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
+        },
+        'playlist_count': 2,
+    }, {
+        'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+        'info_dict': {
+            'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+            'title': 'Rivertonprisen til Karin Fossum',
+            'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+        },
+        'playlist_count': 5,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('nrk:%s' % video_id, 'NRK')
+            for video_id in re.findall(
+                r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
+                webpage)
+        ]
+
+        playlist_title = self._og_search_title(webpage)
+        playlist_description = self._og_search_description(webpage)
+
+        return self.playlist_result(
+            entries, playlist_id, playlist_title, playlist_description)
+
+
 class NRKTVIE(InfoExtractor):
     _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
 
@@ -149,9 +191,6 @@ class NRKTVIE(InfoExtractor):
         }
     ]
 
-    def _seconds2str(self, s):
-        return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000)
-
     def _debug_print(self, txt):
         if self._downloader.params.get('verbose', False):
             self.to_screen('[debug] %s' % txt)
@@ -160,20 +199,10 @@ class NRKTVIE(InfoExtractor):
         url = "%s%s" % (baseurl, subtitlesurl)
         self._debug_print('%s: Subtitle url: %s' % (video_id, url))
         captions = self._download_xml(
-            url, video_id, 'Downloading subtitles',
-            transform_source=lambda s: s.replace(r'<br />', '\r\n'))
+            url, video_id, 'Downloading subtitles')
         lang = captions.get('lang', 'no')
-        ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
-        srt = ''
-        for pos, p in enumerate(ps):
-            begin = parse_duration(p.get('begin'))
-            duration = parse_duration(p.get('dur'))
-            starttime = self._seconds2str(begin)
-            endtime = self._seconds2str(begin + duration)
-            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
         return {lang: [
             {'ext': 'ttml', 'url': url},
-            {'ext': 'srt', 'data': srt},
         ]}
 
     def _extract_f4m(self, manifest_url, video_id):
index 56e1cad3b0021431721b59df2162feaf7e0c357b..7f254b867da66f70a79ff7aac5d81eb6f37bd997 100644 (file)
@@ -1,39 +1,22 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import parse_iso8601
-
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
+)
 
-class NYTimesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
-
-    _TEST = {
-        'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
-        'md5': '18a525a510f942ada2720db5f31644c0',
-        'info_dict': {
-            'id': '100000002847155',
-            'ext': 'mov',
-            'title': 'Verbatim: What Is a Photocopier?',
-            'description': 'md5:93603dada88ddbda9395632fdc5da260',
-            'timestamp': 1398631707,
-            'upload_date': '20140427',
-            'uploader': 'Brett Weiner',
-            'duration': 419,
-        }
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
 
+class NYTimesBaseIE(InfoExtractor):
+    def _extract_video_from_id(self, video_id):
         video_data = self._download_json(
-            'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
+            'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
+            video_id, 'Downloading video JSON')
 
         title = video_data['headline']
-        description = video_data['summary']
-        duration = video_data['duration'] / 1000.0
+        description = video_data.get('summary')
+        duration = float_or_none(video_data.get('duration'), 1000)
 
         uploader = video_data['byline']
         timestamp = parse_iso8601(video_data['publication_date'][:-8])
@@ -49,11 +32,11 @@ class NYTimesIE(InfoExtractor):
         formats = [
             {
                 'url': video['url'],
-                'format_id': video['type'],
-                'vcodec': video['video_codec'],
-                'width': video['width'],
-                'height': video['height'],
-                'filesize': get_file_size(video['fileSize']),
+                'format_id': video.get('type'),
+                'vcodec': video.get('video_codec'),
+                'width': int_or_none(video.get('width')),
+                'height': int_or_none(video.get('height')),
+                'filesize': get_file_size(video.get('fileSize')),
             } for video in video_data['renditions']
         ]
         self._sort_formats(formats)
@@ -61,7 +44,8 @@ class NYTimesIE(InfoExtractor):
         thumbnails = [
             {
                 'url': 'http://www.nytimes.com/%s' % image['url'],
-                'resolution': '%dx%d' % (image['width'], image['height']),
+                'width': int_or_none(image.get('width')),
+                'height': int_or_none(image.get('height')),
             } for image in video_data['images']
         ]
 
@@ -75,3 +59,59 @@ class NYTimesIE(InfoExtractor):
             'formats': formats,
             'thumbnails': thumbnails,
         }
+
+
+class NYTimesIE(NYTimesBaseIE):
+    _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
+        'md5': '18a525a510f942ada2720db5f31644c0',
+        'info_dict': {
+            'id': '100000002847155',
+            'ext': 'mov',
+            'title': 'Verbatim: What Is a Photocopier?',
+            'description': 'md5:93603dada88ddbda9395632fdc5da260',
+            'timestamp': 1398631707,
+            'upload_date': '20140427',
+            'uploader': 'Brett Weiner',
+            'duration': 419,
+        }
+    }, {
+        'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        return self._extract_video_from_id(video_id)
+
+
+class NYTimesArticleIE(NYTimesBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
+    _TESTS = [{
+        'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
+        'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
+        'info_dict': {
+            'id': '100000003628438',
+            'ext': 'mov',
+            'title': 'New Minimum Wage: $70,000 a Year',
+            'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.',
+            'timestamp': 1429033037,
+            'upload_date': '20150414',
+            'uploader': 'Matthew Williams',
+        }
+    }, {
+        'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_id = self._html_search_regex(r'data-videoid="(\d+)"', webpage, 'video id')
+
+        return self._extract_video_from_id(video_id)
index 155d0ee6a834fa6fb551900e0d0c35dcf0007a5c..fbc521d1aae02077ae62c5cd0a6c5f9cdcff014a 100644 (file)
@@ -6,6 +6,7 @@ from ..utils import (
     unified_strdate,
     int_or_none,
     qualities,
+    unescapeHTML,
 )
 
 
@@ -36,8 +37,8 @@ class OdnoklassnikiIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         player = self._parse_json(
-            self._search_regex(
-                r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'),
+            unescapeHTML(self._search_regex(
+                r'data-attributes="([^"]+)"', webpage, 'player')),
             video_id)
 
         metadata = self._parse_json(player['flashvars']['metadata'], video_id)
index d5b05c18febb580a448263b4f7b2876ef3234957..c0e6d643d51982a8f8e0694329f940345c5f1284 100644 (file)
@@ -1,11 +1,14 @@
 from __future__ import unicode_literals
 import re
 import json
+import base64
 
 from .common import InfoExtractor
 from ..utils import (
     unescapeHTML,
     ExtractorError,
+    determine_ext,
+    int_or_none,
 )
 
 
@@ -32,6 +35,17 @@ class OoyalaIE(InfoExtractor):
                 'description': '',
             },
         },
+        {
+            # Information available only through SAS api
+            # From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187
+            'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx',
+            'md5': 'a84001441b35ea492bc03736e59e7935',
+            'info_dict': {
+                'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
+                'ext': 'mp4',
+                'title': 'Ooyala video',
+            }
+        }
     ]
 
     @staticmethod
@@ -44,11 +58,21 @@ class OoyalaIE(InfoExtractor):
                               ie=cls.ie_key())
 
     def _extract_result(self, info, more_info):
+        embedCode = info['embedCode']
+        video_url = info.get('ipad_url') or info['url']
+
+        if determine_ext(video_url) == 'm3u8':
+            formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
+        else:
+            formats = [{
+                'url': video_url,
+                'ext': 'mp4',
+            }]
+
         return {
-            'id': info['embedCode'],
-            'ext': 'mp4',
+            'id': embedCode,
             'title': unescapeHTML(info['title']),
-            'url': info.get('ipad_url') or info['url'],
+            'formats': formats,
             'description': unescapeHTML(more_info['description']),
             'thumbnail': more_info['promo'],
         }
@@ -77,6 +101,36 @@ class OoyalaIE(InfoExtractor):
                 mobile_player, 'info', fatal=False, default=None)
             if videos_info:
                 break
+
+        if not videos_info:
+            formats = []
+            auth_data = self._download_json(
+                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (embedCode, embedCode),
+                embedCode)
+
+            cur_auth_data = auth_data['authorization_data'][embedCode]
+
+            for stream in cur_auth_data['streams']:
+                formats.append({
+                    'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
+                    'ext': stream.get('delivery_type'),
+                    'format': stream.get('video_codec'),
+                    'format_id': stream.get('profile'),
+                    'width': int_or_none(stream.get('width')),
+                    'height': int_or_none(stream.get('height')),
+                    'abr': int_or_none(stream.get('audio_bitrate')),
+                    'vbr': int_or_none(stream.get('video_bitrate')),
+                })
+            if formats:
+                return {
+                    'id': embedCode,
+                    'formats': formats,
+                    'title': 'Ooyala video',
+                }
+
+            if not cur_auth_data['authorized']:
+                raise ExtractorError(cur_auth_data['message'], expected=True)
+
         if not videos_info:
             raise ExtractorError('Unable to extract info')
         videos_info = videos_info.replace('\\"', '"')
index 4e293392b3d39b46ad1612d884068a2dbfaeef23..2e6c9872b5d251be4eb3c61addab113aad4d2416 100644 (file)
@@ -11,6 +11,11 @@ from ..utils import (
     HEADRequest,
     unified_strdate,
     ExtractorError,
+    strip_jsonp,
+    int_or_none,
+    float_or_none,
+    determine_ext,
+    remove_end,
 )
 
 
@@ -197,3 +202,92 @@ class ORFFM4IE(InfoExtractor):
             'description': data['subtitle'],
             'entries': entries
         }
+
+
+class ORFIPTVIE(InfoExtractor):
+    IE_NAME = 'orf:iptv'
+    IE_DESC = 'iptv.ORF.at'
+    _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://iptv.orf.at/stories/2275236/',
+        'md5': 'c8b22af4718a4b4af58342529453e3e5',
+        'info_dict': {
+            'id': '350612',
+            'ext': 'flv',
+            'title': 'Weitere Evakuierungen um Vulkan Calbuco',
+            'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
+            'duration': 68.197,
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20150425',
+        },
+    }
+
+    def _real_extract(self, url):
+        story_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'http://iptv.orf.at/stories/%s' % story_id, story_id)
+
+        video_id = self._search_regex(
+            r'data-video(?:id)?="(\d+)"', webpage, 'video id')
+
+        data = self._download_json(
+            'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+            video_id)[0]
+
+        duration = float_or_none(data['duration'], 1000)
+
+        video = data['sources']['default']
+        load_balancer_url = video['loadBalancerUrl']
+        abr = int_or_none(video.get('audioBitrate'))
+        vbr = int_or_none(video.get('bitrate'))
+        fps = int_or_none(video.get('videoFps'))
+        width = int_or_none(video.get('videoWidth'))
+        height = int_or_none(video.get('videoHeight'))
+        thumbnail = video.get('preview')
+
+        rendition = self._download_json(
+            load_balancer_url, video_id, transform_source=strip_jsonp)
+
+        f = {
+            'abr': abr,
+            'vbr': vbr,
+            'fps': fps,
+            'width': width,
+            'height': height,
+        }
+
+        formats = []
+        for format_id, format_url in rendition['redirect'].items():
+            if format_id == 'rtmp':
+                ff = f.copy()
+                ff.update({
+                    'url': format_url,
+                    'format_id': format_id,
+                })
+                formats.append(ff)
+            elif determine_ext(format_url) == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    format_url, video_id, f4m_id=format_id))
+            elif determine_ext(format_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', m3u8_id=format_id))
+            else:
+                continue
+        self._sort_formats(formats)
+
+        title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
+        description = self._og_search_description(webpage)
+        upload_date = unified_strdate(self._html_search_meta(
+            'dc.date', webpage, 'upload date'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'formats': formats,
+        }
index afce732e141a1ae6cec78cc28ed4376fa174ab1f..143a7669639770e0cdfddc55e0b9395893301736 100644 (file)
@@ -5,6 +5,8 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
+    determine_ext,
+    int_or_none,
     unified_strdate,
     US_RATINGS,
 )
@@ -149,21 +151,45 @@ class PBSIE(InfoExtractor):
                 for vid_id in video_id]
             return self.playlist_result(entries, display_id)
 
-        info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
-        info = self._download_json(info_url, display_id)
-
-        redirect_url = info['alternate_encoding']['url']
-        redirect_info = self._download_json(
-            redirect_url + '?format=json', display_id,
-            'Downloading video url info')
-        if redirect_info['status'] == 'error':
-            if redirect_info['http_code'] == 403:
-                message = (
-                    'The video is not available in your region due to '
-                    'right restrictions')
+        info = self._download_json(
+            'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
+            display_id)
+
+        formats = []
+        for encoding_name in ('recommended_encoding', 'alternate_encoding'):
+            redirect = info.get(encoding_name)
+            if not redirect:
+                continue
+            redirect_url = redirect.get('url')
+            if not redirect_url:
+                continue
+
+            redirect_info = self._download_json(
+                redirect_url + '?format=json', display_id,
+                'Downloading %s video url info' % encoding_name)
+
+            if redirect_info['status'] == 'error':
+                if redirect_info['http_code'] == 403:
+                    message = (
+                        'The video is not available in your region due to '
+                        'right restrictions')
+                else:
+                    message = redirect_info['message']
+                raise ExtractorError(message, expected=True)
+
+            format_url = redirect_info.get('url')
+            if not format_url:
+                continue
+
+            if determine_ext(format_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, display_id, 'mp4', preference=1, m3u8_id='hls'))
             else:
-                message = redirect_info['message']
-            raise ExtractorError(message, expected=True)
+                formats.append({
+                    'url': format_url,
+                    'format_id': redirect.get('eeid'),
+                })
+        self._sort_formats(formats)
 
         rating_str = info.get('rating')
         if rating_str is not None:
@@ -174,11 +200,10 @@ class PBSIE(InfoExtractor):
             'id': video_id,
             'display_id': display_id,
             'title': info['title'],
-            'url': redirect_info['url'],
-            'ext': 'mp4',
             'description': info['program'].get('description'),
             'thumbnail': info.get('image_url'),
-            'duration': info.get('duration'),
+            'duration': int_or_none(info.get('duration')),
             'age_limit': age_limit,
             'upload_date': upload_date,
+            'formats': formats,
         }
diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py
new file mode 100644 (file)
index 0000000..6e60e5f
--- /dev/null
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
+    xpath_text,
+)
+
+
+class PhilharmonieDeParisIE(InfoExtractor):
+    IE_DESC = 'Philharmonie de Paris'
+    _VALID_URL = r'http://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
+        'info_dict': {
+            'id': '1032066',
+            'ext': 'flv',
+            'title': 'md5:d1f5585d87d041d07ce9434804bc8425',
+            'timestamp': 1428179400,
+            'upload_date': '20150404',
+            'duration': 6592.278,
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        concert = self._download_xml(
+            'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id,
+            video_id).find('./concert')
+
+        formats = []
+        info_dict = {
+            'id': video_id,
+            'title': xpath_text(concert, './titre', 'title', fatal=True),
+            'formats': formats,
+        }
+
+        fichiers = concert.find('./fichiers')
+        stream = fichiers.attrib['serveurstream']
+        for fichier in fichiers.findall('./fichier'):
+            info_dict['duration'] = float_or_none(fichier.get('timecodefin'))
+            for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]):
+                format_url = fichier.get('url%s' % suffix)
+                if not format_url:
+                    continue
+                formats.append({
+                    'url': stream,
+                    'play_path': format_url,
+                    'ext': 'flv',
+                    'format_id': format_id,
+                    'width': int_or_none(concert.get('largeur%s' % suffix)),
+                    'height': int_or_none(concert.get('hauteur%s' % suffix)),
+                    'quality': quality,
+                })
+        self._sort_formats(formats)
+
+        date, hour = concert.get('date'), concert.get('heure')
+        if date and hour:
+            info_dict['timestamp'] = parse_iso8601(
+                '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour))
+        elif date:
+            info_dict['upload_date'] = date
+
+        return info_dict
index a20672c0cc7fea5309e77bb193b887ced2b8d7d5..46cebc0d7b05080491d5f1d32ee8a709b549debc 100644 (file)
@@ -5,19 +5,33 @@ from .zdf import extract_from_xml_url
 
 
 class PhoenixIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://www.phoenix.de/content/884301',
-        'md5': 'ed249f045256150c92e72dbb70eadec6',
-        'info_dict': {
-            'id': '884301',
-            'ext': 'mp4',
-            'title': 'Michael Krons mit Hans-Werner Sinn',
-            'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
-            'upload_date': '20141025',
-            'uploader': 'Im Dialog',
-        }
-    }
+    _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
+        (?:
+            phoenix/die_sendungen/(?:[^/]+/)?
+        )?
+        (?P<id>[0-9]+)'''
+    _TESTS = [
+        {
+            'url': 'http://www.phoenix.de/content/884301',
+            'md5': 'ed249f045256150c92e72dbb70eadec6',
+            'info_dict': {
+                'id': '884301',
+                'ext': 'mp4',
+                'title': 'Michael Krons mit Hans-Werner Sinn',
+                'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
+                'upload_date': '20141025',
+                'uploader': 'Im Dialog',
+            }
+        },
+        {
+            'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
+            'only_matching': True,
+        },
+    ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py
new file mode 100644 (file)
index 0000000..551c8c9
--- /dev/null
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    xpath_text,
+    qualities,
+)
+
+
+class PladformIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:
+                                out\.pladform\.ru/player|
+                                static\.pladform\.ru/player\.swf
+                            )
+                            \?.*\bvideoid=|
+                            video\.pladform\.ru/catalog/video/videoid/
+                        )
+                        (?P<id>\d+)
+                    '''
+    _TESTS = [{
+        # http://muz-tv.ru/kinozal/view/7400/
+        'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
+        'md5': '61f37b575dd27f1bb2e1854777fe31f4',
+        'info_dict': {
+            'id': '100183293',
+            'ext': 'mp4',
+            'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
+            'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 694,
+            'age_limit': 0,
+        },
+    }, {
+        'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
+        'only_matching': True,
+    }, {
+        'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_xml(
+            'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
+            video_id)
+
+        if video.tag == 'error':
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, video.text),
+                expected=True)
+
+        quality = qualities(('ld', 'sd', 'hd'))
+
+        formats = [{
+            'url': src.text,
+            'format_id': src.get('quality'),
+            'quality': quality(src.get('quality')),
+        } for src in video.findall('./src')]
+        self._sort_formats(formats)
+
+        webpage = self._download_webpage(
+            'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
+            video_id)
+
+        title = self._og_search_title(webpage, fatal=False) or xpath_text(
+            video, './/title', 'title', fatal=True)
+        description = self._search_regex(
+            r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
+        thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
+            video, './/cover', 'cover')
+
+        duration = int_or_none(xpath_text(video, './/time', 'duration'))
+        age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
index 9576aed0e6668189c1959df3166b1e550facc7b0..e766ccca322da0e17389e949cd11fed3a5cb1910 100644 (file)
@@ -4,85 +4,72 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_request,
-)
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
-    float_or_none,
     int_or_none,
-    str_to_int,
+    parse_iso8601,
 )
 
 
 class PlayFMIE(InfoExtractor):
     IE_NAME = 'play.fm'
-    _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
 
     _TEST = {
-        'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
+        'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
         'md5': 'c505f8307825a245d0c7ad1850001f22',
         'info_dict': {
-            'id': '137220',
+            'id': '71276',
             'ext': 'mp3',
-            'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
-            'uploader': 'Sven Tasnadi',
-            'uploader_id': 'sventasnadi',
-            'duration': 5627.428,
-            'upload_date': '20140712',
+            'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
+            'description': '',
+            'duration': 5627,
+            'timestamp': 1406033781,
+            'upload_date': '20140722',
+            'uploader': 'Dan Drastic',
+            'uploader_id': '71170',
             'view_count': int,
             'comment_count': int,
-            'thumbnail': 're:^https?://.*\.jpg$',
         },
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        upload_date = mobj.group('upload_date')
-
-        rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
-        req = compat_urllib_request.Request(
-            'http://www.play.fm/flexRead/recording', data=rec_data)
-        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        rec_doc = self._download_xml(req, video_id)
+        slug = mobj.group('slug')
 
-        error_node = rec_doc.find('./error')
-        if error_node is not None:
-            raise ExtractorError('An error occured: %s (code %s)' % (
-                error_node.text, rec_doc.find('./status').text))
+        recordings = self._download_json(
+            'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
 
-        recording = rec_doc.find('./recording')
-        title = recording.find('./title').text
-        view_count = str_to_int(recording.find('./stats/playcount').text)
-        comment_count = str_to_int(recording.find('./stats/comments').text)
-        duration = float_or_none(recording.find('./duration').text, scale=1000)
-        thumbnail = recording.find('./image').text
+        error = recordings.get('error')
+        if isinstance(error, dict):
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error.get('message')),
+                expected=True)
 
-        artist = recording.find('./artists/artist')
-        uploader = artist.find('./name').text
-        uploader_id = artist.find('./slug').text
-
-        video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
-            'http:', recording.find('./url').text,
-            recording.find('./_class').text, recording.find('./file_id').text,
-            rec_doc.find('./uuid').text, video_id,
-            rec_doc.find('./jingle/file_id').text,
-            'http%3A%2F%2Fwww.play.fm%2Fplayer',
-        )
+        audio_url = recordings['audio']
+        video_id = compat_str(recordings.get('id') or video_id)
+        title = recordings['title']
+        description = recordings.get('description')
+        duration = int_or_none(recordings.get('recordingDuration'))
+        timestamp = parse_iso8601(recordings.get('created_at'))
+        uploader = recordings.get('page', {}).get('title')
+        uploader_id = compat_str(recordings.get('page', {}).get('id'))
+        view_count = int_or_none(recordings.get('playCount'))
+        comment_count = int_or_none(recordings.get('commentCount'))
+        categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
 
         return {
             'id': video_id,
-            'url': video_url,
-            'ext': 'mp3',
-            'filesize': int_or_none(recording.find('./size').text),
+            'url': audio_url,
             'title': title,
-            'upload_date': upload_date,
-            'view_count': view_count,
-            'comment_count': comment_count,
+            'description': description,
             'duration': duration,
-            'thumbnail': thumbnail,
+            'timestamp': timestamp,
             'uploader': uploader,
             'uploader_id': uploader_id,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'categories': categories,
         }
diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py
new file mode 100644 (file)
index 0000000..bdc7101
--- /dev/null
@@ -0,0 +1,78 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    float_or_none,
+    int_or_none,
+)
+
+
+class PlaywireIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
+        'md5': 'e6398701e3595888125729eaa2329ed9',
+        'info_dict': {
+            'id': '3353705',
+            'ext': 'mp4',
+            'title': 'S04_RM_UCL_Rus',
+            'thumbnail': 're:^http://.*\.png$',
+            'duration': 145.94,
+        },
+    }, {
+        'url': 'http://cdn.playwire.com/11625/embed/85228.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json',
+        'only_matching': True,
+    }, {
+        'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id')
+
+        player = self._download_json(
+            'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id),
+            video_id)
+
+        title = player['settings']['title']
+        duration = float_or_none(player.get('duration'), 1000)
+
+        content = player['content']
+        thumbnail = content.get('poster')
+        src = content['media']['f4m']
+
+        f4m = self._download_xml(src, video_id)
+        base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True)
+        formats = []
+        for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'):
+            media_url = media.get('url')
+            if not media_url:
+                continue
+            tbr = int_or_none(media.get('bitrate'))
+            width = int_or_none(media.get('width'))
+            height = int_or_none(media.get('height'))
+            f = {
+                'url': '%s/%s' % (base_url, media.attrib['url']),
+                'tbr': tbr,
+                'width': width,
+                'height': height,
+            }
+            if not (tbr or width or height):
+                f['quality'] = 1 if '-hd.' in media_url else 0
+            formats.append(f)
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
index 3a27e37890dc78b26af866c9884807c97c56ccb9..0c8b731cf47267568e43ccd09ff21f1683b4d992 100644 (file)
@@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):
     }
 
     def _extract_count(self, pattern, webpage, name):
-        count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
-        if count:
-            count = str_to_int(count)
-        return count
+        return str_to_int(self._search_regex(
+            pattern, webpage, '%s count' % name, fatal=False))
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):
         if thumbnail:
             thumbnail = compat_urllib_parse.unquote(thumbnail)
 
-        view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
-        like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
-        dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+        view_count = self._extract_count(
+            r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+        like_count = self._extract_count(
+            r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+        dislike_count = self._extract_count(
+            r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
         comment_count = self._extract_count(
-            r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+            r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
 
         video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
         if webpage.find('"encrypted":true') != -1:
diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py
new file mode 100644 (file)
index 0000000..9688ed9
--- /dev/null
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    unified_strdate,
+)
+
+
+class PornoVoisinesIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
+
+    _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
+        '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
+
+    _SERVER_NUMBERS = (1, 2)
+
+    _TEST = {
+        'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
+        'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
+        'info_dict': {
+            'id': '1285',
+            'display_id': 'recherche-appartement',
+            'ext': 'mp4',
+            'title': 'Recherche appartement',
+            'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20140925',
+            'duration': 120,
+            'view_count': int,
+            'average_rating': float,
+            'categories': ['Débutante', 'Scénario', 'Sodomie'],
+            'age_limit': 18,
+        }
+    }
+
+    @classmethod
+    def build_video_url(cls, num):
+        return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self.build_video_url(video_id)
+
+        title = self._html_search_regex(
+            r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
+        description = self._html_search_regex(
+            r'<article id="descriptif">(.+?)</article>',
+            webpage, "description", fatal=False, flags=re.DOTALL)
+
+        thumbnail = self._search_regex(
+            r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
+            webpage, 'thumbnail', fatal=False)
+        if thumbnail:
+            thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
+
+        upload_date = unified_strdate(self._search_regex(
+            r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
+        duration = int_or_none(self._search_regex(
+            'Durée (\d+)', webpage, 'duration', fatal=False))
+        view_count = int_or_none(self._search_regex(
+            r'(\d+) vues', webpage, 'view count', fatal=False))
+        average_rating = self._search_regex(
+            r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
+        if average_rating:
+            average_rating = float_or_none(average_rating.replace(',', '.'))
+
+        categories = self._html_search_meta(
+            'keywords', webpage, 'categories', fatal=False)
+        if categories:
+            categories = [category.strip() for category in categories.split(',')]
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'duration': duration,
+            'view_count': view_count,
+            'average_rating': average_rating,
+            'categories': categories,
+            'age_limit': 18,
+        }
diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py
new file mode 100644 (file)
index 0000000..01cc3d9
--- /dev/null
@@ -0,0 +1,69 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+from ..utils import ExtractorError
+
+
+class PrimeShareTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
+
+    _TEST = {
+        'url': 'http://primeshare.tv/download/238790B611',
+        'md5': 'b92d9bf5461137c36228009f31533fbc',
+        'info_dict': {
+            'id': '238790B611',
+            'ext': 'mp4',
+            'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        if '>File not exist<' in webpage:
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+        fields = dict(re.findall(r'''(?x)<input\s+
+            type="hidden"\s+
+            name="([^"]+)"\s+
+            (?:id="[^"]+"\s+)?
+            value="([^"]*)"
+            ''', webpage))
+
+        headers = {
+            'Referer': url,
+            'Content-Type': 'application/x-www-form-urlencoded',
+        }
+
+        wait_time = int(self._search_regex(
+            r'var\s+cWaitTime\s*=\s*(\d+)',
+            webpage, 'wait time', default=7)) + 1
+        self._sleep(wait_time, video_id)
+
+        req = compat_urllib_request.Request(
+            url, compat_urllib_parse.urlencode(fields), headers)
+        video_page = self._download_webpage(
+            req, video_id, 'Downloading video page')
+
+        video_url = self._search_regex(
+            r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
+            video_page, 'video url')
+
+        title = self._html_search_regex(
+            r'<h1>Watch\s*(?:&nbsp;)?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?:&nbsp;)?\s*<strong>',
+            video_page, 'title')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'ext': 'mp4',
+        }
index 385681d06e3dda356193d9f89c7ccbdd4cbde453..7cc7996642cae1de1ca2a585391d167025b92162 100644 (file)
@@ -10,6 +10,7 @@ from ..compat import (
 )
 from ..utils import (
     unified_strdate,
+    int_or_none,
 )
 
 
@@ -24,7 +25,7 @@ class ProSiebenSat1IE(InfoExtractor):
             'info_dict': {
                 'id': '2104602',
                 'ext': 'mp4',
-                'title': 'Staffel 2, Episode 18 - Jahresrückblick',
+                'title': 'Episode 18 - Staffel 2',
                 'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
                 'upload_date': '20131231',
                 'duration': 5845.04,
@@ -266,6 +267,9 @@ class ProSiebenSat1IE(InfoExtractor):
             urls_sources = urls_sources.values()
 
         def fix_bitrate(bitrate):
+            bitrate = int_or_none(bitrate)
+            if not bitrate:
+                return None
             return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
 
         for source in urls_sources:
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py
new file mode 100644 (file)
index 0000000..1311382
--- /dev/null
@@ -0,0 +1,238 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import time
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    strip_jsonp,
+    unescapeHTML,
+    js_to_json,
+)
+from ..compat import compat_urllib_request
+
+
+class QQMusicIE(InfoExtractor):
+    IE_NAME = 'qqmusic'
+    _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
+    _TESTS = [{
+        'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
+        'md5': 'bed90b6db2a7a7a7e11bc585f471f63a',
+        'info_dict': {
+            'id': '004295Et37taLD',
+            'ext': 'm4a',
+            'title': '可惜没如果',
+            'upload_date': '20141227',
+            'creator': '林俊杰',
+            'description': 'md5:4348ff1dd24036906baa7b6f973f8d30',
+        }
+    }]
+
+    # Reference: m_r_GetRUin() in top_player.js
+    # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
+    @staticmethod
+    def m_r_get_ruin():
+        curMs = int(time.time() * 1000) % 1000
+        return int(round(random.random() * 2147483647) * curMs % 1E10)
+
+    def _real_extract(self, url):
+        mid = self._match_id(url)
+
+        detail_info_page = self._download_webpage(
+            'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid,
+            mid, note='Download song detail info',
+            errnote='Unable to get song detail info', encoding='gbk')
+
+        song_name = self._html_search_regex(
+            r"songname:\s*'([^']+)'", detail_info_page, 'song name')
+
+        publish_time = self._html_search_regex(
+            r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page,
+            'publish time', default=None)
+        if publish_time:
+            publish_time = publish_time.replace('-', '')
+
+        singer = self._html_search_regex(
+            r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
+
+        lrc_content = self._html_search_regex(
+            r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
+            detail_info_page, 'LRC lyrics', default=None)
+
+        guid = self.m_r_get_ruin()
+
+        vkey = self._download_json(
+            'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
+            mid, note='Retrieve vkey', errnote='Unable to get vkey',
+            transform_source=strip_jsonp)['key']
+        song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid)
+
+        return {
+            'id': mid,
+            'url': song_url,
+            'title': song_name,
+            'upload_date': publish_time,
+            'creator': singer,
+            'description': lrc_content,
+        }
+
+
+class QQPlaylistBaseIE(InfoExtractor):
+    @staticmethod
+    def qq_static_url(category, mid):
+        return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
+
+    @classmethod
+    def get_entries_from_page(cls, page):
+        entries = []
+
+        for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page):
+            song_mid = unescapeHTML(item).split('|')[-5]
+            entries.append(cls.url_result(
+                'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
+                song_mid))
+
+        return entries
+
+
+class QQMusicSingerIE(QQPlaylistBaseIE):
+    IE_NAME = 'qqmusic:singer'
+    _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
+    _TEST = {
+        'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
+        'info_dict': {
+            'id': '001BLpXF2DyJe2',
+            'title': '林俊杰',
+            'description': 'md5:2a222d89ba4455a3af19940c0481bb78',
+        },
+        'playlist_count': 12,
+    }
+
+    def _real_extract(self, url):
+        mid = self._match_id(url)
+
+        singer_page = self._download_webpage(
+            self.qq_static_url('singer', mid), mid, 'Download singer page')
+
+        entries = self.get_entries_from_page(singer_page)
+
+        singer_name = self._html_search_regex(
+            r"singername\s*:\s*'([^']+)'", singer_page, 'singer name',
+            default=None)
+
+        singer_id = self._html_search_regex(
+            r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
+            default=None)
+
+        singer_desc = None
+
+        if singer_id:
+            req = compat_urllib_request.Request(
+                'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
+            req.add_header(
+                'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
+            singer_desc_page = self._download_xml(
+                req, mid, 'Donwload singer description XML')
+
+            singer_desc = singer_desc_page.find('./data/info/desc').text
+
+        return self.playlist_result(entries, mid, singer_name, singer_desc)
+
+
+class QQMusicAlbumIE(QQPlaylistBaseIE):
+    IE_NAME = 'qqmusic:album'
+    _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
+
+    _TEST = {
+        'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0',
+        'info_dict': {
+            'id': '000gXCTb2AhRR1',
+            'title': '我们都是这样长大的',
+            'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6',
+        },
+        'playlist_count': 4,
+    }
+
+    def _real_extract(self, url):
+        mid = self._match_id(url)
+
+        album_page = self._download_webpage(
+            self.qq_static_url('album', mid), mid, 'Download album page')
+
+        entries = self.get_entries_from_page(album_page)
+
+        album_name = self._html_search_regex(
+            r"albumname\s*:\s*'([^']+)',", album_page, 'album name',
+            default=None)
+
+        album_detail = self._html_search_regex(
+            r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>',
+            album_page, 'album details', default=None)
+
+        return self.playlist_result(entries, mid, album_name, album_detail)
+
+
+class QQMusicToplistIE(QQPlaylistBaseIE):
+    IE_NAME = 'qqmusic:toplist'
+    _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
+
+    _TESTS = [{
+        'url': 'http://y.qq.com/#type=toplist&p=global_12',
+        'info_dict': {
+            'id': 'global_12',
+            'title': 'itunes榜',
+        },
+        'playlist_count': 10,
+    }, {
+        'url': 'http://y.qq.com/#type=toplist&p=top_6',
+        'info_dict': {
+            'id': 'top_6',
+            'title': 'QQ音乐巅峰榜·欧美',
+        },
+        'playlist_count': 100,
+    }, {
+        'url': 'http://y.qq.com/#type=toplist&p=global_5',
+        'info_dict': {
+            'id': 'global_5',
+            'title': '韩国mnet排行榜',
+        },
+        'playlist_count': 50,
+    }]
+
+    @staticmethod
+    def strip_qq_jsonp(code):
+        return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code))
+
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+
+        list_type, num_id = list_id.split("_")
+
+        list_page = self._download_webpage(
+            "http://y.qq.com/y/static/toplist/index/%s.html" % list_id,
+            list_id, 'Download toplist page')
+
+        entries = []
+        if list_type == 'top':
+            jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id
+        else:
+            jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id
+
+        toplist_json = self._download_json(
+            jsonp_url, list_id, note='Retrieve toplist json',
+            errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp)
+
+        for song in toplist_json['l']:
+            s = song['s']
+            song_mid = s.split("|")[20]
+            entries.append(self.url_result(
+                'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
+                song_mid))
+
+        list_name = self._html_search_regex(
+            r'<h2 id="top_name">([^\']+)</h2>', list_page, 'top list name',
+            default=None)
+
+        return self.playlist_result(entries, list_id, list_name)
diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py
new file mode 100644 (file)
index 0000000..884c284
--- /dev/null
@@ -0,0 +1,67 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import(
+    unified_strdate,
+    str_to_int,
+)
+
+
+class RadioJavanIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
+    _TEST = {
+        'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
+        'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
+        'info_dict': {
+            'id': 'chaartaar-ashoobam',
+            'ext': 'mp4',
+            'title': 'Chaartaar - Ashoobam',
+            'thumbnail': 're:^https?://.*\.jpe?g$',
+            'upload_date': '20150215',
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        formats = [{
+            'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
+            'format_id': '%sp' % height,
+            'height': int(height),
+        } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
+        self._sort_formats(formats)
+
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        upload_date = unified_strdate(self._search_regex(
+            r'class="date_added">Date added: ([^<]+)<',
+            webpage, 'upload date', fatal=False))
+
+        view_count = str_to_int(self._search_regex(
+            r'class="views">Plays: ([\d,]+)',
+            webpage, 'view count', fatal=False))
+        like_count = str_to_int(self._search_regex(
+            r'class="rating">([\d,]+) likes',
+            webpage, 'like count', fatal=False))
+        dislike_count = str_to_int(self._search_regex(
+            r'class="rating">([\d,]+) dislikes',
+            webpage, 'dislike count', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'formats': formats,
+        }
index 144e3398259179e396206d0ea059e334953dcfd7..1631faf29f61c9cc15bca99394966c1917ca1a08 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class RaiIE(InfoExtractor):
-    _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
+    _VALID_URL = r'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
     _TESTS = [
         {
             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
@@ -62,34 +62,78 @@ class RaiIE(InfoExtractor):
                 'description': 'Edizione delle ore 20:30 ',
             }
         },
+        {
+            'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
+            'md5': '02b64456f7cc09f96ff14e7dd489017e',
+            'info_dict': {
+                'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
+                'ext': 'flv',
+                'title': 'Il Candidato - Primo episodio: "Le Primarie"',
+                'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!',
+                'uploader': 'RaiTre',
+            }
+        }
     ]
 
+    def _extract_relinker_url(self, webpage):
+        return self._proto_relative_url(self._search_regex(
+            [r'name="videourl" content="([^"]+)"', r'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'],
+            webpage, 'relinker url', default=None))
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
+        host = mobj.group('host')
 
-        media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON')
+        webpage = self._download_webpage(url, video_id)
 
-        title = media.get('name')
-        description = media.get('desc')
-        thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
-        duration = parse_duration(media.get('length'))
-        uploader = media.get('author')
-        upload_date = unified_strdate(media.get('date'))
+        relinker_url = self._extract_relinker_url(webpage)
 
-        formats = []
+        if not relinker_url:
+            iframe_path = self._search_regex(
+                r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"',
+                webpage, 'iframe')
+            webpage = self._download_webpage(
+                '%s/%s' % (host, iframe_path), video_id)
+            relinker_url = self._extract_relinker_url(webpage)
 
-        for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']:
-            media_url = media.get(format_id)
-            if not media_url:
-                continue
-            formats.append({
+        relinker = self._download_json(
+            '%s&output=47' % relinker_url, video_id)
+
+        media_url = relinker['video'][0]
+        ct = relinker.get('ct')
+        if ct == 'f4m':
+            formats = self._extract_f4m_formats(
+                media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id)
+        else:
+            formats = [{
                 'url': media_url,
-                'format_id': format_id,
-                'ext': 'mp4',
-            })
+                'format_id': ct,
+            }]
 
-        subtitles = self.extract_subtitles(video_id, url)
+        json_link = self._html_search_meta(
+            'jsonlink', webpage, 'JSON link', default=None)
+        if json_link:
+            media = self._download_json(
+                host + json_link, video_id, 'Downloading video JSON')
+            title = media.get('name')
+            description = media.get('desc')
+            thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
+            duration = parse_duration(media.get('length'))
+            uploader = media.get('author')
+            upload_date = unified_strdate(media.get('date'))
+        else:
+            title = (self._search_regex(
+                r'var\s+videoTitolo\s*=\s*"(.+?)";',
+                webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"')
+            description = self._og_search_description(webpage)
+            thumbnail = self._og_search_thumbnail(webpage)
+            duration = None
+            uploader = self._html_search_meta('Editore', webpage, 'uploader')
+            upload_date = unified_strdate(self._html_search_meta(
+                'item-date', webpage, 'upload date', default=None))
+
+        subtitles = self.extract_subtitles(video_id, webpage)
 
         return {
             'id': video_id,
@@ -103,8 +147,7 @@ class RaiIE(InfoExtractor):
             'subtitles': subtitles,
         }
 
-    def _get_subtitles(self, video_id, url):
-        webpage = self._download_webpage(url, video_id)
+    def _get_subtitles(self, video_id, webpage):
         subtitles = {}
         m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
         if m:
index 846b76c81528431c0faf8ea3fc9bbd6b017db099..d6054d7175fd49a22117dd357bea7905f6e739be 100644 (file)
@@ -1,17 +1,19 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import ExtractorError
 
 
 class RedTubeIE(InfoExtractor):
     _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.redtube.com/66418',
+        'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
         'info_dict': {
             'id': '66418',
             'ext': 'mp4',
-            "title": "Sucked on a toilet",
-            "age_limit": 18,
+            'title': 'Sucked on a toilet',
+            'age_limit': 18,
         }
     }
 
@@ -19,6 +21,9 @@ class RedTubeIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
+        if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
+            raise ExtractorError('Video %s has been removed' % video_id, expected=True)
+
         video_url = self._html_search_regex(
             r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
         video_title = self._html_search_regex(
index b42442d127c13e69fc81cb27e71cf117d2cb96b2..849300140ecbf598874d22b090262eabec1e7ea5 100644 (file)
@@ -8,8 +8,10 @@ import time
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
+    ExtractorError,
     float_or_none,
     remove_end,
+    std_headers,
     struct_unpack,
 )
 
@@ -84,13 +86,22 @@ class RTVEALaCartaIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    def _real_initialize(self):
+        user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
+        manager_info = self._download_json(
+            'http://www.rtve.es/odin/loki/' + user_agent_b64,
+            None, 'Fetching manager info')
+        self._manager = manager_info['manager']
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         info = self._download_json(
             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
             video_id)['page']['items'][0]
-        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
+        if info['state'] == 'DESPU':
+            raise ExtractorError('The video is no longer available', expected=True)
+        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
         png = self._download_webpage(png_url, video_id, 'Downloading url information')
         video_url = _decrypt_url(png)
         if not video_url.endswith('.f4m'):
@@ -127,6 +138,47 @@ class RTVEALaCartaIE(InfoExtractor):
             for s in subs)
 
 
+class RTVEInfantilIE(InfoExtractor):
+    IE_NAME = 'rtve.es:infantil'
+    IE_DESC = 'RTVE infantil'
+    _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
+
+    _TESTS = [{
+        'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
+        'md5': '915319587b33720b8e0357caaa6617e6',
+        'info_dict': {
+            'id': '3040283',
+            'ext': 'mp4',
+            'title': 'Maneras de vivir',
+            'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
+            'duration': 357.958,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        info = self._download_json(
+            'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
+            video_id)['page']['items'][0]
+
+        webpage = self._download_webpage(url, video_id)
+        vidplayer_id = self._search_regex(
+            r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
+
+        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
+        png = self._download_webpage(png_url, video_id, 'Downloading url information')
+        video_url = _decrypt_url(png)
+
+        return {
+            'id': video_id,
+            'ext': 'mp4',
+            'title': info['title'],
+            'url': video_url,
+            'thumbnail': info.get('image'),
+            'duration': float_or_none(info.get('duration'), scale=1000),
+        }
+
+
 class RTVELiveIE(InfoExtractor):
     IE_NAME = 'rtve.es:live'
     IE_DESC = 'RTVE.es live streams'
index ef766237bf318d40da067a6a820a725fbe0da286..55604637dca22533cd765529dcb2abfb759fd9c1 100644 (file)
@@ -84,11 +84,20 @@ class RUTVIE(InfoExtractor):
                 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
                 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
             },
+            'skip': 'Translation has finished',
+        },
+        {
+            'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/',
+            'info_dict': {
+                'id': '21',
+                'ext': 'mp4',
+                'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'is_live': True,
+            },
             'params': {
-                # rtmp download
+                # m3u8 download
                 'skip_download': True,
             },
-            'skip': 'Translation has finished',
         },
     ]
 
@@ -119,8 +128,10 @@ class RUTVIE(InfoExtractor):
         elif video_path.startswith('index/iframe/cast_id'):
             video_type = 'live'
 
+        is_live = video_type == 'live'
+
         json_data = self._download_json(
-            'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
+            'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if is_live else '', video_id),
             video_id, 'Downloading JSON')
 
         if json_data['errors']:
@@ -147,6 +158,7 @@ class RUTVIE(InfoExtractor):
 
         for transport, links in media['sources'].items():
             for quality, url in links.items():
+                preference = -1 if priority_transport == transport else -2
                 if transport == 'rtmp':
                     mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
                     if not mobj:
@@ -160,9 +172,11 @@ class RUTVIE(InfoExtractor):
                         'rtmp_live': True,
                         'ext': 'flv',
                         'vbr': int(quality),
+                        'preference': preference,
                     }
                 elif transport == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4'))
+                    formats.extend(self._extract_m3u8_formats(
+                        url, video_id, 'mp4', preference=preference, m3u8_id='hls'))
                     continue
                 else:
                     fmt = {
@@ -172,21 +186,18 @@ class RUTVIE(InfoExtractor):
                     'width': width,
                     'height': height,
                     'format_id': '%s-%s' % (transport, quality),
-                    'preference': -1 if priority_transport == transport else -2,
                 })
                 formats.append(fmt)
 
-        if not formats:
-            raise ExtractorError('No media links available for %s' % video_id)
-
         self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': title,
+            'title': self._live_title(title) if is_live else title,
             'description': description,
             'thumbnail': thumbnail,
             'view_count': view_count,
             'duration': duration,
             'formats': formats,
+            'is_live': is_live,
         }
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
new file mode 100644 (file)
index 0000000..10251f2
--- /dev/null
@@ -0,0 +1,157 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
+    smuggle_url,
+    std_headers,
+)
+
+
+class SafariBaseIE(InfoExtractor):
+    _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
+    _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
+    _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
+    _NETRC_MACHINE = 'safari'
+
+    _API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
+    _API_FORMAT = 'json'
+
+    LOGGED_IN = False
+
+    def _real_initialize(self):
+        # We only need to log in once for courses or individual videos
+        if not self.LOGGED_IN:
+            self._login()
+            SafariBaseIE.LOGGED_IN = True
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            raise ExtractorError(
+                self._ACCOUNT_CREDENTIALS_HINT,
+                expected=True)
+
+        headers = std_headers
+        if 'Referer' not in headers:
+            headers['Referer'] = self._LOGIN_URL
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None,
+            'Downloading login form')
+
+        csrf = self._html_search_regex(
+            r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
+            login_page, 'csrf token')
+
+        login_form = {
+            'csrfmiddlewaretoken': csrf,
+            'email': username,
+            'password1': password,
+            'login': 'Sign In',
+            'next': '',
+        }
+
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
+        login_page = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+            raise ExtractorError(
+                'Login failed; make sure your credentials are correct and try again.',
+                expected=True)
+
+        self.to_screen('Login successful')
+
+
+class SafariIE(SafariBaseIE):
+    IE_NAME = 'safari'
+    IE_DESC = 'safaribooksonline.com online video'
+    _VALID_URL = r'''(?x)https?://
+                            (?:www\.)?safaribooksonline\.com/
+                                (?:
+                                    library/view/[^/]+|
+                                    api/v1/book
+                                )/
+                                (?P<course_id>\d+)/
+                                    (?:chapter(?:-content)?/)?
+                                (?P<part>part\d+)\.html
+    '''
+
+    _TESTS = [{
+        'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
+        'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
+        'info_dict': {
+            'id': '2842601850001',
+            'ext': 'mp4',
+            'title': 'Introduction',
+        },
+        'skip': 'Requires safaribooksonline account credentials',
+    }, {
+        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        course_id = mobj.group('course_id')
+        part = mobj.group('part')
+
+        webpage = self._download_webpage(
+            '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
+            part)
+
+        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if not bc_url:
+            raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
+
+        return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
+
+
+class SafariCourseIE(SafariBaseIE):
+    IE_NAME = 'safari:course'
+    IE_DESC = 'safaribooksonline.com online courses'
+
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
+
+    _TESTS = [{
+        'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+        'info_dict': {
+            'id': '9780133392838',
+            'title': 'Hadoop Fundamentals LiveLessons',
+        },
+        'playlist_count': 22,
+        'skip': 'Requires safaribooksonline account credentials',
+    }, {
+        'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        course_id = self._match_id(url)
+
+        course_json = self._download_json(
+            '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+            course_id, 'Downloading course JSON')
+
+        if 'chapters' not in course_json:
+            raise ExtractorError(
+                'No chapters found for course %s' % course_id, expected=True)
+
+        entries = [
+            self.url_result(chapter, 'Safari')
+            for chapter in course_json['chapters']]
+
+        course_title = course_json['title']
+
+        return self.playlist_result(entries, course_id, course_title)
index 6c9fdb7c1aceb35efc166c9207fd503603040b9b..d1ab66b3216d5153a5480769fb0723919f3fdb37 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class ScreenwaveMediaIE(InfoExtractor):
-    _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
+    _VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
 
     _TESTS = [{
         'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
@@ -20,7 +20,10 @@ class ScreenwaveMediaIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
+
+        playerdata = self._download_webpage(
+            'http://player.screenwavemedia.com/play/player.php?id=%s' % video_id,
+            video_id, 'Downloading player webpage')
 
         vidtitle = self._search_regex(
             r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
@@ -81,60 +84,6 @@ class ScreenwaveMediaIE(InfoExtractor):
         }
 
 
-class CinemassacreIE(InfoExtractor):
-    _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
-    _TESTS = [
-        {
-            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
-            'md5': 'fde81fbafaee331785f58cd6c0d46190',
-            'info_dict': {
-                'id': 'Cinemassacre-19911',
-                'ext': 'mp4',
-                'upload_date': '20121110',
-                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
-                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
-            },
-        },
-        {
-            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
-            'md5': 'd72f10cd39eac4215048f62ab477a511',
-            'info_dict': {
-                'id': 'Cinemassacre-521be8ef82b16',
-                'ext': 'mp4',
-                'upload_date': '20131002',
-                'title': 'The Mummy’s Hand (1940)',
-            },
-        }
-    ]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
-        video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
-
-        webpage = self._download_webpage(url, display_id)
-
-        playerdata_url = self._search_regex(
-            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
-            webpage, 'player data URL')
-        video_title = self._html_search_regex(
-            r'<title>(?P<title>.+?)\|', webpage, 'title')
-        video_description = self._html_search_regex(
-            r'<div class="entry-content">(?P<description>.+?)</div>',
-            webpage, 'description', flags=re.DOTALL, fatal=False)
-        video_thumbnail = self._og_search_thumbnail(webpage)
-
-        return {
-            '_type': 'url_transparent',
-            'display_id': display_id,
-            'title': video_title,
-            'description': video_description,
-            'upload_date': video_date,
-            'thumbnail': video_thumbnail,
-            'url': playerdata_url,
-        }
-
-
 class TeamFourIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
     _TEST = {
@@ -153,7 +102,7 @@ class TeamFourIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         playerdata_url = self._search_regex(
-            r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+            r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
             webpage, 'player data URL')
 
         video_title = self._html_search_regex(
diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py
new file mode 100644 (file)
index 0000000..d3b8a1b
--- /dev/null
@@ -0,0 +1,141 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    unsmuggle_url,
+)
+from ..compat import (
+    compat_parse_qs,
+    compat_urlparse,
+)
+
+
+class SenateISVPIE(InfoExtractor):
+    _COMM_MAP = [
+        ["ag", "76440", "http://ag-f.akamaihd.net"],
+        ["aging", "76442", "http://aging-f.akamaihd.net"],
+        ["approps", "76441", "http://approps-f.akamaihd.net"],
+        ["armed", "76445", "http://armed-f.akamaihd.net"],
+        ["banking", "76446", "http://banking-f.akamaihd.net"],
+        ["budget", "76447", "http://budget-f.akamaihd.net"],
+        ["cecc", "76486", "http://srs-f.akamaihd.net"],
+        ["commerce", "80177", "http://commerce1-f.akamaihd.net"],
+        ["csce", "75229", "http://srs-f.akamaihd.net"],
+        ["dpc", "76590", "http://dpc-f.akamaihd.net"],
+        ["energy", "76448", "http://energy-f.akamaihd.net"],
+        ["epw", "76478", "http://epw-f.akamaihd.net"],
+        ["ethics", "76449", "http://ethics-f.akamaihd.net"],
+        ["finance", "76450", "http://finance-f.akamaihd.net"],
+        ["foreign", "76451", "http://foreign-f.akamaihd.net"],
+        ["govtaff", "76453", "http://govtaff-f.akamaihd.net"],
+        ["help", "76452", "http://help-f.akamaihd.net"],
+        ["indian", "76455", "http://indian-f.akamaihd.net"],
+        ["intel", "76456", "http://intel-f.akamaihd.net"],
+        ["intlnarc", "76457", "http://intlnarc-f.akamaihd.net"],
+        ["jccic", "85180", "http://jccic-f.akamaihd.net"],
+        ["jec", "76458", "http://jec-f.akamaihd.net"],
+        ["judiciary", "76459", "http://judiciary-f.akamaihd.net"],
+        ["rpc", "76591", "http://rpc-f.akamaihd.net"],
+        ["rules", "76460", "http://rules-f.akamaihd.net"],
+        ["saa", "76489", "http://srs-f.akamaihd.net"],
+        ["smbiz", "76461", "http://smbiz-f.akamaihd.net"],
+        ["srs", "75229", "http://srs-f.akamaihd.net"],
+        ["uscc", "76487", "http://srs-f.akamaihd.net"],
+        ["vetaff", "76462", "http://vetaff-f.akamaihd.net"],
+        ["arch", "", "http://ussenate-f.akamaihd.net/"]
+    ]
+    _IE_NAME = 'senate.gov'
+    _VALID_URL = r'http://www\.senate\.gov/isvp/\?(?P<qs>.+)'
+    _TESTS = [{
+        'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
+        'info_dict': {
+            'id': 'judiciary031715',
+            'ext': 'flv',
+            'title': 'Integrated Senate Video Player',
+            'thumbnail': 're:^https?://.*\.(?:jpg|png)$',
+        }
+    }, {
+        'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
+        'info_dict': {
+            'id': 'commerce011514',
+            'ext': 'flv',
+            'title': 'Integrated Senate Video Player'
+        }
+    }, {
+        'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
+        # checksum differs each time
+        'info_dict': {
+            'id': 'intel090613',
+            'ext': 'mp4',
+            'title': 'Integrated Senate Video Player'
+        }
+    }]
+
+    @staticmethod
+    def _search_iframe_url(webpage):
+        mobj = re.search(
+            r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/\?[^'\"]+)['\"]",
+            webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _get_info_for_comm(self, committee):
+        for entry in self._COMM_MAP:
+            if entry[0] == committee:
+                return entry[1:]
+
+    def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+
+        qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs'))
+        if not qs.get('filename') or not qs.get('type') or not qs.get('comm'):
+            raise ExtractorError('Invalid URL', expected=True)
+
+        video_id = re.sub(r'.mp4$', '', qs['filename'][0])
+
+        webpage = self._download_webpage(url, video_id)
+
+        if smuggled_data.get('force_title'):
+            title = smuggled_data['force_title']
+        else:
+            title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id)
+        poster = qs.get('poster')
+        thumbnail = poster[0] if poster else None
+
+        video_type = qs['type'][0]
+        committee = video_type if video_type == 'arch' else qs['comm'][0]
+        stream_num, domain = self._get_info_for_comm(committee)
+
+        formats = []
+        if video_type == 'arch':
+            filename = video_id if '.' in video_id else video_id + '.mp4'
+            formats = [{
+                # All parameters in the query string are necessary to prevent a 403 error
+                'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=',
+            }]
+        else:
+            hdcore_sign = '?hdcore=3.1.0'
+            url_params = (domain, video_id, stream_num)
+            f4m_url = '%s/z/%s_1@%s/manifest.f4m' % url_params + hdcore_sign
+            m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params
+            for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'):
+                # URLs without the extra param induce an 404 error
+                entry.update({'extra_param_to_segment_url': hdcore_sign})
+                formats.append(entry)
+            for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'):
+                mobj = re.search(r'(?P<tag>(?:-p|-b)).m3u8', entry['url'])
+                if mobj:
+                    entry['format_id'] += mobj.group('tag')
+                formats.append(entry)
+
+            self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+        }
index 9f79ff5c1b66d2bf37369a6009a914043493b407..0b717a1e42b8dd2c3d8a88d602f001876cf99e03 100644 (file)
@@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):
         page_title = mobj.group('title')
         webpage = self._download_webpage(url, page_title)
         slideshare_obj = self._search_regex(
-            r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
+            r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
             webpage, 'slideshare object')
         info = json.loads(slideshare_obj)
         if info['slideshow']['type'] != 'video':
index c04791997f3672cdb643870086c7ed7f52db54c1..eab4adfca46f3686ac7d23ca581681e56d5066f1 100644 (file)
@@ -4,22 +4,88 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from .common import compat_str
+from ..compat import (
+    compat_str,
+    compat_urllib_request
+)
+from ..utils import ExtractorError
 
 
 class SohuIE(InfoExtractor):
     _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
 
-    _TEST = {
+    _TESTS = [{
+        'note': 'This video is available only in Mainland China',
         'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
-        'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7',
+        'md5': '29175c8cadd8b5cc4055001e85d6b372',
         'info_dict': {
             'id': '382479172',
             'ext': 'mp4',
             'title': 'MV:Far East Movement《The Illest》',
         },
-        'skip': 'Only available from China',
-    }
+        'params': {
+            'cn_verification_proxy': 'proxy.uku.im:8888'
+        }
+    }, {
+        'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
+        'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
+        'info_dict': {
+            'id': '409385080',
+            'ext': 'mp4',
+            'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
+        }
+    }, {
+        'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
+        'md5': '49308ff6dafde5ece51137d04aec311e',
+        'info_dict': {
+            'id': '78693464',
+            'ext': 'mp4',
+            'title': '【爱范品】第31期:MWC见不到的奇葩手机',
+        }
+    }, {
+        'note': 'Multipart video',
+        'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml',
+        'info_dict': {
+            'id': '78910339',
+            'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+        },
+        'playlist': [{
+            'md5': '492923eac023ba2f13ff69617c32754a',
+            'info_dict': {
+                'id': '78910339_part1',
+                'ext': 'mp4',
+                'duration': 294,
+                'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+            }
+        }, {
+            'md5': 'de604848c0e8e9c4a4dde7e1347c0637',
+            'info_dict': {
+                'id': '78910339_part2',
+                'ext': 'mp4',
+                'duration': 300,
+                'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+            }
+        }, {
+            'md5': '93584716ee0657c0b205b8aa3d27aa13',
+            'info_dict': {
+                'id': '78910339_part3',
+                'ext': 'mp4',
+                'duration': 150,
+                'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+            }
+        }]
+    }, {
+        'note': 'Video with title containing dash',
+        'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
+        'info_dict': {
+            'id': '78932792',
+            'ext': 'mp4',
+            'title': 'youtube-dl testing video',
+        },
+        'params': {
+            'skip_download': True
+        }
+    }]
 
     def _real_extract(self, url):
 
@@ -29,8 +95,14 @@ class SohuIE(InfoExtractor):
             else:
                 base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
 
+            req = compat_urllib_request.Request(base_data_url + vid_id)
+
+            cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+            if cn_verification_proxy:
+                req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
             return self._download_json(
-                base_data_url + vid_id, video_id,
+                req, video_id,
                 'Downloading JSON data for %s' % vid_id)
 
         mobj = re.match(self._VALID_URL, url)
@@ -38,15 +110,22 @@ class SohuIE(InfoExtractor):
         mytv = mobj.group('mytv') is not None
 
         webpage = self._download_webpage(url, video_id)
-        raw_title = self._html_search_regex(
-            r'(?s)<title>(.+?)</title>',
-            webpage, 'video title')
-        title = raw_title.partition('-')[0].strip()
+
+        title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage))
 
         vid = self._html_search_regex(
             r'var vid ?= ?["\'](\d+)["\']',
             webpage, 'video path')
         vid_data = _fetch_data(vid, mytv)
+        if vid_data['play'] != 1:
+            if vid_data.get('status') == 12:
+                raise ExtractorError(
+                    'Sohu said: There\'s something wrong in the video.',
+                    expected=True)
+            else:
+                raise ExtractorError(
+                    'Sohu said: The video is only licensed to users in Mainland China.',
+                    expected=True)
 
         formats_json = {}
         for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
@@ -62,22 +141,21 @@ class SohuIE(InfoExtractor):
         for i in range(part_count):
             formats = []
             for format_id, format_data in formats_json.items():
-                allot = format_data['allot']
-                prot = format_data['prot']
-
                 data = format_data['data']
-                clips_url = data['clipsURL']
-                su = data['su']
 
-                part_str = self._download_webpage(
-                    'http://%s/?prot=%s&file=%s&new=%s' %
-                    (allot, prot, clips_url[i], su[i]),
-                    video_id,
-                    'Downloading %s video URL part %d of %d'
-                    % (format_id, i + 1, part_count))
-
-                part_info = part_str.split('|')
-                video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+                # URLs starts with http://newflv.sohu.ccgslb.net/ is not usable
+                # so retry until got a working URL
+                video_url = 'newflv.sohu.ccgslb.net'
+                retries = 0
+                while 'newflv.sohu.ccgslb.net' in video_url and retries < 5:
+                    download_note = 'Download information from CDN gateway for format ' + format_id
+                    if retries > 0:
+                        download_note += ' (retry #%d)' % retries
+                    retries += 1
+                    cdn_info = self._download_json(
+                        'http://data.vod.itc.cn/cdnList?new=' + data['su'][i],
+                        video_id, download_note)
+                    video_url = cdn_info['url']
 
                 formats.append({
                     'url': video_url,
@@ -101,9 +179,10 @@ class SohuIE(InfoExtractor):
             info['id'] = video_id
         else:
             info = {
-                '_type': 'playlist',
+                '_type': 'multi_video',
                 'entries': playlist,
                 'id': video_id,
+                'title': title,
             }
 
         return info
index c5284fa673b7eda4f74191fba6a788df39939a51..c23c5ee0fb853f86e4662ad574b587610456159c 100644 (file)
@@ -180,7 +180,7 @@ class SoundcloudIE(InfoExtractor):
                     'format_id': key,
                     'url': url,
                     'play_path': 'mp3:' + path,
-                    'ext': ext,
+                    'ext': 'flv',
                     'vcodec': 'none',
                 })
 
@@ -200,8 +200,9 @@ class SoundcloudIE(InfoExtractor):
                 if f['format_id'].startswith('rtmp'):
                     f['protocol'] = 'rtmp'
 
-            self._sort_formats(formats)
-            result['formats'] = formats
+        self._check_formats(formats, track_id)
+        self._sort_formats(formats)
+        result['formats'] = formats
 
         return result
 
@@ -220,7 +221,12 @@ class SoundcloudIE(InfoExtractor):
                 info_json_url += "&secret_token=" + token
         elif mobj.group('player'):
             query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-            return self.url_result(query['url'][0])
+            real_url = query['url'][0]
+            # If the token is in the query of the original url we have to
+            # manually add it
+            if 'secret_token' in query:
+                real_url += '?secret_token=' + query['secret_token'][0]
+            return self.url_result(real_url)
         else:
             # extract uploader (which is in the url)
             uploader = mobj.group('uploader')
@@ -241,7 +247,7 @@ class SoundcloudIE(InfoExtractor):
 
 
 class SoundcloudSetIE(SoundcloudIE):
-    _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
     IE_NAME = 'soundcloud:set'
     _TESTS = [{
         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
@@ -273,9 +279,8 @@ class SoundcloudSetIE(SoundcloudIE):
         info = self._download_json(resolv_url, full_title)
 
         if 'errors' in info:
-            for err in info['errors']:
-                self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
-            return
+            msgs = (compat_str(err['error_message']) for err in info['errors'])
+            raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
 
         return {
             '_type': 'playlist',
@@ -286,7 +291,7 @@ class SoundcloudSetIE(SoundcloudIE):
 
 
 class SoundcloudUserIE(SoundcloudIE):
-    _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
     IE_NAME = 'soundcloud:user'
     _TESTS = [{
         'url': 'https://soundcloud.com/the-concept-band',
@@ -331,7 +336,7 @@ class SoundcloudUserIE(SoundcloudIE):
             if len(new_entries) == 0:
                 self.to_screen('%s: End page received' % uploader)
                 break
-            entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries)
+            entries.extend(self.url_result(e['permalink_url'], 'Soundcloud') for e in new_entries)
 
         return {
             '_type': 'playlist',
index c20397b3d1bbffb69188ac872facd36cce5b11f7..7fb165a872766f4c1917d2929ec8a73b8f74434e 100644 (file)
@@ -1,3 +1,4 @@
+# encoding: utf-8
 from __future__ import unicode_literals
 
 from .mtv import MTVServicesInfoExtractor
@@ -5,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor
 
 class SouthParkIE(MTVServicesInfoExtractor):
     IE_NAME = 'southpark.cc.com'
-    _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
 
     _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
 
@@ -20,9 +21,20 @@ class SouthParkIE(MTVServicesInfoExtractor):
     }]
 
 
-class SouthparkDeIE(SouthParkIE):
+class SouthParkEsIE(SouthParkIE):
+    IE_NAME = 'southpark.cc.com:español'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))'
+    _LANG = 'es'
+
+    _TESTS = [{
+        'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
+        'playlist_count': 4,
+    }]
+
+
+class SouthParkDeIE(SouthParkIE):
     IE_NAME = 'southpark.de'
-    _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
     _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
 
     _TESTS = [{
@@ -34,3 +46,25 @@ class SouthparkDeIE(SouthParkIE):
             'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
         },
     }]
+
+
+class SouthParkNlIE(SouthParkIE):
+    IE_NAME = 'southpark.nl'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+    _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/'
+
+    _TESTS = [{
+        'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
+        'playlist_count': 4,
+    }]
+
+
+class SouthParkDkIE(SouthParkIE):
+    IE_NAME = 'southparkstudios.dk'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.dk/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+    _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/'
+
+    _TESTS = [{
+        'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
+        'playlist_count': 4,
+    }]
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py
new file mode 100644 (file)
index 0000000..7f060b1
--- /dev/null
@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SpankBangIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
+    _TEST = {
+        'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
+        'md5': '1cc433e1d6aa14bc376535b8679302f7',
+        'info_dict': {
+            'id': '3vvn',
+            'ext': 'mp4',
+            'title': 'fantasy solo',
+            'description': 'dillion harper masturbates on a bed',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'silly2587',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        stream_key = self._html_search_regex(
+            r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
+            webpage, 'stream key')
+
+        formats = [{
+            'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
+            'ext': 'mp4',
+            'format_id': '%sp' % height,
+            'height': int(height),
+        } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
+        self._sort_formats(formats)
+
+        title = self._html_search_regex(
+            r'(?s)<h1>(.+?)</h1>', webpage, 'title')
+        description = self._search_regex(
+            r'class="desc"[^>]*>([^<]+)',
+            webpage, 'description', default=None)
+        thumbnail = self._og_search_thumbnail(webpage)
+        uploader = self._search_regex(
+            r'class="user"[^>]*>([^<]+)',
+            webpage, 'uploader', fatal=False)
+
+        age_limit = self._rta_search(webpage)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'formats': formats,
+            'age_limit': age_limit,
+        }
index e529bb55ccccb1beefdf12d2df1ea689dd0d6f2e..182f286dfefc4023483c422fbf6c6a73203b86ff 100644 (file)
@@ -5,7 +5,7 @@ from .mtv import MTVServicesInfoExtractor
 
 class SpikeIE(MTVServicesInfoExtractor):
     _VALID_URL = r'''(?x)https?://
-        (?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+|
+        (?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+|
          m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
         '''
     _TEST = {
diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py
new file mode 100644 (file)
index 0000000..77eec0b
--- /dev/null
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    parse_iso8601,
+    xpath_text,
+)
+
+
+class SrfIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})'
+    _TESTS = [{
+        'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+        'md5': '4cd93523723beff51bb4bee974ee238d',
+        'info_dict': {
+            'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+            'display_id': 'snowden-beantragt-asyl-in-russland',
+            'ext': 'm4v',
+            'upload_date': '20130701',
+            'title': 'Snowden beantragt Asyl in Russland',
+            'timestamp': 1372713995,
+        }
+    }, {
+        # No Speichern (Save) button
+        'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa',
+        'md5': 'd97e236e80d1d24729e5d0953d276a4f',
+        'info_dict': {
+            'id': '677f5829-e473-4823-ac83-a1087fe97faa',
+            'display_id': 'jaguar-xk120-shadow-und-tornado-dampflokomotive',
+            'ext': 'flv',
+            'upload_date': '20130710',
+            'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive',
+            'timestamp': 1373493600,
+        },
+    }, {
+        'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+        'only_matching': True,
+    }, {
+        'url': 'https://tp.srgssr.ch/p/flash?urn=urn:srf:ais:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        display_id = re.match(self._VALID_URL, url).group('display_id') or video_id
+
+        video_data = self._download_xml(
+            'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id,
+            display_id)
+
+        title = xpath_text(
+            video_data, './AssetMetadatas/AssetMetadata/title', fatal=True)
+        thumbnails = [{
+            'url': s.text
+        } for s in video_data.findall('.//ImageRepresentation/url')]
+        timestamp = parse_iso8601(xpath_text(video_data, './createdDate'))
+        # The <duration> field in XML is different from the exact duration, skipping
+
+        formats = []
+        for item in video_data.findall('./Playlists/Playlist') + video_data.findall('./Downloads/Download'):
+            for url_node in item.findall('url'):
+                quality = url_node.attrib['quality']
+                full_url = url_node.text
+                original_ext = determine_ext(full_url)
+                format_id = '%s-%s' % (quality, item.attrib['protocol'])
+                if original_ext == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        full_url + '?hdcore=3.4.0', display_id, f4m_id=format_id))
+                elif original_ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        full_url, display_id, 'mp4', m3u8_id=format_id))
+                else:
+                    formats.append({
+                        'url': full_url,
+                        'ext': original_ext,
+                        'format_id': format_id,
+                        'quality': 0 if 'HD' in quality else -1,
+                        'preference': 1,
+                    })
+
+        self._sort_formats(formats)
+
+        subtitles = {}
+        subtitles_data = video_data.find('Subtitles')
+        if subtitles_data is not None:
+            subtitles_list = [{
+                'url': sub.text,
+                'ext': determine_ext(sub.text),
+            } for sub in subtitles_data]
+            if subtitles_list:
+                subtitles['de'] = subtitles_list
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'formats': formats,
+            'title': title,
+            'thumbnails': thumbnails,
+            'timestamp': timestamp,
+            'subtitles': subtitles,
+        }
diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/ssa.py
new file mode 100644 (file)
index 0000000..13101c7
--- /dev/null
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    unescapeHTML,
+    parse_duration,
+)
+
+
+class SSAIE(InfoExtractor):
+    _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://ssa.nls.uk/film/3561',
+        'info_dict': {
+            'id': '3561',
+            'ext': 'flv',
+            'title': 'SHETLAND WOOL',
+            'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
+            'duration': 900,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        streamer = self._search_regex(
+            r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
+        play_path = self._search_regex(
+            r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
+
+        def search_field(field_name, fatal=False):
+            return self._search_regex(
+                r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
+                webpage, 'title', fatal=fatal)
+
+        title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
+        description = unescapeHTML(search_field('Description'))
+        duration = parse_duration(search_field('Running time'))
+        thumbnail = self._search_regex(
+            r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
+
+        return {
+            'id': video_id,
+            'url': streamer,
+            'play_path': play_path,
+            'ext': 'flv',
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'thumbnail': thumbnail,
+        }
similarity index 66%
rename from youtube_dl/extractor/svtplay.py
rename to youtube_dl/extractor/svt.py
index 433dfd1cb0f27f066e37b26508a97a388d0046d1..fc20f664b7f4e1e6267e5cbad7a191e723e204e3 100644 (file)
@@ -9,41 +9,9 @@ from ..utils import (
 )
 
 
-class SVTPlayIE(InfoExtractor):
-    IE_DESC = 'SVT Play and Öppet arkiv'
-    _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
-    _TESTS = [{
-        'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
-        'md5': 'ade3def0643fa1c40587a422f98edfd9',
-        'info_dict': {
-            'id': '2609989',
-            'ext': 'flv',
-            'title': 'SM veckan vinter, Örebro - Rally, final',
-            'duration': 4500,
-            'thumbnail': 're:^https?://.*[\.-]jpg$',
-            'age_limit': 0,
-        },
-    }, {
-        'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
-        'md5': 'c3101a17ce9634f4c1f9800f0746c187',
-        'info_dict': {
-            'id': '1058509',
-            'ext': 'flv',
-            'title': 'Farlig kryssning',
-            'duration': 2566,
-            'thumbnail': 're:^https?://.*[\.-]jpg$',
-            'age_limit': 0,
-        },
-        'skip': 'Only works from Sweden',
-    }]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        host = mobj.group('host')
-
-        info = self._download_json(
-            'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id)
+class SVTBaseIE(InfoExtractor):
+    def _extract_video(self, url, video_id):
+        info = self._download_json(url, video_id)
 
         title = info['context']['title']
         thumbnail = info['context'].get('thumbnailImage')
@@ -80,3 +48,70 @@ class SVTPlayIE(InfoExtractor):
             'duration': duration,
             'age_limit': age_limit,
         }
+
+
+class SVTIE(SVTBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
+        'md5': '9648197555fc1b49e3dc22db4af51d46',
+        'info_dict': {
+            'id': '2900353',
+            'ext': 'flv',
+            'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
+            'duration': 27,
+            'age_limit': 0,
+        },
+    }
+
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        widget_id = mobj.group('widget_id')
+        article_id = mobj.group('id')
+        return self._extract_video(
+            'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
+            article_id)
+
+
+class SVTPlayIE(SVTBaseIE):
+    IE_DESC = 'SVT Play and Öppet arkiv'
+    _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
+        'md5': 'ade3def0643fa1c40587a422f98edfd9',
+        'info_dict': {
+            'id': '2609989',
+            'ext': 'flv',
+            'title': 'SM veckan vinter, Örebro - Rally, final',
+            'duration': 4500,
+            'thumbnail': 're:^https?://.*[\.-]jpg$',
+            'age_limit': 0,
+        },
+    }, {
+        'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
+        'md5': 'c3101a17ce9634f4c1f9800f0746c187',
+        'info_dict': {
+            'id': '1058509',
+            'ext': 'flv',
+            'title': 'Farlig kryssning',
+            'duration': 2566,
+            'thumbnail': 're:^https?://.*[\.-]jpg$',
+            'age_limit': 0,
+        },
+        'skip': 'Only works from Sweden',
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        host = mobj.group('host')
+        return self._extract_video(
+            'http://www.%s.se/video/%s?output=json' % (host, video_id),
+            video_id)
index 5793dbc1085a86fdf573a432805be129dc62de94..56be526383b590d9b00759400a269d5164beef2f 100644 (file)
@@ -1,10 +1,17 @@
+# -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
 import base64
+import binascii
 import re
+import json
 
 from .common import InfoExtractor
-from ..utils import qualities
+from ..utils import (
+    ExtractorError,
+    qualities,
+)
+from ..compat import compat_ord
 
 
 class TeamcocoIE(InfoExtractor):
@@ -18,6 +25,7 @@ class TeamcocoIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
+                'duration': 504,
                 'age_limit': 0,
             }
         }, {
@@ -28,8 +36,20 @@ class TeamcocoIE(InfoExtractor):
                 'ext': 'mp4',
                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
                 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
+                'duration': 288,
                 'age_limit': 0,
             }
+        }, {
+            'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
+            'info_dict': {
+                'id': '88748',
+                'ext': 'mp4',
+                'title': 'Timothy Olyphant Raises A Toast To “Justified”',
+                'description': 'md5:15501f23f020e793aeca761205e42c24',
+            },
+            'params': {
+                'skip_download': True,  # m3u8 downloads
+            }
         }
     ]
     _VIDEO_ID_REGEXES = (
@@ -42,42 +62,73 @@ class TeamcocoIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
 
         display_id = mobj.group('display_id')
-        webpage = self._download_webpage(url, display_id)
+        webpage, urlh = self._download_webpage_handle(url, display_id)
+        if 'src=expired' in urlh.geturl():
+            raise ExtractorError('This video is expired.', expected=True)
 
         video_id = mobj.group('video_id')
         if not video_id:
             video_id = self._html_search_regex(
                 self._VIDEO_ID_REGEXES, webpage, 'video id')
 
-        embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
-        embed = self._download_webpage(
-            embed_url, video_id, 'Downloading embed page')
+        data = None
+
+        preload_codes = self._html_search_regex(
+            r'(function.+)setTimeout\(function\(\)\{playlist',
+            webpage, 'preload codes')
+        base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes)
+        base64_fragments.remove('init')
 
-        encoded_data = self._search_regex(
-            r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
-        data = self._parse_json(
-            base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
+        def _check_sequence(cur_fragments):
+            if not cur_fragments:
+                return
+            for i in range(len(cur_fragments)):
+                cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii')
+                try:
+                    raw_data = base64.b64decode(cur_sequence)
+                    if compat_ord(raw_data[0]) == compat_ord('{'):
+                        return json.loads(raw_data.decode('utf-8'))
+                except (TypeError, binascii.Error, UnicodeDecodeError, ValueError):
+                    continue
+
+        def _check_data():
+            for i in range(len(base64_fragments) + 1):
+                for j in range(i, len(base64_fragments) + 1):
+                    data = _check_sequence(base64_fragments[:i] + base64_fragments[j:])
+                    if data:
+                        return data
+
+        self.to_screen('Try to compute possible data sequence. This may take some time.')
+        data = _check_data()
+
+        if not data:
+            raise ExtractorError(
+                'Preload information could not be extracted', expected=True)
 
         formats = []
         get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
         for filed in data['files']:
-            m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
-            if m_format is not None:
-                format_id = m_format.group(1)
+            if filed['type'] == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    filed['url'], video_id, ext='mp4'))
             else:
-                format_id = filed['bitrate']
-            tbr = (
-                int(filed['bitrate'])
-                if filed['bitrate'].isdigit()
-                else None)
-
-            formats.append({
-                'url': filed['url'],
-                'ext': 'mp4',
-                'tbr': tbr,
-                'format_id': format_id,
-                'quality': get_quality(format_id),
-            })
+                m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
+                if m_format is not None:
+                    format_id = m_format.group(1)
+                else:
+                    format_id = filed['bitrate']
+                tbr = (
+                    int(filed['bitrate'])
+                    if filed['bitrate'].isdigit()
+                    else None)
+
+                formats.append({
+                    'url': filed['url'],
+                    'ext': 'mp4',
+                    'tbr': tbr,
+                    'format_id': format_id,
+                    'quality': get_quality(format_id),
+                })
 
         self._sort_formats(formats)
 
@@ -88,5 +139,6 @@ class TeamcocoIE(InfoExtractor):
             'title': data['title'],
             'thumbnail': data.get('thumb', {}).get('href'),
             'description': data.get('teaser'),
+            'duration': data.get('duration'),
             'age_limit': self._family_friendly_search(webpage),
         }
index 4cec06f8bd6e2a18ac3062e916225746f5153c93..a48d77c309dcd1f9984cd0a6c71b7af574ca5498 100644 (file)
@@ -5,12 +5,12 @@ import re
 
 from .common import InfoExtractor
 
-from ..compat import (
-    compat_str,
-)
+from ..compat import compat_str
+from ..utils import int_or_none
 
 
 class TEDIE(InfoExtractor):
+    IE_NAME = 'ted'
     _VALID_URL = r'''(?x)
         (?P<proto>https?://)
         (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
@@ -170,17 +170,51 @@ class TEDIE(InfoExtractor):
                 finfo = self._NATIVE_FORMATS.get(f['format_id'])
                 if finfo:
                     f.update(finfo)
-        else:
-            # Use rtmp downloads
-            formats = [{
-                'format_id': f['name'],
-                'url': talk_info['streamer'],
-                'play_path': f['file'],
-                'ext': 'flv',
-                'width': f['width'],
-                'height': f['height'],
-                'tbr': f['bitrate'],
-            } for f in talk_info['resources']['rtmp']]
+
+        for format_id, resources in talk_info['resources'].items():
+            if format_id == 'h264':
+                for resource in resources:
+                    bitrate = int_or_none(resource.get('bitrate'))
+                    formats.append({
+                        'url': resource['file'],
+                        'format_id': '%s-%sk' % (format_id, bitrate),
+                        'tbr': bitrate,
+                    })
+            elif format_id == 'rtmp':
+                streamer = talk_info.get('streamer')
+                if not streamer:
+                    continue
+                for resource in resources:
+                    formats.append({
+                        'format_id': '%s-%s' % (format_id, resource.get('name')),
+                        'url': streamer,
+                        'play_path': resource['file'],
+                        'ext': 'flv',
+                        'width': int_or_none(resource.get('width')),
+                        'height': int_or_none(resource.get('height')),
+                        'tbr': int_or_none(resource.get('bitrate')),
+                    })
+            elif format_id == 'hls':
+                hls_formats = self._extract_m3u8_formats(
+                    resources.get('stream'), video_name, 'mp4', m3u8_id=format_id)
+                for f in hls_formats:
+                    if f.get('format_id') == 'hls-meta':
+                        continue
+                    if not f.get('height'):
+                        f['vcodec'] = 'none'
+                    else:
+                        f['acodec'] = 'none'
+                formats.extend(hls_formats)
+
+        audio_download = talk_info.get('audioDownload')
+        if audio_download:
+            formats.append({
+                'url': audio_download,
+                'format_id': 'audio',
+                'vcodec': 'none',
+                'preference': -0.5,
+            })
+
         self._sort_formats(formats)
 
         video_id = compat_str(talk_info['id'])
index 6a7b5e49de2d348cb76b88abd57bea59113138a6..26655d690250f495caf98de2cfaad6aff3eda331 100644 (file)
@@ -15,19 +15,37 @@ class TestTubeIE(InfoExtractor):
             'id': '60163',
             'display_id': '5-weird-ways-plants-can-eat-animals',
             'duration': 275,
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': '5 Weird Ways Plants Can Eat Animals',
             'description': 'Why have some plants evolved to eat meat?',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': 'DNews',
             'uploader_id': 'dnews',
         },
+    }, {
+        'url': 'https://testtube.com/iflscience/insane-jet-ski-flipping',
+        'info_dict': {
+            'id': 'fAGfJ4YjVus',
+            'ext': 'mp4',
+            'title': 'Flipping Jet-Ski Skills | Outrageous Acts of Science',
+            'uploader': 'Science Channel',
+            'uploader_id': 'ScienceChannel',
+            'upload_date': '20150203',
+            'description': 'md5:e61374030015bae1d2e22f096d4769d6',
+        }
     }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
         webpage = self._download_webpage(url, display_id)
+
+        youtube_url = self._html_search_regex(
+            r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
+            webpage, 'youtube iframe', default=None)
+        if youtube_url:
+            return self.url_result(youtube_url, 'Youtube', video_id=display_id)
+
         video_id = self._search_regex(
             r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
             webpage, 'video ID')
index feac666f78baff49f4fb312a147acad67d320bc2..92731ad3d7e8dcc3167b50ce1a15e3b035fb7721 100644 (file)
@@ -17,6 +17,7 @@ from ..utils import (
     ExtractorError,
     xpath_with_ns,
     unsmuggle_url,
+    int_or_none,
 )
 
 _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
@@ -28,7 +29,7 @@ class ThePlatformIE(InfoExtractor):
            (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
          |theplatform:)(?P<id>[^/\?&]+)'''
 
-    _TEST = {
+    _TESTS = [{
         # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
         'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
         'info_dict': {
@@ -42,7 +43,20 @@ class ThePlatformIE(InfoExtractor):
             # rtmp download
             'skip_download': True,
         },
-    }
+    }, {
+        # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
+        'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
+        'info_dict': {
+            'id': '22d_qsQ6MIRT',
+            'ext': 'flv',
+            'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
+            'title': 'Tesla Model S: A second step towards a cleaner motoring future',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }]
 
     @staticmethod
     def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
@@ -92,7 +106,7 @@ class ThePlatformIE(InfoExtractor):
             error_msg = next(
                 n.attrib['abstract']
                 for n in meta.findall(_x('.//smil:ref'))
-                if n.attrib.get('title') == 'Geographic Restriction')
+                if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
         except StopIteration:
             pass
         else:
@@ -116,6 +130,8 @@ class ThePlatformIE(InfoExtractor):
         body = meta.find(_x('smil:body'))
 
         f4m_node = body.find(_x('smil:seq//smil:video'))
+        if f4m_node is None:
+            f4m_node = body.find(_x('smil:seq/smil:video'))
         if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
             f4m_url = f4m_node.attrib['src']
             if 'manifest.f4m?' not in f4m_url:
@@ -127,13 +143,19 @@ class ThePlatformIE(InfoExtractor):
         else:
             formats = []
             switch = body.find(_x('smil:switch'))
+            if switch is None:
+                switch = body.find(_x('smil:par//smil:switch'))
+            if switch is None:
+                switch = body.find(_x('smil:par/smil:switch'))
+            if switch is None:
+                switch = body.find(_x('smil:par'))
             if switch is not None:
                 base_url = head.find(_x('smil:meta')).attrib['base']
                 for f in switch.findall(_x('smil:video')):
                     attr = f.attrib
-                    width = int(attr['width'])
-                    height = int(attr['height'])
-                    vbr = int(attr['system-bitrate']) // 1000
+                    width = int_or_none(attr.get('width'))
+                    height = int_or_none(attr.get('height'))
+                    vbr = int_or_none(attr.get('system-bitrate'), 1000)
                     format_id = '%dx%d_%dk' % (width, height, vbr)
                     formats.append({
                         'format_id': format_id,
@@ -146,9 +168,11 @@ class ThePlatformIE(InfoExtractor):
                     })
             else:
                 switch = body.find(_x('smil:seq//smil:switch'))
+                if switch is None:
+                    switch = body.find(_x('smil:seq/smil:switch'))
                 for f in switch.findall(_x('smil:video')):
                     attr = f.attrib
-                    vbr = int(attr['system-bitrate']) // 1000
+                    vbr = int_or_none(attr.get('system-bitrate'), 1000)
                     ext = determine_ext(attr['src'])
                     if ext == 'once':
                         ext = 'mp4'
@@ -167,5 +191,5 @@ class ThePlatformIE(InfoExtractor):
             'formats': formats,
             'description': info['description'],
             'thumbnail': info['defaultThumbnailUrl'],
-            'duration': info['duration'] // 1000,
+            'duration': int_or_none(info.get('duration'), 1000),
         }
index c5c6fdc51b19fce90d45298858ce345bc901307e..7dbe68b5c228ea6d3fa20d835a60ecf38b820816 100644 (file)
@@ -30,3 +30,31 @@ class TMZIE(InfoExtractor):
             'description': self._og_search_description(webpage),
             'thumbnail': self._html_search_meta('ThumbURL', webpage),
         }
+
+
+class TMZArticleIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
+    _TEST = {
+        'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
+        'md5': 'e482a414a38db73087450e3a6ce69d00',
+        'info_dict': {
+            'id': '0_6snoelag',
+            'ext': 'mp4',
+            'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
+            'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake.  She\'s watching me."',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        embedded_video_info_str = self._html_search_regex(
+            r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info')
+
+        embedded_video_info = self._parse_json(
+            embedded_video_info_str, video_id,
+            transform_source=lambda s: s.replace('\\', ''))
+
+        return self.url_result(
+            'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
index 2a1ae5a717cf7b2af16bf5a1ce3ef7494e28a7a6..828c808a6456b6b99b134cb7ae9d9017de9ad3aa 100644 (file)
@@ -56,6 +56,6 @@ class TumblrIE(InfoExtractor):
             'url': video_url,
             'ext': 'mp4',
             'title': video_title,
-            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': self._og_search_description(webpage, default=None),
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
         }
index 9a53a3c74143d72a14842ea70ce4063a8d28a30c..e83e31a31640fa32e4a19a48a745d279a14d3753 100644 (file)
@@ -16,6 +16,7 @@ class TVPlayIE(InfoExtractor):
     _VALID_URL = r'''(?x)http://(?:www\.)?
         (?:tvplay\.lv/parraides|
            tv3play\.lt/programos|
+           play\.tv3\.lt/programos|
            tv3play\.ee/sisu|
            tv3play\.se/program|
            tv6play\.se/program|
@@ -45,7 +46,7 @@ class TVPlayIE(InfoExtractor):
             },
         },
         {
-            'url': 'http://www.tv3play.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
+            'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
             'info_dict': {
                 'id': '409229',
                 'ext': 'flv',
diff --git a/youtube_dl/extractor/twentytwotracks.py b/youtube_dl/extractor/twentytwotracks.py
new file mode 100644 (file)
index 0000000..d6c0ab1
--- /dev/null
@@ -0,0 +1,86 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+# 22Tracks regularly replace the audio tracks that can be streamed on their
+# site. The tracks usually expire after 1 months, so we can't add tests.
+
+
+class TwentyTwoTracksIE(InfoExtractor):
+    _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
+    IE_NAME = '22tracks:track'
+
+    _API_BASE = 'http://22tracks.com/api'
+
+    def _extract_info(self, city, genre_name, track_id=None):
+        item_id = track_id if track_id else genre_name
+
+        cities = self._download_json(
+            '%s/cities' % self._API_BASE, item_id,
+            'Downloading cities info',
+            'Unable to download cities info')
+        city_id = [x['id'] for x in cities if x['slug'] == city][0]
+
+        genres = self._download_json(
+            '%s/genres/%s' % (self._API_BASE, city_id), item_id,
+            'Downloading %s genres info' % city,
+            'Unable to download %s genres info' % city)
+        genre = [x for x in genres if x['slug'] == genre_name][0]
+        genre_id = genre['id']
+
+        tracks = self._download_json(
+            '%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
+            'Downloading %s genre tracks info' % genre_name,
+            'Unable to download track info')
+
+        return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
+
+    def _get_track_url(self, filename, track_id):
+        token = self._download_json(
+            'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
+            track_id, 'Downloading token', 'Unable to download token')
+        return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
+
+    def _extract_track_info(self, track_info, track_id):
+        download_url = self._get_track_url(track_info['filename'], track_id)
+        title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
+        return {
+            'id': track_id,
+            'url': download_url,
+            'ext': 'mp3',
+            'title': title,
+            'duration': int_or_none(track_info.get('duration')),
+            'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
+        }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        city = mobj.group('city')
+        genre = mobj.group('genre')
+        track_id = mobj.group('id')
+
+        track_info = self._extract_info(city, genre, track_id)
+        return self._extract_track_info(track_info, track_id)
+
+
+class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
+    _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
+    IE_NAME = '22tracks:genre'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        city = mobj.group('city')
+        genre = mobj.group('genre')
+
+        genre_title, tracks = self._extract_info(city, genre)
+
+        entries = [
+            self._extract_track_info(track_info, track_info['id'])
+            for track_info in tracks]
+
+        return self.playlist_result(entries, genre, genre_title)
index 4b0ce54df4d329cf24e76621c6064c67a4befaaa..94bd6345da18815a50b72502a8b91ae4e30ae2b5 100644 (file)
@@ -23,6 +23,8 @@ class TwitchBaseIE(InfoExtractor):
     _API_BASE = 'https://api.twitch.tv'
     _USHER_BASE = 'http://usher.twitch.tv'
     _LOGIN_URL = 'https://secure.twitch.tv/user/login'
+    _LOGIN_POST_URL = 'https://secure-login.twitch.tv/login'
+    _NETRC_MACHINE = 'twitch'
 
     def _handle_error(self, response):
         if not isinstance(response, dict):
@@ -66,14 +68,14 @@ class TwitchBaseIE(InfoExtractor):
             'authenticity_token': authenticity_token,
             'redirect_on_login': '',
             'embed_form': 'false',
-            'mp_source_action': '',
+            'mp_source_action': 'login-button',
             'follow': '',
-            'user[login]': username,
-            'user[password]': password,
+            'login': username,
+            'password': password,
         }
 
         request = compat_urllib_request.Request(
-            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+            self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
         request.add_header('Referer', self._LOGIN_URL)
         response = self._download_webpage(
             request, None, 'Logging in as %s' % username)
@@ -84,6 +86,14 @@ class TwitchBaseIE(InfoExtractor):
             raise ExtractorError(
                 'Unable to login: %s' % m.group('msg').strip(), expected=True)
 
+    def _prefer_source(self, formats):
+        try:
+            source = next(f for f in formats if f['format_id'] == 'Source')
+            source['preference'] = 10
+        except StopIteration:
+            pass  # No Source stream present
+        self._sort_formats(formats)
+
 
 class TwitchItemBaseIE(TwitchBaseIE):
     def _download_info(self, item, item_id):
@@ -139,7 +149,7 @@ class TwitchItemBaseIE(TwitchBaseIE):
 
 class TwitchVideoIE(TwitchItemBaseIE):
     IE_NAME = 'twitch:video'
-    _VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+    _VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
     _ITEM_TYPE = 'video'
     _ITEM_SHORTCUT = 'a'
 
@@ -155,7 +165,7 @@ class TwitchVideoIE(TwitchItemBaseIE):
 
 class TwitchChapterIE(TwitchItemBaseIE):
     IE_NAME = 'twitch:chapter'
-    _VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+    _VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
     _ITEM_TYPE = 'chapter'
     _ITEM_SHORTCUT = 'c'
 
@@ -174,7 +184,7 @@ class TwitchChapterIE(TwitchItemBaseIE):
 
 class TwitchVodIE(TwitchItemBaseIE):
     IE_NAME = 'twitch:vod'
-    _VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+    _VALID_URL = r'%s/[^/]+/v/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
     _ITEM_TYPE = 'vod'
     _ITEM_SHORTCUT = 'v'
 
@@ -208,6 +218,7 @@ class TwitchVodIE(TwitchItemBaseIE):
             '%s/vod/%s?nauth=%s&nauthsig=%s'
             % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
             item_id, 'mp4')
+        self._prefer_source(formats)
         info['formats'] = formats
         return info
 
@@ -348,21 +359,14 @@ class TwitchStreamIE(TwitchBaseIE):
             'p': random.randint(1000000, 10000000),
             'player': 'twitchweb',
             'segment_preference': '4',
-            'sig': access_token['sig'],
-            'token': access_token['token'],
+            'sig': access_token['sig'].encode('utf-8'),
+            'token': access_token['token'].encode('utf-8'),
         }
-
         formats = self._extract_m3u8_formats(
             '%s/api/channel/hls/%s.m3u8?%s'
-            % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
+            % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)),
             channel_id, 'mp4')
-
-        # prefer the 'source' stream, the others are limited to 30 fps
-        def _sort_source(f):
-            if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
-                return 1
-            return 0
-        formats = sorted(formats, key=_sort_source)
+        self._prefer_source(formats)
 
         view_count = stream.get('viewers')
         timestamp = parse_iso8601(stream.get('created_at'))
diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py
new file mode 100644 (file)
index 0000000..c08428a
--- /dev/null
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    ExtractorError,
+)
+from ..compat import compat_urlparse
+
+
+class UDNEmbedIE(InfoExtractor):
+    _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://video.udn.com/embed/news/300040',
+        'md5': 'de06b4c90b042c128395a88f0384817e',
+        'info_dict': {
+            'id': '300040',
+            'ext': 'mp4',
+            'title': '生物老師男變女 全校挺"做自己"',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': 'https://video.udn.com/embed/news/300040',
+        'only_matching': True,
+    }, {
+        # From https://video.udn.com/news/303776
+        'url': 'https://video.udn.com/play/news/303776',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        page = self._download_webpage(url, video_id)
+
+        options = json.loads(js_to_json(self._html_search_regex(
+            r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))
+
+        video_urls = options['video']
+
+        if video_urls.get('youtube'):
+            return self.url_result(video_urls.get('youtube'), 'Youtube')
+
+        try:
+            del video_urls['youtube']
+        except KeyError:
+            pass
+
+        formats = [{
+            'url': self._download_webpage(
+                compat_urlparse.urljoin(url, api_url), video_id,
+                'retrieve url for %s video' % video_type),
+            'format_id': video_type,
+            'preference': 0 if video_type == 'mp4' else -1,
+        } for video_type, api_url in video_urls.items() if api_url]
+
+        if not formats:
+            raise ExtractorError('No videos found', expected=True)
+
+        self._sort_formats(formats)
+
+        thumbnail = None
+
+        if options.get('gallery') and len(options['gallery']):
+            thumbnail = options['gallery'][0].get('original')
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': options['title'],
+            'thumbnail': thumbnail
+        }
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py
new file mode 100644 (file)
index 0000000..96c809e
--- /dev/null
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    qualities,
+    unified_strdate,
+    clean_html,
+)
+
+
+class UltimediaIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)'
+    _TESTS = [{
+        # news
+        'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
+        'md5': '276a0e49de58c7e85d32b057837952a2',
+        'info_dict': {
+            'id': 's8uk0r',
+            'ext': 'mp4',
+            'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
+            'description': 'md5:3e5c8fd65791487333dda5db8aed32af',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'upload_date': '20150317',
+        },
+    }, {
+        # music
+        'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
+        'md5': '2ea3513813cf230605c7e2ffe7eca61c',
+        'info_dict': {
+            'id': 'xvpfp8',
+            'ext': 'mp4',
+            'title': "Two - C'est la vie (Clip)",
+            'description': 'Two',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'upload_date': '20150224',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        deliver_url = self._search_regex(
+            r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
+            webpage, 'deliver URL')
+
+        deliver_page = self._download_webpage(
+            deliver_url, video_id, 'Downloading iframe page')
+
+        if '>This video is currently not available' in deliver_page:
+            raise ExtractorError(
+                'Video %s is currently not available' % video_id, expected=True)
+
+        player = self._parse_json(
+            self._search_regex(
+                r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'),
+            video_id)
+
+        quality = qualities(['flash', 'html5'])
+        formats = []
+        for mode in player['modes']:
+            video_url = mode.get('config', {}).get('file')
+            if not video_url:
+                continue
+            if re.match(r'https?://www\.youtube\.com/.+?', video_url):
+                return self.url_result(video_url, 'Youtube')
+            formats.append({
+                'url': video_url,
+                'format_id': mode.get('type'),
+                'quality': quality(mode.get('type')),
+            })
+        self._sort_formats(formats)
+
+        thumbnail = player.get('image')
+
+        title = clean_html((
+            self._html_search_regex(
+                r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
+                webpage, 'title', default=None) or
+            self._search_regex(
+                r"var\s+nameVideo\s*=\s*'([^']+)'",
+                deliver_page, 'title')))
+
+        description = clean_html(self._html_search_regex(
+            r'(?s)<span>Description</span>(.+?)</p>', webpage,
+            'description', fatal=False))
+
+        upload_date = unified_strdate(self._search_regex(
+            r'Ajouté le\s*<span>([^<]+)', webpage,
+            'upload date', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'formats': formats,
+        }
index 68d03b99905cce848eb38fde8b6d8e643c548105..c39c278ab211c45809e594f64cc90f71304e9d92 100644 (file)
@@ -1,17 +1,19 @@
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
 from ..compat import (
     compat_urlparse,
 )
+from ..utils import ExtractorError
 
 
 class UstreamIE(InfoExtractor):
     _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<videoID>\d+)'
     IE_NAME = 'ustream'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.ustream.tv/recorded/20274954',
         'md5': '088f151799e8f572f84eb62f17d73e5c',
         'info_dict': {
@@ -20,7 +22,18 @@ class UstreamIE(InfoExtractor):
             'uploader': 'Young Americans for Liberty',
             'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
         },
-    }
+    }, {
+        # From http://sportscanada.tv/canadagames/index.php/week2/figure-skating/444
+        # Title and uploader available only from params JSON
+        'url': 'http://www.ustream.tv/embed/recorded/59307601?ub=ff0000&lc=ff0000&oc=ffffff&uc=ffffff&v=3&wmode=direct',
+        'md5': '5a2abf40babeac9812ed20ae12d34e10',
+        'info_dict': {
+            'id': '59307601',
+            'ext': 'flv',
+            'title': '-CG11- Canada Games Figure Skating',
+            'uploader': 'sportscanadatv',
+        }
+    }]
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
@@ -39,16 +52,42 @@ class UstreamIE(InfoExtractor):
             desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
             return self.url_result(desktop_url, 'Ustream')
 
-        video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
+        params = self._download_json(
+            'http://cdngw.ustream.tv/rgwjson/Viewer.getVideo/' + json.dumps({
+                'brandId': 1,
+                'videoId': int(video_id),
+                'autoplay': False,
+            }), video_id)
+
+        if 'error' in params:
+            raise ExtractorError(params['error']['message'], expected=True)
+
+        video_url = params['flv']
+
         webpage = self._download_webpage(url, video_id)
 
         self.report_extraction(video_id)
 
         video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
-                                              webpage, 'title')
+                                              webpage, 'title', default=None)
+
+        if not video_title:
+            try:
+                video_title = params['moduleConfig']['meta']['title']
+            except KeyError:
+                pass
+
+        if not video_title:
+            video_title = 'Ustream video ' + video_id
 
         uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
-                                           webpage, 'uploader', fatal=False, flags=re.DOTALL)
+                                           webpage, 'uploader', fatal=False, flags=re.DOTALL, default=None)
+
+        if not uploader:
+            try:
+                uploader = params['moduleConfig']['meta']['userName']
+            except KeyError:
+                uploader = None
 
         thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
                                             webpage, 'thumbnail', fatal=False)
diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py
new file mode 100644 (file)
index 0000000..9369aba
--- /dev/null
@@ -0,0 +1,45 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class Varzesh3IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
+    _TEST = {
+        'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
+        'md5': '2a933874cb7dce4366075281eb49e855',
+        'info_dict': {
+            'id': '76337',
+            'ext': 'mp4',
+            'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا',
+            'description': 'فصل ۲۰۱۵-۲۰۱۴',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_url = self._search_regex(
+            r'<source[^>]+src="([^"]+)"', webpage, 'video url')
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_regex(
+            r'(?s)<div class="matn">(.+?)</div>',
+            webpage, 'description', fatal=False)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        video_id = self._search_regex(
+            r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
+            webpage, display_id, default=display_id)
+
+        return {
+            'url': video_url,
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
index 96353f5250783be95fd4bf308190309baae70187..346edf485998e973f4a56f8dee3213093f90aae1 100644 (file)
@@ -17,7 +17,9 @@ from ..utils import (
 class VeeHDIE(InfoExtractor):
     _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
 
-    _TEST = {
+    # Seems VeeHD videos have multiple copies on several servers, all of
+    # whom have different MD5 checksums, so omit md5 field in all tests
+    _TESTS = [{
         'url': 'http://veehd.com/video/4639434_Solar-Sinter',
         'info_dict': {
             'id': '4639434',
@@ -26,7 +28,26 @@ class VeeHDIE(InfoExtractor):
             'uploader_id': 'VideoEyes',
             'description': 'md5:46a840e8692ddbaffb5f81d9885cb457',
         },
-    }
+        'skip': 'Video deleted',
+    }, {
+        'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling',
+        'info_dict': {
+            'id': '4905758',
+            'ext': 'mp4',
+            'title': 'Elysian Fields - Channeling',
+            'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b',
+            'uploader_id': 'spotted',
+        }
+    }, {
+        'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer',
+        'info_dict': {
+            'id': '2046729',
+            'ext': 'avi',
+            'title': '2012 (2009) DivX Trailer',
+            'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b',
+            'uploader_id': 'Movie_Trailers',
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -48,13 +69,21 @@ class VeeHDIE(InfoExtractor):
         player_page = self._download_webpage(
             player_url, video_id, 'Downloading player page')
 
+        video_url = None
+
         config_json = self._search_regex(
             r'value=\'config=({.+?})\'', player_page, 'config json', default=None)
 
         if config_json:
             config = json.loads(config_json)
             video_url = compat_urlparse.unquote(config['clip']['url'])
-        else:
+
+        if not video_url:
+            video_url = self._html_search_regex(
+                r'<embed[^>]+type="video/divx"[^>]+src="([^"]+)"',
+                player_page, 'video url', default=None)
+
+        if not video_url:
             iframe_src = self._search_regex(
                 r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url')
             iframe_url = 'http://veehd.com/%s' % iframe_src
@@ -82,7 +111,6 @@ class VeeHDIE(InfoExtractor):
             'id': video_id,
             'title': title,
             'url': video_url,
-            'ext': 'mp4',
             'uploader_id': uploader_id,
             'thumbnail': thumbnail,
             'description': description,
diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py
new file mode 100644 (file)
index 0000000..3c8d2a9
--- /dev/null
@@ -0,0 +1,133 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+)
+
+
+class VesselIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)'
+    _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
+    _LOGIN_URL = 'https://www.vessel.com/api/account/login'
+    _NETRC_MACHINE = 'vessel'
+    _TEST = {
+        'url': 'https://www.vessel.com/videos/HDN7G5UMs',
+        'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
+        'info_dict': {
+            'id': 'HDN7G5UMs',
+            'ext': 'mp4',
+            'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20150317',
+            'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
+            'timestamp': int,
+        },
+    }
+
+    @staticmethod
+    def make_json_request(url, data):
+        payload = json.dumps(data).encode('utf-8')
+        req = compat_urllib_request.Request(url, payload)
+        req.add_header('Content-Type', 'application/json; charset=utf-8')
+        return req
+
+    @staticmethod
+    def find_assets(data, asset_type, asset_id=None):
+        for asset in data.get('assets', []):
+            if not asset.get('type') == asset_type:
+                continue
+            elif asset_id is not None and not asset.get('id') == asset_id:
+                continue
+            else:
+                yield asset
+
+    def _check_access_rights(self, data):
+        access_info = data.get('__view', {})
+        if not access_info.get('allow_access', True):
+            err_code = access_info.get('error_code') or ''
+            if err_code == 'ITEM_PAID_ONLY':
+                raise ExtractorError(
+                    'This video requires subscription.', expected=True)
+            else:
+                raise ExtractorError(
+                    'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+        self.report_login()
+        data = {
+            'client_id': 'web',
+            'type': 'password',
+            'user_key': username,
+            'password': password,
+        }
+        login_request = VesselIE.make_json_request(self._LOGIN_URL, data)
+        self._download_webpage(login_request, None, False, 'Wrong login info')
+
+    def _real_initialize(self):
+        self._login()
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        data = self._parse_json(self._search_regex(
+            r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id)
+        asset_id = data['model']['data']['id']
+
+        req = VesselIE.make_json_request(
+            self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
+        data = self._download_json(req, video_id)
+        video_asset_id = data.get('main_video_asset')
+
+        self._check_access_rights(data)
+
+        try:
+            video_asset = next(
+                VesselIE.find_assets(data, 'video', asset_id=video_asset_id))
+        except StopIteration:
+            raise ExtractorError('No video assets found')
+
+        formats = []
+        for f in video_asset.get('sources', []):
+            if f['name'] == 'hls-index':
+                formats.extend(self._extract_m3u8_formats(
+                    f['location'], video_id, ext='mp4', m3u8_id='m3u8'))
+            else:
+                formats.append({
+                    'format_id': f['name'],
+                    'tbr': f.get('bitrate'),
+                    'height': f.get('height'),
+                    'width': f.get('width'),
+                    'url': f['location'],
+                })
+        self._sort_formats(formats)
+
+        thumbnails = []
+        for im_asset in VesselIE.find_assets(data, 'image'):
+            thumbnails.append({
+                'url': im_asset['location'],
+                'width': im_asset.get('width', 0),
+                'height': im_asset.get('height', 0),
+            })
+
+        return {
+            'id': video_id,
+            'title': data['title'],
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'description': data.get('short_description'),
+            'duration': data.get('duration'),
+            'comment_count': data.get('comment_count'),
+            'like_count': data.get('like_count'),
+            'view_count': data.get('view_count'),
+            'timestamp': parse_iso8601(data.get('released_at')),
+        }
index 2f111bf7ee042de1fce790a3f0d0f13be7f1feff..e6ee1e4715efc5d47dd3f9aa32d6559a5737a8ea 100644 (file)
@@ -8,7 +8,19 @@ from ..utils import float_or_none
 
 
 class VGTVIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/(?:.*)/(?P<id>[0-9]+)'
+    IE_DESC = 'VGTV and BTTV'
+    _VALID_URL = r'''(?x)
+                    (?:
+                        vgtv:|
+                        http://(?:www\.)?
+                    )
+                    (?P<host>vgtv|bt)
+                    (?:
+                        :|
+                        \.no/(?:tv/)?\#!/(?:video|live)/
+                    )
+                    (?P<id>[0-9]+)
+                    '''
     _TESTS = [
         {
             # streamType: vod
@@ -64,12 +76,25 @@ class VGTVIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        host = mobj.group('host')
+
+        HOST_WEBSITES = {
+            'vgtv': 'vgtv',
+            'bt': 'bttv',
+        }
+
         data = self._download_json(
-            'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,
+            'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
+            % (host, video_id, HOST_WEBSITES[host]),
             video_id, 'Downloading media JSON')
 
         streams = data['streamUrls']
@@ -78,11 +103,14 @@ class VGTVIE(InfoExtractor):
 
         hls_url = streams.get('hls')
         if hls_url:
-            formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4'))
+            formats.extend(self._extract_m3u8_formats(
+                hls_url, video_id, 'mp4', m3u8_id='hls'))
 
         hds_url = streams.get('hds')
         if hds_url:
-            formats.extend(self._extract_f4m_formats(hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id))
+            formats.extend(self._extract_f4m_formats(
+                hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
+                video_id, f4m_id='hds'))
 
         mp4_url = streams.get('mp4')
         if mp4_url:
@@ -115,3 +143,51 @@ class VGTVIE(InfoExtractor):
             'view_count': data['displays'],
             'formats': formats,
         }
+
+
+class BTArticleIE(InfoExtractor):
+    IE_NAME = 'bt:article'
+    IE_DESC = 'Bergens Tidende Articles'
+    _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
+    _TEST = {
+        'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
+        'md5': 'd055e8ee918ef2844745fcfd1a4175fb',
+        'info_dict': {
+            'id': '23199',
+            'ext': 'mp4',
+            'title': 'Alrekstad internat',
+            'description': 'md5:dc81a9056c874fedb62fc48a300dac58',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 191,
+            'timestamp': 1289991323,
+            'upload_date': '20101117',
+            'view_count': int,
+        },
+    }
+
+    def _real_extract(self, url):
+        webpage = self._download_webpage(url, self._match_id(url))
+        video_id = self._search_regex(
+            r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id')
+        return self.url_result('vgtv:bt:%s' % video_id, 'VGTV')
+
+
+class BTVestlendingenIE(InfoExtractor):
+    IE_NAME = 'bt:vestlendingen'
+    IE_DESC = 'Bergens Tidende - Vestlendingen'
+    _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
+        'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
+        'info_dict': {
+            'id': '86588',
+            'ext': 'mov',
+            'title': 'Otto Wollertsen',
+            'description': 'Vestlendingen Otto Fredrik Wollertsen',
+            'timestamp': 1430473209,
+            'upload_date': '20150501',
+        },
+    }
+
+    def _real_extract(self, url):
+        return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream')
index 71f520fb525a5bef424061be1ff881408b762624..04e2b0ba7849adee473a42e471d77ced7df0652c 100644 (file)
@@ -31,7 +31,6 @@ class ViceIE(InfoExtractor):
                 r'embedCode=([^&\'"]+)', webpage,
                 'ooyala embed code')
             ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
-            print(ooyala_url)
         except ExtractorError:
             raise ExtractorError('The page doesn\'t contain a video', expected=True)
         return self.url_result(ooyala_url, ie='Ooyala')
index 27303031620a8c126797bcdd6207d2f2355c74be..eb309a7cdf99b3ebc4bde755fe09d47505516f28 100644 (file)
@@ -4,28 +4,21 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_request,
-)
-from ..utils import (
-    ExtractorError,
-    remove_start,
-)
+from ..compat import compat_urllib_request
 
 
 class VideoMegaIE(InfoExtractor):
     _VALID_URL = r'''(?x)https?://
         (?:www\.)?videomega\.tv/
-        (?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
+        (?:iframe\.php|cdn\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
         '''
     _TEST = {
-        'url': 'http://videomega.tv/?ref=QR0HCUHI1661IHUCH0RQ',
+        'url': 'http://videomega.tv/?ref=4GNA688SU99US886ANG4',
         'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
         'info_dict': {
-            'id': 'QR0HCUHI1661IHUCH0RQ',
+            'id': '4GNA688SU99US886ANG4',
             'ext': 'mp4',
-            'title': 'Big Buck Bunny',
+            'title': 'BigBuckBunny_320x180',
             'thumbnail': 're:^https?://.*\.jpg$',
         }
     }
@@ -33,34 +26,24 @@ class VideoMegaIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        iframe_url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
+        iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
         req = compat_urllib_request.Request(iframe_url)
         req.add_header('Referer', url)
         webpage = self._download_webpage(req, video_id)
 
-        try:
-            escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1]
-        except IndexError:
-            raise ExtractorError('Unable to extract escaped data')
-
-        playlist = compat_urllib_parse.unquote(escaped_data)
-
+        title = self._html_search_regex(
+            r'<title>(.*?)</title>', webpage, 'title')
+        title = re.sub(
+            r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s?|\s?-\svideomega\.tv$)', '', title)
         thumbnail = self._search_regex(
-            r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False)
-        video_url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL')
-        title = remove_start(self._html_search_regex(
-            r'<title>(.*?)</title>', webpage, 'title'), 'VideoMega.tv - ')
-
-        formats = [{
-            'format_id': 'sd',
-            'url': video_url,
-        }]
-        self._sort_formats(formats)
+            r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
+        video_url = self._search_regex(
+            r'<source[^>]+?src="([^"]+)"', webpage, 'video URL')
 
         return {
             'id': video_id,
             'title': title,
-            'formats': formats,
+            'url': video_url,
             'thumbnail': thumbnail,
             'http_headers': {
                 'Referer': iframe_url,
index 5c89824c164272358923e245e60a4f86466287c4..bd953fb4cc212f50dce2cac624c9391a14e82898 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
@@ -28,12 +26,11 @@ class VidmeIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
+        video_url = self._html_search_regex(
+            r'<source src="([^"]+)"', webpage, 'video URL')
 
         title = self._og_search_title(webpage)
         description = self._og_search_description(webpage, default='')
@@ -44,13 +41,10 @@ class VidmeIE(InfoExtractor):
         duration = float_or_none(self._html_search_regex(
             r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
         view_count = str_to_int(self._html_search_regex(
-            r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
+            r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
         like_count = str_to_int(self._html_search_regex(
             r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
             webpage, 'like count', fatal=False))
-        comment_count = str_to_int(self._html_search_regex(
-            r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
-            webpage, 'comment count', fatal=False))
 
         return {
             'id': video_id,
@@ -64,5 +58,4 @@ class VidmeIE(InfoExtractor):
             'duration': duration,
             'view_count': view_count,
             'like_count': like_count,
-            'comment_count': comment_count,
         }
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
new file mode 100644 (file)
index 0000000..1742e66
--- /dev/null
@@ -0,0 +1,129 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+
+
+class ViewsterIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P<id>\d+-\d+-\d+)'
+    _TESTS = [{
+        # movielink, paymethod=fre
+        'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/',
+        'playlist': [{
+            'md5': '8f9d94b282d80c42b378dffdbb11caf3',
+            'info_dict': {
+                'id': '1293-19341-000-movie',
+                'ext': 'flv',
+                'title': "'Hout' (Wood) - Movie",
+            },
+        }],
+        'info_dict': {
+            'id': '1293-19341-000',
+            'title': "'Hout' (Wood)",
+            'description': 'md5:925733185a9242ef96f436937683f33b',
+        }
+    }, {
+        # movielink, paymethod=adv
+        'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
+        'playlist': [{
+            'md5': '77a005453ca7396cbe3d35c9bea30aef',
+            'info_dict': {
+                'id': '1140-11855-000-movie',
+                'ext': 'flv',
+                'title': "THE LISTENING PROJECT - Movie",
+            },
+        }],
+        'info_dict': {
+            'id': '1140-11855-000',
+            'title': "THE LISTENING PROJECT",
+            'description': 'md5:714421ae9957e112e672551094bf3b08',
+        }
+    }, {
+        # direct links, no movielink
+        'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/',
+        'playlist': [{
+            'md5': '0307b7eac6bfb21ab0577a71f6eebd8f',
+            'info_dict': {
+                'id': '1198-56411-000-trailer',
+                'ext': 'mp4',
+                'title': "Sinister - Trailer",
+            },
+        }, {
+            'md5': '80b9ee3ad69fb368f104cb5d9732ae95',
+            'info_dict': {
+                'id': '1198-56411-000-behind-scenes',
+                'ext': 'mp4',
+                'title': "Sinister - Behind Scenes",
+            },
+        }, {
+            'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5',
+            'info_dict': {
+                'id': '1198-56411-000-scene-from-movie',
+                'ext': 'mp4',
+                'title': "Sinister - Scene from movie",
+            },
+        }],
+        'info_dict': {
+            'id': '1198-56411-000',
+            'title': "Sinister",
+            'description': 'md5:014c40b0488848de9683566a42e33372',
+        }
+    }]
+
+    _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        request = compat_urllib_request.Request(
+            'http://api.live.viewster.com/api/v1/movie/%s' % video_id)
+        request.add_header('Accept', self._ACCEPT_HEADER)
+
+        movie = self._download_json(
+            request, video_id, 'Downloading movie metadata JSON')
+
+        title = movie.get('title') or movie['original_title']
+        description = movie.get('synopsis')
+        thumbnail = movie.get('large_artwork') or movie.get('artwork')
+
+        entries = []
+        for clip in movie['play_list']:
+            entry = None
+
+            # movielink api
+            link_request = clip.get('link_request')
+            if link_request:
+                request = compat_urllib_request.Request(
+                    'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s&currency=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s'
+                    % link_request)
+                request.add_header('Accept', self._ACCEPT_HEADER)
+
+                movie_link = self._download_json(
+                    request, video_id, 'Downloading movie link JSON', fatal=False)
+
+                if movie_link:
+                    formats = self._extract_f4m_formats(
+                        movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id)
+                    self._sort_formats(formats)
+                    entry = {
+                        'formats': formats,
+                    }
+
+            # direct link
+            clip_url = clip.get('clip_data', {}).get('url')
+            if clip_url:
+                entry = {
+                    'url': clip_url,
+                    'ext': 'mp4',
+                }
+
+            if entry:
+                entry.update({
+                    'id': '%s-%s' % (video_id, clip['canonical_title']),
+                    'title': '%s - %s' % (title, clip['title']),
+                })
+                entries.append(entry)
+
+        playlist = self.playlist_result(entries, video_id, title, description)
+        playlist['thumbnail'] = thumbnail
+        return playlist
index 6816dacb665e2253a132cfe678999a1129860a0b..cf6af1e5cdb6315d325d2bd355d384cc283a3e0c 100644 (file)
@@ -2,12 +2,17 @@ from __future__ import unicode_literals
 
 import re
 
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_urlparse,
+    compat_urllib_request,
+)
 from ..utils import (
     ExtractorError,
     unescapeHTML,
     unified_strdate,
     US_RATINGS,
+    determine_ext,
+    mimetype2ext,
 )
 from .common import InfoExtractor
 
@@ -15,8 +20,11 @@ from .common import InfoExtractor
 class VikiIE(InfoExtractor):
     IE_NAME = 'viki'
 
+    # iPad2
+    _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5'
+
     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
         'info_dict': {
             'id': '1023585v',
@@ -28,7 +36,30 @@ class VikiIE(InfoExtractor):
             'age_limit': 13,
         },
         'skip': 'Blocked in the US',
-    }
+    }, {
+        'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
+        'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c',
+        'info_dict': {
+            'id': '1067139v',
+            'ext': 'mp4',
+            'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
+            'upload_date': '20150430',
+            'title': '\'The Avengers: Age of Ultron\' Press Conference',
+        }
+    }, {
+        'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
+        'info_dict': {
+            'id': '1048879v',
+            'ext': 'mp4',
+            'upload_date': '20140820',
+            'description': 'md5:54ff56d51bdfc7a30441ec967394e91c',
+            'title': 'Ankhon Dekhi',
+        },
+        'params': {
+            # requires ffmpeg
+            'skip_download': True,
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -50,15 +81,34 @@ class VikiIE(InfoExtractor):
             'rating information', default='').strip()
         age_limit = US_RATINGS.get(rating_str)
 
-        info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
+        req = compat_urllib_request.Request(
+            'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id)
+        req.add_header('User-Agent', self._USER_AGENT)
         info_webpage = self._download_webpage(
-            info_url, video_id, note='Downloading info page')
-        if re.match(r'\s*<div\s+class="video-error', info_webpage):
-            raise ExtractorError(
-                'Video %s is blocked from your location.' % video_id,
-                expected=True)
-        video_url = self._html_search_regex(
-            r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
+            req, video_id, note='Downloading info page')
+        err_msg = self._html_search_regex(r'<div[^>]+class="video-error[^>]+>(.+)</div>', info_webpage, 'error message', default=None)
+        if err_msg:
+            if 'not available in your region' in err_msg:
+                raise ExtractorError(
+                    'Video %s is blocked from your location.' % video_id,
+                    expected=True)
+            else:
+                raise ExtractorError('Viki said: ' + err_msg)
+        mobj = re.search(
+            r'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"', info_webpage)
+        if not mobj:
+            raise ExtractorError('Unable to find video URL')
+        video_url = unescapeHTML(mobj.group('url'))
+        video_ext = mimetype2ext(mobj.group('mime_type'))
+
+        if determine_ext(video_url) == 'm3u8':
+            formats = self._extract_m3u8_formats(
+                video_url, video_id, ext=video_ext)
+        else:
+            formats = [{
+                'url': video_url,
+                'ext': video_ext,
+            }]
 
         upload_date_str = self._html_search_regex(
             r'"created_at":"([^"]+)"', info_webpage, 'upload date')
@@ -74,7 +124,7 @@ class VikiIE(InfoExtractor):
         return {
             'id': video_id,
             'title': title,
-            'url': video_url,
+            'formats': formats,
             'description': description,
             'thumbnail': thumbnail,
             'age_limit': age_limit,
index 8f540f5780570d06fa10e695555026c537b7c0f0..f300c7ca40eeba75f872bbd6cd79c3f0720e0955 100644 (file)
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
 import json
 import re
 import itertools
-import hashlib
 
 from .common import InfoExtractor
 from ..compat import (
@@ -20,6 +19,7 @@ from ..utils import (
     RegexNotFoundError,
     smuggle_url,
     std_headers,
+    unified_strdate,
     unsmuggle_url,
     urlencode_postdata,
 )
@@ -38,7 +38,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
         self.report_login()
         login_url = 'https://vimeo.com/log_in'
         webpage = self._download_webpage(login_url, None, False)
-        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+        token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token')
         data = urlencode_postdata({
             'email': username,
             'password': password,
@@ -140,6 +140,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'description': 'md5:8678b246399b070816b12313e8b4eb5c',
                 'uploader_id': 'atencio',
                 'uploader': 'Peter Atencio',
+                'upload_date': '20130927',
                 'duration': 187,
             },
         },
@@ -176,17 +177,15 @@ class VimeoIE(VimeoBaseInfoExtractor):
         password = self._downloader.params.get('videopassword', None)
         if password is None:
             raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
-        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
-        data = compat_urllib_parse.urlencode({
+        token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
+        data = urlencode_postdata({
             'password': password,
             'token': token,
         })
-        # I didn't manage to use the password with https
-        if url.startswith('https'):
-            pass_url = url.replace('https', 'http')
-        else:
-            pass_url = url
-        password_request = compat_urllib_request.Request(pass_url + '/password', data)
+        if url.startswith('http://'):
+            # vimeo only supports https now, but the user can give an http url
+            url = url.replace('http://', 'https://')
+        password_request = compat_urllib_request.Request(url + '/password', data)
         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         password_request.add_header('Cookie', 'xsrft=%s' % token)
         return self._download_webpage(
@@ -223,12 +222,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
         video_id = mobj.group('id')
         orig_url = url
         if mobj.group('pro') or mobj.group('player'):
-            url = 'http://player.vimeo.com/video/' + video_id
-
-        password = self._downloader.params.get('videopassword', None)
-        if password:
-            headers['Cookie'] = '%s_password=%s' % (
-                video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
+            url = 'https://player.vimeo.com/video/' + video_id
+        else:
+            url = 'https://vimeo.com/' + video_id
 
         # Retrieve video webpage to extract further information
         request = compat_urllib_request.Request(url, None, headers)
@@ -250,6 +246,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
         # and latter we extract those that are Vimeo specific.
         self.report_extraction(video_id)
 
+        vimeo_config = self._search_regex(
+            r'vimeo\.config\s*=\s*({.+?});', webpage,
+            'vimeo config', default=None)
+        if vimeo_config:
+            seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
+            if seed_status.get('state') == 'failed':
+                raise ExtractorError(
+                    '%s returned error: %s' % (self.IE_NAME, seed_status['title']),
+                    expected=True)
+
         # Extract the config JSON
         try:
             try:
@@ -323,9 +329,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
         # Extract upload date
         video_upload_date = None
-        mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage)
+        mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
         if mobj is not None:
-            video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
+            video_upload_date = unified_strdate(mobj.group(1))
 
         try:
             view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
@@ -379,7 +385,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             for tt in text_tracks:
                 subtitles[tt['lang']] = [{
                     'ext': 'vtt',
-                    'url': 'http://vimeo.com' + tt['url'],
+                    'url': 'https://vimeo.com' + tt['url'],
                 }]
 
         return {
@@ -402,11 +408,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
 class VimeoChannelIE(InfoExtractor):
     IE_NAME = 'vimeo:channel'
-    _VALID_URL = r'https?://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
+    _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
     _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
     _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
     _TESTS = [{
-        'url': 'http://vimeo.com/channels/tributes',
+        'url': 'https://vimeo.com/channels/tributes',
         'info_dict': {
             'id': 'tributes',
             'title': 'Vimeo Tributes',
@@ -435,10 +441,10 @@ class VimeoChannelIE(InfoExtractor):
             name="([^"]+)"\s+
             value="([^"]*)"
             ''', login_form))
-        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+        token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
         fields['token'] = token
         fields['password'] = password
-        post = compat_urllib_parse.urlencode(fields)
+        post = urlencode_postdata(fields)
         password_path = self._search_regex(
             r'action="([^"]+)"', login_form, 'password URL')
         password_url = compat_urlparse.urljoin(page_url, password_path)
@@ -465,7 +471,7 @@ class VimeoChannelIE(InfoExtractor):
             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                 break
 
-        entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
+        entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
                    for video_id in video_ids]
         return {'_type': 'playlist',
                 'id': list_id,
@@ -476,15 +482,15 @@ class VimeoChannelIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         channel_id = mobj.group('id')
-        return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
+        return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id)
 
 
 class VimeoUserIE(VimeoChannelIE):
     IE_NAME = 'vimeo:user'
-    _VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
+    _VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
     _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
     _TESTS = [{
-        'url': 'http://vimeo.com/nkistudio/videos',
+        'url': 'https://vimeo.com/nkistudio/videos',
         'info_dict': {
             'title': 'Nki',
             'id': 'nkistudio',
@@ -495,15 +501,15 @@ class VimeoUserIE(VimeoChannelIE):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         name = mobj.group('name')
-        return self._extract_videos(name, 'http://vimeo.com/%s' % name)
+        return self._extract_videos(name, 'https://vimeo.com/%s' % name)
 
 
 class VimeoAlbumIE(VimeoChannelIE):
     IE_NAME = 'vimeo:album'
-    _VALID_URL = r'https?://vimeo\.com/album/(?P<id>\d+)'
+    _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
     _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
     _TESTS = [{
-        'url': 'http://vimeo.com/album/2632481',
+        'url': 'https://vimeo.com/album/2632481',
         'info_dict': {
             'id': '2632481',
             'title': 'Staff Favorites: November 2013',
@@ -527,14 +533,14 @@ class VimeoAlbumIE(VimeoChannelIE):
 
     def _real_extract(self, url):
         album_id = self._match_id(url)
-        return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
+        return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
 
 
 class VimeoGroupsIE(VimeoAlbumIE):
     IE_NAME = 'vimeo:group'
-    _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)'
+    _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)'
     _TESTS = [{
-        'url': 'http://vimeo.com/groups/rolexawards',
+        'url': 'https://vimeo.com/groups/rolexawards',
         'info_dict': {
             'id': 'rolexawards',
             'title': 'Rolex Awards for Enterprise',
@@ -548,13 +554,13 @@ class VimeoGroupsIE(VimeoAlbumIE):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         name = mobj.group('name')
-        return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)
+        return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name)
 
 
 class VimeoReviewIE(InfoExtractor):
     IE_NAME = 'vimeo:review'
     IE_DESC = 'Review pages on vimeo'
-    _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
+    _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
         'md5': 'c507a72f780cacc12b2248bb4006d253',
@@ -566,7 +572,7 @@ class VimeoReviewIE(InfoExtractor):
         }
     }, {
         'note': 'video player needs Referer',
-        'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
+        'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
         'md5': '6295fdab8f4bf6a002d058b2c6dce276',
         'info_dict': {
             'id': '91613211',
@@ -588,11 +594,11 @@ class VimeoReviewIE(InfoExtractor):
 class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
     IE_NAME = 'vimeo:watchlater'
     IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
-    _VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
+    _VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater'
     _LOGIN_REQUIRED = True
     _TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
     _TESTS = [{
-        'url': 'http://vimeo.com/home/watchlater',
+        'url': 'https://vimeo.com/home/watchlater',
         'only_matching': True,
     }]
 
@@ -612,7 +618,7 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
 
 
 class VimeoLikesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
+    _VALID_URL = r'https://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
     IE_NAME = 'vimeo:likes'
     IE_DESC = 'Vimeo user likes'
     _TEST = {
@@ -640,8 +646,8 @@ class VimeoLikesIE(InfoExtractor):
         description = self._html_search_meta('description', webpage)
 
         def _get_page(idx):
-            page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % (
-                self.http_scheme(), user_id, idx + 1)
+            page_url = 'https://vimeo.com/user%s/likes/page:%d/sort:date' % (
+                user_id, idx + 1)
             webpage = self._download_webpage(
                 page_url, user_id,
                 note='Downloading page %d/%d' % (idx + 1, page_count))
index ee3d86117e625cca66303aeeee229f1a091b4602..aa3d6ddfd2420524fd87f85819d2611225224e79 100644 (file)
@@ -1,75 +1,54 @@
-# coding: utf-8
 from __future__ import unicode_literals
 
-import base64
-import re
-import xml.etree.ElementTree
-import zlib
-
 from .common import InfoExtractor
 from ..utils import int_or_none
 
 
 class VimpleIE(InfoExtractor):
-    IE_DESC = 'Vimple.ru'
-    _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'
+    IE_DESC = 'Vimple - one-click video hosting'
+    _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'
     _TESTS = [
         {
             'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
             'md5': '2e750a330ed211d3fd41821c6ad9a279',
             'info_dict': {
-                'id': 'c0f6b1687dcd4000a97ebe70068039cf',
+                'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
                 'ext': 'mp4',
                 'title': 'Sunset',
                 'duration': 20,
                 'thumbnail': 're:https?://.*?\.jpg',
             },
-        },
+        }, {
+            'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id
-
-        iframe = self._download_webpage(
-            iframe_url, video_id,
-            note='Downloading iframe', errnote='unable to fetch iframe')
-        player_url = self._html_search_regex(
-            r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url')
+        video_id = self._match_id(url)
 
-        player = self._request_webpage(
-            player_url, video_id, note='Downloading swf player').read()
+        webpage = self._download_webpage(
+            'http://player.vimple.ru/iframe/%s' % video_id, video_id)
 
-        player = zlib.decompress(player[8:])
+        playlist = self._parse_json(
+            self._search_regex(
+                r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'),
+            video_id)['playlist'][0]
 
-        xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player)
-        xml_pieces = [piece[1:-1] for piece in xml_pieces]
+        title = playlist['title']
+        video_id = playlist.get('videoId') or video_id
+        thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
+        duration = int_or_none(playlist.get('duration'))
 
-        xml_data = b''.join(xml_pieces)
-        xml_data = base64.b64decode(xml_data)
-
-        xml_data = xml.etree.ElementTree.fromstring(xml_data)
-
-        video = xml_data.find('Video')
-        quality = video.get('quality')
-        q_tag = video.find(quality.capitalize())
-
-        formats = [
-            {
-                'url': q_tag.get('url'),
-                'tbr': int(q_tag.get('bitrate')),
-                'filesize': int(q_tag.get('filesize')),
-                'format_id': quality,
-            },
-        ]
+        formats = [{
+            'url': f['url'],
+        } for f in playlist['video']]
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': video.find('Title').text,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
             'formats': formats,
-            'thumbnail': video.find('Poster').get('url'),
-            'duration': int_or_none(video.get('duration')),
-            'webpage_url': video.find('Share').get('videoPageUrl'),
         }
index 0b58fe0fe0b5188e9c9865e56ce064e94dbc45e5..c733a48fa26edce6b219d3bf2404267b8c346bf6 100644 (file)
@@ -1,7 +1,6 @@
 from __future__ import unicode_literals
 
 import re
-import json
 import itertools
 
 from .common import InfoExtractor
@@ -9,8 +8,8 @@ from ..utils import unified_strdate
 
 
 class VineIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)'
+    _TESTS = [{
         'url': 'https://vine.co/v/b9KOOWX7HUx',
         'md5': '2f36fed6235b16da96ce9b4dc890940d',
         'info_dict': {
@@ -23,29 +22,60 @@ class VineIE(InfoExtractor):
             'uploader': 'Jack Dorsey',
             'uploader_id': '76',
         },
-    }
+    }, {
+        'url': 'https://vine.co/v/MYxVapFvz2z',
+        'md5': '7b9a7cbc76734424ff942eb52c8f1065',
+        'info_dict': {
+            'id': 'MYxVapFvz2z',
+            'ext': 'mp4',
+            'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
+            'alt_title': 'Vine by Luna',
+            'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
+            'upload_date': '20140815',
+            'uploader': 'Luna',
+            'uploader_id': '1102363502380728320',
+        },
+    }, {
+        'url': 'https://vine.co/v/bxVjBbZlPUH',
+        'md5': 'ea27decea3fa670625aac92771a96b73',
+        'info_dict': {
+            'id': 'bxVjBbZlPUH',
+            'ext': 'mp4',
+            'title': '#mw3 #ac130 #killcam #angelofdeath',
+            'alt_title': 'Vine by Z3k3',
+            'description': '#mw3 #ac130 #killcam #angelofdeath',
+            'upload_date': '20130430',
+            'uploader': 'Z3k3',
+            'uploader_id': '936470460173008896',
+        },
+    }, {
+        'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
 
-        data = json.loads(self._html_search_regex(
-            r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
+        data = self._parse_json(
+            self._html_search_regex(
+                r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id,
+                webpage, 'vine data'),
+            video_id)
 
         formats = [{
-            'url': data['videoLowURL'],
-            'ext': 'mp4',
-            'format_id': 'low',
-        }, {
-            'url': data['videoUrl'],
-            'ext': 'mp4',
-            'format_id': 'standard',
-        }]
+            'format_id': '%(format)s-%(rate)s' % f,
+            'vcodec': f['format'],
+            'quality': f['rate'],
+            'url': f['videoUrl'],
+        } for f in data['videoUrls']]
+
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': self._og_search_title(webpage),
-            'alt_title': self._og_search_description(webpage),
+            'alt_title': self._og_search_description(webpage, default=None),
             'description': data['description'],
             'thumbnail': data['thumbnailUrl'],
             'upload_date': unified_strdate(data['created']),
index 7dea8c59d2a30673b93ae181bab128b8ef0a8b58..cc384adbf9837f35f90c64d0e8dc0396b0b601ec 100644 (file)
@@ -31,7 +31,7 @@ class VKIE(InfoExtractor):
                 'id': '162222515',
                 'ext': 'flv',
                 'title': 'ProtivoGunz - Хуёвая песня',
-                'uploader': 're:Noize MC.*',
+                'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
                 'duration': 195,
                 'upload_date': '20120212',
             },
@@ -140,7 +140,7 @@ class VKIE(InfoExtractor):
         if not video_id:
             video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
 
-        info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
+        info_url = 'http://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
         info_page = self._download_webpage(info_url, video_id)
 
         ERRORS = {
@@ -152,7 +152,10 @@ class VKIE(InfoExtractor):
             'use --username and --password options to provide account credentials.',
 
             r'<!>Unknown error':
-            'Video %s does not exist.'
+            'Video %s does not exist.',
+
+            r'<!>Видео временно недоступно':
+            'Video %s is temporarily unavailable.',
         }
 
         for error_re, error_msg in ERRORS.items():
diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py
new file mode 100644 (file)
index 0000000..254383d
--- /dev/null
@@ -0,0 +1,99 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+    compat_urlparse,
+)
+from ..utils import (
+    ExtractorError,
+    determine_ext,
+    int_or_none,
+)
+
+
+class VoiceRepublicIE(InfoExtractor):
+    _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
+    _TESTS = [{
+        'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
+        'md5': '0554a24d1657915aa8e8f84e15dc9353',
+        'info_dict': {
+            'id': '2296',
+            'display_id': 'watching-the-watchers-building-a-sousveillance-state',
+            'ext': 'm4a',
+            'title': 'Watching the Watchers: Building a Sousveillance State',
+            'description': 'md5:715ba964958afa2398df615809cfecb1',
+            'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
+            'duration': 1800,
+            'view_count': int,
+        }
+    }, {
+        'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        req = compat_urllib_request.Request(
+            compat_urlparse.urljoin(url, '/talks/%s' % display_id))
+        # Older versions of Firefox get redirected to an "upgrade browser" page
+        req.add_header('User-Agent', 'youtube-dl')
+        webpage = self._download_webpage(req, display_id)
+
+        if '>Queued for processing, please stand by...<' in webpage:
+            raise ExtractorError(
+                'Audio is still queued for processing', expected=True)
+
+        config = self._search_regex(
+            r'(?s)return ({.+?});\s*\n', webpage,
+            'data', default=None)
+        data = self._parse_json(config, display_id, fatal=False) if config else None
+        if data:
+            title = data['title']
+            description = data.get('teaser')
+            talk_id = data.get('talk_id') or display_id
+            talk = data['talk']
+            duration = int_or_none(talk.get('duration'))
+            formats = [{
+                'url': compat_urlparse.urljoin(url, talk_url),
+                'format_id': format_id,
+                'ext': determine_ext(talk_url) or format_id,
+                'vcodec': 'none',
+            } for format_id, talk_url in talk['links'].items()]
+        else:
+            title = self._og_search_title(webpage)
+            description = self._html_search_regex(
+                r"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>",
+                webpage, 'description', fatal=False)
+            talk_id = self._search_regex(
+                [r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"],
+                webpage, 'talk id', default=None) or display_id
+            duration = None
+            player = self._search_regex(
+                r"class='vr-player jp-jplayer'([^>]+)>", webpage, 'player')
+            formats = [{
+                'url': compat_urlparse.urljoin(url, talk_url),
+                'format_id': format_id,
+                'ext': determine_ext(talk_url) or format_id,
+                'vcodec': 'none',
+            } for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", player)]
+        self._sort_formats(formats)
+
+        thumbnail = self._og_search_thumbnail(webpage)
+        view_count = int_or_none(self._search_regex(
+            r"class='play-count[^']*'>\s*(\d+) plays",
+            webpage, 'play count', fatal=False))
+
+        return {
+            'id': talk_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'view_count': view_count,
+            'formats': formats,
+        }
index 2d23effccdff0ba49ff628ded1f72d044fe609d6..92c90e5172e89b98c3309bb01dc9787f15c24859 100644 (file)
@@ -27,9 +27,6 @@ class VpornIE(InfoExtractor):
                 'duration': 393,
                 'age_limit': 18,
                 'view_count': int,
-                'like_count': int,
-                'dislike_count': int,
-                'comment_count': int,
             }
         },
         {
@@ -47,9 +44,6 @@ class VpornIE(InfoExtractor):
                 'duration': 588,
                 'age_limit': 18,
                 'view_count': int,
-                'like_count': int,
-                'dislike_count': int,
-                'comment_count': int,
             }
         },
     ]
@@ -64,29 +58,29 @@ class VpornIE(InfoExtractor):
         title = self._html_search_regex(
             r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
         description = self._html_search_regex(
-            r'<div class="description_txt">(.*?)</div>', webpage, 'description', fatal=False)
+            r'class="(?:descr|description_txt)">(.*?)</div>',
+            webpage, 'description', fatal=False)
         thumbnail = self._html_search_regex(
             r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
         if thumbnail:
             thumbnail = 'http://www.vporn.com' + thumbnail
 
         uploader = self._html_search_regex(
-            r'(?s)UPLOADED BY.*?<a href="/user/[^"]+">([^<]+)</a>',
+            r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
             webpage, 'uploader', fatal=False)
 
-        categories = re.findall(r'<a href="/cat/[^"]+">([^<]+)</a>', webpage)
+        categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage)
 
         duration = parse_duration(self._search_regex(
-            r'duration (\d+ min \d+ sec)', webpage, 'duration', fatal=False))
+            r'Runtime:\s*</span>\s*(\d+ min \d+ sec)',
+            webpage, 'duration', fatal=False))
 
-        view_count = str_to_int(self._html_search_regex(
-            r'<span>([\d,\.]+) VIEWS</span>', webpage, 'view count', fatal=False))
-        like_count = str_to_int(self._html_search_regex(
-            r'<span id="like" class="n">([\d,\.]+)</span>', webpage, 'like count', fatal=False))
-        dislike_count = str_to_int(self._html_search_regex(
-            r'<span id="dislike" class="n">([\d,\.]+)</span>', webpage, 'dislike count', fatal=False))
+        view_count = str_to_int(self._search_regex(
+            r'class="views">([\d,\.]+) [Vv]iews<',
+            webpage, 'view count', fatal=False))
         comment_count = str_to_int(self._html_search_regex(
-            r'<h4>Comments \(<b>([\d,\.]+)</b>\)</h4>', webpage, 'comment count', fatal=False))
+            r"'Comments \(([\d,\.]+)\)'",
+            webpage, 'comment count', default=None))
 
         formats = []
 
@@ -117,8 +111,6 @@ class VpornIE(InfoExtractor):
             'categories': categories,
             'duration': duration,
             'view_count': view_count,
-            'like_count': like_count,
-            'dislike_count': dislike_count,
             'comment_count': comment_count,
             'age_limit': 18,
             'formats': formats,
index bf9e40bad7c29e01b231b2ecf9e0cbb53295d52f..affcc52f6e244c40bbca6381700c2f15e645580f 100644 (file)
@@ -113,7 +113,7 @@ class WatIE(InfoExtractor):
             video_url = self._download_webpage(
                 'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
                 real_id,
-                'Downloding %s video URL' % fmt[0],
+                'Downloading %s video URL' % fmt[0],
                 'Failed to download %s video URL' % fmt[0],
                 False)
             if not video_url:
index d5c26a032bcf28a9c8ae79e1d083d67ed29b2726..a3ea26feb38257071c8ae5d3c1702cf0fcd2650a 100644 (file)
@@ -6,8 +6,8 @@ from .common import InfoExtractor
 
 
 class WorldStarHipHopIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
+    _TESTS = [{
         "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
         "md5": "9d04de741161603bf7071bbf4e883186",
         "info_dict": {
@@ -15,7 +15,15 @@ class WorldStarHipHopIE(InfoExtractor):
             "ext": "mp4",
             "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
         }
-    }
+    }, {
+        'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
+        'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
+        'info_dict': {
+            'id': 'wshh6a7q1ny0G34ZwuIO',
+            'ext': 'mp4',
+            "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -26,19 +34,22 @@ class WorldStarHipHopIE(InfoExtractor):
             return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
 
         video_url = self._search_regex(
-            r'so\.addVariable\("file","(.*?)"\)', webpage, 'video URL')
+            [r'so\.addVariable\("file","(.*?)"\)',
+             r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
+            webpage, 'video URL')
 
         if 'youtube' in video_url:
             return self.url_result(video_url, ie='Youtube')
 
         video_title = self._html_search_regex(
-            r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
+            [r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
+             r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
             webpage, 'title')
 
         # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
         thumbnail = self._html_search_regex(
             r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
-            fatal=False)
+            default=None)
         if not thumbnail:
             _title = r'candytitles.*>(.*)</span>'
             mobj = re.search(_title, webpage)
diff --git a/youtube_dl/extractor/xstream.py b/youtube_dl/extractor/xstream.py
new file mode 100644 (file)
index 0000000..71584c2
--- /dev/null
@@ -0,0 +1,115 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+    xpath_with_ns,
+    xpath_text,
+    find_xpath_attr,
+)
+
+
+class XstreamIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    (?:
+                        xstream:|
+                        https?://frontend\.xstream\.(?:dk|net)/
+                    )
+                    (?P<partner_id>[^/]+)
+                    (?:
+                        :|
+                        /feed/video/\?.*?\bid=
+                    )
+                    (?P<id>\d+)
+                    '''
+    _TESTS = [{
+        'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
+        'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
+        'info_dict': {
+            'id': '86588',
+            'ext': 'mov',
+            'title': 'Otto Wollertsen',
+            'description': 'Vestlendingen Otto Fredrik Wollertsen',
+            'timestamp': 1430473209,
+            'upload_date': '20150501',
+        },
+    }, {
+        'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        partner_id = mobj.group('partner_id')
+        video_id = mobj.group('id')
+
+        data = self._download_xml(
+            'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
+            % (partner_id, video_id),
+            video_id)
+
+        NS_MAP = {
+            'atom': 'http://www.w3.org/2005/Atom',
+            'xt': 'http://xstream.dk/',
+            'media': 'http://search.yahoo.com/mrss/',
+        }
+
+        entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
+
+        title = xpath_text(
+            entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
+        description = xpath_text(
+            entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
+        timestamp = parse_iso8601(xpath_text(
+            entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
+
+        formats = []
+        media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
+        for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
+            media_url = media_content.get('url')
+            if not media_url:
+                continue
+            tbr = int_or_none(media_content.get('bitrate'))
+            mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
+            if mobj:
+                formats.append({
+                    'url': mobj.group('url'),
+                    'play_path': 'mp4:%s' % mobj.group('playpath'),
+                    'app': mobj.group('app'),
+                    'ext': 'flv',
+                    'tbr': tbr,
+                    'format_id': 'rtmp-%d' % tbr,
+                })
+            else:
+                formats.append({
+                    'url': media_url,
+                    'tbr': tbr,
+                })
+        self._sort_formats(formats)
+
+        link = find_xpath_attr(
+            entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
+        if link is not None:
+            formats.append({
+                'url': link.get('href'),
+                'format_id': link.get('rel'),
+            })
+
+        thumbnails = [{
+            'url': splash.get('url'),
+            'width': int_or_none(splash.get('width')),
+            'height': int_or_none(splash.get('height')),
+        } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'timestamp': timestamp,
+            'formats': formats,
+            'thumbnails': thumbnails,
+        }
index 4971965f9d090cce61a2e8b6d1486fadc873b4dc..81d885fdcee1cf788c217e862629df58f386d73c 100644 (file)
@@ -69,18 +69,26 @@ class XuiteIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    @staticmethod
+    def base64_decode_utf8(data):
+        return base64.b64decode(data.encode('utf-8')).decode('utf-8')
+
+    @staticmethod
+    def base64_encode_utf8(data):
+        return base64.b64encode(data.encode('utf-8')).decode('utf-8')
+
     def _extract_flv_config(self, media_id):
-        base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
+        base64_media_id = self.base64_encode_utf8(media_id)
         flv_config = self._download_xml(
             'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
             'flv config')
         prop_dict = {}
         for prop in flv_config.findall('./property'):
-            prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
+            prop_id = self.base64_decode_utf8(prop.attrib['id'])
             # CDATA may be empty in flv config
             if not prop.text:
                 continue
-            encoded_content = base64.b64decode(prop.text).decode('utf-8')
+            encoded_content = self.base64_decode_utf8(prop.text)
             prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
         return prop_dict
 
index 97dbac4cce53d7fe956b074fddbe40993fd5681f..bf4e659ac6981c77f7e5f3c77578c4808634d766 100644 (file)
@@ -17,10 +17,12 @@ from ..utils import (
     int_or_none,
 )
 
+from .nbc import NBCSportsVPlayerIE
+
 
 class YahooIE(InfoExtractor):
     IE_DESC = 'Yahoo screen and movies'
-    _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+?)-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
+    _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
     _TESTS = [
         {
             'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@@ -129,12 +131,24 @@ class YahooIE(InfoExtractor):
         }, {
             'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
             'only_matching': True,
+        }, {
+            'note': 'NBC Sports embeds',
+            'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+            'info_dict': {
+                'id': '9CsDKds0kvHI',
+                'ext': 'flv',
+                'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+                'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+            }
+        }, {
+            'url': 'https://tw.news.yahoo.com/-100120367.html',
+            'only_matching': True,
         }
     ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
+        display_id = mobj.group('display_id') or self._match_id(url)
         page_id = mobj.group('id')
         url = mobj.group('url')
         host = mobj.group('host')
@@ -151,6 +165,10 @@ class YahooIE(InfoExtractor):
                 items = json.loads(items_json)
                 video_id = items[0]['id']
                 return self._get_info(video_id, display_id, webpage)
+        # Look for NBCSports iframes
+        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+        if nbc_sports_url:
+            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
 
         items_json = self._search_regex(
             r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
index b294767c5b6bc5a7ec918e9108cc4cff1a5fac40..9d851bae3b779d8ce434742d6f5ef70d3f423936 100644 (file)
@@ -8,6 +8,8 @@ from ..compat import compat_urlparse
 from ..utils import (
     float_or_none,
     month_by_abbreviation,
+    ExtractorError,
+    get_element_by_attribute,
 )
 
 
@@ -22,22 +24,49 @@ class YamIE(InfoExtractor):
             'id': '2283921',
             'ext': 'mp3',
             'title': '發現 - 趙薇 京華煙雲主題曲',
+            'description': '發現 - 趙薇 京華煙雲主題曲',
             'uploader_id': 'princekt',
             'upload_date': '20080807',
             'duration': 313.0,
         }
     }, {
         # An external video hosted on YouTube
-        'url': 'http://mymedia.yam.com/m/3598173',
-        'md5': '0238ceec479c654e8c2f1223755bf3e9',
+        'url': 'http://mymedia.yam.com/m/3599430',
+        'md5': '03127cf10d8f35d120a9e8e52e3b17c6',
         'info_dict': {
-            'id': 'pJ2Deys283c',
+            'id': 'CNpEoQlrIgA',
             'ext': 'mp4',
-            'upload_date': '20150202',
+            'upload_date': '20150306',
             'uploader': '新莊社大瑜伽社',
-            'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff',
+            'description': 'md5:11e2e405311633ace874f2e6226c8b17',
             'uploader_id': '2323agoy',
-            'title': '外婆的澎湖灣KTV-潘安邦',
+            'title': '20090412陽明山二子坪-1',
+        },
+        'skip': 'Video does not exist',
+    }, {
+        'url': 'http://mymedia.yam.com/m/3598173',
+        'info_dict': {
+            'id': '3598173',
+            'ext': 'mp4',
+        },
+        'skip': 'cause Yam system error',
+    }, {
+        'url': 'http://mymedia.yam.com/m/3599437',
+        'info_dict': {
+            'id': '3599437',
+            'ext': 'mp4',
+        },
+        'skip': 'invalid YouTube URL',
+    }, {
+        'url': 'http://mymedia.yam.com/m/2373534',
+        'md5': '7ff74b91b7a817269d83796f8c5890b1',
+        'info_dict': {
+            'id': '2373534',
+            'ext': 'mp3',
+            'title': '林俊傑&蔡卓妍-小酒窩',
+            'description': 'md5:904003395a0fcce6cfb25028ff468420',
+            'upload_date': '20080928',
+            'uploader_id': 'onliner2',
         }
     }]
 
@@ -45,6 +74,13 @@ class YamIE(InfoExtractor):
         video_id = self._match_id(url)
         page = self._download_webpage(url, video_id)
 
+        # Check for errors
+        system_msg = self._html_search_regex(
+            r'系統訊息(?:<br>|\n|\r)*([^<>]+)<br>', page, 'system message',
+            default=None)
+        if system_msg:
+            raise ExtractorError(system_msg, expected=True)
+
         # Is it hosted externally on YouTube?
         youtube_url = self._html_search_regex(
             r'<embed src="(http://www.youtube.com/[^"]+)"',
@@ -52,15 +88,19 @@ class YamIE(InfoExtractor):
         if youtube_url:
             return self.url_result(youtube_url, 'Youtube')
 
+        title = self._html_search_regex(
+            r'<h1[^>]+class="heading"[^>]*>\s*(.+)\s*</h1>', page, 'title')
+
         api_page = self._download_webpage(
             'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id,
             note='Downloading API page')
         api_result_obj = compat_urlparse.parse_qs(api_page)
 
+        info_table = get_element_by_attribute('class', 'info', page)
         uploader_id = self._html_search_regex(
-            r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z]+)"',
-            page, 'uploader id', fatal=False)
-        mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2})  ' +
+            r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z0-9]+)"',
+            info_table, 'uploader id', fatal=False)
+        mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2})\s+' +
                          r'(?P<day>\d{1,2}), (?P<year>\d{4})', page)
         if mobj:
             upload_date = '%s%02d%02d' % (
@@ -74,7 +114,8 @@ class YamIE(InfoExtractor):
         return {
             'id': video_id,
             'url': api_result_obj['mp3file'][0],
-            'title': self._html_search_meta('description', page),
+            'title': title,
+            'description': self._html_search_meta('description', page),
             'duration': duration,
             'uploader_id': uploader_id,
             'upload_date': upload_date,
diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
new file mode 100644 (file)
index 0000000..f4c0f57
--- /dev/null
@@ -0,0 +1,127 @@
+# coding=utf-8
+from __future__ import unicode_literals
+
+import re
+import hashlib
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    float_or_none,
+)
+
+
+class YandexMusicBaseIE(InfoExtractor):
+    def _get_track_url(self, storage_dir, track_id):
+        data = self._download_json(
+            'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
+            % storage_dir,
+            track_id, 'Downloading track location JSON')
+
+        key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
+        storage = storage_dir.split('.')
+
+        return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default'
+                % (data['host'], key, data['ts'] + data['path'], storage[1]))
+
+    def _get_track_info(self, track):
+        return {
+            'id': track['id'],
+            'ext': 'mp3',
+            'url': self._get_track_url(track['storageDir'], track['id']),
+            'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
+            'filesize': int_or_none(track.get('fileSize')),
+            'duration': float_or_none(track.get('durationMs'), 1000),
+        }
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
+    IE_NAME = 'yandexmusic:track'
+    IE_DESC = 'Яндекс.Музыка - Трек'
+    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://music.yandex.ru/album/540508/track/4878838',
+        'md5': 'f496818aa2f60b6c0062980d2e00dc20',
+        'info_dict': {
+            'id': '4878838',
+            'ext': 'mp3',
+            'title': 'Carlo Ambrosio - Gypsy Eyes 1',
+            'filesize': 4628061,
+            'duration': 193.04,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        album_id, track_id = mobj.group('album_id'), mobj.group('id')
+
+        track = self._download_json(
+            'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
+            track_id, 'Downloading track JSON')['track']
+
+        return self._get_track_info(track)
+
+
+class YandexMusicAlbumIE(YandexMusicBaseIE):
+    IE_NAME = 'yandexmusic:album'
+    IE_DESC = 'Яндекс.Музыка - Альбом'
+    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
+
+    _TEST = {
+        'url': 'http://music.yandex.ru/album/540508',
+        'info_dict': {
+            'id': '540508',
+            'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
+        },
+        'playlist_count': 50,
+    }
+
+    def _real_extract(self, url):
+        album_id = self._match_id(url)
+
+        album = self._download_json(
+            'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
+            album_id, 'Downloading album JSON')
+
+        entries = [self._get_track_info(track) for track in album['volumes'][0]]
+
+        title = '%s - %s' % (album['artists'][0]['name'], album['title'])
+        year = album.get('year')
+        if year:
+            title += ' (%s)' % year
+
+        return self.playlist_result(entries, compat_str(album['id']), title)
+
+
+class YandexMusicPlaylistIE(YandexMusicBaseIE):
+    IE_NAME = 'yandexmusic:playlist'
+    IE_DESC = 'Яндекс.Музыка - Плейлист'
+    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
+        'info_dict': {
+            'id': '1245',
+            'title': 'Что слушают Enter Shikari',
+            'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
+        },
+        'playlist_count': 6,
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        playlist = self._parse_json(
+            self._search_regex(
+                r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
+            playlist_id)['pageData']['playlist']
+
+        entries = [self._get_track_info(track) for track in playlist['tracks']]
+
+        return self.playlist_result(
+            entries, compat_str(playlist_id),
+            playlist['title'], playlist.get('description'))
index 107c9ac36e4f4f48bd768567e4399af15fd07743..4ba7c36db78fb457b63e05fe161a75b00383c78c 100644 (file)
@@ -47,11 +47,12 @@ class YouPornIE(InfoExtractor):
 
         # Get JSON parameters
         json_params = self._search_regex(
-            r'var currentVideo = new Video\((.*)\)[,;]',
+            [r'videoJa?son\s*=\s*({.+})',
+             r'var\s+currentVideo\s*=\s*new\s+Video\((.+?)\)[,;]'],
             webpage, 'JSON parameters')
         try:
             params = json.loads(json_params)
-        except:
+        except ValueError:
             raise ExtractorError('Invalid JSON')
 
         self.report_extraction(video_id)
index 40fc4165f402722eca38cbfbc7c06cf2e4409a64..4e25d6f22312a0dca9f1997baa3bacd1c3fd263d 100644 (file)
@@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -16,7 +14,7 @@ class YourUploadIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://yourupload.com/watch/14i14h',
-            'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
+            'md5': '5e2c63385454c557f97c4c4131a393cd',
             'info_dict': {
                 'id': '14i14h',
                 'ext': 'mp4',
@@ -35,24 +33,21 @@ class YourUploadIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
-        url = 'http://embed.yucache.net/{0:}'.format(video_id)
-        webpage = self._download_webpage(url, video_id)
+        embed_url = 'http://embed.yucache.net/{0:}'.format(video_id)
+        webpage = self._download_webpage(embed_url, video_id)
 
         title = self._og_search_title(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
-        url = self._og_search_video_url(webpage)
-
-        formats = [{
-            'format_id': 'sd',
-            'url': url,
-        }]
+        video_url = self._og_search_video_url(webpage)
+        thumbnail = self._og_search_thumbnail(webpage, default=None)
 
         return {
             'id': video_id,
             'title': title,
-            'formats': formats,
+            'url': video_url,
             'thumbnail': thumbnail,
+            'http_headers': {
+                'Referer': embed_url,
+            },
         }
index 3690f8021267b30171be4f2e7a019133aaeaaca9..e58184adcf4b329c85ddb8bde2f352e383404ed8 100644 (file)
@@ -28,7 +28,6 @@ from ..utils import (
     get_element_by_attribute,
     get_element_by_id,
     int_or_none,
-    OnDemandPagedList,
     orderedSet,
     unescapeHTML,
     unified_strdate,
@@ -495,7 +494,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': '孫艾倫',
                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
             },
-        }
+        },
+        # url_encoded_fmt_stream_map is empty string
+        {
+            'url': 'qEJwOuvDf7I',
+            'info_dict': {
+                'id': 'qEJwOuvDf7I',
+                'ext': 'mp4',
+                'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
+                'description': '',
+                'upload_date': '20150404',
+                'uploader_id': 'spbelect',
+                'uploader': 'Наблюдатели Петербурга',
+            },
+            'params': {
+                'skip_download': 'requires avconv',
+            }
+        },
     ]
 
     def __init__(self, *args, **kwargs):
@@ -772,33 +787,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             errnote='Could not download DASH manifest')
 
         formats = []
-        for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
-            url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
-            if url_el is None:
-                continue
-            format_id = r.attrib['id']
-            video_url = url_el.text
-            filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
-            f = {
-                'format_id': format_id,
-                'url': video_url,
-                'width': int_or_none(r.attrib.get('width')),
-                'height': int_or_none(r.attrib.get('height')),
-                'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
-                'asr': int_or_none(r.attrib.get('audioSamplingRate')),
-                'filesize': filesize,
-                'fps': int_or_none(r.attrib.get('frameRate')),
-            }
-            try:
-                existing_format = next(
-                    fo for fo in formats
-                    if fo['format_id'] == format_id)
-            except StopIteration:
-                full_info = self._formats.get(format_id, {}).copy()
-                full_info.update(f)
-                formats.append(full_info)
-            else:
-                existing_format.update(f)
+        for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
+            mime_type = a.attrib.get('mimeType')
+            for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+                url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
+                if url_el is None:
+                    continue
+                if mime_type == 'text/vtt':
+                    # TODO implement WebVTT downloading
+                    pass
+                elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+                    format_id = r.attrib['id']
+                    video_url = url_el.text
+                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
+                    f = {
+                        'format_id': format_id,
+                        'url': video_url,
+                        'width': int_or_none(r.attrib.get('width')),
+                        'height': int_or_none(r.attrib.get('height')),
+                        'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+                        'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+                        'filesize': filesize,
+                        'fps': int_or_none(r.attrib.get('frameRate')),
+                    }
+                    try:
+                        existing_format = next(
+                            fo for fo in formats
+                            if fo['format_id'] == format_id)
+                    except StopIteration:
+                        full_info = self._formats.get(format_id, {}).copy()
+                        full_info.update(f)
+                        formats.append(full_info)
+                    else:
+                        existing_format.update(f)
+                else:
+                    self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats
 
     def _real_extract(self, url):
@@ -855,7 +878,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 args = ytplayer_config['args']
                 # Convert to the same format returned by compat_parse_qs
                 video_info = dict((k, [v]) for k, v in args.items())
-                if 'url_encoded_fmt_stream_map' not in args:
+                if not args.get('url_encoded_fmt_stream_map'):
                     raise ValueError('No stream_map present')  # caught below
             except ValueError:
                 # We fallback to the get_video_info pages (used by the embed page)
@@ -1263,37 +1286,27 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
 
         return self.playlist_result(url_results, playlist_id, title)
 
-    def _real_extract(self, url):
-        # Extract playlist id
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
-        playlist_id = mobj.group(1) or mobj.group(2)
-
-        # Check if it's a video-specific URL
-        query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-        if 'v' in query_dict:
-            video_id = query_dict['v'][0]
-            if self._downloader.params.get('noplaylist'):
-                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
-                return self.url_result(video_id, 'Youtube', video_id=video_id)
-            else:
-                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
-
-        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
-            # Mixes require a custom extraction process
-            return self._extract_mix(playlist_id)
-
+    def _extract_playlist(self, playlist_id):
         url = self._TEMPLATE_URL % playlist_id
         page = self._download_webpage(url, playlist_id)
         more_widget_html = content_html = page
 
-        # Check if the playlist exists or is private
-        if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
-            raise ExtractorError(
-                'The playlist doesn\'t exist or is private, use --username or '
-                '--netrc to access it.',
-                expected=True)
+        for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
+            match = match.strip()
+            # Check if the playlist exists or is private
+            if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
+                raise ExtractorError(
+                    'The playlist doesn\'t exist or is private, use --username or '
+                    '--netrc to access it.',
+                    expected=True)
+            elif re.match(r'[^<]*Invalid parameters[^<]*', match):
+                raise ExtractorError(
+                    'Invalid parameters. Maybe URL is incorrect.',
+                    expected=True)
+            elif re.match(r'[^<]*Choose your language[^<]*', match):
+                continue
+            else:
+                self.report_warning('Youtube gives an alert message: ' + match)
 
         # Extract the video ids from the playlist pages
         ids = []
@@ -1327,10 +1340,34 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         url_results = self._ids_to_results(ids)
         return self.playlist_result(url_results, playlist_id, playlist_title)
 
+    def _real_extract(self, url):
+        # Extract playlist id
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError('Invalid URL: %s' % url)
+        playlist_id = mobj.group(1) or mobj.group(2)
+
+        # Check if it's a video-specific URL
+        query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        if 'v' in query_dict:
+            video_id = query_dict['v'][0]
+            if self._downloader.params.get('noplaylist'):
+                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+                return self.url_result(video_id, 'Youtube', video_id=video_id)
+            else:
+                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
+            # Mixes require a custom extraction process
+            return self._extract_mix(playlist_id)
+
+        return self._extract_playlist(playlist_id)
+
 
 class YoutubeChannelIE(InfoExtractor):
     IE_DESC = 'YouTube.com channels'
     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
+    _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
     IE_NAME = 'youtube:channel'
     _TESTS = [{
         'note': 'paginated channel',
@@ -1341,19 +1378,27 @@ class YoutubeChannelIE(InfoExtractor):
         }
     }]
 
-    def extract_videos_from_page(self, page):
+    @staticmethod
+    def extract_videos_from_page(page):
         ids_in_page = []
-        for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
-            if mobj.group(1) not in ids_in_page:
-                ids_in_page.append(mobj.group(1))
-        return ids_in_page
+        titles_in_page = []
+        for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
+            video_id = mobj.group('id')
+            video_title = unescapeHTML(mobj.group('title'))
+            try:
+                idx = ids_in_page.index(video_id)
+                if video_title and not titles_in_page[idx]:
+                    titles_in_page[idx] = video_title
+            except ValueError:
+                ids_in_page.append(video_id)
+                titles_in_page.append(video_title)
+        return zip(ids_in_page, titles_in_page)
 
     def _real_extract(self, url):
         channel_id = self._match_id(url)
 
-        video_ids = []
-        url = 'https://www.youtube.com/channel/%s/videos' % channel_id
-        channel_page = self._download_webpage(url, channel_id)
+        url = self._TEMPLATE_URL % channel_id
+        channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
         autogenerated = re.search(r'''(?x)
                 class="[^"]*?(?:
                     channel-header-autogenerated-label|
@@ -1363,20 +1408,21 @@ class YoutubeChannelIE(InfoExtractor):
         if autogenerated:
             # The videos are contained in a single page
             # the ajax pages can't be used, they are empty
-            video_ids = self.extract_videos_from_page(channel_page)
             entries = [
-                self.url_result(video_id, 'Youtube', video_id=video_id)
-                for video_id in video_ids]
+                self.url_result(
+                    video_id, 'Youtube', video_id=video_id,
+                    video_title=video_title)
+                for video_id, video_title in self.extract_videos_from_page(channel_page)]
             return self.playlist_result(entries, channel_id)
 
         def _entries():
             more_widget_html = content_html = channel_page
             for pagenum in itertools.count(1):
 
-                ids_in_page = self.extract_videos_from_page(content_html)
-                for video_id in ids_in_page:
+                for video_id, video_title in self.extract_videos_from_page(content_html):
                     yield self.url_result(
-                        video_id, 'Youtube', video_id=video_id)
+                        video_id, 'Youtube', video_id=video_id,
+                        video_title=video_title)
 
                 mobj = re.search(
                     r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
@@ -1394,12 +1440,10 @@ class YoutubeChannelIE(InfoExtractor):
         return self.playlist_result(_entries(), channel_id)
 
 
-class YoutubeUserIE(InfoExtractor):
+class YoutubeUserIE(YoutubeChannelIE):
     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
-    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
-    _GDATA_PAGE_SIZE = 50
-    _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
+    _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
     IE_NAME = 'youtube:user'
 
     _TESTS = [{
@@ -1423,95 +1467,57 @@ class YoutubeUserIE(InfoExtractor):
         else:
             return super(YoutubeUserIE, cls).suitable(url)
 
-    def _real_extract(self, url):
-        username = self._match_id(url)
-
-        # Download video ids using YouTube Data API. Result size per
-        # query is limited (currently to 50 videos) so we need to query
-        # page by page until there are no video ids - it means we got
-        # all of them.
-
-        def download_page(pagenum):
-            start_index = pagenum * self._GDATA_PAGE_SIZE + 1
-
-            gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
-            page = self._download_webpage(
-                gdata_url, username,
-                'Downloading video ids from %d to %d' % (
-                    start_index, start_index + self._GDATA_PAGE_SIZE))
 
-            try:
-                response = json.loads(page)
-            except ValueError as err:
-                raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
-            if 'entry' not in response['feed']:
-                return
-
-            # Extract video identifiers
-            entries = response['feed']['entry']
-            for entry in entries:
-                title = entry['title']['$t']
-                video_id = entry['id']['$t'].split('/')[-1]
-                yield {
-                    '_type': 'url',
-                    'url': video_id,
-                    'ie_key': 'Youtube',
-                    'id': video_id,
-                    'title': title,
-                }
-        url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
-
-        return self.playlist_result(url_results, playlist_title=username)
-
-
-class YoutubeSearchIE(SearchInfoExtractor):
+class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
     IE_DESC = 'YouTube.com searches'
-    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
-    _MAX_RESULTS = 1000
+    # there doesn't appear to be a real limit, for example if you search for
+    # 'python' you get more than 8.000.000 results
+    _MAX_RESULTS = float('inf')
     IE_NAME = 'youtube:search'
     _SEARCH_KEY = 'ytsearch'
+    _EXTRA_QUERY_ARGS = {}
+    _TESTS = []
 
     def _get_n_results(self, query, n):
         """Get a specified number of results for a query"""
 
-        video_ids = []
-        pagenum = 0
+        videos = []
         limit = n
-        PAGE_SIZE = 50
 
-        while (PAGE_SIZE * pagenum) < limit:
-            result_url = self._API_URL % (
-                compat_urllib_parse.quote_plus(query.encode('utf-8')),
-                (PAGE_SIZE * pagenum) + 1)
-            data_json = self._download_webpage(
+        for pagenum in itertools.count(1):
+            url_query = {
+                'search_query': query,
+                'page': pagenum,
+                'spf': 'navigate',
+            }
+            url_query.update(self._EXTRA_QUERY_ARGS)
+            result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
+            data = self._download_json(
                 result_url, video_id='query "%s"' % query,
-                note='Downloading page %s' % (pagenum + 1),
+                note='Downloading page %s' % pagenum,
                 errnote='Unable to download API page')
-            data = json.loads(data_json)
-            api_response = data['data']
+            html_content = data[1]['body']['content']
 
-            if 'items' not in api_response:
+            if 'class="search-message' in html_content:
                 raise ExtractorError(
                     '[youtube] No video results', expected=True)
 
-            new_ids = list(video['id'] for video in api_response['items'])
-            video_ids += new_ids
-
-            limit = min(n, api_response['totalItems'])
-            pagenum += 1
+            new_videos = self._ids_to_results(orderedSet(re.findall(
+                r'href="/watch\?v=(.{11})', html_content)))
+            videos += new_videos
+            if not new_videos or len(videos) > limit:
+                break
 
-        if len(video_ids) > n:
-            video_ids = video_ids[:n]
-        videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
-                  for video_id in video_ids]
+        if len(videos) > n:
+            videos = videos[:n]
         return self.playlist_result(videos, query)
 
 
 class YoutubeSearchDateIE(YoutubeSearchIE):
     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
-    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
     _SEARCH_KEY = 'ytsearchdate'
     IE_DESC = 'YouTube.com searches, newest videos first'
+    _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
 
 
 class YoutubeSearchURLIE(InfoExtractor):
@@ -1532,7 +1538,7 @@ class YoutubeSearchURLIE(InfoExtractor):
 
         webpage = self._download_webpage(url, query)
         result_code = self._search_regex(
-            r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
+            r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
 
         part_codes = re.findall(
             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
@@ -1643,26 +1649,60 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
 
 
 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+    IE_NAME = 'youtube:recommended'
     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
     _FEED_NAME = 'recommended'
     _PLAYLIST_TITLE = 'Youtube Recommended videos'
 
 
-class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
+class YoutubeWatchLaterIE(YoutubePlaylistIE):
+    IE_NAME = 'youtube:watchlater'
     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
-    _FEED_NAME = 'watch_later'
-    _PLAYLIST_TITLE = 'Youtube Watch Later'
-    _PERSONAL_FEED = True
+    _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
 
+    _TESTS = []  # override PlaylistIE tests
+
+    def _real_extract(self, url):
+        return self._extract_playlist('WL')
 
-class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+
+class YoutubeHistoryIE(YoutubePlaylistIE):
+    IE_NAME = 'youtube:history'
     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
     _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
-    _FEED_NAME = 'history'
-    _PERSONAL_FEED = True
-    _PLAYLIST_TITLE = 'Youtube Watch History'
+    _TESTS = []
+
+    def _real_extract(self, url):
+        title = 'Youtube History'
+        page = self._download_webpage('https://www.youtube.com/feed/history', title)
+
+        # The extraction process is the same as for playlists, but the regex
+        # for the video ids doesn't contain an index
+        ids = []
+        more_widget_html = content_html = page
+
+        for page_num in itertools.count(1):
+            matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
+            new_ids = orderedSet(matches)
+            ids.extend(new_ids)
+
+            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+            if not mobj:
+                break
+
+            more = self._download_json(
+                'https://youtube.com/%s' % mobj.group('more'), title,
+                'Downloading page #%s' % page_num,
+                transform_source=uppercase_escape)
+            content_html = more['content_html']
+            more_widget_html = more['load_more_widget_html']
+
+        return {
+            '_type': 'playlist',
+            'title': title,
+            'entries': self._ids_to_results(ids),
+        }
 
 
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
index 1afbe68ed68e084028cda0c0f9d7d80a385118fb..7dc1e2f2bd3f36e2b71e199fb5e5f6f2cc4e18e9 100644 (file)
@@ -4,12 +4,18 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import ExtractorError
 
 
 class ZingMp3BaseInfoExtractor(InfoExtractor):
 
-    @staticmethod
-    def _extract_item(item):
+    def _extract_item(self, item):
+        error_message = item.find('./errormessage').text
+        if error_message:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error_message),
+                expected=True)
+
         title = item.find('./title').text.strip()
         source = item.find('./source').text
         extension = item.attrib['type']
index 58f811162eadc27996d4c3687b297f8c96217741..22dbc3aec7866ad3f5d048c35737486a8fdac8fc 100644 (file)
@@ -8,11 +8,12 @@ import sys
 from .downloader.external import list_external_downloaders
 from .compat import (
     compat_expanduser,
+    compat_get_terminal_size,
     compat_getenv,
     compat_kwargs,
 )
 from .utils import (
-    get_term_width,
+    preferredencoding,
     write_string,
 )
 from .version import __version__
@@ -100,7 +101,7 @@ def parseOpts(overrideArguments=None):
         return opts
 
     # No need to wrap help messages if we're on a wide console
-    columns = get_term_width()
+    columns = compat_get_terminal_size().columns
     max_width = columns if columns else 80
     max_help_position = 80
 
@@ -120,19 +121,19 @@ def parseOpts(overrideArguments=None):
     general.add_option(
         '-h', '--help',
         action='help',
-        help='print this help text and exit')
+        help='Print this help text and exit')
     general.add_option(
         '-v', '--version',
         action='version',
-        help='print program version and exit')
+        help='Print program version and exit')
     general.add_option(
         '-U', '--update',
         action='store_true', dest='update_self',
-        help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
+        help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
     general.add_option(
         '-i', '--ignore-errors',
         action='store_true', dest='ignoreerrors', default=False,
-        help='continue on download errors, for example to skip unavailable videos in a playlist')
+        help='Continue on download errors, for example to skip unavailable videos in a playlist')
     general.add_option(
         '--abort-on-error',
         action='store_false', dest='ignoreerrors',
@@ -140,7 +141,7 @@ def parseOpts(overrideArguments=None):
     general.add_option(
         '--dump-user-agent',
         action='store_true', dest='dump_user_agent', default=False,
-        help='display the current browser identification')
+        help='Display the current browser identification')
     general.add_option(
         '--list-extractors',
         action='store_true', dest='list_extractors', default=False,
@@ -152,7 +153,7 @@ def parseOpts(overrideArguments=None):
     general.add_option(
         '--default-search',
         dest='default_search', metavar='PREFIX',
-        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
     general.add_option(
         '--ignore-config',
         action='store_true',
@@ -169,7 +170,7 @@ def parseOpts(overrideArguments=None):
         '--no-color', '--no-colors',
         action='store_true', dest='no_color',
         default=False,
-        help='Do not emit color codes in output.')
+        help='Do not emit color codes in output')
 
     network = optparse.OptionGroup(parser, 'Network Options')
     network.add_option(
@@ -195,28 +196,34 @@ def parseOpts(overrideArguments=None):
         action='store_const', const='::', dest='source_address',
         help='Make all connections via IPv6 (experimental)',
     )
+    network.add_option(
+        '--cn-verification-proxy',
+        dest='cn_verification_proxy', default=None, metavar='URL',
+        help='Use this proxy to verify the IP address for some Chinese sites. '
+        'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
+    )
 
     selection = optparse.OptionGroup(parser, 'Video Selection')
     selection.add_option(
         '--playlist-start',
         dest='playliststart', metavar='NUMBER', default=1, type=int,
-        help='playlist video to start at (default is %default)')
+        help='Playlist video to start at (default is %default)')
     selection.add_option(
         '--playlist-end',
         dest='playlistend', metavar='NUMBER', default=None, type=int,
-        help='playlist video to end at (default is last)')
+        help='Playlist video to end at (default is last)')
     selection.add_option(
         '--playlist-items',
         dest='playlist_items', metavar='ITEM_SPEC', default=None,
-        help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
+        help='Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
     selection.add_option(
         '--match-title',
         dest='matchtitle', metavar='REGEX',
-        help='download only matching titles (regex or caseless sub-string)')
+        help='Download only matching titles (regex or caseless sub-string)')
     selection.add_option(
         '--reject-title',
         dest='rejecttitle', metavar='REGEX',
-        help='skip download for matching titles (regex or caseless sub-string)')
+        help='Skip download for matching titles (regex or caseless sub-string)')
     selection.add_option(
         '--max-downloads',
         dest='max_downloads', metavar='NUMBER', type=int, default=None,
@@ -232,19 +239,19 @@ def parseOpts(overrideArguments=None):
     selection.add_option(
         '--date',
         metavar='DATE', dest='date', default=None,
-        help='download only videos uploaded in this date')
+        help='Download only videos uploaded in this date')
     selection.add_option(
         '--datebefore',
         metavar='DATE', dest='datebefore', default=None,
-        help='download only videos uploaded on or before this date (i.e. inclusive)')
+        help='Download only videos uploaded on or before this date (i.e. inclusive)')
     selection.add_option(
         '--dateafter',
         metavar='DATE', dest='dateafter', default=None,
-        help='download only videos uploaded on or after this date (i.e. inclusive)')
+        help='Download only videos uploaded on or after this date (i.e. inclusive)')
     selection.add_option(
         '--min-views',
         metavar='COUNT', dest='min_views', default=None, type=int,
-        help='Do not download any videos with less than COUNT views',)
+        help='Do not download any videos with less than COUNT views')
     selection.add_option(
         '--max-views',
         metavar='COUNT', dest='max_views', default=None, type=int,
@@ -253,7 +260,7 @@ def parseOpts(overrideArguments=None):
         '--match-filter',
         metavar='FILTER', dest='match_filter', default=None,
         help=(
-            '(Experimental) Generic video filter. '
+            'Generic video filter (experimental). '
             'Specify any key (see help for -o for a list of available keys) to'
             ' match if the key is present, '
             '!key to check if the key is not present,'
@@ -271,15 +278,15 @@ def parseOpts(overrideArguments=None):
     selection.add_option(
         '--no-playlist',
         action='store_true', dest='noplaylist', default=False,
-        help='If the URL refers to a video and a playlist, download only the video.')
+        help='Download only the video, if the URL refers to a video and a playlist.')
     selection.add_option(
         '--yes-playlist',
         action='store_false', dest='noplaylist', default=False,
-        help='If the URL refers to a video and a playlist, download the playlist.')
+        help='Download the playlist, if the URL refers to a video and a playlist.')
     selection.add_option(
         '--age-limit',
         metavar='YEARS', dest='age_limit', default=None, type=int,
-        help='download only videos suitable for the given age')
+        help='Download only videos suitable for the given age')
     selection.add_option(
         '--download-archive', metavar='FILE',
         dest='download_archive',
@@ -293,70 +300,41 @@ def parseOpts(overrideArguments=None):
     authentication.add_option(
         '-u', '--username',
         dest='username', metavar='USERNAME',
-        help='login with this account ID')
+        help='Login with this account ID')
     authentication.add_option(
         '-p', '--password',
         dest='password', metavar='PASSWORD',
-        help='account password. If this option is left out, youtube-dl will ask interactively.')
+        help='Account password. If this option is left out, youtube-dl will ask interactively.')
     authentication.add_option(
         '-2', '--twofactor',
         dest='twofactor', metavar='TWOFACTOR',
-        help='two-factor auth code')
+        help='Two-factor auth code')
     authentication.add_option(
         '-n', '--netrc',
         action='store_true', dest='usenetrc', default=False,
-        help='use .netrc authentication data')
+        help='Use .netrc authentication data')
     authentication.add_option(
         '--video-password',
         dest='videopassword', metavar='PASSWORD',
-        help='video password (vimeo, smotri)')
+        help='Video password (vimeo, smotri)')
 
     video_format = optparse.OptionGroup(parser, 'Video Format Options')
     video_format.add_option(
         '-f', '--format',
         action='store', dest='format', metavar='FORMAT', default=None,
-        help=(
-            'video format code, specify the order of preference using'
-            ' slashes, as in -f 22/17/18 . '
-            ' Instead of format codes, you can select by extension for the '
-            'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
-            'You can also use the special names "best",'
-            ' "bestvideo", "bestaudio", "worst". '
-            ' You can filter the video results by putting a condition in'
-            ' brackets, as in -f "best[height=720]"'
-            ' (or -f "[filesize>10M]"). '
-            ' This works for filesize, height, width, tbr, abr, vbr, asr, and fps'
-            ' and the comparisons <, <=, >, >=, =, !='
-            ' and for ext, acodec, vcodec, container, and protocol'
-            ' and the comparisons =, != .'
-            ' Formats for which the value is not known are excluded unless you'
-            ' put a question mark (?) after the operator.'
-            ' You can combine format filters, so  '
-            '-f "[height <=? 720][tbr>500]" '
-            'selects up to 720p videos (or videos where the height is not '
-            'known) with a bitrate of at least 500 KBit/s.'
-            ' By default, youtube-dl will pick the best quality.'
-            ' Use commas to download multiple audio formats, such as'
-            ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.'
-            ' You can merge the video and audio of two formats into a single'
-            ' file using -f <video-format>+<audio-format> (requires ffmpeg or'
-            ' avconv), for example -f bestvideo+bestaudio.'))
+        help='Video format code, see the "FORMAT SELECTION" for all the info')
     video_format.add_option(
         '--all-formats',
         action='store_const', dest='format', const='all',
-        help='download all available video formats')
+        help='Download all available video formats')
     video_format.add_option(
         '--prefer-free-formats',
         action='store_true', dest='prefer_free_formats', default=False,
-        help='prefer free video formats unless a specific one is requested')
-    video_format.add_option(
-        '--max-quality',
-        action='store', dest='format_limit', metavar='FORMAT',
-        help='highest quality format to download')
+        help='Prefer free video formats unless a specific one is requested')
     video_format.add_option(
         '-F', '--list-formats',
         action='store_true', dest='listformats',
-        help='list all available formats')
+        help='List all available formats')
     video_format.add_option(
         '--youtube-include-dash-manifest',
         action='store_true', dest='youtube_include_dash_manifest', default=True,
@@ -376,46 +354,46 @@ def parseOpts(overrideArguments=None):
     subtitles.add_option(
         '--write-sub', '--write-srt',
         action='store_true', dest='writesubtitles', default=False,
-        help='write subtitle file')
+        help='Write subtitle file')
     subtitles.add_option(
         '--write-auto-sub', '--write-automatic-sub',
         action='store_true', dest='writeautomaticsub', default=False,
-        help='write automatic subtitle file (youtube only)')
+        help='Write automatic subtitle file (YouTube only)')
     subtitles.add_option(
         '--all-subs',
         action='store_true', dest='allsubtitles', default=False,
-        help='downloads all the available subtitles of the video')
+        help='Download all the available subtitles of the video')
     subtitles.add_option(
         '--list-subs',
         action='store_true', dest='listsubtitles', default=False,
-        help='lists all available subtitles for the video')
+        help='List all available subtitles for the video')
     subtitles.add_option(
         '--sub-format',
         action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
-        help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
+        help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
     subtitles.add_option(
         '--sub-lang', '--sub-langs', '--srt-lang',
         action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
         default=[], callback=_comma_separated_values_options_callback,
-        help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
+        help='Languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
 
     downloader = optparse.OptionGroup(parser, 'Download Options')
     downloader.add_option(
         '-r', '--rate-limit',
         dest='ratelimit', metavar='LIMIT',
-        help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
+        help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
     downloader.add_option(
         '-R', '--retries',
         dest='retries', metavar='RETRIES', default=10,
-        help='number of retries (default is %default), or "infinite".')
+        help='Number of retries (default is %default), or "infinite".')
     downloader.add_option(
         '--buffer-size',
         dest='buffersize', metavar='SIZE', default='1024',
-        help='size of download buffer (e.g. 1024 or 16K) (default is %default)')
+        help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
     downloader.add_option(
         '--no-resize-buffer',
         action='store_true', dest='noresizebuffer', default=False,
-        help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
+        help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
     downloader.add_option(
         '--test',
         action='store_true', dest='test', default=False,
@@ -427,16 +405,20 @@ def parseOpts(overrideArguments=None):
     downloader.add_option(
         '--xattr-set-filesize',
         dest='xattr_set_filesize', action='store_true',
-        help='(experimental) set file xattribute ytdl.filesize with expected filesize')
+        help='Set file xattribute ytdl.filesize with expected filesize (experimental)')
     downloader.add_option(
         '--hls-prefer-native',
         dest='hls_prefer_native', action='store_true',
-        help='(experimental) Use the native HLS downloader instead of ffmpeg.')
+        help='Use the native HLS downloader instead of ffmpeg (experimental)')
     downloader.add_option(
         '--external-downloader',
         dest='external_downloader', metavar='COMMAND',
-        help='(experimental) Use the specified external downloader. '
+        help='Use the specified external downloader. '
              'Currently supports %s' % ','.join(list_external_downloaders()))
+    downloader.add_option(
+        '--external-downloader-args',
+        dest='external_downloader_args', metavar='ARGS',
+        help='Give these arguments to the external downloader')
 
     workarounds = optparse.OptionGroup(parser, 'Workarounds')
     workarounds.add_option(
@@ -446,7 +428,7 @@ def parseOpts(overrideArguments=None):
     workarounds.add_option(
         '--no-check-certificate',
         action='store_true', dest='no_check_certificate', default=False,
-        help='Suppress HTTPS certificate validation.')
+        help='Suppress HTTPS certificate validation')
     workarounds.add_option(
         '--prefer-insecure',
         '--prefer-unsecure', action='store_true', dest='prefer_insecure',
@@ -454,16 +436,16 @@ def parseOpts(overrideArguments=None):
     workarounds.add_option(
         '--user-agent',
         metavar='UA', dest='user_agent',
-        help='specify a custom user agent')
+        help='Specify a custom user agent')
     workarounds.add_option(
         '--referer',
         metavar='URL', dest='referer', default=None,
-        help='specify a custom referer, use if the video access is restricted to one domain',
+        help='Specify a custom referer, use if the video access is restricted to one domain',
     )
     workarounds.add_option(
         '--add-header',
         metavar='FIELD:VALUE', dest='headers', action='append',
-        help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+        help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
     )
     workarounds.add_option(
         '--bidi-workaround',
@@ -478,7 +460,7 @@ def parseOpts(overrideArguments=None):
     verbosity.add_option(
         '-q', '--quiet',
         action='store_true', dest='quiet', default=False,
-        help='activates quiet mode')
+        help='Activate quiet mode')
     verbosity.add_option(
         '--no-warnings',
         dest='no_warnings', action='store_true', default=False,
@@ -486,51 +468,51 @@ def parseOpts(overrideArguments=None):
     verbosity.add_option(
         '-s', '--simulate',
         action='store_true', dest='simulate', default=False,
-        help='do not download the video and do not write anything to disk',)
+        help='Do not download the video and do not write anything to disk')
     verbosity.add_option(
         '--skip-download',
         action='store_true', dest='skip_download', default=False,
-        help='do not download the video',)
+        help='Do not download the video')
     verbosity.add_option(
         '-g', '--get-url',
         action='store_true', dest='geturl', default=False,
-        help='simulate, quiet but print URL')
+        help='Simulate, quiet but print URL')
     verbosity.add_option(
         '-e', '--get-title',
         action='store_true', dest='gettitle', default=False,
-        help='simulate, quiet but print title')
+        help='Simulate, quiet but print title')
     verbosity.add_option(
         '--get-id',
         action='store_true', dest='getid', default=False,
-        help='simulate, quiet but print id')
+        help='Simulate, quiet but print id')
     verbosity.add_option(
         '--get-thumbnail',
         action='store_true', dest='getthumbnail', default=False,
-        help='simulate, quiet but print thumbnail URL')
+        help='Simulate, quiet but print thumbnail URL')
     verbosity.add_option(
         '--get-description',
         action='store_true', dest='getdescription', default=False,
-        help='simulate, quiet but print video description')
+        help='Simulate, quiet but print video description')
     verbosity.add_option(
         '--get-duration',
         action='store_true', dest='getduration', default=False,
-        help='simulate, quiet but print video length')
+        help='Simulate, quiet but print video length')
     verbosity.add_option(
         '--get-filename',
         action='store_true', dest='getfilename', default=False,
-        help='simulate, quiet but print output filename')
+        help='Simulate, quiet but print output filename')
     verbosity.add_option(
         '--get-format',
         action='store_true', dest='getformat', default=False,
-        help='simulate, quiet but print output format')
+        help='Simulate, quiet but print output format')
     verbosity.add_option(
         '-j', '--dump-json',
         action='store_true', dest='dumpjson', default=False,
-        help='simulate, quiet but print JSON information. See --output for a description of available keys.')
+        help='Simulate, quiet but print JSON information. See --output for a description of available keys.')
     verbosity.add_option(
         '-J', '--dump-single-json',
         action='store_true', dest='dump_single_json', default=False,
-        help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
+        help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
     verbosity.add_option(
         '--print-json',
         action='store_true', dest='print_json', default=False,
@@ -539,23 +521,23 @@ def parseOpts(overrideArguments=None):
     verbosity.add_option(
         '--newline',
         action='store_true', dest='progress_with_newline', default=False,
-        help='output progress bar as new lines')
+        help='Output progress bar as new lines')
     verbosity.add_option(
         '--no-progress',
         action='store_true', dest='noprogress', default=False,
-        help='do not print progress bar')
+        help='Do not print progress bar')
     verbosity.add_option(
         '--console-title',
         action='store_true', dest='consoletitle', default=False,
-        help='display progress in console titlebar')
+        help='Display progress in console titlebar')
     verbosity.add_option(
         '-v', '--verbose',
         action='store_true', dest='verbose', default=False,
-        help='print various debugging information')
+        help='Print various debugging information')
     verbosity.add_option(
-        '--dump-intermediate-pages',
+        '--dump-pages', '--dump-intermediate-pages',
         action='store_true', dest='dump_intermediate_pages', default=False,
-        help='print downloaded pages to debug problems (very verbose)')
+        help='Print downloaded pages to debug problems (very verbose)')
     verbosity.add_option(
         '--write-pages',
         action='store_true', dest='write_pages', default=False,
@@ -571,29 +553,29 @@ def parseOpts(overrideArguments=None):
     verbosity.add_option(
         '-C', '--call-home',
         dest='call_home', action='store_true', default=False,
-        help='Contact the youtube-dl server for debugging.')
+        help='Contact the youtube-dl server for debugging')
     verbosity.add_option(
         '--no-call-home',
         dest='call_home', action='store_false', default=False,
-        help='Do NOT contact the youtube-dl server for debugging.')
+        help='Do NOT contact the youtube-dl server for debugging')
 
     filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
     filesystem.add_option(
         '-a', '--batch-file',
         dest='batchfile', metavar='FILE',
-        help='file containing URLs to download (\'-\' for stdin)')
+        help='File containing URLs to download (\'-\' for stdin)')
     filesystem.add_option(
         '--id', default=False,
-        action='store_true', dest='useid', help='use only video ID in file name')
+        action='store_true', dest='useid', help='Use only video ID in file name')
     filesystem.add_option(
         '-o', '--output',
         dest='outtmpl', metavar='TEMPLATE',
-        help=('output filename template. Use %(title)s to get the title, '
+        help=('Output filename template. Use %(title)s to get the title, '
               '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
               '%(autonumber)s to get an automatically incremented number, '
               '%(ext)s for the filename extension, '
               '%(format)s for the format description (like "22 - 1280x720" or "HD"), '
-              '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
+              '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), '
               '%(upload_date)s for the upload date (YYYYMMDD), '
               '%(extractor)s for the provider (youtube, metacafe, etc), '
               '%(id)s for the video id, '
@@ -607,7 +589,7 @@ def parseOpts(overrideArguments=None):
     filesystem.add_option(
         '--autonumber-size',
         dest='autonumber_size', metavar='NUMBER',
-        help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
+        help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
     filesystem.add_option(
         '--restrict-filenames',
         action='store_true', dest='restrictfilenames', default=False,
@@ -615,55 +597,55 @@ def parseOpts(overrideArguments=None):
     filesystem.add_option(
         '-A', '--auto-number',
         action='store_true', dest='autonumber', default=False,
-        help='[deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000')
+        help='[deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000')
     filesystem.add_option(
         '-t', '--title',
         action='store_true', dest='usetitle', default=False,
-        help='[deprecated] use title in file name (default)')
+        help='[deprecated] Use title in file name (default)')
     filesystem.add_option(
         '-l', '--literal', default=False,
         action='store_true', dest='usetitle',
-        help='[deprecated] alias of --title')
+        help='[deprecated] Alias of --title')
     filesystem.add_option(
         '-w', '--no-overwrites',
         action='store_true', dest='nooverwrites', default=False,
-        help='do not overwrite files')
+        help='Do not overwrite files')
     filesystem.add_option(
         '-c', '--continue',
         action='store_true', dest='continue_dl', default=True,
-        help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
+        help='Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
     filesystem.add_option(
         '--no-continue',
         action='store_false', dest='continue_dl',
-        help='do not resume partially downloaded files (restart from beginning)')
+        help='Do not resume partially downloaded files (restart from beginning)')
     filesystem.add_option(
         '--no-part',
         action='store_true', dest='nopart', default=False,
-        help='do not use .part files - write directly into output file')
+        help='Do not use .part files - write directly into output file')
     filesystem.add_option(
         '--no-mtime',
         action='store_false', dest='updatetime', default=True,
-        help='do not use the Last-modified header to set the file modification time')
+        help='Do not use the Last-modified header to set the file modification time')
     filesystem.add_option(
         '--write-description',
         action='store_true', dest='writedescription', default=False,
-        help='write video description to a .description file')
+        help='Write video description to a .description file')
     filesystem.add_option(
         '--write-info-json',
         action='store_true', dest='writeinfojson', default=False,
-        help='write video metadata to a .info.json file')
+        help='Write video metadata to a .info.json file')
     filesystem.add_option(
         '--write-annotations',
         action='store_true', dest='writeannotations', default=False,
-        help='write video annotations to a .annotation file')
+        help='Write video annotations to a .annotations.xml file')
     filesystem.add_option(
         '--load-info',
         dest='load_info_filename', metavar='FILE',
-        help='json file containing the video information (created with the "--write-json" option)')
+        help='JSON file containing the video information (created with the "--write-info-json" option)')
     filesystem.add_option(
         '--cookies',
         dest='cookiefile', metavar='FILE',
-        help='file to read cookies from and dump cookie jar in')
+        help='File to read cookies from and dump cookie jar in')
     filesystem.add_option(
         '--cache-dir', dest='cachedir', default=None, metavar='DIR',
         help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
@@ -679,11 +661,11 @@ def parseOpts(overrideArguments=None):
     thumbnail.add_option(
         '--write-thumbnail',
         action='store_true', dest='writethumbnail', default=False,
-        help='write thumbnail image to disk')
+        help='Write thumbnail image to disk')
     thumbnail.add_option(
         '--write-all-thumbnails',
         action='store_true', dest='write_all_thumbnails', default=False,
-        help='write all thumbnail image formats to disk')
+        help='Write all thumbnail image formats to disk')
     thumbnail.add_option(
         '--list-thumbnails',
         action='store_true', dest='list_thumbnails', default=False,
@@ -693,14 +675,14 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '-x', '--extract-audio',
         action='store_true', dest='extractaudio', default=False,
-        help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
+        help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
     postproc.add_option(
         '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
-        help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default')
+        help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default')
     postproc.add_option(
         '--audio-quality', metavar='QUALITY',
         dest='audioquality', default='5',
-        help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
+        help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
     postproc.add_option(
         '--recode-video',
         metavar='FORMAT', dest='recodevideo', default=None,
@@ -708,27 +690,36 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '-k', '--keep-video',
         action='store_true', dest='keepvideo', default=False,
-        help='keeps the video file on disk after the post-processing; the video is erased by default')
+        help='Keep the video file on disk after the post-processing; the video is erased by default')
     postproc.add_option(
         '--no-post-overwrites',
         action='store_true', dest='nopostoverwrites', default=False,
-        help='do not overwrite post-processed files; the post-processed files are overwritten by default')
+        help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
     postproc.add_option(
         '--embed-subs',
         action='store_true', dest='embedsubtitles', default=False,
-        help='embed subtitles in the video (only for mp4 videos)')
+        help='Embed subtitles in the video (only for mkv and mp4 videos)')
     postproc.add_option(
         '--embed-thumbnail',
         action='store_true', dest='embedthumbnail', default=False,
-        help='embed thumbnail in the audio as cover art')
+        help='Embed thumbnail in the audio as cover art')
     postproc.add_option(
         '--add-metadata',
         action='store_true', dest='addmetadata', default=False,
-        help='write metadata to the video file')
+        help='Write metadata to the video file')
+    postproc.add_option(
+        '--metadata-from-title',
+        metavar='FORMAT', dest='metafromtitle',
+        help='Parse additional metadata like song title / artist from the video title. '
+             'The format syntax is the same as --output, '
+             'the parsed parameters replace existing values. '
+             'Additional templates: %(album), %(artist). '
+             'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
+             '"Coldplay - Paradise"')
     postproc.add_option(
         '--xattrs',
         action='store_true', dest='xattrs', default=False,
-        help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+        help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
     postproc.add_option(
         '--fixup',
         metavar='POLICY', dest='fixup', default='detect_or_warn',
@@ -774,16 +765,22 @@ def parseOpts(overrideArguments=None):
         if opts.verbose:
             write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
     else:
-        command_line_conf = sys.argv[1:]
+        def compat_conf(conf):
+            if sys.version_info < (3,):
+                return [a.decode(preferredencoding(), 'replace') for a in conf]
+            return conf
+
+        command_line_conf = compat_conf(sys.argv[1:])
+
         if '--ignore-config' in command_line_conf:
             system_conf = []
             user_conf = []
         else:
-            system_conf = _readOptions('/etc/youtube-dl.conf')
+            system_conf = compat_conf(_readOptions('/etc/youtube-dl.conf'))
             if '--ignore-config' in system_conf:
                 user_conf = []
             else:
-                user_conf = _readUserConf()
+                user_conf = compat_conf(_readUserConf())
         argv = system_conf + user_conf + command_line_conf
 
         opts, args = parser.parse_args(argv)
index 708df3dd493ca97e6d1649d572ac68a0d6847464..0d8ef6ca26c6ef7f1b7b402b387d20eebd3f8a8f 100644 (file)
@@ -1,9 +1,8 @@
 from __future__ import unicode_literals
 
-from .atomicparsley import AtomicParsleyPP
+from .embedthumbnail import EmbedThumbnailPP
 from .ffmpeg import (
     FFmpegPostProcessor,
-    FFmpegAudioFixPP,
     FFmpegEmbedSubtitlePP,
     FFmpegExtractAudioPP,
     FFmpegFixupStretchedPP,
@@ -15,6 +14,7 @@ from .ffmpeg import (
 )
 from .xattrpp import XAttrMetadataPP
 from .execafterdownload import ExecAfterDownloadPP
+from .metadatafromtitle import MetadataFromTitlePP
 
 
 def get_postprocessor(key):
@@ -22,9 +22,8 @@ def get_postprocessor(key):
 
 
 __all__ = [
-    'AtomicParsleyPP',
+    'EmbedThumbnailPP',
     'ExecAfterDownloadPP',
-    'FFmpegAudioFixPP',
     'FFmpegEmbedSubtitlePP',
     'FFmpegExtractAudioPP',
     'FFmpegFixupM4aPP',
@@ -34,5 +33,6 @@ __all__ = [
     'FFmpegPostProcessor',
     'FFmpegSubtitlesConvertorPP',
     'FFmpegVideoConvertorPP',
+    'MetadataFromTitlePP',
     'XAttrMetadataPP',
 ]
diff --git a/youtube_dl/postprocessor/atomicparsley.py b/youtube_dl/postprocessor/atomicparsley.py
deleted file mode 100644 (file)
index 448ccc5..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
-
-import os
-import subprocess
-
-from .common import PostProcessor
-from ..compat import (
-    compat_urlretrieve,
-)
-from ..utils import (
-    check_executable,
-    encodeFilename,
-    PostProcessingError,
-    prepend_extension,
-    shell_quote
-)
-
-
-class AtomicParsleyPPError(PostProcessingError):
-    pass
-
-
-class AtomicParsleyPP(PostProcessor):
-    def run(self, info):
-        if not check_executable('AtomicParsley', ['-v']):
-            raise AtomicParsleyPPError('AtomicParsley was not found. Please install.')
-
-        filename = info['filepath']
-        temp_filename = prepend_extension(filename, 'temp')
-        temp_thumbnail = prepend_extension(filename, 'thumb')
-
-        if not info.get('thumbnail'):
-            raise AtomicParsleyPPError('Thumbnail was not found. Nothing to do.')
-
-        compat_urlretrieve(info['thumbnail'], temp_thumbnail)
-
-        cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
-
-        self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
-
-        if self._downloader.params.get('verbose', False):
-            self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
-
-        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        stdout, stderr = p.communicate()
-
-        if p.returncode != 0:
-            msg = stderr.decode('utf-8', 'replace').strip()
-            raise AtomicParsleyPPError(msg)
-
-        os.remove(encodeFilename(filename))
-        os.remove(encodeFilename(temp_thumbnail))
-        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
-        return True, info
index e54ae678da17bef5c5848bc7165d42d5eec912a4..3b0e8ddd8bfed92f2e9043b0adea2655a9e5f67b 100644 (file)
@@ -1,6 +1,11 @@
 from __future__ import unicode_literals
 
-from ..utils import PostProcessingError
+import os
+
+from ..utils import (
+    PostProcessingError,
+    encodeFilename,
+)
 
 
 class PostProcessor(object):
@@ -37,14 +42,20 @@ class PostProcessor(object):
         one has an extra field called "filepath" that points to the
         downloaded file.
 
-        This method returns a tuple, the first element of which describes
-        whether the original file should be kept (i.e. not deleted - None for
-        no preference), and the second of which is the updated information.
+        This method returns a tuple, the first element is a list of the files
+        that can be deleted, and the second of which is the updated
+        information.
 
         In addition, this method may raise a PostProcessingError
         exception if post processing fails.
         """
-        return None, information  # by default, keep file and do nothing
+        return [], information  # by default, keep file and do nothing
+
+    def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'):
+        try:
+            os.utime(encodeFilename(path), (atime, mtime))
+        except Exception:
+            self._downloader.report_warning(errnote)
 
 
 class AudioConversionError(PostProcessingError):
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
new file mode 100644 (file)
index 0000000..8f825f7
--- /dev/null
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+
+import os
+import subprocess
+
+from .ffmpeg import FFmpegPostProcessor
+
+from ..utils import (
+    check_executable,
+    encodeArgument,
+    encodeFilename,
+    PostProcessingError,
+    prepend_extension,
+    shell_quote
+)
+
+
+class EmbedThumbnailPPError(PostProcessingError):
+    pass
+
+
+class EmbedThumbnailPP(FFmpegPostProcessor):
+    def __init__(self, downloader=None, already_have_thumbnail=False):
+        super(EmbedThumbnailPP, self).__init__(downloader)
+        self._already_have_thumbnail = already_have_thumbnail
+
+    def run(self, info):
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+
+        if not info.get('thumbnails'):
+            raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.')
+
+        thumbnail_filename = info['thumbnails'][-1]['filename']
+
+        if info['ext'] == 'mp3':
+            options = [
+                '-c', 'copy', '-map', '0', '-map', '1',
+                '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
+
+            self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
+
+            self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
+
+            if not self._already_have_thumbnail:
+                os.remove(encodeFilename(thumbnail_filename))
+            os.remove(encodeFilename(filename))
+            os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        elif info['ext'] == 'm4a':
+            if not check_executable('AtomicParsley', ['-v']):
+                raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
+
+            cmd = [encodeFilename('AtomicParsley', True),
+                   encodeFilename(filename, True),
+                   encodeArgument('--artwork'),
+                   encodeFilename(thumbnail_filename, True),
+                   encodeArgument('-o'),
+                   encodeFilename(temp_filename, True)]
+
+            self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
+
+            if self._downloader.params.get('verbose', False):
+                self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
+
+            p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout, stderr = p.communicate()
+
+            if p.returncode != 0:
+                msg = stderr.decode('utf-8', 'replace').strip()
+                raise EmbedThumbnailPPError(msg)
+
+            if not self._already_have_thumbnail:
+                os.remove(encodeFilename(thumbnail_filename))
+            # for formats that don't support thumbnails (like 3gp) AtomicParsley
+            # won't create to the temporary file
+            if b'No changes' in stdout:
+                self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail')
+            else:
+                os.remove(encodeFilename(filename))
+                os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+        else:
+            raise EmbedThumbnailPPError('Only mp3 and m4a are supported for thumbnail embedding for now.')
+
+        return [], info
index 75c0f7bbe86ef8e19f41fd61e1bbd58678474d8a..13794b7ba8653b179a08a348744441ae5c296852 100644 (file)
@@ -8,8 +8,8 @@ from ..utils import PostProcessingError
 
 
 class ExecAfterDownloadPP(PostProcessor):
-    def __init__(self, downloader=None, verboseOutput=None, exec_cmd=None):
-        self.verboseOutput = verboseOutput
+    def __init__(self, downloader, exec_cmd):
+        super(ExecAfterDownloadPP, self).__init__(downloader)
         self.exec_cmd = exec_cmd
 
     def run(self, information):
@@ -25,4 +25,4 @@ class ExecAfterDownloadPP(PostProcessor):
             raise PostProcessingError(
                 'Command returned error code %d' % retCode)
 
-        return None, information  # by default, keep file and do nothing
+        return [], information
index 30094c2f37f767f937052306ddf3967279858a01..cc65b34e71a28cfb0947b9441d5dcc006baf47ba 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import io
 import os
 import subprocess
-import sys
 import time
 
 
@@ -21,6 +20,7 @@ from ..utils import (
     prepend_extension,
     shell_quote,
     subtitles_filename,
+    dfxp2srt,
 )
 
 
@@ -29,9 +29,8 @@ class FFmpegPostProcessorError(PostProcessingError):
 
 
 class FFmpegPostProcessor(PostProcessor):
-    def __init__(self, downloader=None, deletetempfiles=False):
+    def __init__(self, downloader=None):
         PostProcessor.__init__(self, downloader)
-        self._deletetempfiles = deletetempfiles
         self._determine_executables()
 
     def check_version(self):
@@ -117,6 +116,10 @@ class FFmpegPostProcessor(PostProcessor):
     def executable(self):
         return self._paths[self.basename]
 
+    @property
+    def probe_available(self):
+        return self.probe_basename is not None
+
     @property
     def probe_executable(self):
         return self._paths[self.probe_basename]
@@ -143,10 +146,7 @@ class FFmpegPostProcessor(PostProcessor):
             stderr = stderr.decode('utf-8', 'replace')
             msg = stderr.strip().split('\n')[-1]
             raise FFmpegPostProcessorError(msg)
-        os.utime(encodeFilename(out_path), (oldest_mtime, oldest_mtime))
-        if self._deletetempfiles:
-            for ipath in input_paths:
-                os.remove(ipath)
+        self.try_utime(out_path, oldest_mtime, oldest_mtime)
 
     def run_ffmpeg(self, path, out_path, opts):
         self.run_ffmpeg_multiple_files([path], out_path, opts)
@@ -169,7 +169,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
 
     def get_audio_codec(self, path):
 
-        if not self.probe_executable:
+        if not self.probe_available:
             raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
         try:
             cmd = [
@@ -260,32 +260,30 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         new_path = prefix + sep + extension
 
         # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
-        if new_path == path:
-            self._nopostoverwrites = True
+        if (new_path == path or
+                (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
+            self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
+            return [], information
 
         try:
-            if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
-                self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
-            else:
-                self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
-                self.run_ffmpeg(path, new_path, acodec, more_opts)
-        except:
-            etype, e, tb = sys.exc_info()
-            if isinstance(e, AudioConversionError):
-                msg = 'audio conversion failed: ' + e.msg
-            else:
-                msg = 'error running ' + self.basename
-            raise PostProcessingError(msg)
+            self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
+            self.run_ffmpeg(path, new_path, acodec, more_opts)
+        except AudioConversionError as e:
+            raise PostProcessingError(
+                'audio conversion failed: ' + e.msg)
+        except Exception:
+            raise PostProcessingError('error running ' + self.basename)
 
         # Try to update the date time for extracted audio file.
         if information.get('filetime') is not None:
-            try:
-                os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
-            except:
-                self._downloader.report_warning('Cannot update utime of audio file')
+            self.try_utime(
+                new_path, time.time(), information['filetime'],
+                errnote='Cannot update utime of audio file')
 
         information['filepath'] = new_path
-        return self._nopostoverwrites, information
+        information['ext'] = extension
+
+        return [path], information
 
 
 class FFmpegVideoConvertorPP(FFmpegPostProcessor):
@@ -299,13 +297,13 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
         outpath = prefix + sep + self._preferedformat
         if information['ext'] == self._preferedformat:
             self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
-            return True, information
+            return [], information
         self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
         self.run_ffmpeg(path, outpath, [])
         information['filepath'] = outpath
         information['format'] = self._preferedformat
         information['ext'] = self._preferedformat
-        return False, information
+        return [path], information
 
 
 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
@@ -503,17 +501,18 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
         return cls._lang_map.get(code[:2])
 
     def run(self, information):
-        if information['ext'] != 'mp4':
-            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
-            return True, information
+        if information['ext'] not in ['mp4', 'mkv']:
+            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files')
+            return [], information
         subtitles = information.get('requested_subtitles')
         if not subtitles:
             self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
-            return True, information
+            return [], information
 
         sub_langs = list(subtitles.keys())
         filename = information['filepath']
-        input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
+        sub_filenames = [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
+        input_files = [filename] + sub_filenames
 
         opts = [
             '-map', '0',
@@ -521,8 +520,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
             # Don't copy the existing subtitles, we may be running the
             # postprocessor a second time
             '-map', '-0:s',
-            '-c:s', 'mov_text',
         ]
+        if information['ext'] == 'mp4':
+            opts += ['-c:s', 'mov_text']
         for (i, lang) in enumerate(sub_langs):
             opts.extend(['-map', '%d:0' % (i + 1)])
             lang_code = self._conver_lang_code(lang)
@@ -535,7 +535,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 
-        return True, information
+        return sub_filenames, information
 
 
 class FFmpegMetadataPP(FFmpegPostProcessor):
@@ -545,7 +545,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
             metadata['title'] = info['title']
         if info.get('upload_date') is not None:
             metadata['date'] = info['upload_date']
-        if info.get('uploader') is not None:
+        if info.get('artist') is not None:
+            metadata['artist'] = info['artist']
+        elif info.get('uploader') is not None:
             metadata['artist'] = info['uploader']
         elif info.get('uploader_id') is not None:
             metadata['artist'] = info['uploader_id']
@@ -554,10 +556,12 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
             metadata['comment'] = info['description']
         if info.get('webpage_url') is not None:
             metadata['purl'] = info['webpage_url']
+        if info.get('album') is not None:
+            metadata['album'] = info['album']
 
         if not metadata:
             self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
-            return True, info
+            return [], info
 
         filename = info['filepath']
         temp_filename = prepend_extension(filename, 'temp')
@@ -574,38 +578,42 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
         self.run_ffmpeg(filename, temp_filename, options)
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-        return True, info
+        return [], info
 
 
 class FFmpegMergerPP(FFmpegPostProcessor):
     def run(self, info):
         filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
         args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
         self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
-        self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
-        return True, info
-
-
-class FFmpegAudioFixPP(FFmpegPostProcessor):
-    def run(self, info):
-        filename = info['filepath']
-        temp_filename = prepend_extension(filename, 'temp')
-
-        options = ['-vn', '-acodec', 'copy']
-        self._downloader.to_screen('[ffmpeg] Fixing audio file "%s"' % filename)
-        self.run_ffmpeg(filename, temp_filename, options)
-
-        os.remove(encodeFilename(filename))
+        self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+        return info['__files_to_merge'], info
 
-        return True, info
+    def can_merge(self):
+        # TODO: figure out merge-capable ffmpeg version
+        if self.basename != 'avconv':
+            return True
+
+        required_version = '10-0'
+        if is_outdated_version(
+                self._versions[self.basename], required_version):
+            warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
+                       'youtube-dl will download single file media. '
+                       'Update %s to version %s or newer to fix this.') % (
+                           self.basename, self.basename, required_version)
+            if self._downloader:
+                self._downloader.report_warning(warning)
+            return False
+        return True
 
 
 class FFmpegFixupStretchedPP(FFmpegPostProcessor):
     def run(self, info):
         stretched_ratio = info.get('stretched_ratio')
         if stretched_ratio is None or stretched_ratio == 1:
-            return True, info
+            return [], info
 
         filename = info['filepath']
         temp_filename = prepend_extension(filename, 'temp')
@@ -617,13 +625,13 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor):
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 
-        return True, info
+        return [], info
 
 
 class FFmpegFixupM4aPP(FFmpegPostProcessor):
     def run(self, info):
         if info.get('container') != 'm4a_dash':
-            return True, info
+            return [], info
 
         filename = info['filepath']
         temp_filename = prepend_extension(filename, 'temp')
@@ -635,7 +643,7 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 
-        return True, info
+        return [], info
 
 
 class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
@@ -652,7 +660,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
             new_format = 'webvtt'
         if subs is None:
             self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
-            return True, info
+            return [], info
         self._downloader.to_screen('[ffmpeg] Converting subtitles')
         for lang, sub in subs.items():
             ext = sub['ext']
@@ -662,6 +670,30 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
                     'format' % new_ext)
                 continue
             new_file = subtitles_filename(filename, lang, new_ext)
+
+            if ext == 'dfxp' or ext == 'ttml':
+                self._downloader.report_warning(
+                    'You have requested to convert dfxp (TTML) subtitles into another format, '
+                    'which results in style information loss')
+
+                dfxp_file = subtitles_filename(filename, lang, ext)
+                srt_file = subtitles_filename(filename, lang, 'srt')
+
+                with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
+                    srt_data = dfxp2srt(f.read())
+
+                with io.open(srt_file, 'wt', encoding='utf-8') as f:
+                    f.write(srt_data)
+
+                ext = 'srt'
+                subs[lang] = {
+                    'ext': 'srt',
+                    'data': srt_data
+                }
+
+                if new_ext == 'srt':
+                    continue
+
             self.run_ffmpeg(
                 subtitles_filename(filename, lang, ext),
                 new_file, ['-f', new_format])
@@ -672,4 +704,4 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
                     'data': f.read(),
                 }
 
-        return True, info
+        return [], info
diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py
new file mode 100644 (file)
index 0000000..a56077f
--- /dev/null
@@ -0,0 +1,47 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import PostProcessor
+from ..utils import PostProcessingError
+
+
+class MetadataFromTitlePPError(PostProcessingError):
+    pass
+
+
+class MetadataFromTitlePP(PostProcessor):
+    def __init__(self, downloader, titleformat):
+        super(MetadataFromTitlePP, self).__init__(downloader)
+        self._titleformat = titleformat
+        self._titleregex = self.format_to_regex(titleformat)
+
+    def format_to_regex(self, fmt):
+        """
+        Converts a string like
+           '%(title)s - %(artist)s'
+        to a regex like
+           '(?P<title>.+)\ \-\ (?P<artist>.+)'
+        """
+        lastpos = 0
+        regex = ""
+        # replace %(..)s with regex group and escape other string parts
+        for match in re.finditer(r'%\((\w+)\)s', fmt):
+            regex += re.escape(fmt[lastpos:match.start()])
+            regex += r'(?P<' + match.group(1) + '>.+)'
+            lastpos = match.end()
+        if lastpos < len(fmt):
+            regex += re.escape(fmt[lastpos:len(fmt)])
+        return regex
+
+    def run(self, info):
+        title = info['title']
+        match = re.match(self._titleregex, title)
+        if match is None:
+            raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat)
+        for attribute, value in match.groupdict().items():
+            value = match.group(attribute)
+            info[attribute] = value
+            self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
+
+        return [], info
index f6c63fe97545d86947ef1ef4bf2d70e9ea7144be..7d88e130820e073af1b6fd527390cb1cb5dc8dec 100644 (file)
@@ -3,17 +3,34 @@ from __future__ import unicode_literals
 import os
 import subprocess
 import sys
+import errno
 
 from .common import PostProcessor
-from ..compat import (
-    subprocess_check_output
-)
 from ..utils import (
     check_executable,
     hyphenate_date,
+    version_tuple,
+    PostProcessingError,
+    encodeArgument,
+    encodeFilename,
 )
 
 
+class XAttrMetadataError(PostProcessingError):
+    def __init__(self, code=None, msg='Unknown error'):
+        super(XAttrMetadataError, self).__init__(msg)
+        self.code = code
+
+        # Parsing code and msg
+        if (self.code in (errno.ENOSPC, errno.EDQUOT) or
+                'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+            self.reason = 'NO_SPACE'
+        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+            self.reason = 'VALUE_TOO_LONG'
+        else:
+            self.reason = 'NOT_SUPPORTED'
+
+
 class XAttrMetadataPP(PostProcessor):
 
     #
@@ -36,8 +53,24 @@ class XAttrMetadataPP(PostProcessor):
             # try the pyxattr module...
             import xattr
 
+            # Unicode arguments are not supported in python-pyxattr until
+            # version 0.5.0
+            # See https://github.com/rg3/youtube-dl/issues/5498
+            pyxattr_required_version = '0.5.0'
+            if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+                self._downloader.report_warning(
+                    'python-pyxattr is detected but is too old. '
+                    'youtube-dl requires %s or above while your version is %s. '
+                    'Falling back to other xattr implementations' % (
+                        pyxattr_required_version, xattr.__version__))
+
+                raise ImportError
+
             def write_xattr(path, key, value):
-                return xattr.setxattr(path, key, value)
+                try:
+                    xattr.set(path, key, value)
+                except EnvironmentError as e:
+                    raise XAttrMetadataError(e.errno, e.strerror)
 
         except ImportError:
             if os.name == 'nt':
@@ -48,8 +81,11 @@ class XAttrMetadataPP(PostProcessor):
                     assert os.path.exists(path)
 
                     ads_fn = path + ":" + key
-                    with open(ads_fn, "wb") as f:
-                        f.write(value)
+                    try:
+                        with open(ads_fn, "wb") as f:
+                            f.write(value)
+                    except EnvironmentError as e:
+                        raise XAttrMetadataError(e.errno, e.strerror)
             else:
                 user_has_setfattr = check_executable("setfattr", ['--version'])
                 user_has_xattr = check_executable("xattr", ['-h'])
@@ -57,12 +93,27 @@ class XAttrMetadataPP(PostProcessor):
                 if user_has_setfattr or user_has_xattr:
 
                     def write_xattr(path, key, value):
+                        value = value.decode('utf-8')
                         if user_has_setfattr:
-                            cmd = ['setfattr', '-n', key, '-v', value, path]
+                            executable = 'setfattr'
+                            opts = ['-n', key, '-v', value]
                         elif user_has_xattr:
-                            cmd = ['xattr', '-w', key, value, path]
-
-                        subprocess_check_output(cmd)
+                            executable = 'xattr'
+                            opts = ['-w', key, value]
+
+                        cmd = ([encodeFilename(executable, True)] +
+                               [encodeArgument(o) for o in opts] +
+                               [encodeFilename(path, True)])
+
+                        try:
+                            p = subprocess.Popen(
+                                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+                        except EnvironmentError as e:
+                            raise XAttrMetadataError(e.errno, e.strerror)
+                        stdout, stderr = p.communicate()
+                        stderr = stderr.decode('utf-8', 'replace')
+                        if p.returncode != 0:
+                            raise XAttrMetadataError(p.returncode, stderr)
 
                 else:
                     # On Unix, and can't find pyxattr, setfattr, or xattr.
@@ -105,8 +156,21 @@ class XAttrMetadataPP(PostProcessor):
                     byte_value = value.encode('utf-8')
                     write_xattr(filename, xattrname, byte_value)
 
-            return True, info
+            return [], info
 
-        except (subprocess.CalledProcessError, OSError):
-            self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")
-            return False, info
+        except XAttrMetadataError as e:
+            if e.reason == 'NO_SPACE':
+                self._downloader.report_warning(
+                    'There\'s no disk space left or disk quota exceeded. ' +
+                    'Extended attributes are not written.')
+            elif e.reason == 'VALUE_TOO_LONG':
+                self._downloader.report_warning(
+                    'Unable to write extended attributes due to too long values.')
+            else:
+                msg = 'This filesystem doesn\'t support extended attributes. '
+                if os.name == 'nt':
+                    msg += 'You need to use NTFS.'
+                else:
+                    msg += '(You may have to enable them in your /etc/fstab)'
+                self._downloader.report_error(msg)
+            return [], info
index d8be4049f5dce0fdd9a61f2aff3c4284d494e598..de3169eef1d6ec29d82a60b2f4b6a68f49d7dd4e 100644 (file)
@@ -65,7 +65,7 @@ def update_self(to_screen, verbose):
     # Check if there is a new version
     try:
         newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
-    except:
+    except Exception:
         if verbose:
             to_screen(compat_str(traceback.format_exc()))
         to_screen('ERROR: can\'t find the current version. Please try again later.')
@@ -78,7 +78,7 @@ def update_self(to_screen, verbose):
     try:
         versions_info = opener.open(JSON_URL).read().decode('utf-8')
         versions_info = json.loads(versions_info)
-    except:
+    except Exception:
         if verbose:
             to_screen(compat_str(traceback.format_exc()))
         to_screen('ERROR: can\'t obtain versions info. Please try again later.')
index 1f3bfef7d562e3fb0e63db16e644b86819eaaa5c..ed9ed9ed63ec9b40d929f83cb2e56ee4d63f9e7f 100644 (file)
@@ -35,9 +35,9 @@ import zlib
 from .compat import (
     compat_basestring,
     compat_chr,
-    compat_getenv,
     compat_html_entities,
     compat_http_client,
+    compat_kwargs,
     compat_parse_qs,
     compat_socket_create_connection,
     compat_str,
@@ -76,7 +76,7 @@ def preferredencoding():
     try:
         pref = locale.getpreferredencoding()
         'TEST'.encode(pref)
-    except:
+    except Exception:
         pref = 'UTF-8'
 
     return pref
@@ -115,7 +115,7 @@ def write_json_file(obj, fn):
             'encoding': 'utf-8',
         })
 
-    tf = tempfile.NamedTemporaryFile(**args)
+    tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
 
     try:
         with tf:
@@ -128,7 +128,7 @@ def write_json_file(obj, fn):
             except OSError:
                 pass
         os.rename(tf.name, fn)
-    except:
+    except Exception:
         try:
             os.remove(tf.name)
         except OSError:
@@ -253,15 +253,12 @@ def sanitize_open(filename, open_mode):
             raise
 
         # In case of error, try to remove win32 forbidden chars
-        alt_filename = os.path.join(
-            re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
-            for path_part in os.path.split(filename)
-        )
+        alt_filename = sanitize_path(filename)
         if alt_filename == filename:
             raise
         else:
             # An exception here should be caught in the caller
-            stream = open(encodeFilename(filename), open_mode)
+            stream = open(encodeFilename(alt_filename), open_mode)
             return (stream, alt_filename)
 
 
@@ -306,11 +303,30 @@ def sanitize_filename(s, restricted=False, is_id=False):
             result = result[2:]
         if result.startswith('-'):
             result = '_' + result[len('-'):]
+        result = result.lstrip('.')
         if not result:
             result = '_'
     return result
 
 
+def sanitize_path(s):
+    """Sanitizes and normalizes path on Windows"""
+    if sys.platform != 'win32':
+        return s
+    drive_or_unc, _ = os.path.splitdrive(s)
+    if sys.version_info < (2, 7) and not drive_or_unc:
+        drive_or_unc, _ = os.path.splitunc(s)
+    norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
+    if drive_or_unc:
+        norm_path.pop(0)
+    sanitized_path = [
+        path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
+        for path_part in norm_path]
+    if drive_or_unc:
+        sanitized_path.insert(0, drive_or_unc + os.path.sep)
+    return os.path.join(*sanitized_path)
+
+
 def orderedSet(iterable):
     """ Remove all duplicates from the input iterable """
     res = []
@@ -326,7 +342,7 @@ def _htmlentity_transform(entity):
     if entity in compat_html_entities.name2codepoint:
         return compat_chr(compat_html_entities.name2codepoint[entity])
 
-    mobj = re.match(r'#(x?[0-9]+)', entity)
+    mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
     if mobj is not None:
         numstr = mobj.group(1)
         if numstr.startswith('x'):
@@ -349,6 +365,18 @@ def unescapeHTML(s):
         r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
 
 
+def get_subprocess_encoding():
+    if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+        # For subprocess calls, encode with locale encoding
+        # Refer to http://stackoverflow.com/a/9951851/35070
+        encoding = preferredencoding()
+    else:
+        encoding = sys.getfilesystemencoding()
+    if encoding is None:
+        encoding = 'utf-8'
+    return encoding
+
+
 def encodeFilename(s, for_subprocess=False):
     """
     @param s The name of the file
@@ -360,21 +388,24 @@ def encodeFilename(s, for_subprocess=False):
     if sys.version_info >= (3, 0):
         return s
 
-    if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
-        # Pass '' directly to use Unicode APIs on Windows 2000 and up
-        # (Detecting Windows NT 4 is tricky because 'major >= 4' would
-        # match Windows 9x series as well. Besides, NT 4 is obsolete.)
-        if not for_subprocess:
-            return s
-        else:
-            # For subprocess calls, encode with locale encoding
-            # Refer to http://stackoverflow.com/a/9951851/35070
-            encoding = preferredencoding()
-    else:
-        encoding = sys.getfilesystemencoding()
-    if encoding is None:
-        encoding = 'utf-8'
-    return s.encode(encoding, 'ignore')
+    # Pass '' directly to use Unicode APIs on Windows 2000 and up
+    # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+    # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+    if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+        return s
+
+    return s.encode(get_subprocess_encoding(), 'ignore')
+
+
+def decodeFilename(b, for_subprocess=False):
+
+    if sys.version_info >= (3, 0):
+        return b
+
+    if not isinstance(b, bytes):
+        return b
+
+    return b.decode(get_subprocess_encoding(), 'ignore')
 
 
 def encodeArgument(s):
@@ -386,6 +417,10 @@ def encodeArgument(s):
     return encodeFilename(s, True)
 
 
+def decodeArgument(b):
+    return decodeFilename(b, True)
+
+
 def decodeOption(optval):
     if optval is None:
         return optval
@@ -430,6 +465,17 @@ def make_HTTPS_handler(params, **kwargs):
         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 
 
+def bug_reports_message():
+    if ytdl_is_updateable():
+        update_cmd = 'type  youtube-dl -U  to update'
+    else:
+        update_cmd = 'see  https://yt-dl.org/update  on how to update'
+    msg = '; please report this issue on https://yt-dl.org/bug .'
+    msg += ' Make sure you are using the latest version; %s.' % update_cmd
+    msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+    return msg
+
+
 class ExtractorError(Exception):
     """Error during info extraction."""
 
@@ -445,13 +491,7 @@ class ExtractorError(Exception):
         if cause:
             msg += ' (caused by %r)' % cause
         if not expected:
-            if ytdl_is_updateable():
-                update_cmd = 'type  youtube-dl -U  to update'
-            else:
-                update_cmd = 'see  https://yt-dl.org/update  on how to update'
-            msg += '; please report this issue on https://yt-dl.org/bug .'
-            msg += ' Make sure you are using the latest version; %s.' % update_cmd
-            msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+            msg += bug_reports_message()
         super(ExtractorError, self).__init__(msg)
 
         self.traceback = tb
@@ -708,7 +748,8 @@ def unified_strdate(date_str, day_first=True):
     # Replace commas
     date_str = date_str.replace(',', ' ')
     # %z (UTC offset) is only supported in python>=3.2
-    date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+    if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
+        date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
     # Remove AM/PM + timezone
     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
 
@@ -737,6 +778,7 @@ def unified_strdate(date_str, day_first=True):
     ]
     if day_first:
         format_expressions.extend([
+            '%d-%m-%Y',
             '%d.%m.%Y',
             '%d/%m/%Y',
             '%d/%m/%y',
@@ -744,6 +786,7 @@ def unified_strdate(date_str, day_first=True):
         ])
     else:
         format_expressions.extend([
+            '%m-%d-%Y',
             '%m.%d.%Y',
             '%m/%d/%Y',
             '%m/%d/%y',
@@ -1079,15 +1122,6 @@ def shell_quote(args):
     return ' '.join(quoted_args)
 
 
-def takewhile_inclusive(pred, seq):
-    """ Like itertools.takewhile, but include the latest evaluated element
-        (the first element so that Not pred(e)) """
-    for e in seq:
-        yield e
-        if not pred(e):
-            return
-
-
 def smuggle_url(url, data):
     """ Pass additional data in a URL for internal use. """
 
@@ -1173,22 +1207,6 @@ def parse_filesize(s):
     return int(float(num_str) * mult)
 
 
-def get_term_width():
-    columns = compat_getenv('COLUMNS', None)
-    if columns:
-        return int(columns)
-
-    try:
-        sp = subprocess.Popen(
-            ['stty', 'size'],
-            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        out, err = sp.communicate()
-        return int(out.split()[1])
-    except:
-        pass
-    return None
-
-
 def month_by_name(name):
     """ Return the number of a month by (locale-independently) English name """
 
@@ -1324,9 +1342,19 @@ def parse_duration(s):
     return res
 
 
-def prepend_extension(filename, ext):
+def prepend_extension(filename, ext, expected_real_ext=None):
     name, real_ext = os.path.splitext(filename)
-    return '{0}.{1}{2}'.format(name, ext, real_ext)
+    return (
+        '{0}.{1}{2}'.format(name, ext, real_ext)
+        if not expected_real_ext or real_ext[1:] == expected_real_ext
+        else '{0}.{1}'.format(filename, ext))
+
+
+def replace_extension(filename, ext, expected_real_ext=None):
+    name, real_ext = os.path.splitext(filename)
+    return '{0}.{1}'.format(
+        name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
+        ext)
 
 
 def check_executable(exe, args=[]):
@@ -1345,7 +1373,7 @@ def get_exe_version(exe, args=['--version'],
     or False if the executable is not present """
     try:
         out, _ = subprocess.Popen(
-            [exe] + args,
+            [encodeArgument(exe)] + args,
             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
     except OSError:
         return False
@@ -1451,6 +1479,14 @@ def uppercase_escape(s):
         s)
 
 
+def lowercase_escape(s):
+    unicode_escape = codecs.getdecoder('unicode_escape')
+    return re.sub(
+        r'\\u[0-9a-fA-F]{4}',
+        lambda m: unicode_escape(m.group(0))[0],
+        s)
+
+
 def escape_rfc3986(s):
     """Escape non-ASCII characters as suggested by RFC 3986"""
     if sys.version_info < (3, 0) and isinstance(s, compat_str):
@@ -1571,7 +1607,7 @@ def js_to_json(code):
         '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
         [a-zA-Z_][.a-zA-Z_0-9]*
         ''', fix_kv, code)
-    res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
+    res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
     return res
 
 
@@ -1784,3 +1820,77 @@ def match_filter_func(filter_str):
             video_title = info_dict.get('title', info_dict.get('id', 'video'))
             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
     return _match_func
+
+
+def parse_dfxp_time_expr(time_expr):
+    if not time_expr:
+        return 0.0
+
+    mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
+    if mobj:
+        return float(mobj.group('time_offset'))
+
+    mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
+    if mobj:
+        return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
+
+
+def srt_subtitles_timecode(seconds):
+    return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
+
+
+def dfxp2srt(dfxp_data):
+    _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
+
+    def parse_node(node):
+        str_or_empty = functools.partial(str_or_none, default='')
+
+        out = str_or_empty(node.text)
+
+        for child in node:
+            if child.tag == _x('ttml:br'):
+                out += '\n' + str_or_empty(child.tail)
+            elif child.tag == _x('ttml:span'):
+                out += str_or_empty(parse_node(child))
+            else:
+                out += str_or_empty(xml.etree.ElementTree.tostring(child))
+
+        return out
+
+    dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
+    out = []
+    paras = dfxp.findall(_x('.//ttml:p'))
+
+    for para, index in zip(paras, itertools.count(1)):
+        begin_time = parse_dfxp_time_expr(para.attrib['begin'])
+        end_time = parse_dfxp_time_expr(para.attrib.get('end'))
+        if not end_time:
+            end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
+        out.append('%d\n%s --> %s\n%s\n\n' % (
+            index,
+            srt_subtitles_timecode(begin_time),
+            srt_subtitles_timecode(end_time),
+            parse_node(para)))
+
+    return ''.join(out)
+
+
+class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
+    def __init__(self, proxies=None):
+        # Set default handlers
+        for type in ('http', 'https'):
+            setattr(self, '%s_open' % type,
+                    lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
+                        meth(r, proxy, type))
+        return compat_urllib_request.ProxyHandler.__init__(self, proxies)
+
+    def proxy_open(self, req, proxy, type):
+        req_proxy = req.headers.get('Ytdl-request-proxy')
+        if req_proxy is not None:
+            proxy = req_proxy
+            del req.headers['Ytdl-request-proxy']
+
+        if proxy == '__noproxy__':
+            return None  # No Proxy
+        return compat_urllib_request.ProxyHandler.proxy_open(
+            self, req, proxy, type)
index 5582348bae3d442aac3e51218bad00ab5b71494b..38f00bc9bc2ef476ddb813d7bcd26e5d13f4947d 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.02.28'
+__version__ = '2015.05.15'