clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
- find -name "*.pyc" -delete
+ find . -name "*.pyc" -delete
PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
- [OPTIONS](#options)
- [CONFIGURATION](#configuration)
- [OUTPUT TEMPLATE](#output-template)
+- [FORMAT SELECTION](#format-selection)
- [VIDEO SELECTION](#video-selection)
- [FAQ](#faq)
- [DEVELOPER INSTRUCTIONS](#developer-instructions)
youtube-dl [OPTIONS] URL [URL...]
# OPTIONS
- -h, --help print this help text and exit
- --version print program version and exit
- -U, --update update this program to latest version. Make
- sure that you have sufficient permissions
- (run with sudo if needed)
- -i, --ignore-errors continue on download errors, for example to
- skip unavailable videos in a playlist
- --abort-on-error Abort downloading of further videos (in the
- playlist or the command line) if an error
- occurs
- --dump-user-agent display the current browser identification
- --list-extractors List all supported extractors and the URLs
- they would handle
- --extractor-descriptions Output descriptions of all supported
- extractors
- --default-search PREFIX Use this prefix for unqualified URLs. For
- example "gvsearch2:" downloads two videos
- from google videos for youtube-dl "large
- apple". Use the value "auto" to let
- youtube-dl guess ("auto_warning" to emit a
- warning when guessing). "error" just throws
- an error. The default value "fixup_error"
- repairs broken URLs, but emits an error if
- this is not possible instead of searching.
- --ignore-config Do not read configuration files. When given
- in the global configuration file /etc
- /youtube-dl.conf: Do not read the user
- configuration in ~/.config/youtube-
- dl/config (%APPDATA%/youtube-dl/config.txt
- on Windows)
- --flat-playlist Do not extract the videos of a playlist,
- only list them.
- --no-color Do not emit color codes in output.
+ -h, --help Print this help text and exit
+ --version Print program version and exit
+ -U, --update Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
+ -i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist
+ --abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
+ --dump-user-agent Display the current browser identification
+ --list-extractors List all supported extractors and the URLs they would handle
+ --extractor-descriptions Output descriptions of all supported extractors
+ --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
+ Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
+ default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
+ --ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
+ in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
+ --flat-playlist Do not extract the videos of a playlist, only list them.
+ --no-color Do not emit color codes in output
## Network Options:
- --proxy URL Use the specified HTTP/HTTPS proxy. Pass in
- an empty string (--proxy "") for direct
- connection
+ --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
--socket-timeout SECONDS Time to wait before giving up, in seconds
- --source-address IP Client-side IP address to bind to
- (experimental)
- -4, --force-ipv4 Make all connections via IPv4
- (experimental)
- -6, --force-ipv6 Make all connections via IPv6
- (experimental)
+ --source-address IP Client-side IP address to bind to (experimental)
+ -4, --force-ipv4 Make all connections via IPv4 (experimental)
+ -6, --force-ipv6 Make all connections via IPv6 (experimental)
+ --cn-verification-proxy URL Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is
+ not present) is used for the actual downloading. (experimental)
## Video Selection:
- --playlist-start NUMBER playlist video to start at (default is 1)
- --playlist-end NUMBER playlist video to end at (default is last)
- --playlist-items ITEM_SPEC playlist video items to download. Specify
- indices of the videos in the playlist
- seperated by commas like: "--playlist-items
- 1,2,5,8" if you want to download videos
- indexed 1, 2, 5, 8 in the playlist. You can
- specify range: "--playlist-items
- 1-3,7,10-13", it will download the videos
- at index 1, 2, 3, 7, 10, 11, 12 and 13.
- --match-title REGEX download only matching titles (regex or
- caseless sub-string)
- --reject-title REGEX skip download for matching titles (regex or
- caseless sub-string)
+ --playlist-start NUMBER Playlist video to start at (default is 1)
+ --playlist-end NUMBER Playlist video to end at (default is last)
+ --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
+ if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
+ download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
+ --match-title REGEX Download only matching titles (regex or caseless sub-string)
+ --reject-title REGEX Skip download for matching titles (regex or caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files
- --min-filesize SIZE Do not download any videos smaller than
- SIZE (e.g. 50k or 44.6m)
- --max-filesize SIZE Do not download any videos larger than SIZE
- (e.g. 50k or 44.6m)
- --date DATE download only videos uploaded in this date
- --datebefore DATE download only videos uploaded on or before
- this date (i.e. inclusive)
- --dateafter DATE download only videos uploaded on or after
- this date (i.e. inclusive)
- --min-views COUNT Do not download any videos with less than
- COUNT views
- --max-views COUNT Do not download any videos with more than
- COUNT views
- --match-filter FILTER (Experimental) Generic video filter.
- Specify any key (see help for -o for a list
- of available keys) to match if the key is
- present, !key to check if the key is not
- present,key > NUMBER (like "comment_count >
- 12", also works with >=, <, <=, !=, =) to
- compare against a number, and & to require
- multiple matches. Values which are not
- known are excluded unless you put a
- question mark (?) after the operator.For
- example, to only match videos that have
- been liked more than 100 times and disliked
- less than 50 times (or the dislike
- functionality is not available at the given
- service), but who also have a description,
- use --match-filter "like_count > 100 &
+ --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
+ --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
+ --date DATE Download only videos uploaded in this date
+ --datebefore DATE Download only videos uploaded on or before this date (i.e. inclusive)
+ --dateafter DATE Download only videos uploaded on or after this date (i.e. inclusive)
+ --min-views COUNT Do not download any videos with less than COUNT views
+ --max-views COUNT Do not download any videos with more than COUNT views
+ --match-filter FILTER Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present,
+ !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
+ a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
+ operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
+ functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 &
dislike_count <? 50 & description" .
- --no-playlist If the URL refers to a video and a
- playlist, download only the video.
- --yes-playlist If the URL refers to a video and a
- playlist, download the playlist.
- --age-limit YEARS download only videos suitable for the given
- age
- --download-archive FILE Download only videos not listed in the
- archive file. Record the IDs of all
- downloaded videos in it.
- --include-ads Download advertisements as well
- (experimental)
+ --no-playlist Download only the video, if the URL refers to a video and a playlist.
+ --yes-playlist Download the playlist, if the URL refers to a video and a playlist.
+ --age-limit YEARS Download only videos suitable for the given age
+ --download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
+ --include-ads Download advertisements as well (experimental)
## Download Options:
- -r, --rate-limit LIMIT maximum download rate in bytes per second
- (e.g. 50K or 4.2M)
- -R, --retries RETRIES number of retries (default is 10), or
- "infinite".
- --buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
- (default is 1024)
- --no-resize-buffer do not automatically adjust the buffer
- size. By default, the buffer size is
- automatically resized from an initial value
- of SIZE.
+ -r, --rate-limit LIMIT Maximum download rate in bytes per second (e.g. 50K or 4.2M)
+ -R, --retries RETRIES Number of retries (default is 10), or "infinite".
+ --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024)
+ --no-resize-buffer Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
--playlist-reverse Download playlist videos in reverse order
- --xattr-set-filesize (experimental) set file xattribute
- ytdl.filesize with expected filesize
- --hls-prefer-native (experimental) Use the native HLS
- downloader instead of ffmpeg.
- --external-downloader COMMAND (experimental) Use the specified external
- downloader. Currently supports
- aria2c,curl,wget
+ --xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
+ --hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
+ --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
+ --external-downloader-args ARGS Give these arguments to the external downloader
## Filesystem Options:
- -a, --batch-file FILE file containing URLs to download ('-' for
- stdin)
- --id use only video ID in file name
- -o, --output TEMPLATE output filename template. Use %(title)s to
- get the title, %(uploader)s for the
- uploader name, %(uploader_id)s for the
- uploader nickname if different,
- %(autonumber)s to get an automatically
- incremented number, %(ext)s for the
- filename extension, %(format)s for the
- format description (like "22 - 1280x720" or
- "HD"), %(format_id)s for the unique id of
- the format (like Youtube's itags: "137"),
- %(upload_date)s for the upload date
- (YYYYMMDD), %(extractor)s for the provider
- (youtube, metacafe, etc), %(id)s for the
- video id, %(playlist_title)s,
- %(playlist_id)s, or %(playlist)s (=title if
- present, ID otherwise) for the playlist the
- video is in, %(playlist_index)s for the
- position in the playlist. %(height)s and
- %(width)s for the width and height of the
- video format. %(resolution)s for a textual
- description of the resolution of the video
- format. %% for a literal percent. Use - to
- output to stdout. Can also be used to
- download to a different directory, for
- example with -o '/my/downloads/%(uploader)s
- /%(title)s-%(id)s.%(ext)s' .
- --autonumber-size NUMBER Specifies the number of digits in
- %(autonumber)s when it is present in output
- filename template or --auto-number option
- is given
- --restrict-filenames Restrict filenames to only ASCII
- characters, and avoid "&" and spaces in
- filenames
- -A, --auto-number [deprecated; use -o
- "%(autonumber)s-%(title)s.%(ext)s" ] number
- downloaded files starting from 00000
- -t, --title [deprecated] use title in file name
- (default)
- -l, --literal [deprecated] alias of --title
- -w, --no-overwrites do not overwrite files
- -c, --continue force resume of partially downloaded files.
- By default, youtube-dl will resume
- downloads if possible.
- --no-continue do not resume partially downloaded files
- (restart from beginning)
- --no-part do not use .part files - write directly
- into output file
- --no-mtime do not use the Last-modified header to set
- the file modification time
- --write-description write video description to a .description
- file
- --write-info-json write video metadata to a .info.json file
- --write-annotations write video annotations to a .annotation
- file
- --load-info FILE json file containing the video information
- (created with the "--write-json" option)
- --cookies FILE file to read cookies from and dump cookie
- jar in
- --cache-dir DIR Location in the filesystem where youtube-dl
- can store some downloaded information
- permanently. By default $XDG_CACHE_HOME
- /youtube-dl or ~/.cache/youtube-dl . At the
- moment, only YouTube player files (for
- videos with obfuscated signatures) are
- cached, but that may change.
+ -a, --batch-file FILE File containing URLs to download ('-' for stdin)
+ --id Use only video ID in file name
+ -o, --output TEMPLATE Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
+ nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
+ the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube's itags: "137"),
+ %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
+ %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
+ %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
+ %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
+ Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
+ --autonumber-size NUMBER Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
+ --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
+ -A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000
+ -t, --title [deprecated] Use title in file name (default)
+ -l, --literal [deprecated] Alias of --title
+ -w, --no-overwrites Do not overwrite files
+ -c, --continue Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
+ --no-continue Do not resume partially downloaded files (restart from beginning)
+ --no-part Do not use .part files - write directly into output file
+ --no-mtime Do not use the Last-modified header to set the file modification time
+ --write-description Write video description to a .description file
+ --write-info-json Write video metadata to a .info.json file
+ --write-annotations Write video annotations to a .annotations.xml file
+ --load-info FILE JSON file containing the video information (created with the "--write-info-json" option)
+ --cookies FILE File to read cookies from and dump cookie jar in
+ --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
+ or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
+ change.
--no-cache-dir Disable filesystem caching
--rm-cache-dir Delete all filesystem cache files
## Thumbnail images:
- --write-thumbnail write thumbnail image to disk
- --write-all-thumbnails write all thumbnail image formats to disk
- --list-thumbnails Simulate and list all available thumbnail
- formats
+ --write-thumbnail Write thumbnail image to disk
+ --write-all-thumbnails Write all thumbnail image formats to disk
+ --list-thumbnails Simulate and list all available thumbnail formats
## Verbosity / Simulation Options:
- -q, --quiet activates quiet mode
+ -q, --quiet Activate quiet mode
--no-warnings Ignore warnings
- -s, --simulate do not download the video and do not write
- anything to disk
- --skip-download do not download the video
- -g, --get-url simulate, quiet but print URL
- -e, --get-title simulate, quiet but print title
- --get-id simulate, quiet but print id
- --get-thumbnail simulate, quiet but print thumbnail URL
- --get-description simulate, quiet but print video description
- --get-duration simulate, quiet but print video length
- --get-filename simulate, quiet but print output filename
- --get-format simulate, quiet but print output format
- -j, --dump-json simulate, quiet but print JSON information.
- See --output for a description of available
- keys.
- -J, --dump-single-json simulate, quiet but print JSON information
- for each command-line argument. If the URL
- refers to a playlist, dump the whole
- playlist information in a single line.
- --print-json Be quiet and print the video information as
- JSON (video is still being downloaded).
- --newline output progress bar as new lines
- --no-progress do not print progress bar
- --console-title display progress in console titlebar
- -v, --verbose print various debugging information
- --dump-intermediate-pages print downloaded pages to debug problems
- (very verbose)
- --write-pages Write downloaded intermediary pages to
- files in the current directory to debug
- problems
+ -s, --simulate Do not download the video and do not write anything to disk
+ --skip-download Do not download the video
+ -g, --get-url Simulate, quiet but print URL
+ -e, --get-title Simulate, quiet but print title
+ --get-id Simulate, quiet but print id
+ --get-thumbnail Simulate, quiet but print thumbnail URL
+ --get-description Simulate, quiet but print video description
+ --get-duration Simulate, quiet but print video length
+ --get-filename Simulate, quiet but print output filename
+ --get-format Simulate, quiet but print output format
+ -j, --dump-json Simulate, quiet but print JSON information. See --output for a description of available keys.
+ -J, --dump-single-json Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
+ information in a single line.
+ --print-json Be quiet and print the video information as JSON (video is still being downloaded).
+ --newline Output progress bar as new lines
+ --no-progress Do not print progress bar
+ --console-title Display progress in console titlebar
+ -v, --verbose Print various debugging information
+ --dump-pages Print downloaded pages to debug problems (very verbose)
+ --write-pages Write downloaded intermediary pages to files in the current directory to debug problems
--print-traffic Display sent and read HTTP traffic
- -C, --call-home Contact the youtube-dl server for
- debugging.
- --no-call-home Do NOT contact the youtube-dl server for
- debugging.
+ -C, --call-home Contact the youtube-dl server for debugging
+ --no-call-home Do NOT contact the youtube-dl server for debugging
## Workarounds:
--encoding ENCODING Force the specified encoding (experimental)
- --no-check-certificate Suppress HTTPS certificate validation.
- --prefer-insecure Use an unencrypted connection to retrieve
- information about the video. (Currently
- supported only for YouTube)
- --user-agent UA specify a custom user agent
- --referer URL specify a custom referer, use if the video
- access is restricted to one domain
- --add-header FIELD:VALUE specify a custom HTTP header and its value,
- separated by a colon ':'. You can use this
- option multiple times
- --bidi-workaround Work around terminals that lack
- bidirectional text support. Requires bidiv
- or fribidi executable in PATH
- --sleep-interval SECONDS Number of seconds to sleep before each
- download.
+ --no-check-certificate Suppress HTTPS certificate validation
+ --prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
+ --user-agent UA Specify a custom user agent
+ --referer URL Specify a custom referer, use if the video access is restricted to one domain
+ --add-header FIELD:VALUE Specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
+ --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
+ --sleep-interval SECONDS Number of seconds to sleep before each download.
## Video Format Options:
- -f, --format FORMAT video format code, specify the order of
- preference using slashes, as in -f 22/17/18
- . Instead of format codes, you can select
- by extension for the extensions aac, m4a,
- mp3, mp4, ogg, wav, webm. You can also use
- the special names "best", "bestvideo",
- "bestaudio", "worst". You can filter the
- video results by putting a condition in
- brackets, as in -f "best[height=720]" (or
- -f "[filesize>10M]"). This works for
- filesize, height, width, tbr, abr, vbr,
- asr, and fps and the comparisons <, <=, >,
- >=, =, != and for ext, acodec, vcodec,
- container, and protocol and the comparisons
- =, != . Formats for which the value is not
- known are excluded unless you put a
- question mark (?) after the operator. You
- can combine format filters, so -f "[height
- <=? 720][tbr>500]" selects up to 720p
- videos (or videos where the height is not
- known) with a bitrate of at least 500
- KBit/s. By default, youtube-dl will pick
- the best quality. Use commas to download
- multiple audio formats, such as -f
- 136/137/mp4/bestvideo,140/m4a/bestaudio.
- You can merge the video and audio of two
- formats into a single file using -f <video-
- format>+<audio-format> (requires ffmpeg or
- avconv), for example -f
- bestvideo+bestaudio.
- --all-formats download all available video formats
- --prefer-free-formats prefer free video formats unless a specific
- one is requested
- --max-quality FORMAT highest quality format to download
- -F, --list-formats list all available formats
- --youtube-skip-dash-manifest Do not download the DASH manifest on
- YouTube videos
- --merge-output-format FORMAT If a merge is required (e.g.
- bestvideo+bestaudio), output to given
- container format. One of mkv, mp4, ogg,
- webm, flv.Ignored if no merge is required
+ -f, --format FORMAT Video format code, see the "FORMAT SELECTION" for all the info
+ --all-formats Download all available video formats
+ --prefer-free-formats Prefer free video formats unless a specific one is requested
+ -F, --list-formats List all available formats
+ --youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
+ --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
+ merge is required
## Subtitle Options:
- --write-sub write subtitle file
- --write-auto-sub write automatic subtitle file (youtube
- only)
- --all-subs downloads all the available subtitles of
- the video
- --list-subs lists all available subtitles for the video
- --sub-format FORMAT subtitle format, accepts formats
- preference, for example: "ass/srt/best"
- --sub-lang LANGS languages of the subtitles to download
- (optional) separated by commas, use IETF
- language tags like 'en,pt'
+ --write-sub Write subtitle file
+ --write-auto-sub Write automatic subtitle file (YouTube only)
+ --all-subs Download all the available subtitles of the video
+ --list-subs List all available subtitles for the video
+ --sub-format FORMAT Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"
+ --sub-lang LANGS Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
## Authentication Options:
- -u, --username USERNAME login with this account ID
- -p, --password PASSWORD account password. If this option is left
- out, youtube-dl will ask interactively.
- -2, --twofactor TWOFACTOR two-factor auth code
- -n, --netrc use .netrc authentication data
- --video-password PASSWORD video password (vimeo, smotri)
+ -u, --username USERNAME Login with this account ID
+ -p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively.
+ -2, --twofactor TWOFACTOR Two-factor auth code
+ -n, --netrc Use .netrc authentication data
+ --video-password PASSWORD Video password (vimeo, smotri)
## Post-processing Options:
- -x, --extract-audio convert video files to audio-only files
- (requires ffmpeg or avconv and ffprobe or
- avprobe)
- --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a",
- "opus", or "wav"; "best" by default
- --audio-quality QUALITY ffmpeg/avconv audio quality specification,
- insert a value between 0 (better) and 9
- (worse) for VBR or a specific bitrate like
- 128K (default 5)
- --recode-video FORMAT Encode the video to another format if
- necessary (currently supported:
- mp4|flv|ogg|webm|mkv)
- -k, --keep-video keeps the video file on disk after the
- post-processing; the video is erased by
- default
- --no-post-overwrites do not overwrite post-processed files; the
- post-processed files are overwritten by
- default
- --embed-subs embed subtitles in the video (only for mp4
- videos)
- --embed-thumbnail embed thumbnail in the audio as cover art
- --add-metadata write metadata to the video file
- --xattrs write metadata to the video file's xattrs
- (using dublin core and xdg standards)
- --fixup POLICY Automatically correct known faults of the
- file. One of never (do nothing), warn (only
- emit a warning), detect_or_warn(the
- default; fix file if we can, warn
- otherwise)
- --prefer-avconv Prefer avconv over ffmpeg for running the
- postprocessors (default)
- --prefer-ffmpeg Prefer ffmpeg over avconv for running the
- postprocessors
- --ffmpeg-location PATH Location of the ffmpeg/avconv binary;
- either the path to the binary or its
- containing directory.
- --exec CMD Execute a command on the file after
- downloading, similar to find's -exec
- syntax. Example: --exec 'adb push {}
- /sdcard/Music/ && rm {}'
- --convert-subtitles FORMAT Convert the subtitles to other format
- (currently supported: srt|ass|vtt)
+ -x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
+ --audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
+ --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
+ 5)
+ --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
+ -k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
+ --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
+ --embed-subs Embed subtitles in the video (only for mkv and mp4 videos)
+ --embed-thumbnail Embed thumbnail in the audio as cover art
+ --add-metadata Write metadata to the video file
+ --metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
+ parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
+ %(title)s" matches a title like "Coldplay - Paradise"
+ --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards)
+ --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
+ fix file if we can, warn otherwise)
+ --prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
+ --prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors
+ --ffmpeg-location PATH Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.
+ --exec CMD Execute a command on the file after downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm
+ {}'
+ --convert-subtitles FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt)
# CONFIGURATION
youtube-dl_test_video_.mp4 # A simple file name
```
+# FORMAT SELECTION
+
+By default youtube-dl tries to download the best quality, but sometimes you may want to download other format.
+The simplest case is requesting a specific format, for example `-f 22`. You can get the list of available formats using `--list-formats`, you can also use a file extension (currently it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the special names `best`, `bestvideo`, `bestaudio` and `worst`.
+
+If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`.
+
+Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
+
+If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
+
# VIDEO SELECTION
Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`, they accept dates in two formats:
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
-### Do I always have to pass in `--max-quality FORMAT`, or `-citw`?
+### Do I always have to pass `-citw`?
-By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`.
+By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
### Can you please put the -b option back?
YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
+### Video URL contains an ampersand and I'm getting some strange output `[1] 2839` or `'v' is not recognized as an internal or external command` ###
+
+That's actually the output from your shell. Since ampersand is one of the special shell characters it's interpreted by shell preventing you from passing the whole URL to youtube-dl. To disable your shell from interpreting the ampersands (or any other special characters) you have to either put the whole URL in quotes or escape them with a backslash (which approach will work depends on your shell).
+
+For example if your URL is https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with following command:
+
+```youtube-dl 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'```
+
+or
+
+```youtube-dl https://www.youtube.com/watch?t=4\&v=BaW_jenozKc```
+
+For Windows you have to use the double quotes:
+
+```youtube-dl "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"```
+
### ExtractorError: Could not find JS function u'OF'
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
+### HTTP Error 429: Too Many Requests or 402: Payment Required
+
+These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
+
### SyntaxError: Non-ASCII character ###
The error
Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
+### How can I speed up work on my issue?
+
+(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
+
+First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel.
+
+Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
+
+If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
+
+Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
+
### How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
```python
+from __future__ import unicode_literals
import youtube_dl
ydl_opts = {}
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
```python
+from __future__ import unicode_literals
import youtube_dl
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
-Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
+If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
+
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
### Are you using the latest version?
- OPTIONS
- CONFIGURATION
- OUTPUT TEMPLATE
+- FORMAT SELECTION
- VIDEO SELECTION
- FAQ
- DEVELOPER INSTRUCTIONS
- BUGS
- COPYRIGHT
+
+
INSTALLATION
-============
+
To install it right away for all UNIX users (Linux, OS X, etc.), type:
Windows users can download a .exe file and place it in their home
directory or any other location on their PATH.
-OS X users can install youtube-dl with Homebrew.
+OS X users can install YOUTUBE-DL with Homebrew.
brew install youtube-dl
including PGP signatures, see
https://rg3.github.io/youtube-dl/download.html .
+
+
DESCRIPTION
-===========
-youtube-dl is a small command-line program to download videos from
+
+YOUTUBE-DL is a small command-line program to download videos from
YouTube.com and a few more sites. It requires the Python interpreter,
version 2.6, 2.7, or 3.2+, and it is not platform specific. It should
work on your Unix box, on Windows or on Mac OS X. It is released to the
youtube-dl [OPTIONS] URL [URL...]
+
+
OPTIONS
-=======
-
- -h, --help print this help text and exit
- --version print program version and exit
- -U, --update update this program to latest version. Make
- sure that you have sufficient permissions
- (run with sudo if needed)
- -i, --ignore-errors continue on download errors, for example to
- skip unavailable videos in a playlist
- --abort-on-error Abort downloading of further videos (in the
- playlist or the command line) if an error
- occurs
- --dump-user-agent display the current browser identification
- --list-extractors List all supported extractors and the URLs
- they would handle
- --extractor-descriptions Output descriptions of all supported
- extractors
- --default-search PREFIX Use this prefix for unqualified URLs. For
- example "gvsearch2:" downloads two videos
- from google videos for youtube-dl "large
- apple". Use the value "auto" to let
- youtube-dl guess ("auto_warning" to emit a
- warning when guessing). "error" just throws
- an error. The default value "fixup_error"
- repairs broken URLs, but emits an error if
- this is not possible instead of searching.
- --ignore-config Do not read configuration files. When given
- in the global configuration file /etc
- /youtube-dl.conf: Do not read the user
- configuration in ~/.config/youtube-
- dl/config (%APPDATA%/youtube-dl/config.txt
- on Windows)
- --flat-playlist Do not extract the videos of a playlist,
- only list them.
- --no-color Do not emit color codes in output.
+
+
+ -h, --help Print this help text and exit
+ --version Print program version and exit
+ -U, --update Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
+ -i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist
+ --abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
+ --dump-user-agent Display the current browser identification
+ --list-extractors List all supported extractors and the URLs they would handle
+ --extractor-descriptions Output descriptions of all supported extractors
+ --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
+ Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
+ default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
+ --ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
+ in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
+ --flat-playlist Do not extract the videos of a playlist, only list them.
+ --no-color Do not emit color codes in output
+
Network Options:
-----------------
- --proxy URL Use the specified HTTP/HTTPS proxy. Pass in
- an empty string (--proxy "") for direct
- connection
+ --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
--socket-timeout SECONDS Time to wait before giving up, in seconds
- --source-address IP Client-side IP address to bind to
- (experimental)
- -4, --force-ipv4 Make all connections via IPv4
- (experimental)
- -6, --force-ipv6 Make all connections via IPv6
- (experimental)
+ --source-address IP Client-side IP address to bind to (experimental)
+ -4, --force-ipv4 Make all connections via IPv4 (experimental)
+ -6, --force-ipv6 Make all connections via IPv6 (experimental)
+ --cn-verification-proxy URL Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is
+ not present) is used for the actual downloading. (experimental)
+
Video Selection:
-----------------
-
- --playlist-start NUMBER playlist video to start at (default is 1)
- --playlist-end NUMBER playlist video to end at (default is last)
- --playlist-items ITEM_SPEC playlist video items to download. Specify
- indices of the videos in the playlist
- seperated by commas like: "--playlist-items
- 1,2,5,8" if you want to download videos
- indexed 1, 2, 5, 8 in the playlist. You can
- specify range: "--playlist-items
- 1-3,7,10-13", it will download the videos
- at index 1, 2, 3, 7, 10, 11, 12 and 13.
- --match-title REGEX download only matching titles (regex or
- caseless sub-string)
- --reject-title REGEX skip download for matching titles (regex or
- caseless sub-string)
+
+ --playlist-start NUMBER Playlist video to start at (default is 1)
+ --playlist-end NUMBER Playlist video to end at (default is last)
+ --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
+ if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
+ download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
+ --match-title REGEX Download only matching titles (regex or caseless sub-string)
+ --reject-title REGEX Skip download for matching titles (regex or caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files
- --min-filesize SIZE Do not download any videos smaller than
- SIZE (e.g. 50k or 44.6m)
- --max-filesize SIZE Do not download any videos larger than SIZE
- (e.g. 50k or 44.6m)
- --date DATE download only videos uploaded in this date
- --datebefore DATE download only videos uploaded on or before
- this date (i.e. inclusive)
- --dateafter DATE download only videos uploaded on or after
- this date (i.e. inclusive)
- --min-views COUNT Do not download any videos with less than
- COUNT views
- --max-views COUNT Do not download any videos with more than
- COUNT views
- --match-filter FILTER (Experimental) Generic video filter.
- Specify any key (see help for -o for a list
- of available keys) to match if the key is
- present, !key to check if the key is not
- present,key > NUMBER (like "comment_count >
- 12", also works with >=, <, <=, !=, =) to
- compare against a number, and & to require
- multiple matches. Values which are not
- known are excluded unless you put a
- question mark (?) after the operator.For
- example, to only match videos that have
- been liked more than 100 times and disliked
- less than 50 times (or the dislike
- functionality is not available at the given
- service), but who also have a description,
- use --match-filter "like_count > 100 &
+ --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
+ --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
+ --date DATE Download only videos uploaded in this date
+ --datebefore DATE Download only videos uploaded on or before this date (i.e. inclusive)
+ --dateafter DATE Download only videos uploaded on or after this date (i.e. inclusive)
+ --min-views COUNT Do not download any videos with less than COUNT views
+ --max-views COUNT Do not download any videos with more than COUNT views
+ --match-filter FILTER Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present,
+ !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
+ a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
+ operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
+ functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 &
dislike_count <? 50 & description" .
- --no-playlist If the URL refers to a video and a
- playlist, download only the video.
- --yes-playlist If the URL refers to a video and a
- playlist, download the playlist.
- --age-limit YEARS download only videos suitable for the given
- age
- --download-archive FILE Download only videos not listed in the
- archive file. Record the IDs of all
- downloaded videos in it.
- --include-ads Download advertisements as well
- (experimental)
+ --no-playlist Download only the video, if the URL refers to a video and a playlist.
+ --yes-playlist Download the playlist, if the URL refers to a video and a playlist.
+ --age-limit YEARS Download only videos suitable for the given age
+ --download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
+ --include-ads Download advertisements as well (experimental)
+
Download Options:
------------------
-
- -r, --rate-limit LIMIT maximum download rate in bytes per second
- (e.g. 50K or 4.2M)
- -R, --retries RETRIES number of retries (default is 10), or
- "infinite".
- --buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
- (default is 1024)
- --no-resize-buffer do not automatically adjust the buffer
- size. By default, the buffer size is
- automatically resized from an initial value
- of SIZE.
+
+ -r, --rate-limit LIMIT Maximum download rate in bytes per second (e.g. 50K or 4.2M)
+ -R, --retries RETRIES Number of retries (default is 10), or "infinite".
+ --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024)
+ --no-resize-buffer Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
--playlist-reverse Download playlist videos in reverse order
- --xattr-set-filesize (experimental) set file xattribute
- ytdl.filesize with expected filesize
- --hls-prefer-native (experimental) Use the native HLS
- downloader instead of ffmpeg.
- --external-downloader COMMAND (experimental) Use the specified external
- downloader. Currently supports
- aria2c,curl,wget
+ --xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
+ --hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
+ --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
+ --external-downloader-args ARGS Give these arguments to the external downloader
+
Filesystem Options:
--------------------
-
- -a, --batch-file FILE file containing URLs to download ('-' for
- stdin)
- --id use only video ID in file name
- -o, --output TEMPLATE output filename template. Use %(title)s to
- get the title, %(uploader)s for the
- uploader name, %(uploader_id)s for the
- uploader nickname if different,
- %(autonumber)s to get an automatically
- incremented number, %(ext)s for the
- filename extension, %(format)s for the
- format description (like "22 - 1280x720" or
- "HD"), %(format_id)s for the unique id of
- the format (like Youtube's itags: "137"),
- %(upload_date)s for the upload date
- (YYYYMMDD), %(extractor)s for the provider
- (youtube, metacafe, etc), %(id)s for the
- video id, %(playlist_title)s,
- %(playlist_id)s, or %(playlist)s (=title if
- present, ID otherwise) for the playlist the
- video is in, %(playlist_index)s for the
- position in the playlist. %(height)s and
- %(width)s for the width and height of the
- video format. %(resolution)s for a textual
- description of the resolution of the video
- format. %% for a literal percent. Use - to
- output to stdout. Can also be used to
- download to a different directory, for
- example with -o '/my/downloads/%(uploader)s
- /%(title)s-%(id)s.%(ext)s' .
- --autonumber-size NUMBER Specifies the number of digits in
- %(autonumber)s when it is present in output
- filename template or --auto-number option
- is given
- --restrict-filenames Restrict filenames to only ASCII
- characters, and avoid "&" and spaces in
- filenames
- -A, --auto-number [deprecated; use -o
- "%(autonumber)s-%(title)s.%(ext)s" ] number
- downloaded files starting from 00000
- -t, --title [deprecated] use title in file name
- (default)
- -l, --literal [deprecated] alias of --title
- -w, --no-overwrites do not overwrite files
- -c, --continue force resume of partially downloaded files.
- By default, youtube-dl will resume
- downloads if possible.
- --no-continue do not resume partially downloaded files
- (restart from beginning)
- --no-part do not use .part files - write directly
- into output file
- --no-mtime do not use the Last-modified header to set
- the file modification time
- --write-description write video description to a .description
- file
- --write-info-json write video metadata to a .info.json file
- --write-annotations write video annotations to a .annotation
- file
- --load-info FILE json file containing the video information
- (created with the "--write-json" option)
- --cookies FILE file to read cookies from and dump cookie
- jar in
- --cache-dir DIR Location in the filesystem where youtube-dl
- can store some downloaded information
- permanently. By default $XDG_CACHE_HOME
- /youtube-dl or ~/.cache/youtube-dl . At the
- moment, only YouTube player files (for
- videos with obfuscated signatures) are
- cached, but that may change.
+
+ -a, --batch-file FILE File containing URLs to download ('-' for stdin)
+ --id Use only video ID in file name
+ -o, --output TEMPLATE Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
+ nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
+ the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube's itags: "137"),
+ %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
+ %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
+ %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
+ %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
+ Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
+ --autonumber-size NUMBER Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
+ --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
+ -A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000
+ -t, --title [deprecated] Use title in file name (default)
+ -l, --literal [deprecated] Alias of --title
+ -w, --no-overwrites Do not overwrite files
+ -c, --continue Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
+ --no-continue Do not resume partially downloaded files (restart from beginning)
+ --no-part Do not use .part files - write directly into output file
+ --no-mtime Do not use the Last-modified header to set the file modification time
+ --write-description Write video description to a .description file
+ --write-info-json Write video metadata to a .info.json file
+ --write-annotations Write video annotations to a .annotations.xml file
+ --load-info FILE JSON file containing the video information (created with the "--write-info-json" option)
+ --cookies FILE File to read cookies from and dump cookie jar in
+ --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
+ or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
+ change.
--no-cache-dir Disable filesystem caching
--rm-cache-dir Delete all filesystem cache files
+
Thumbnail images:
------------------
- --write-thumbnail write thumbnail image to disk
- --write-all-thumbnails write all thumbnail image formats to disk
- --list-thumbnails Simulate and list all available thumbnail
- formats
+ --write-thumbnail Write thumbnail image to disk
+ --write-all-thumbnails Write all thumbnail image formats to disk
+ --list-thumbnails Simulate and list all available thumbnail formats
+
Verbosity / Simulation Options:
--------------------------------
- -q, --quiet activates quiet mode
+ -q, --quiet Activate quiet mode
--no-warnings Ignore warnings
- -s, --simulate do not download the video and do not write
- anything to disk
- --skip-download do not download the video
- -g, --get-url simulate, quiet but print URL
- -e, --get-title simulate, quiet but print title
- --get-id simulate, quiet but print id
- --get-thumbnail simulate, quiet but print thumbnail URL
- --get-description simulate, quiet but print video description
- --get-duration simulate, quiet but print video length
- --get-filename simulate, quiet but print output filename
- --get-format simulate, quiet but print output format
- -j, --dump-json simulate, quiet but print JSON information.
- See --output for a description of available
- keys.
- -J, --dump-single-json simulate, quiet but print JSON information
- for each command-line argument. If the URL
- refers to a playlist, dump the whole
- playlist information in a single line.
- --print-json Be quiet and print the video information as
- JSON (video is still being downloaded).
- --newline output progress bar as new lines
- --no-progress do not print progress bar
- --console-title display progress in console titlebar
- -v, --verbose print various debugging information
- --dump-intermediate-pages print downloaded pages to debug problems
- (very verbose)
- --write-pages Write downloaded intermediary pages to
- files in the current directory to debug
- problems
+ -s, --simulate Do not download the video and do not write anything to disk
+ --skip-download Do not download the video
+ -g, --get-url Simulate, quiet but print URL
+ -e, --get-title Simulate, quiet but print title
+ --get-id Simulate, quiet but print id
+ --get-thumbnail Simulate, quiet but print thumbnail URL
+ --get-description Simulate, quiet but print video description
+ --get-duration Simulate, quiet but print video length
+ --get-filename Simulate, quiet but print output filename
+ --get-format Simulate, quiet but print output format
+ -j, --dump-json Simulate, quiet but print JSON information. See --output for a description of available keys.
+ -J, --dump-single-json Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
+ information in a single line.
+ --print-json Be quiet and print the video information as JSON (video is still being downloaded).
+ --newline Output progress bar as new lines
+ --no-progress Do not print progress bar
+ --console-title Display progress in console titlebar
+ -v, --verbose Print various debugging information
+ --dump-pages Print downloaded pages to debug problems (very verbose)
+ --write-pages Write downloaded intermediary pages to files in the current directory to debug problems
--print-traffic Display sent and read HTTP traffic
- -C, --call-home Contact the youtube-dl server for
- debugging.
- --no-call-home Do NOT contact the youtube-dl server for
- debugging.
+ -C, --call-home Contact the youtube-dl server for debugging
+ --no-call-home Do NOT contact the youtube-dl server for debugging
+
Workarounds:
-------------
--encoding ENCODING Force the specified encoding (experimental)
- --no-check-certificate Suppress HTTPS certificate validation.
- --prefer-insecure Use an unencrypted connection to retrieve
- information about the video. (Currently
- supported only for YouTube)
- --user-agent UA specify a custom user agent
- --referer URL specify a custom referer, use if the video
- access is restricted to one domain
- --add-header FIELD:VALUE specify a custom HTTP header and its value,
- separated by a colon ':'. You can use this
- option multiple times
- --bidi-workaround Work around terminals that lack
- bidirectional text support. Requires bidiv
- or fribidi executable in PATH
- --sleep-interval SECONDS Number of seconds to sleep before each
- download.
+ --no-check-certificate Suppress HTTPS certificate validation
+ --prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
+ --user-agent UA Specify a custom user agent
+ --referer URL Specify a custom referer, use if the video access is restricted to one domain
+ --add-header FIELD:VALUE Specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
+ --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
+ --sleep-interval SECONDS Number of seconds to sleep before each download.
+
Video Format Options:
----------------------
-
- -f, --format FORMAT video format code, specify the order of
- preference using slashes, as in -f 22/17/18
- . Instead of format codes, you can select
- by extension for the extensions aac, m4a,
- mp3, mp4, ogg, wav, webm. You can also use
- the special names "best", "bestvideo",
- "bestaudio", "worst". You can filter the
- video results by putting a condition in
- brackets, as in -f "best[height=720]" (or
- -f "[filesize>10M]"). This works for
- filesize, height, width, tbr, abr, vbr,
- asr, and fps and the comparisons <, <=, >,
- >=, =, != and for ext, acodec, vcodec,
- container, and protocol and the comparisons
- =, != . Formats for which the value is not
- known are excluded unless you put a
- question mark (?) after the operator. You
- can combine format filters, so -f "[height
- <=? 720][tbr>500]" selects up to 720p
- videos (or videos where the height is not
- known) with a bitrate of at least 500
- KBit/s. By default, youtube-dl will pick
- the best quality. Use commas to download
- multiple audio formats, such as -f
- 136/137/mp4/bestvideo,140/m4a/bestaudio.
- You can merge the video and audio of two
- formats into a single file using -f <video-
- format>+<audio-format> (requires ffmpeg or
- avconv), for example -f
- bestvideo+bestaudio.
- --all-formats download all available video formats
- --prefer-free-formats prefer free video formats unless a specific
- one is requested
- --max-quality FORMAT highest quality format to download
- -F, --list-formats list all available formats
- --youtube-skip-dash-manifest Do not download the DASH manifest on
- YouTube videos
- --merge-output-format FORMAT If a merge is required (e.g.
- bestvideo+bestaudio), output to given
- container format. One of mkv, mp4, ogg,
- webm, flv.Ignored if no merge is required
+
+ -f, --format FORMAT Video format code, see the "FORMAT SELECTION" for all the info
+ --all-formats Download all available video formats
+ --prefer-free-formats Prefer free video formats unless a specific one is requested
+ -F, --list-formats List all available formats
+ --youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
+ --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
+ merge is required
+
Subtitle Options:
------------------
-
- --write-sub write subtitle file
- --write-auto-sub write automatic subtitle file (youtube
- only)
- --all-subs downloads all the available subtitles of
- the video
- --list-subs lists all available subtitles for the video
- --sub-format FORMAT subtitle format, accepts formats
- preference, for example: "ass/srt/best"
- --sub-lang LANGS languages of the subtitles to download
- (optional) separated by commas, use IETF
- language tags like 'en,pt'
+
+ --write-sub Write subtitle file
+ --write-auto-sub Write automatic subtitle file (YouTube only)
+ --all-subs Download all the available subtitles of the video
+ --list-subs List all available subtitles for the video
+ --sub-format FORMAT Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"
+ --sub-lang LANGS Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
+
Authentication Options:
------------------------
- -u, --username USERNAME login with this account ID
- -p, --password PASSWORD account password. If this option is left
- out, youtube-dl will ask interactively.
- -2, --twofactor TWOFACTOR two-factor auth code
- -n, --netrc use .netrc authentication data
- --video-password PASSWORD video password (vimeo, smotri)
+ -u, --username USERNAME Login with this account ID
+ -p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively.
+ -2, --twofactor TWOFACTOR Two-factor auth code
+ -n, --netrc Use .netrc authentication data
+ --video-password PASSWORD Video password (vimeo, smotri)
+
Post-processing Options:
-------------------------
-
- -x, --extract-audio convert video files to audio-only files
- (requires ffmpeg or avconv and ffprobe or
- avprobe)
- --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a",
- "opus", or "wav"; "best" by default
- --audio-quality QUALITY ffmpeg/avconv audio quality specification,
- insert a value between 0 (better) and 9
- (worse) for VBR or a specific bitrate like
- 128K (default 5)
- --recode-video FORMAT Encode the video to another format if
- necessary (currently supported:
- mp4|flv|ogg|webm|mkv)
- -k, --keep-video keeps the video file on disk after the
- post-processing; the video is erased by
- default
- --no-post-overwrites do not overwrite post-processed files; the
- post-processed files are overwritten by
- default
- --embed-subs embed subtitles in the video (only for mp4
- videos)
- --embed-thumbnail embed thumbnail in the audio as cover art
- --add-metadata write metadata to the video file
- --xattrs write metadata to the video file's xattrs
- (using dublin core and xdg standards)
- --fixup POLICY Automatically correct known faults of the
- file. One of never (do nothing), warn (only
- emit a warning), detect_or_warn(the
- default; fix file if we can, warn
- otherwise)
- --prefer-avconv Prefer avconv over ffmpeg for running the
- postprocessors (default)
- --prefer-ffmpeg Prefer ffmpeg over avconv for running the
- postprocessors
- --ffmpeg-location PATH Location of the ffmpeg/avconv binary;
- either the path to the binary or its
- containing directory.
- --exec CMD Execute a command on the file after
- downloading, similar to find's -exec
- syntax. Example: --exec 'adb push {}
- /sdcard/Music/ && rm {}'
- --convert-subtitles FORMAT Convert the subtitles to other format
- (currently supported: srt|ass|vtt)
+
+ -x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
+ --audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
+ --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
+ 5)
+ --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
+ -k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
+ --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
+ --embed-subs Embed subtitles in the video (only for mkv and mp4 videos)
+ --embed-thumbnail Embed thumbnail in the audio as cover art
+ --add-metadata Write metadata to the video file
+ --metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
+ parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
+ %(title)s" matches a title like "Coldplay - Paradise"
+ --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards)
+ --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
+ fix file if we can, warn otherwise)
+ --prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
+ --prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors
+ --ffmpeg-location PATH Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.
+ --exec CMD Execute a command on the file after downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm
+ {}'
+ --convert-subtitles FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt)
+
+
CONFIGURATION
-=============
+
You can configure youtube-dl by placing default arguments (such as
--extract-audio --no-mtime to always extract the audio and not copy the
%APPDATA%\youtube-dl\config.txt and
C:\Users\<user name>\youtube-dl.conf.
+
+
OUTPUT TEMPLATE
-===============
+
The -o option allows users to indicate a template for the output file
names. The basic usage is not to set any template arguments when
youtube-dl_test_video_.mp4 # A simple file name
```
+
+
+FORMAT SELECTION
+
+
+By default youtube-dl tries to download the best quality, but sometimes
+you may want to download other format. The simplest case is requesting a
+specific format, for example -f 22. You can get the list of available
+formats using --list-formats, you can also use a file extension
+(currently it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the
+special names best, bestvideo, bestaudio and worst.
+
+If you want to download multiple videos and they don't have the same
+formats available, you can specify the order of preference using
+slashes, as in -f 22/17/18. You can also filter the video results by
+putting a condition in brackets, as in -f "best[height=720]" (or
+-f "[filesize>10M]"). This works for filesize, height, width, tbr, abr,
+vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext,
+acodec, vcodec, container, and protocol and the comparisons =, != .
+Formats for which the value is not known are excluded unless you put a
+question mark (?) after the operator. You can combine format filters, so
+-f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos
+where the height is not known) with a bitrate of at least 500 KBit/s.
+Use commas to download multiple formats, such as
+-f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and
+audio of two formats into a single file using
+-f <video-format>+<audio-format> (requires ffmpeg or avconv), for
+example -f bestvideo+bestaudio.
+
+Since the end of April 2015 and version 2015.04.26 youtube-dl uses
+-f bestvideo+bestaudio/best as default format selection (see #5447,
+#5456). If ffmpeg or avconv are installed this results in downloading
+bestvideo and bestaudio separately and muxing them together into a
+single file giving the best overall quality available. Otherwise it
+falls back to best and results in downloading best available quality
+served as a single file. best is also needed for videos that don't come
+from YouTube because they don't provide the audio and video in two
+different files. If you want to only download some dash formats (for
+example if you are not interested in getting videos with a resolution
+higher than 1080p), you can add
+-f bestvideo[height<=?1080]+bestaudio/best to your configuration file.
+Note that if you use youtube-dl to stream to stdout (and most likely to
+pipe it to your media player then), i.e. you explicitly specify output
+template as -o -, youtube-dl still uses -f best format selection in
+order to start content delivery immediately to your player and not to
+wait until bestvideo and bestaudio are downloaded and muxed.
+
+If you want to preserve the old format selection behavior (prior to
+youtube-dl 2015.04.26), i.e. you want to download best available quality
+media served as a single file, you should explicitly specify your choice
+with -f best. You may want to add it to the configuration file in order
+not to type it every time you run youtube-dl.
+
+
+
VIDEO SELECTION
-===============
+
Videos can be filtered by their upload date using the options --date,
--datebefore or --dateafter, they accept dates in two formats:
$ youtube-dl --dateafter 20000101 --datebefore 20091231
```
+
+
FAQ
-===
+
How do I update youtube-dl?
If you have used pip, a simple sudo pip install -U youtube-dl is
sufficient to update.
-If you have installed youtube-dl using a package manager like apt-get or
-yum, use the standard system update mechanism to update. Note that
+If you have installed youtube-dl using a package manager like _apt-get_
+or _yum_, use the standard system update mechanism to update. Note that
distribution packages are often outdated. As a rule of thumb, youtube-dl
releases at least once a month, and often weekly or even daily. Simply
go to http://yt-dl.org/ to find out the current version. Unfortunately,
Ubuntu packaging guys - all they have to do is update the package to a
somewhat recent version. See above for a way to update.
-Do I always have to pass in --max-quality FORMAT, or -citw?
+Do I always have to pass -citw?
By default, youtube-dl intends to have the best options (incidentally,
if you have a convincing case that these should be different, please
file an issue where you explain that). Therefore, it is unnecessary and
sometimes harmful to copy long option strings from webpages. In
-particular, --max-quality limits the video quality (so if you want the
-best quality, do NOT pass it in), and the only option out of -citw that
-is regularly useful is -i.
+particular, the only option out of -citw that is regularly useful is -i.
Can you please put the -b option back?
videos) do not restrict the video URL by IP address, cookie, or
user-agent, but these are the exception rather than the rule.
-Please bear in mind that some URL protocols are not supported by
+Please bear in mind that some URL protocols are NOT supported by
browsers out of the box, including RTMP. If you are using -g, your own
downloader must support these as well.
not supported by old versions of youtube-dl. See above for how to update
youtube-dl.
+Video URL contains an ampersand and I'm getting some strange output [1] 2839 or 'v' is not recognized as an internal or external command
+
+That's actually the output from your shell. Since ampersand is one of
+the special shell characters it's interpreted by shell preventing you
+from passing the whole URL to youtube-dl. To disable your shell from
+interpreting the ampersands (or any other special characters) you have
+to either put the whole URL in quotes or escape them with a backslash
+(which approach will work depends on your shell).
+
+For example if your URL is
+https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with
+following command:
+
+youtube-dl 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'
+
+or
+
+youtube-dl https://www.youtube.com/watch?t=4\&v=BaW_jenozKc
+
+For Windows you have to use the double quotes:
+
+youtube-dl "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"
+
ExtractorError: Could not find JS function u'OF'
In February 2015, the new YouTube player contained a character sequence
in a string that was misinterpreted by old versions of youtube-dl. See
above for how to update youtube-dl.
+HTTP Error 429: Too Many Requests or 402: Payment Required
+
+These two error codes indicate that the service is blocking your IP
+address because of overuse. Contact the service and ask them to unblock
+your IP address, or - if you have acquired a whitelisted IP address
+already - use the --proxy or --network-address options to select another
+IP address.
+
SyntaxError: Non-ASCII character
The error
the code, you can run it by executing the __main__.py file. To recompile
the executable, run make youtube-dl.
-The exe throws a Runtime error from Visual C++
+The exe throws a _Runtime error from Visual C++_
To run the exe you need to install first the Microsoft Visual C++ 2008
Redistributable Package.
license), the service is probably unfit for inclusion to youtube-dl.
A note on the service that they don't host the infringing content, but
-just link to those who do, is evidence that the service should not be
+just link to those who do, is evidence that the service should NOT be
included into youtube-dl. The same goes for any DMCA note when the whole
front page of the service is filled with videos they are not allowed to
distribute. A "fair use" note is equally unconvincing if the service
shows copyright-protected videos in full without authorization.
-Support requests for services that do purchase the rights to distribute
+Support requests for services that DO purchase the rights to distribute
their content are perfectly fine though. If in doubt, you can simply
include a source that mentions the legitimate purchase of content.
+How can I speed up work on my issue?
+
+(Also known as: Help, my important issue not being solved!) The
+youtube-dl core developer team is quite small. While we do our best to
+solve as many issues as possible, sometimes that can take quite a while.
+To speed up your issue, here's what you can do:
+
+First of all, please do report the issue at our issue tracker. That
+allows us to coordinate all efforts by users and developers, and serves
+as a unified point. Unfortunately, the youtube-dl project has grown too
+large to use personal email as an effective communication channel.
+
+Please read the bug reporting instructions below. A lot of bugs lack all
+the necessary information. If you can, offer proxy, VPN, or shell access
+to the youtube-dl developers. If you are able to, test the issue from
+multiple computers in multiple countries to exclude local censorship or
+misconfiguration issues.
+
+If nobody is interested in solving your issue, you are welcome to take
+matters into your own hands and submit a pull request (or coerce/pay
+somebody else to do so).
+
+Feel free to bump the issue from time to time by writing a small comment
+("Issue is still present in youtube-dl version ...from France, but fixed
+from Belgium"), but please not more than once a month. Please do not
+declare your issue as important or urgent.
+
How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the list of supported sites. Note that it can
youtube-dl reports an URL of a service in that list as unsupported. In
that case, simply report a bug.
-It is not possible to detect whether a URL is supported or not. That's
-because youtube-dl contains a generic extractor which matches all URLs.
+It is _not_ possible to detect whether a URL is supported or not. That's
+because youtube-dl contains a generic extractor which matches ALL URLs.
You may be tempted to disable, exclude, or remove the generic extractor,
but the generic extractor not only allows users to extract videos from
lots of websites that embed a video from another service, but may also
catching an UnsupportedError exception if you run it from a Python
program.
+
+
DEVELOPER INSTRUCTIONS
-======================
+
Most users do not need to build youtube-dl and can download the builds
or get them from their distribution.
5. Add an import in youtube_dl/extractor/__init__.py.
6. Run python test/test_download.py TestDownload.test_YourExtractor.
- This should fail at first, but you can continually re-run it until
+ This _should fail_ at first, but you can continually re-run it until
you're done. If you decide to add more than one test, then rename
_TEST to _TESTS and make it into a list of dictionaries. The tests
will be then be named TestDownload.test_YourExtractor,
In any case, thank you very much for your contributions!
+
+
EMBEDDING YOUTUBE-DL
-====================
+
youtube-dl makes the best effort to be a good command-line program, and
thus should be callable from any programming language. If you encounter
fashion, like this:
``` {.python}
+from __future__ import unicode_literals
import youtube_dl
ydl_opts = {}
downloads/converts the video to an mp3 file:
``` {.python}
+from __future__ import unicode_literals
import youtube_dl
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
```
+
+
BUGS
-====
+
Bugs and suggestions should be reported at:
https://github.com/rg3/youtube-dl/issues . Unless you were prompted so
bug report), please do not send bug reports via personal email. For
discussions, join us in the irc channel #youtube-dl on freenode.
-Please include the full output of youtube-dl when run with -v.
+PLEASE INCLUDE THE FULL OUTPUT OF YOUTUBE-DL WHEN RUN WITH -v.
The output (including the first lines) contain important debugging
information. Issues without the full output are often not reproducible
by these issues, since the only possible way for me to move forward on
them is to ask for clarification over and over.
-For bug reports, this means that your report should contain the complete
-output of youtube-dl when called with the -v flag. The error message you
-get for (most) bugs even says so, but you would not believe how many of
-our bug reports do not contain this information.
+For bug reports, this means that your report should contain the
+_complete_ output of youtube-dl when called with the -v flag. The error
+message you get for (most) bugs even says so, but you would not believe
+how many of our bug reports do not contain this information.
+
+If your server has multiple IPs or you suspect censorship,
+adding --call-home may be a good idea to get more diagnostics. If the
+error is ERROR: Unable to extract ... and you cannot reproduce it from
+multiple countries, add --dump-pages (warning: this will yield a rather
+large output, redirect it to the file log.txt by adding >log.txt 2>&1 to
+your command-line) or upload the .dump files you get when you add
+--write-pages somewhere.
-Site support requests must contain an example URL. An example URL is a
+SITE SUPPORT REQUESTS MUST CONTAIN AN EXAMPLE URL. An example URL is a
URL you might want to download, like
http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious
video present. Except under very special circumstances, the main page of
-a video service (e.g. http://www.youtube.com/ ) is not an example URL.
+a video service (e.g. http://www.youtube.com/ ) is _not_ an example URL.
Are you using the latest version?
Before requesting a new feature, please have a quick peek at the list of
supported options. Many feature requests are for features that actually
exist already! Please, absolutely do show off your work in the issue
-report and detail how the existing similar options do not solve your
+report and detail how the existing similar options do _not_ solve your
problem.
Is there enough context in your bug report?
hand, if your UI for youtube-dl fails in some way you believe is related
to youtube-dl, by all means, go ahead and report the bug.
+
+
COPYRIGHT
-=========
+
youtube-dl is released into the public domain by the copyright holders.
This README file was originally written by Daniel Bolton
(https://github.com/dbbolton) and is likewise released into the public
domain.
-
if METHOD == 'EURISTIC':
try:
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
- except:
+ except Exception:
print('\nFail: {0}'.format(test['name']))
continue
--- /dev/null
+from __future__ import unicode_literals
+
+import codecs
+import subprocess
+
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.utils import intlist_to_bytes
+from youtube_dl.aes import aes_encrypt, key_expansion
+
+secret_msg = b'Secret message goes here'
+
+
+def hex_str(int_list):
+ return codecs.encode(intlist_to_bytes(int_list), 'hex')
+
+
+def openssl_encode(algo, key, iv):
+ cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
+ prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ out, _ = prog.communicate(secret_msg)
+ return out
+
+iv = key = [0x20, 0x15] + 14 * [0]
+
+r = openssl_encode('aes-128-cbc', key, iv)
+print('aes_cbc_decrypt')
+print(repr(r))
+
+password = key
+new_key = aes_encrypt(password, key_expansion(password))
+r = openssl_encode('aes-128-ctr', new_key, iv)
+print('aes_decrypt_text 16')
+print(repr(r))
+
+password = key + 16 * [0]
+new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
+r = openssl_encode('aes-256-ctr', new_key, iv)
+print('aes_decrypt_text 32')
+print(repr(r))
- **1tv**: Первый канал
- **1up.com**
- **220.ro**
+ - **22tracks:genre**
+ - **22tracks:track**
- **24video**
- **3sat**
- **4tube**
- **audiomack**
- **audiomack:album**
- **Azubu**
+ - **BaiduVideo**
- **bambuser**
- **bambuser:channel**
- **Bandcamp**
- **Bandcamp:album**
- **bbc.co.uk**: BBC iPlayer
+ - **BeatportPro**
- **Beeg**
- **BehindKink**
- **Bet**
- **BR**: Bayerischer Rundfunk Mediathek
- **Break**
- **Brightcove**
+ - **bt:article**: Bergens Tidende Articles
+ - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
- **BuzzFeed**
- **BYUtv**
- **Camdemy**
- **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
- **Cracked**
- **Criterion**
+ - **CrooksAndLiars**
- **Crunchyroll**
- **crunchyroll:playlist**
- **CSpan**: C-SPAN
- **DctpTv**
- **DeezerPlaylist**
- **defense.gouv.fr**
+ - **DHM**: Filmarchiv - Deutsches Historisches Museum
- **Discovery**
- **divxstage**: DivxStage
- **Dotsub**
+ - **DouyuTV**
- **DRBonanza**
- **Dropbox**
- **DrTuber**
- **DRTV**
- **Dump**
+ - **Dumpert**
- **dvtv**: http://video.aktualne.cz/
+ - **EaglePlatform**
- **EbaumsWorld**
- **EchoMsk**
- **eHow**
- **Firstpost**
- **Flickr**
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
+ - **FootyRoom**
- **Foxgay**
- **FoxNews**
+ - **FoxSports**
- **france2.fr:generation-quoi**
- **FranceCulture**
- **FranceInter**
- **Gamekings**
- **GameOne**
- **gameone:playlist**
+ - **Gamersyde**
- **GameSpot**
- **GameStar**
- **Gametrailers**
+ - **Gazeta**
- **GDCVault**
- **generic**: Generic downloader that works on some sites
+ - **Gfycat**
- **GiantBomb**
- **Giga**
- **Glide**: Glide mobile video messages (glide.me)
- **GodTube**
- **GoldenMoustache**
- **Golem**
- - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in and fastvideo.in
+ - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net
- **Goshgay**
- - **Grooveshark**
- **Groupon**
- **Hark**
- **HearThisAt**
- **jpopsuki.tv**
- **Jukebox**
- **Kaltura**
+ - **KanalPlay**: Kanal 5/9/11 Play
- **Kankan**
- **Karaoketv**
- **keek**
- **Letv**
- **LetvPlaylist**
- **LetvTv**
+ - **Libsyn**
+ - **life:embed**
- **lifenews**: LIFE | NEWS
- **LiveLeak**
- **livestream**
- **Malemotion**
- **MDR**
- **media.ccc.de**
+ - **MegaVideoz**
- **metacafe**
- **Metacritic**
- **Mgoon**
- **Minhateca**
- **MinistryGrid**
+ - **miomio.tv**
- **mitele.es**
- **mixcloud**
- **MLB**
- **MySpass**
- **myvideo**
- **MyVidster**
+ - **N-JOY**
- **n-tv.de**
- **NationalGeographic**
- **Naver**
- **NBA**
- **NBC**
- **NBCNews**
+ - **NBCSports**
+ - **NBCSportsVPlayer**
- **ndr**: NDR.de - Mediathek
- **NDTV**
- **NerdCubedFeed**
- **npo.nl:radio**
- **npo.nl:radio:fragment**
- **NRK**
+ - **NRKPlaylist**
- **NRKTV**
- **ntv.ru**
- **Nuvid**
- **NYTimes**
+ - **NYTimesArticle**
- **ocw.mit.edu**
- **Odnoklassniki**
- **OktoberfestTV**
- **Ooyala**
- **OpenFilm**
- **orf:fm4**: radio FM4
+ - **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek
- **parliamentlive.tv**: UK parliament videos
- **Patreon**
- **PBS**
+ - **PhilharmonieDeParis**: Philharmonie de Paris
- **Phoenix**
- **Photobucket**
+ - **Pladform**
- **PlanetaPlay**
- **play.fm**
- **played.to**
- **Playvid**
+ - **Playwire**
- **plus.google**: Google Plus
- **pluzz.francetv.fr**
- **podomatic**
- **PornHub**
- **PornHubPlaylist**
- **Pornotube**
+ - **PornoVoisines**
- **PornoXO**
+ - **PrimeShareTV**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **Puls4**
- **Pyvideo**
+ - **qqmusic**
+ - **qqmusic:album**
+ - **qqmusic:singer**
+ - **qqmusic:toplist**
- **QuickVid**
- **R7**
- **radio.de**
- **radiobremen**
- **radiofrance**
+ - **RadioJavan**
- **Rai**
- **RBMARadio**
- **RedTube**
- **RTP**
- **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta
+ - **rtve.es:infantil**: RTVE infantil
- **rtve.es:live**: RTVE.es live streams
- **RUHD**
- **rutube**: Rutube videos
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
- **RUTV**: RUTV.RU
+ - **safari**: safaribooksonline.com online video
+ - **safari:course**: safaribooksonline.com online courses
- **Sandia**: Sandia National Laboratories
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **Screencast**
- **ScreencastOMatic**
- **ScreenwaveMedia**
+ - **SenateISVP**
- **ServingSys**
- **Sexu**
- **SexyKarma**: Sexy Karma and Watch Indian Porn
- **soundgasm**
- **soundgasm:profile**
- **southpark.cc.com**
+ - **southpark.cc.com:español**
- **southpark.de**
+ - **southpark.nl**
+ - **southparkstudios.dk**
- **Space**
+ - **SpankBang**
- **Spankwire**
- **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **Sport5**
- **SportBox**
- **SportDeutschland**
+ - **Srf**
- **SRMediathek**: Saarländischer Rundfunk
+ - **SSA**
- **stanfordoc**: Stanford Open ClassRoom
- **Steam**
- **streamcloud.eu**
- **StreamCZ**
- **StreetVoice**
- **SunPorno**
+ - **SVT**
- **SVTPlay**: SVT Play and Öppet arkiv
- **SWRMediathek**
- **Syfy**
- **TeamFour**
- **TechTalks**
- **techtv.mit.edu**
- - **TED**
+ - **ted**
- **tegenlicht.vpro.nl**
- **TeleBruxelles**
- **telecinco.es**
- **tlc.com**
- **tlc.de**
- **TMZ**
+ - **TMZArticle**
- **TNAFlix**
- **tou.tv**
- **Toypics**: Toypics user profile
- **Ubu**
- **udemy**
- **udemy:course**
+ - **UDNEmbed**
+ - **Ultimedia**
- **Unistra**
- **Urort**: NRK P3 Urørt
- **ustream**
- **ustream:channel**
+ - **Varzesh3**
- **Vbox7**
- **VeeHD**
- **Veoh**
+ - **Vessel**
- **Vesti**: Вести.Ru
- **Vevo**
- - **VGTV**
+ - **VGTV**: VGTV and BTTV
- **vh1.com**
- **Vice**
- **Viddler**
- **Vidzi**
- **vier**
- **vier:videos**
+ - **Viewster**
- **viki**
- **vimeo**
- **vimeo:album**
- **vimeo:review**: Review pages on vimeo
- **vimeo:user**
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
- - **Vimple**: Vimple.ru
+ - **Vimple**: Vimple - one-click video hosting
- **Vine**
- **vine:user**
- **vk.com**
- **vk.com:user-videos**: vk.com:All of a user's videos
- **Vodlocker**
+ - **VoiceRepublic**
- **Vporn**
- **VRT**
- **vube**: Vube.com
- **XHamster**
- **XMinus**
- **XNXX**
+ - **Xstream**
- **XTube**
- **XTubeUser**: XTube user profile
- **Xuite**
- **XXXYMovies**
- **Yahoo**: Yahoo screen and movies
- **Yam**
+ - **yandexmusic:album**: Яндекс.Музыка - Альбом
+ - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
+ - **yandexmusic:track**: Яндекс.Музыка - Трек
- **YesJapan**
- **Ynet**
- **YouJizz**
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **ZDF**
- **ZDFChannel**
'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# Check for the presence of mandatory fields
- if got_dict.get('_type') != 'playlist':
+ if got_dict.get('_type') not in ('playlist', 'multi_video'):
for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
# Check for mandatory fields that are automatically set by YoutubeDL
"forcethumbnail": false,
"forcetitle": false,
"forceurl": false,
- "format": null,
- "format_limit": null,
+ "format": "best",
"ignoreerrors": false,
"listformats": null,
"logtostderr": false,
from youtube_dl import YoutubeDL
from youtube_dl.extractor import YoutubeIE
from youtube_dl.postprocessor.common import PostProcessor
+from youtube_dl.utils import match_filter_func
+
+TEST_URL = 'http://localhost/sample.mp4'
class YDL(FakeYDL):
ydl = YDL()
ydl.params['prefer_free_formats'] = True
formats = [
- {'ext': 'webm', 'height': 460, 'url': 'x'},
- {'ext': 'mp4', 'height': 460, 'url': 'y'},
+ {'ext': 'webm', 'height': 460, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 460, 'url': TEST_URL},
]
info_dict = _make_result(formats)
yie = YoutubeIE(ydl)
ydl = YDL()
ydl.params['prefer_free_formats'] = True
formats = [
- {'ext': 'webm', 'height': 720, 'url': 'a'},
- {'ext': 'mp4', 'height': 1080, 'url': 'b'},
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
- {'ext': 'webm', 'height': 720, 'url': '_'},
- {'ext': 'mp4', 'height': 720, 'url': '_'},
- {'ext': 'flv', 'height': 720, 'url': '_'},
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 720, 'url': TEST_URL},
+ {'ext': 'flv', 'height': 720, 'url': TEST_URL},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
- {'ext': 'flv', 'height': 720, 'url': '_'},
- {'ext': 'webm', 'height': 720, 'url': '_'},
+ {'ext': 'flv', 'height': 720, 'url': TEST_URL},
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['ext'], 'flv')
- def test_format_limit(self):
- formats = [
- {'format_id': 'meh', 'url': 'http://example.com/meh', 'preference': 1},
- {'format_id': 'good', 'url': 'http://example.com/good', 'preference': 2},
- {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
- {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
- ]
- info_dict = _make_result(formats)
-
- ydl = YDL()
- ydl.process_ie_result(info_dict)
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], 'excellent')
-
- ydl = YDL({'format_limit': 'good'})
- assert ydl.params['format_limit'] == 'good'
- ydl.process_ie_result(info_dict.copy())
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], 'good')
-
- ydl = YDL({'format_limit': 'great', 'format': 'all'})
- ydl.process_ie_result(info_dict.copy())
- self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'meh')
- self.assertEqual(ydl.downloaded_info_dicts[1]['format_id'], 'good')
- self.assertEqual(ydl.downloaded_info_dicts[2]['format_id'], 'great')
- self.assertTrue('3' in ydl.msgs[0])
-
- ydl = YDL()
- ydl.params['format_limit'] = 'excellent'
- ydl.process_ie_result(info_dict.copy())
- downloaded = ydl.downloaded_info_dicts[0]
- self.assertEqual(downloaded['format_id'], 'excellent')
-
def test_format_selection(self):
formats = [
- {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
- {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
- {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
- {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
+ {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+ {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
+ {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
+ {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
]
info_dict = _make_result(formats)
def test_format_selection_audio(self):
formats = [
- {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
- {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
- {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
- {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
+ {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
]
info_dict = _make_result(formats)
self.assertEqual(downloaded['format_id'], 'audio-low')
formats = [
- {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
- {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
+ {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+ {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
]
info_dict = _make_result(formats)
def test_format_selection_video(self):
formats = [
- {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
- {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
- {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
+ {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
]
info_dict = _make_result(formats)
f2['url'] = 'url:' + f2id
info_dict = _make_result([f1, f2], extractor='youtube')
- ydl = YDL()
+ ydl = YDL({'format': 'best/bestvideo'})
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
self.assertEqual(downloaded['format_id'], f1id)
info_dict = _make_result([f2, f1], extractor='youtube')
- ydl = YDL()
+ ydl = YDL({'format': 'best/bestvideo'})
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'G')
+
+class TestYoutubeDL(unittest.TestCase):
def test_subtitles(self):
def s_formats(lang, autocaption=False):
return [{
def run(self, info):
with open(audiofile, 'wt') as f:
f.write('EXAMPLE')
- info['filepath']
- return False, info
+ return [info['filepath']], info
- def run_pp(params):
+ def run_pp(params, PP):
with open(filename, 'wt') as f:
f.write('EXAMPLE')
ydl = YoutubeDL(params)
- ydl.add_post_processor(SimplePP())
+ ydl.add_post_processor(PP())
ydl.post_process(filename, {'filepath': filename})
- run_pp({'keepvideo': True})
+ run_pp({'keepvideo': True}, SimplePP)
self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
os.unlink(filename)
os.unlink(audiofile)
- run_pp({'keepvideo': False})
+ run_pp({'keepvideo': False}, SimplePP)
self.assertFalse(os.path.exists(filename), '%s exists' % filename)
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
os.unlink(audiofile)
+ class ModifierPP(PostProcessor):
+ def run(self, info):
+ with open(info['filepath'], 'wt') as f:
+ f.write('MODIFIED')
+ return [], info
+
+ run_pp({'keepvideo': False}, ModifierPP)
+ self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
+ os.unlink(filename)
+
+ def test_match_filter(self):
+ class FilterYDL(YDL):
+ def __init__(self, *args, **kwargs):
+ super(FilterYDL, self).__init__(*args, **kwargs)
+ self.params['simulate'] = True
+
+ def process_info(self, info_dict):
+ super(YDL, self).process_info(info_dict)
+
+ def _match_entry(self, info_dict, incomplete):
+ res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
+ if res is None:
+ self.downloaded_info_dicts.append(info_dict)
+ return res
+
+ first = {
+ 'id': '1',
+ 'url': TEST_URL,
+ 'title': 'one',
+ 'extractor': 'TEST',
+ 'duration': 30,
+ 'filesize': 10 * 1024,
+ }
+ second = {
+ 'id': '2',
+ 'url': TEST_URL,
+ 'title': 'two',
+ 'extractor': 'TEST',
+ 'duration': 10,
+ 'description': 'foo',
+ 'filesize': 5 * 1024,
+ }
+ videos = [first, second]
+
+ def get_videos(filter_=None):
+ ydl = FilterYDL({'match_filter': filter_})
+ for v in videos:
+ ydl.process_ie_result(v, download=True)
+ return [v['id'] for v in ydl.downloaded_info_dicts]
+
+ res = get_videos()
+ self.assertEqual(res, ['1', '2'])
+
+ def f(v):
+ if v['id'] == '1':
+ return None
+ else:
+ return 'Video id is not 1'
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func('duration < 30')
+ res = get_videos(f)
+ self.assertEqual(res, ['2'])
+
+ f = match_filter_func('description = foo')
+ res = get_videos(f)
+ self.assertEqual(res, ['2'])
+
+ f = match_filter_func('description =? foo')
+ res = get_videos(f)
+ self.assertEqual(res, ['1', '2'])
+
+ f = match_filter_func('filesize > 5KiB')
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
if __name__ == '__main__':
unittest.main()
--- /dev/null
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
+from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
+import base64
+
+# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
+
+
+class TestAES(unittest.TestCase):
+ def setUp(self):
+ self.key = self.iv = [0x20, 0x15] + 14 * [0]
+ self.secret_msg = b'Secret message goes here'
+
+ def test_encrypt(self):
+ msg = b'message'
+ key = list(range(16))
+ encrypted = aes_encrypt(bytes_to_intlist(msg), key)
+ decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
+ self.assertEqual(decrypted, msg)
+
+ def test_cbc_decrypt(self):
+ data = bytes_to_intlist(
+ b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
+ )
+ decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+
+ def test_decrypt_text(self):
+ password = intlist_to_bytes(self.key).decode('utf-8')
+ encrypted = base64.b64encode(
+ intlist_to_bytes(self.iv[:8]) +
+ b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
+ )
+ decrypted = (aes_decrypt_text(encrypted, password, 16))
+ self.assertEqual(decrypted, self.secret_msg)
+
+ password = intlist_to_bytes(self.key).decode('utf-8')
+ encrypted = base64.b64encode(
+ intlist_to_bytes(self.iv[:8]) +
+ b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
+ )
+ decrypted = (aes_decrypt_text(encrypted, password, 32))
+ self.assertEqual(decrypted, self.secret_msg)
+
+if __name__ == '__main__':
+ unittest.main()
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
def test_youtube_feeds(self):
- self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
+ self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
self.assertMatch(':tds', ['ComedyCentralShows'])
def test_vimeo_matching(self):
- self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
- self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
- self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
- self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
- self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
+ self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
+ self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
+ self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
+ self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
+ self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
# https://github.com/rg3/youtube-dl/issues/1930
break
if is_playlist:
- self.assertEqual(res_dict['_type'], 'playlist')
+ self.assertTrue(res_dict['_type'] in ['playlist', 'multi_video'])
self.assertTrue('entries' in res_dict)
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
#!/usr/bin/env python
+# coding: utf-8
+
from __future__ import unicode_literals
import unittest
import sys
import os
import subprocess
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.utils import encodeArgument
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
def test_main_exec(self):
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+ def test_cmdline_umlauts(self):
+ p = subprocess.Popen(
+ [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
+ cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
+ _, stderr = p.communicate()
+ self.assertFalse(stderr)
+
if __name__ == '__main__':
unittest.main()
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_http_server
+from youtube_dl.compat import compat_http_server, compat_urllib_request
import ssl
import threading
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
+
+def _build_proxy_handler(name):
+ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ proxy_name = name
+
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/plain; charset=utf-8')
+ self.end_headers()
+ self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
+ return HTTPTestRequestHandler
+
+
+class TestProxy(unittest.TestCase):
+ def setUp(self):
+ self.proxy = compat_http_server.HTTPServer(
+ ('localhost', 0), _build_proxy_handler('normal'))
+ self.port = self.proxy.socket.getsockname()[1]
+ self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
+ self.proxy_thread.daemon = True
+ self.proxy_thread.start()
+
+ self.cn_proxy = compat_http_server.HTTPServer(
+ ('localhost', 0), _build_proxy_handler('cn'))
+ self.cn_port = self.cn_proxy.socket.getsockname()[1]
+ self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
+ self.cn_proxy_thread.daemon = True
+ self.cn_proxy_thread.start()
+
+ def test_proxy(self):
+ cn_proxy = 'localhost:{0}'.format(self.cn_port)
+ ydl = YoutubeDL({
+ 'proxy': 'localhost:{0}'.format(self.port),
+ 'cn_verification_proxy': cn_proxy,
+ })
+ url = 'http://foo.com/bar'
+ response = ydl.urlopen(url).read().decode('utf-8')
+ self.assertEqual(response, 'normal: {0}'.format(url))
+
+ req = compat_urllib_request.Request(url)
+ req.add_header('Ytdl-request-proxy', cn_proxy)
+ response = ydl.urlopen(req).read().decode('utf-8')
+ self.assertEqual(response, 'cn: {0}'.format(url))
+
if __name__ == '__main__':
unittest.main()
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from youtube_dl.extractor import (
+ gen_extractors,
+)
+
+
+class TestNetRc(unittest.TestCase):
+ def test_netrc_present(self):
+ for ie in gen_extractors():
+ if not hasattr(ie, '_login'):
+ continue
+ self.assertTrue(
+ hasattr(ie, '_NETRC_MACHINE'),
+ 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.postprocessor import MetadataFromTitlePP
+
+
+class TestMetadataFromTitle(unittest.TestCase):
+ def test_format_to_regex(self):
+ pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
+ self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
VikiIE,
ThePlatformIE,
RTVEALaCartaIE,
+ FunnyOrDieIE,
)
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
+class TestFunnyOrDieSubtitles(BaseTestSubtitles):
+ url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
+ IE = FunnyOrDieIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
+
+
if __name__ == '__main__':
unittest.main()
'buildserver.py',
]
+IGNORED_DIRS = [
+ '.git',
+ '.tox',
+]
from test.helper import assertRegexpMatches
class TestUnicodeLiterals(unittest.TestCase):
def test_all_files(self):
- for dirpath, _, filenames in os.walk(rootDir):
+ for dirpath, dirnames, filenames in os.walk(rootDir):
+ for ignore_dir in IGNORED_DIRS:
+ if ignore_dir in dirnames:
+ # If we remove the directory from dirnames os.walk won't
+ # recurse into it
+ dirnames.remove(ignore_dir)
for basename in filenames:
if not basename.endswith('.py'):
continue
encodeFilename,
escape_rfc3986,
escape_url,
+ ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
InAdvancePagedList,
parse_iso8601,
read_batch_urls,
sanitize_filename,
+ sanitize_path,
+ prepend_extension,
+ replace_extension,
shell_quote,
smuggle_url,
str_to_int,
unified_strdate,
unsmuggle_url,
uppercase_escape,
+ lowercase_escape,
url_basename,
urlencode_postdata,
version_tuple,
xpath_with_ns,
+ xpath_text,
render_table,
match_str,
+ parse_dfxp_time_expr,
+ dfxp2srt,
)
self.assertEqual(
sanitize_filename('New World record at 0:12:34'),
'New World record at 0_12_34')
+
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
forbidden = '"\0\\/'
for fc in forbidden:
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
+ def test_sanitize_path(self):
+ if sys.platform != 'win32':
+ return
+
+ self.assertEqual(sanitize_path('abc'), 'abc')
+ self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
+ self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
+ self.assertEqual(sanitize_path('abc|def'), 'abc#def')
+ self.assertEqual(sanitize_path('<>:"|?*'), '#######')
+ self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
+ self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
+
+ self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
+
+ self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
+ self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+
+ self.assertEqual(
+ sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
+ 'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
+
+ self.assertEqual(
+ sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
+ 'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
+ self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
+ self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
+ self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
+
+ self.assertEqual(sanitize_path('../abc'), '..\\abc')
+ self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
+ self.assertEqual(sanitize_path('./abc'), 'abc')
+ self.assertEqual(sanitize_path('./../abc'), '..\\abc')
+
+ def test_prepend_extension(self):
+ self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
+ self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
+ self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+ self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp')
+ self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
+ self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
+
+ def test_replace_extension(self):
+ self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
+ self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
+ self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+ self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp')
+ self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
+ self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
+
def test_ordered_set(self):
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), [])
def test_unescape_html(self):
self.assertEqual(unescapeHTML('%20;'), '%20;')
+ self.assertEqual(unescapeHTML('/'), '/')
+ self.assertEqual(unescapeHTML('/'), '/')
self.assertEqual(
unescapeHTML('é'), 'é')
self.assertEqual(
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
'20150202')
+ self.assertEqual(unified_strdate('25-09-2014'), '20140925')
def test_find_xpath_attr(self):
testxml = '''<root>
self.assertEqual(find('media:song/media:author').text, 'The Author')
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
+ def test_xpath_text(self):
+ testxml = '''<root>
+ <div>
+ <p>Foo</p>
+ </div>
+ </root>'''
+ doc = xml.etree.ElementTree.fromstring(testxml)
+ self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
+ self.assertTrue(xpath_text(doc, 'div/bar') is None)
+ self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
+
def test_smuggle_url(self):
data = {"ö": "ö", "abc": [3]}
url = 'https://foo.bar/baz?x=y#a'
self.assertEqual(uppercase_escape('aä'), 'aä')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
+ def test_lowercase_escape(self):
+ self.assertEqual(lowercase_escape('aä'), 'aä')
+ self.assertEqual(lowercase_escape('\\u0026'), '&')
+
def test_limit_length(self):
self.assertEqual(limit_length(None, 12), None)
self.assertEqual(limit_length('foo', 12), 'foo')
self.assertEqual(d['x'], 1)
self.assertEqual(d['y'], 'a')
+ on = js_to_json('["abc", "def",]')
+ self.assertEqual(json.loads(on), ['abc', 'def'])
+
+ on = js_to_json('{"abc": "def",}')
+ self.assertEqual(json.loads(on), {'abc': 'def'})
+
def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b')
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 10}))
+ def test_parse_dfxp_time_expr(self):
+ self.assertEqual(parse_dfxp_time_expr(None), 0.0)
+ self.assertEqual(parse_dfxp_time_expr(''), 0.0)
+ self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
+ self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
+
+ def test_dfxp2srt(self):
+ dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
+ <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+ <body>
+ <div xml:lang="en">
+ <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
+ <p begin="1" end="2">第二行<br/>♪♪</p>
+ <p begin="2" dur="1"><span>Third<br/>Line</span></p>
+ </div>
+ </body>
+ </tt>'''
+ srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+The following line contains Chinese characters and special symbols
+
+2
+00:00:01,000 --> 00:00:02,000
+第二行
+♪♪
+
+3
+00:00:02,000 --> 00:00:03,000
+Third
+Line
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data), srt_data)
+
if __name__ == '__main__':
unittest.main()
.IP
.nf
\f[C]
-\-h,\ \-\-help\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ this\ help\ text\ and\ exit
-\-\-version\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ program\ version\ and\ exit
-\-U,\ \-\-update\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ update\ this\ program\ to\ latest\ version.\ Make
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ sure\ that\ you\ have\ sufficient\ permissions
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (run\ with\ sudo\ if\ needed)
-\-i,\ \-\-ignore\-errors\ \ \ \ \ \ \ \ \ \ \ \ \ \ continue\ on\ download\ errors,\ for\ example\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ skip\ unavailable\ videos\ in\ a\ playlist
-\-\-abort\-on\-error\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Abort\ downloading\ of\ further\ videos\ (in\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist\ or\ the\ command\ line)\ if\ an\ error
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ occurs
-\-\-dump\-user\-agent\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification
-\-\-list\-extractors\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ they\ would\ handle
-\-\-extractor\-descriptions\ \ \ \ \ \ \ \ \ Output\ descriptions\ of\ all\ supported
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ extractors
-\-\-default\-search\ PREFIX\ \ \ \ \ \ \ \ \ \ Use\ this\ prefix\ for\ unqualified\ URLs.\ For
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example\ "gvsearch2:"\ downloads\ two\ videos
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ google\ videos\ for\ \ youtube\-dl\ "large
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ apple".\ Use\ the\ value\ "auto"\ to\ let
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ youtube\-dl\ guess\ ("auto_warning"\ to\ emit\ a
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ warning\ when\ guessing).\ "error"\ just\ throws
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ an\ error.\ The\ default\ value\ "fixup_error"
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ repairs\ broken\ URLs,\ but\ emits\ an\ error\ if
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ this\ is\ not\ possible\ instead\ of\ searching.
-\-\-ignore\-config\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ read\ configuration\ files.\ When\ given
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ in\ the\ global\ configuration\ file\ /etc
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /youtube\-dl.conf:\ Do\ not\ read\ the\ user
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ configuration\ in\ ~/.config/youtube\-
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ dl/config\ (%APPDATA%/youtube\-dl/config.txt
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ on\ Windows)
-\-\-flat\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ extract\ the\ videos\ of\ a\ playlist,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only\ list\ them.
-\-\-no\-color\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ emit\ color\ codes\ in\ output.
+\-h,\ \-\-help\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Print\ this\ help\ text\ and\ exit
+\-\-version\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Print\ program\ version\ and\ exit
+\-U,\ \-\-update\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Update\ this\ program\ to\ latest\ version.\ Make\ sure\ that\ you\ have\ sufficient\ permissions\ (run\ with\ sudo\ if\ needed)
+\-i,\ \-\-ignore\-errors\ \ \ \ \ \ \ \ \ \ \ \ \ \ Continue\ on\ download\ errors,\ for\ example\ to\ skip\ unavailable\ videos\ in\ a\ playlist
+\-\-abort\-on\-error\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Abort\ downloading\ of\ further\ videos\ (in\ the\ playlist\ or\ the\ command\ line)\ if\ an\ error\ occurs
+\-\-dump\-user\-agent\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Display\ the\ current\ browser\ identification
+\-\-list\-extractors\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they\ would\ handle
+\-\-extractor\-descriptions\ \ \ \ \ \ \ \ \ Output\ descriptions\ of\ all\ supported\ extractors
+\-\-default\-search\ PREFIX\ \ \ \ \ \ \ \ \ \ Use\ this\ prefix\ for\ unqualified\ URLs.\ For\ example\ "gvsearch2:"\ downloads\ two\ videos\ from\ google\ videos\ for\ youtube\-dl\ "large\ apple".
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ value\ "auto"\ to\ let\ youtube\-dl\ guess\ ("auto_warning"\ to\ emit\ a\ warning\ when\ guessing).\ "error"\ just\ throws\ an\ error.\ The
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default\ value\ "fixup_error"\ repairs\ broken\ URLs,\ but\ emits\ an\ error\ if\ this\ is\ not\ possible\ instead\ of\ searching.
+\-\-ignore\-config\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ read\ configuration\ files.\ When\ given\ in\ the\ global\ configuration\ file\ /etc/youtube\-dl.conf:\ Do\ not\ read\ the\ user\ configuration
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ in\ ~/.config/youtube\-dl/config\ (%APPDATA%/youtube\-dl/config.txt\ on\ Windows)
+\-\-flat\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ extract\ the\ videos\ of\ a\ playlist,\ only\ list\ them.
+\-\-no\-color\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ emit\ color\ codes\ in\ output
\f[]
.fi
.SS Network Options:
.IP
.nf
\f[C]
-\-\-proxy\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ specified\ HTTP/HTTPS\ proxy.\ Pass\ in
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ an\ empty\ string\ (\-\-proxy\ "")\ for\ direct
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ connection
+\-\-proxy\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ specified\ HTTP/HTTPS\ proxy.\ Pass\ in\ an\ empty\ string\ (\-\-proxy\ "")\ for\ direct\ connection
\-\-socket\-timeout\ SECONDS\ \ \ \ \ \ \ \ \ Time\ to\ wait\ before\ giving\ up,\ in\ seconds
-\-\-source\-address\ IP\ \ \ \ \ \ \ \ \ \ \ \ \ \ Client\-side\ IP\ address\ to\ bind\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)
-\-4,\ \-\-force\-ipv4\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Make\ all\ connections\ via\ IPv4
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)
-\-6,\ \-\-force\-ipv6\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Make\ all\ connections\ via\ IPv6
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)
+\-\-source\-address\ IP\ \ \ \ \ \ \ \ \ \ \ \ \ \ Client\-side\ IP\ address\ to\ bind\ to\ (experimental)
+\-4,\ \-\-force\-ipv4\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Make\ all\ connections\ via\ IPv4\ (experimental)
+\-6,\ \-\-force\-ipv6\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Make\ all\ connections\ via\ IPv6\ (experimental)
+\-\-cn\-verification\-proxy\ URL\ \ \ \ \ \ Use\ this\ proxy\ to\ verify\ the\ IP\ address\ for\ some\ Chinese\ sites.\ The\ default\ proxy\ specified\ by\ \-\-proxy\ (or\ none,\ if\ the\ options\ is
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ not\ present)\ is\ used\ for\ the\ actual\ downloading.\ (experimental)
\f[]
.fi
.SS Video Selection:
.IP
.nf
\f[C]
-\-\-playlist\-start\ NUMBER\ \ \ \ \ \ \ \ \ \ playlist\ video\ to\ start\ at\ (default\ is\ 1)
-\-\-playlist\-end\ NUMBER\ \ \ \ \ \ \ \ \ \ \ \ playlist\ video\ to\ end\ at\ (default\ is\ last)
-\-\-playlist\-items\ ITEM_SPEC\ \ \ \ \ \ \ playlist\ video\ items\ to\ download.\ Specify
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ indices\ of\ the\ videos\ in\ the\ playlist
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ seperated\ by\ commas\ like:\ "\-\-playlist\-items
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 1,2,5,8"\ if\ you\ want\ to\ download\ videos
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ indexed\ 1,\ 2,\ 5,\ 8\ in\ the\ playlist.\ You\ can
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ specify\ range:\ "\-\-playlist\-items
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 1\-3,7,10\-13",\ it\ will\ download\ the\ videos
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ at\ index\ 1,\ 2,\ 3,\ 7,\ 10,\ 11,\ 12\ and\ 13.
-\-\-match\-title\ REGEX\ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ matching\ titles\ (regex\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ caseless\ sub\-string)
-\-\-reject\-title\ REGEX\ \ \ \ \ \ \ \ \ \ \ \ \ skip\ download\ for\ matching\ titles\ (regex\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ caseless\ sub\-string)
+\-\-playlist\-start\ NUMBER\ \ \ \ \ \ \ \ \ \ Playlist\ video\ to\ start\ at\ (default\ is\ 1)
+\-\-playlist\-end\ NUMBER\ \ \ \ \ \ \ \ \ \ \ \ Playlist\ video\ to\ end\ at\ (default\ is\ last)
+\-\-playlist\-items\ ITEM_SPEC\ \ \ \ \ \ \ Playlist\ video\ items\ to\ download.\ Specify\ indices\ of\ the\ videos\ in\ the\ playlist\ seperated\ by\ commas\ like:\ "\-\-playlist\-items\ 1,2,5,8"
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ if\ you\ want\ to\ download\ videos\ indexed\ 1,\ 2,\ 5,\ 8\ in\ the\ playlist.\ You\ can\ specify\ range:\ "\-\-playlist\-items\ 1\-3,7,10\-13",\ it\ will
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ the\ videos\ at\ index\ 1,\ 2,\ 3,\ 7,\ 10,\ 11,\ 12\ and\ 13.
+\-\-match\-title\ REGEX\ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ matching\ titles\ (regex\ or\ caseless\ sub\-string)
+\-\-reject\-title\ REGEX\ \ \ \ \ \ \ \ \ \ \ \ \ Skip\ download\ for\ matching\ titles\ (regex\ or\ caseless\ sub\-string)
\-\-max\-downloads\ NUMBER\ \ \ \ \ \ \ \ \ \ \ Abort\ after\ downloading\ NUMBER\ files
-\-\-min\-filesize\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ smaller\ than
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ SIZE\ (e.g.\ 50k\ or\ 44.6m)
-\-\-max\-filesize\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ larger\ than\ SIZE
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (e.g.\ 50k\ or\ 44.6m)
-\-\-date\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ in\ this\ date
-\-\-datebefore\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ on\ or\ before
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ this\ date\ (i.e.\ inclusive)
-\-\-dateafter\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ on\ or\ after
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ this\ date\ (i.e.\ inclusive)
-\-\-min\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ less\ than
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
-\-\-max\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ more\ than
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
-\-\-match\-filter\ FILTER\ \ \ \ \ \ \ \ \ \ \ \ (Experimental)\ Generic\ video\ filter.
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Specify\ any\ key\ (see\ help\ for\ \-o\ for\ a\ list
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ of\ available\ keys)\ to\ match\ if\ the\ key\ is
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ present,\ !key\ to\ check\ if\ the\ key\ is\ not
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ present,key\ >\ NUMBER\ (like\ "comment_count\ >
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 12",\ also\ works\ with\ >=,\ <,\ <=,\ !=,\ =)\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ compare\ against\ a\ number,\ and\ &\ to\ require
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ multiple\ matches.\ Values\ which\ are\ not
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known\ are\ excluded\ unless\ you\ put\ a
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ question\ mark\ (?)\ after\ the\ operator.For
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example,\ to\ only\ match\ videos\ that\ have
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ been\ liked\ more\ than\ 100\ times\ and\ disliked
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ less\ than\ 50\ times\ (or\ the\ dislike
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ functionality\ is\ not\ available\ at\ the\ given
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ service),\ but\ who\ also\ have\ a\ description,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ \ \-\-match\-filter\ "like_count\ >\ 100\ &
+\-\-min\-filesize\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ smaller\ than\ SIZE\ (e.g.\ 50k\ or\ 44.6m)
+\-\-max\-filesize\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ larger\ than\ SIZE\ (e.g.\ 50k\ or\ 44.6m)
+\-\-date\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ uploaded\ in\ this\ date
+\-\-datebefore\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ uploaded\ on\ or\ before\ this\ date\ (i.e.\ inclusive)
+\-\-dateafter\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ uploaded\ on\ or\ after\ this\ date\ (i.e.\ inclusive)
+\-\-min\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ less\ than\ COUNT\ views
+\-\-max\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ more\ than\ COUNT\ views
+\-\-match\-filter\ FILTER\ \ \ \ \ \ \ \ \ \ \ \ Generic\ video\ filter\ (experimental).\ Specify\ any\ key\ (see\ help\ for\ \-o\ for\ a\ list\ of\ available\ keys)\ to\ match\ if\ the\ key\ is\ present,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ !key\ to\ check\ if\ the\ key\ is\ not\ present,key\ >\ NUMBER\ (like\ "comment_count\ >\ 12",\ also\ works\ with\ >=,\ <,\ <=,\ !=,\ =)\ to\ compare\ against
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ a\ number,\ and\ &\ to\ require\ multiple\ matches.\ Values\ which\ are\ not\ known\ are\ excluded\ unless\ you\ put\ a\ question\ mark\ (?)\ after\ the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ operator.For\ example,\ to\ only\ match\ videos\ that\ have\ been\ liked\ more\ than\ 100\ times\ and\ disliked\ less\ than\ 50\ times\ (or\ the\ dislike
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ functionality\ is\ not\ available\ at\ the\ given\ service),\ but\ who\ also\ have\ a\ description,\ use\ \ \-\-match\-filter\ "like_count\ >\ 100\ &
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ dislike_count\ <?\ 50\ &\ description"\ .
-\-\-no\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ If\ the\ URL\ refers\ to\ a\ video\ and\ a
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist,\ download\ only\ the\ video.
-\-\-yes\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ If\ the\ URL\ refers\ to\ a\ video\ and\ a
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist,\ download\ the\ playlist.
-\-\-age\-limit\ YEARS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ suitable\ for\ the\ given
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ age
-\-\-download\-archive\ FILE\ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ not\ listed\ in\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ archive\ file.\ Record\ the\ IDs\ of\ all
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloaded\ videos\ in\ it.
-\-\-include\-ads\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ advertisements\ as\ well
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)
+\-\-no\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ the\ video,\ if\ the\ URL\ refers\ to\ a\ video\ and\ a\ playlist.
+\-\-yes\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ the\ playlist,\ if\ the\ URL\ refers\ to\ a\ video\ and\ a\ playlist.
+\-\-age\-limit\ YEARS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ suitable\ for\ the\ given\ age
+\-\-download\-archive\ FILE\ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ not\ listed\ in\ the\ archive\ file.\ Record\ the\ IDs\ of\ all\ downloaded\ videos\ in\ it.
+\-\-include\-ads\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ advertisements\ as\ well\ (experimental)
\f[]
.fi
.SS Download Options:
.IP
.nf
\f[C]
-\-r,\ \-\-rate\-limit\ LIMIT\ \ \ \ \ \ \ \ \ \ \ maximum\ download\ rate\ in\ bytes\ per\ second
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (e.g.\ 50K\ or\ 4.2M)
-\-R,\ \-\-retries\ RETRIES\ \ \ \ \ \ \ \ \ \ \ \ number\ of\ retries\ (default\ is\ 10),\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "infinite".
-\-\-buffer\-size\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16K)
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (default\ is\ 1024)
-\-\-no\-resize\-buffer\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ size.\ By\ default,\ the\ buffer\ size\ is
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ automatically\ resized\ from\ an\ initial\ value
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ of\ SIZE.
+\-r,\ \-\-rate\-limit\ LIMIT\ \ \ \ \ \ \ \ \ \ \ Maximum\ download\ rate\ in\ bytes\ per\ second\ (e.g.\ 50K\ or\ 4.2M)
+\-R,\ \-\-retries\ RETRIES\ \ \ \ \ \ \ \ \ \ \ \ Number\ of\ retries\ (default\ is\ 10),\ or\ "infinite".
+\-\-buffer\-size\ SIZE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16K)\ (default\ is\ 1024)
+\-\-no\-resize\-buffer\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ automatically\ adjust\ the\ buffer\ size.\ By\ default,\ the\ buffer\ size\ is\ automatically\ resized\ from\ an\ initial\ value\ of\ SIZE.
\-\-playlist\-reverse\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ playlist\ videos\ in\ reverse\ order
-\-\-xattr\-set\-filesize\ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)\ set\ file\ xattribute
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ytdl.filesize\ with\ expected\ filesize
-\-\-hls\-prefer\-native\ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)\ Use\ the\ native\ HLS
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloader\ instead\ of\ ffmpeg.
-\-\-external\-downloader\ COMMAND\ \ \ \ (experimental)\ Use\ the\ specified\ external
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloader.\ Currently\ supports
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ aria2c,curl,wget
+\-\-xattr\-set\-filesize\ \ \ \ \ \ \ \ \ \ \ \ \ Set\ file\ xattribute\ ytdl.filesize\ with\ expected\ filesize\ (experimental)
+\-\-hls\-prefer\-native\ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ native\ HLS\ downloader\ instead\ of\ ffmpeg\ (experimental)
+\-\-external\-downloader\ COMMAND\ \ \ \ Use\ the\ specified\ external\ downloader.\ Currently\ supports\ aria2c,curl,wget
+\-\-external\-downloader\-args\ ARGS\ \ Give\ these\ arguments\ to\ the\ external\ downloader
\f[]
.fi
.SS Filesystem Options:
.IP
.nf
\f[C]
-\-a,\ \-\-batch\-file\ FILE\ \ \ \ \ \ \ \ \ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]\-\[aq]\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ stdin)
-\-\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ only\ video\ ID\ in\ file\ name
-\-o,\ \-\-output\ TEMPLATE\ \ \ \ \ \ \ \ \ \ \ \ output\ filename\ template.\ Use\ %(title)s\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ get\ the\ title,\ %(uploader)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ uploader\ name,\ %(uploader_id)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ uploader\ nickname\ if\ different,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(autonumber)s\ to\ get\ an\ automatically
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ incremented\ number,\ %(ext)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filename\ extension,\ %(format)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ format\ description\ (like\ "22\ \-\ 1280x720"\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "HD"),\ %(format_id)s\ for\ the\ unique\ id\ of
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ format\ (like\ Youtube\[aq]s\ itags:\ "137"),
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(upload_date)s\ for\ the\ upload\ date
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (YYYYMMDD),\ %(extractor)s\ for\ the\ provider
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (youtube,\ metacafe,\ etc),\ %(id)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ id,\ %(playlist_title)s,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(playlist_id)s,\ or\ %(playlist)s\ (=title\ if
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ present,\ ID\ otherwise)\ for\ the\ playlist\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ is\ in,\ %(playlist_index)s\ for\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ position\ in\ the\ playlist.\ %(height)s\ and
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(width)s\ for\ the\ width\ and\ height\ of\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ format.\ %(resolution)s\ for\ a\ textual
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ description\ of\ the\ resolution\ of\ the\ video
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ format.\ %%\ for\ a\ literal\ percent.\ Use\ \-\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ output\ to\ stdout.\ Can\ also\ be\ used\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ to\ a\ different\ directory,\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example\ with\ \-o\ \[aq]/my/downloads/%(uploader)s
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /%(title)s\-%(id)s.%(ext)s\[aq]\ .
-\-\-autonumber\-size\ NUMBER\ \ \ \ \ \ \ \ \ Specifies\ the\ number\ of\ digits\ in
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(autonumber)s\ when\ it\ is\ present\ in\ output
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filename\ template\ or\ \-\-auto\-number\ option
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ given
-\-\-restrict\-filenames\ \ \ \ \ \ \ \ \ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ characters,\ and\ avoid\ "&"\ and\ spaces\ in
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
-\-A,\ \-\-auto\-number\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated;\ use\ \ \-o
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "%(autonumber)s\-%(title)s.%(ext)s"\ ]\ number
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloaded\ files\ starting\ from\ 00000
-\-t,\ \-\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ use\ title\ in\ file\ name
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (default)
-\-l,\ \-\-literal\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-title
-\-w,\ \-\-no\-overwrites\ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ overwrite\ files
-\-c,\ \-\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ force\ resume\ of\ partially\ downloaded\ files.
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ By\ default,\ youtube\-dl\ will\ resume
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ if\ possible.
-\-\-no\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ resume\ partially\ downloaded\ files
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (restart\ from\ beginning)
-\-\-no\-part\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ use\ .part\ files\ \-\ write\ directly
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ into\ output\ file
-\-\-no\-mtime\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ use\ the\ Last\-modified\ header\ to\ set
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ file\ modification\ time
-\-\-write\-description\ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ video\ description\ to\ a\ .description
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file
-\-\-write\-info\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ video\ metadata\ to\ a\ .info.json\ file
-\-\-write\-annotations\ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ video\ annotations\ to\ a\ .annotation
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file
-\-\-load\-info\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ json\ file\ containing\ the\ video\ information
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (created\ with\ the\ "\-\-write\-json"\ option)
-\-\-cookies\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file\ to\ read\ cookies\ from\ and\ dump\ cookie
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ jar\ in
-\-\-cache\-dir\ DIR\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Location\ in\ the\ filesystem\ where\ youtube\-dl
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ can\ store\ some\ downloaded\ information
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ permanently.\ By\ default\ $XDG_CACHE_HOME
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /youtube\-dl\ or\ ~/.cache/youtube\-dl\ .\ At\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ moment,\ only\ YouTube\ player\ files\ (for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ videos\ with\ obfuscated\ signatures)\ are
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ cached,\ but\ that\ may\ change.
+\-a,\ \-\-batch\-file\ FILE\ \ \ \ \ \ \ \ \ \ \ \ File\ containing\ URLs\ to\ download\ (\[aq]\-\[aq]\ for\ stdin)
+\-\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ only\ video\ ID\ in\ file\ name
+\-o,\ \-\-output\ TEMPLATE\ \ \ \ \ \ \ \ \ \ \ \ Output\ filename\ template.\ Use\ %(title)s\ to\ get\ the\ title,\ %(uploader)s\ for\ the\ uploader\ name,\ %(uploader_id)s\ for\ the\ uploader
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ nickname\ if\ different,\ %(autonumber)s\ to\ get\ an\ automatically\ incremented\ number,\ %(ext)s\ for\ the\ filename\ extension,\ %(format)s\ for
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ format\ description\ (like\ "22\ \-\ 1280x720"\ or\ "HD"),\ %(format_id)s\ for\ the\ unique\ id\ of\ the\ format\ (like\ YouTube\[aq]s\ itags:\ "137"),
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(upload_date)s\ for\ the\ upload\ date\ (YYYYMMDD),\ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,\ etc),\ %(id)s\ for\ the\ video\ id,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(playlist_title)s,\ %(playlist_id)s,\ or\ %(playlist)s\ (=title\ if\ present,\ ID\ otherwise)\ for\ the\ playlist\ the\ video\ is\ in,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(playlist_index)s\ for\ the\ position\ in\ the\ playlist.\ %(height)s\ and\ %(width)s\ for\ the\ width\ and\ height\ of\ the\ video\ format.
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(resolution)s\ for\ a\ textual\ description\ of\ the\ resolution\ of\ the\ video\ format.\ %%\ for\ a\ literal\ percent.\ Use\ \-\ to\ output\ to\ stdout.
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Can\ also\ be\ used\ to\ download\ to\ a\ different\ directory,\ for\ example\ with\ \-o\ \[aq]/my/downloads/%(uploader)s/%(title)s\-%(id)s.%(ext)s\[aq]\ .
+\-\-autonumber\-size\ NUMBER\ \ \ \ \ \ \ \ \ Specify\ the\ number\ of\ digits\ in\ %(autonumber)s\ when\ it\ is\ present\ in\ output\ filename\ template\ or\ \-\-auto\-number\ option\ is\ given
+\-\-restrict\-filenames\ \ \ \ \ \ \ \ \ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and\ avoid\ "&"\ and\ spaces\ in\ filenames
+\-A,\ \-\-auto\-number\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated;\ use\ \ \-o\ "%(autonumber)s\-%(title)s.%(ext)s"\ ]\ Number\ downloaded\ files\ starting\ from\ 00000
+\-t,\ \-\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ Use\ title\ in\ file\ name\ (default)
+\-l,\ \-\-literal\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ Alias\ of\ \-\-title
+\-w,\ \-\-no\-overwrites\ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ overwrite\ files
+\-c,\ \-\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Force\ resume\ of\ partially\ downloaded\ files.\ By\ default,\ youtube\-dl\ will\ resume\ downloads\ if\ possible.
+\-\-no\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ resume\ partially\ downloaded\ files\ (restart\ from\ beginning)
+\-\-no\-part\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ use\ .part\ files\ \-\ write\ directly\ into\ output\ file
+\-\-no\-mtime\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ use\ the\ Last\-modified\ header\ to\ set\ the\ file\ modification\ time
+\-\-write\-description\ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ video\ description\ to\ a\ .description\ file
+\-\-write\-info\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ video\ metadata\ to\ a\ .info.json\ file
+\-\-write\-annotations\ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ video\ annotations\ to\ a\ .annotations.xml\ file
+\-\-load\-info\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ JSON\ file\ containing\ the\ video\ information\ (created\ with\ the\ "\-\-write\-info\-json"\ option)
+\-\-cookies\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ File\ to\ read\ cookies\ from\ and\ dump\ cookie\ jar\ in
+\-\-cache\-dir\ DIR\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Location\ in\ the\ filesystem\ where\ youtube\-dl\ can\ store\ some\ downloaded\ information\ permanently.\ By\ default\ $XDG_CACHE_HOME/youtube\-dl
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ or\ ~/.cache/youtube\-dl\ .\ At\ the\ moment,\ only\ YouTube\ player\ files\ (for\ videos\ with\ obfuscated\ signatures)\ are\ cached,\ but\ that\ may
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ change.
\-\-no\-cache\-dir\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Disable\ filesystem\ caching
\-\-rm\-cache\-dir\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Delete\ all\ filesystem\ cache\ files
\f[]
.IP
.nf
\f[C]
-\-\-write\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ thumbnail\ image\ to\ disk
-\-\-write\-all\-thumbnails\ \ \ \ \ \ \ \ \ \ \ write\ all\ thumbnail\ image\ formats\ to\ disk
-\-\-list\-thumbnails\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate\ and\ list\ all\ available\ thumbnail
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ formats
+\-\-write\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ thumbnail\ image\ to\ disk
+\-\-write\-all\-thumbnails\ \ \ \ \ \ \ \ \ \ \ Write\ all\ thumbnail\ image\ formats\ to\ disk
+\-\-list\-thumbnails\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate\ and\ list\ all\ available\ thumbnail\ formats
\f[]
.fi
.SS Verbosity / Simulation Options:
.IP
.nf
\f[C]
-\-q,\ \-\-quiet\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ activates\ quiet\ mode
+\-q,\ \-\-quiet\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Activate\ quiet\ mode
\-\-no\-warnings\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Ignore\ warnings
-\-s,\ \-\-simulate\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ download\ the\ video\ and\ do\ not\ write
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ anything\ to\ disk
-\-\-skip\-download\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ download\ the\ video
-\-g,\ \-\-get\-url\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ URL
-\-e,\ \-\-get\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ title
-\-\-get\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ id
-\-\-get\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ thumbnail\ URL
-\-\-get\-description\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ video\ description
-\-\-get\-duration\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ video\ length
-\-\-get\-filename\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ output\ filename
-\-\-get\-format\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ output\ format
-\-j,\ \-\-dump\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ JSON\ information.
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ See\ \-\-output\ for\ a\ description\ of\ available
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ keys.
-\-J,\ \-\-dump\-single\-json\ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ JSON\ information
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ for\ each\ command\-line\ argument.\ If\ the\ URL
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ refers\ to\ a\ playlist,\ dump\ the\ whole
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist\ information\ in\ a\ single\ line.
-\-\-print\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Be\ quiet\ and\ print\ the\ video\ information\ as
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ JSON\ (video\ is\ still\ being\ downloaded).
-\-\-newline\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ output\ progress\ bar\ as\ new\ lines
-\-\-no\-progress\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ print\ progress\ bar
-\-\-console\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ display\ progress\ in\ console\ titlebar
-\-v,\ \-\-verbose\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ various\ debugging\ information
-\-\-dump\-intermediate\-pages\ \ \ \ \ \ \ \ print\ downloaded\ pages\ to\ debug\ problems
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (very\ verbose)
-\-\-write\-pages\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ downloaded\ intermediary\ pages\ to
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ files\ in\ the\ current\ directory\ to\ debug
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ problems
+\-s,\ \-\-simulate\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ the\ video\ and\ do\ not\ write\ anything\ to\ disk
+\-\-skip\-download\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ the\ video
+\-g,\ \-\-get\-url\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ URL
+\-e,\ \-\-get\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ title
+\-\-get\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ id
+\-\-get\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ thumbnail\ URL
+\-\-get\-description\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ video\ description
+\-\-get\-duration\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ video\ length
+\-\-get\-filename\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ output\ filename
+\-\-get\-format\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ output\ format
+\-j,\ \-\-dump\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ JSON\ information.\ See\ \-\-output\ for\ a\ description\ of\ available\ keys.
+\-J,\ \-\-dump\-single\-json\ \ \ \ \ \ \ \ \ \ \ Simulate,\ quiet\ but\ print\ JSON\ information\ for\ each\ command\-line\ argument.\ If\ the\ URL\ refers\ to\ a\ playlist,\ dump\ the\ whole\ playlist
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ information\ in\ a\ single\ line.
+\-\-print\-json\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Be\ quiet\ and\ print\ the\ video\ information\ as\ JSON\ (video\ is\ still\ being\ downloaded).
+\-\-newline\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Output\ progress\ bar\ as\ new\ lines
+\-\-no\-progress\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ print\ progress\ bar
+\-\-console\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Display\ progress\ in\ console\ titlebar
+\-v,\ \-\-verbose\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Print\ various\ debugging\ information
+\-\-dump\-pages\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Print\ downloaded\ pages\ to\ debug\ problems\ (very\ verbose)
+\-\-write\-pages\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ downloaded\ intermediary\ pages\ to\ files\ in\ the\ current\ directory\ to\ debug\ problems
\-\-print\-traffic\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Display\ sent\ and\ read\ HTTP\ traffic
-\-C,\ \-\-call\-home\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Contact\ the\ youtube\-dl\ server\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ debugging.
-\-\-no\-call\-home\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ NOT\ contact\ the\ youtube\-dl\ server\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ debugging.
+\-C,\ \-\-call\-home\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Contact\ the\ youtube\-dl\ server\ for\ debugging
+\-\-no\-call\-home\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ NOT\ contact\ the\ youtube\-dl\ server\ for\ debugging
\f[]
.fi
.SS Workarounds:
.nf
\f[C]
\-\-encoding\ ENCODING\ \ \ \ \ \ \ \ \ \ \ \ \ \ Force\ the\ specified\ encoding\ (experimental)
-\-\-no\-check\-certificate\ \ \ \ \ \ \ \ \ \ \ Suppress\ HTTPS\ certificate\ validation.
-\-\-prefer\-insecure\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ an\ unencrypted\ connection\ to\ retrieve
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ information\ about\ the\ video.\ (Currently
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ supported\ only\ for\ YouTube)
-\-\-user\-agent\ UA\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent
-\-\-referer\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ referer,\ use\ if\ the\ video
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ access\ is\ restricted\ to\ one\ domain
-\-\-add\-header\ FIELD:VALUE\ \ \ \ \ \ \ \ \ specify\ a\ custom\ HTTP\ header\ and\ its\ value,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ separated\ by\ a\ colon\ \[aq]:\[aq].\ You\ can\ use\ this
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ option\ multiple\ times
-\-\-bidi\-workaround\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Work\ around\ terminals\ that\ lack
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ bidirectional\ text\ support.\ Requires\ bidiv
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ or\ fribidi\ executable\ in\ PATH
-\-\-sleep\-interval\ SECONDS\ \ \ \ \ \ \ \ \ Number\ of\ seconds\ to\ sleep\ before\ each
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download.
+\-\-no\-check\-certificate\ \ \ \ \ \ \ \ \ \ \ Suppress\ HTTPS\ certificate\ validation
+\-\-prefer\-insecure\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ an\ unencrypted\ connection\ to\ retrieve\ information\ about\ the\ video.\ (Currently\ supported\ only\ for\ YouTube)
+\-\-user\-agent\ UA\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Specify\ a\ custom\ user\ agent
+\-\-referer\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Specify\ a\ custom\ referer,\ use\ if\ the\ video\ access\ is\ restricted\ to\ one\ domain
+\-\-add\-header\ FIELD:VALUE\ \ \ \ \ \ \ \ \ Specify\ a\ custom\ HTTP\ header\ and\ its\ value,\ separated\ by\ a\ colon\ \[aq]:\[aq].\ You\ can\ use\ this\ option\ multiple\ times
+\-\-bidi\-workaround\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Work\ around\ terminals\ that\ lack\ bidirectional\ text\ support.\ Requires\ bidiv\ or\ fribidi\ executable\ in\ PATH
+\-\-sleep\-interval\ SECONDS\ \ \ \ \ \ \ \ \ Number\ of\ seconds\ to\ sleep\ before\ each\ download.
\f[]
.fi
.SS Video Format Options:
.IP
.nf
\f[C]
-\-f,\ \-\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ format\ code,\ specify\ the\ order\ of
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ preference\ using\ slashes,\ as\ in\ \-f\ 22/17/18
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ .\ \ Instead\ of\ format\ codes,\ you\ can\ select
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ by\ extension\ for\ the\ extensions\ aac,\ m4a,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mp3,\ mp4,\ ogg,\ wav,\ webm.\ You\ can\ also\ use
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ special\ names\ "best",\ "bestvideo",
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "bestaudio",\ "worst".\ \ You\ can\ filter\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ results\ by\ putting\ a\ condition\ in
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ brackets,\ as\ in\ \-f\ "best[height=720]"\ (or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-f\ "[filesize>10M]").\ \ This\ works\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filesize,\ height,\ width,\ tbr,\ abr,\ vbr,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ asr,\ and\ fps\ and\ the\ comparisons\ <,\ <=,\ >,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ >=,\ =,\ !=\ and\ for\ ext,\ acodec,\ vcodec,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ container,\ and\ protocol\ and\ the\ comparisons
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ =,\ !=\ .\ Formats\ for\ which\ the\ value\ is\ not
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known\ are\ excluded\ unless\ you\ put\ a
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ question\ mark\ (?)\ after\ the\ operator.\ You
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ can\ combine\ format\ filters,\ so\ \ \-f\ "[height
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ <=?\ 720][tbr>500]"\ selects\ up\ to\ 720p
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ videos\ (or\ videos\ where\ the\ height\ is\ not
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known)\ with\ a\ bitrate\ of\ at\ least\ 500
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ KBit/s.\ By\ default,\ youtube\-dl\ will\ pick
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ best\ quality.\ Use\ commas\ to\ download
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ multiple\ audio\ formats,\ such\ as\ \-f
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 136/137/mp4/bestvideo,140/m4a/bestaudio.
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ You\ can\ merge\ the\ video\ and\ audio\ of\ two
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ formats\ into\ a\ single\ file\ using\ \-f\ <video\-
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ format>+<audio\-format>\ (requires\ ffmpeg\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avconv),\ for\ example\ \-f
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ bestvideo+bestaudio.
-\-\-all\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ all\ available\ video\ formats
-\-\-prefer\-free\-formats\ \ \ \ \ \ \ \ \ \ \ \ prefer\ free\ video\ formats\ unless\ a\ specific
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ one\ is\ requested
-\-\-max\-quality\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ highest\ quality\ format\ to\ download
-\-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ list\ all\ available\ formats
-\-\-youtube\-skip\-dash\-manifest\ \ \ \ \ Do\ not\ download\ the\ DASH\ manifest\ on
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ YouTube\ videos
-\-\-merge\-output\-format\ FORMAT\ \ \ \ \ If\ a\ merge\ is\ required\ (e.g.
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ bestvideo+bestaudio),\ output\ to\ given
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ container\ format.\ One\ of\ mkv,\ mp4,\ ogg,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ webm,\ flv.Ignored\ if\ no\ merge\ is\ required
+\-f,\ \-\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ Video\ format\ code,\ see\ the\ "FORMAT\ SELECTION"\ for\ all\ the\ info
+\-\-all\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ all\ available\ video\ formats
+\-\-prefer\-free\-formats\ \ \ \ \ \ \ \ \ \ \ \ Prefer\ free\ video\ formats\ unless\ a\ specific\ one\ is\ requested
+\-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ List\ all\ available\ formats
+\-\-youtube\-skip\-dash\-manifest\ \ \ \ \ Do\ not\ download\ the\ DASH\ manifest\ on\ YouTube\ videos
+\-\-merge\-output\-format\ FORMAT\ \ \ \ \ If\ a\ merge\ is\ required\ (e.g.\ bestvideo+bestaudio),\ output\ to\ given\ container\ format.\ One\ of\ mkv,\ mp4,\ ogg,\ webm,\ flv.Ignored\ if\ no
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ merge\ is\ required
\f[]
.fi
.SS Subtitle Options:
.IP
.nf
\f[C]
-\-\-write\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ subtitle\ file
-\-\-write\-auto\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ automatic\ subtitle\ file\ (youtube
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
-\-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ all\ the\ available\ subtitles\ of
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ video
-\-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ lists\ all\ available\ subtitles\ for\ the\ video
-\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ subtitle\ format,\ accepts\ formats
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ preference,\ for\ example:\ "ass/srt/best"
-\-\-sub\-lang\ LANGS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ languages\ of\ the\ subtitles\ to\ download
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (optional)\ separated\ by\ commas,\ use\ IETF
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ language\ tags\ like\ \[aq]en,pt\[aq]
+\-\-write\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ subtitle\ file
+\-\-write\-auto\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ automatic\ subtitle\ file\ (YouTube\ only)
+\-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ all\ the\ available\ subtitles\ of\ the\ video
+\-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ List\ all\ available\ subtitles\ for\ the\ video
+\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ Subtitle\ format,\ accepts\ formats\ preference,\ for\ example:\ "srt"\ or\ "ass/srt/best"
+\-\-sub\-lang\ LANGS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Languages\ of\ the\ subtitles\ to\ download\ (optional)\ separated\ by\ commas,\ use\ IETF\ language\ tags\ like\ \[aq]en,pt\[aq]
\f[]
.fi
.SS Authentication Options:
.IP
.nf
\f[C]
-\-u,\ \-\-username\ USERNAME\ \ \ \ \ \ \ \ \ \ login\ with\ this\ account\ ID
-\-p,\ \-\-password\ PASSWORD\ \ \ \ \ \ \ \ \ \ account\ password.\ If\ this\ option\ is\ left
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ out,\ youtube\-dl\ will\ ask\ interactively.
-\-2,\ \-\-twofactor\ TWOFACTOR\ \ \ \ \ \ \ \ two\-factor\ auth\ code
-\-n,\ \-\-netrc\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ .netrc\ authentication\ data
-\-\-video\-password\ PASSWORD\ \ \ \ \ \ \ \ video\ password\ (vimeo,\ smotri)
+\-u,\ \-\-username\ USERNAME\ \ \ \ \ \ \ \ \ \ Login\ with\ this\ account\ ID
+\-p,\ \-\-password\ PASSWORD\ \ \ \ \ \ \ \ \ \ Account\ password.\ If\ this\ option\ is\ left\ out,\ youtube\-dl\ will\ ask\ interactively.
+\-2,\ \-\-twofactor\ TWOFACTOR\ \ \ \ \ \ \ \ Two\-factor\ auth\ code
+\-n,\ \-\-netrc\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ .netrc\ authentication\ data
+\-\-video\-password\ PASSWORD\ \ \ \ \ \ \ \ Video\ password\ (vimeo,\ smotri)
\f[]
.fi
.SS Post\-processing Options:
.IP
.nf
\f[C]
-\-x,\ \-\-extract\-audio\ \ \ \ \ \ \ \ \ \ \ \ \ \ convert\ video\ files\ to\ audio\-only\ files
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (requires\ ffmpeg\ or\ avconv\ and\ ffprobe\ or
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avprobe)
-\-\-audio\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "opus",\ or\ "wav";\ "best"\ by\ default
-\-\-audio\-quality\ QUALITY\ \ \ \ \ \ \ \ \ \ ffmpeg/avconv\ audio\ quality\ specification,
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ insert\ a\ value\ between\ 0\ (better)\ and\ 9
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (worse)\ for\ VBR\ or\ a\ specific\ bitrate\ like
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 128K\ (default\ 5)
-\-\-recode\-video\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ Encode\ the\ video\ to\ another\ format\ if
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ necessary\ (currently\ supported:
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mp4|flv|ogg|webm|mkv)
-\-k,\ \-\-keep\-video\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ keeps\ the\ video\ file\ on\ disk\ after\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ post\-processing;\ the\ video\ is\ erased\ by
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default
-\-\-no\-post\-overwrites\ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ overwrite\ post\-processed\ files;\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ post\-processed\ files\ are\ overwritten\ by
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default
-\-\-embed\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ embed\ subtitles\ in\ the\ video\ (only\ for\ mp4
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ videos)
-\-\-embed\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ embed\ thumbnail\ in\ the\ audio\ as\ cover\ art
-\-\-add\-metadata\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ metadata\ to\ the\ video\ file
-\-\-xattrs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ metadata\ to\ the\ video\ file\[aq]s\ xattrs
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (using\ dublin\ core\ and\ xdg\ standards)
-\-\-fixup\ POLICY\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Automatically\ correct\ known\ faults\ of\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file.\ One\ of\ never\ (do\ nothing),\ warn\ (only
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ emit\ a\ warning),\ detect_or_warn(the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default;\ fix\ file\ if\ we\ can,\ warn
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ otherwise)
-\-\-prefer\-avconv\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ avconv\ over\ ffmpeg\ for\ running\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ postprocessors\ (default)
-\-\-prefer\-ffmpeg\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ ffmpeg\ over\ avconv\ for\ running\ the
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ postprocessors
-\-\-ffmpeg\-location\ PATH\ \ \ \ \ \ \ \ \ \ \ Location\ of\ the\ ffmpeg/avconv\ binary;
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ either\ the\ path\ to\ the\ binary\ or\ its
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ containing\ directory.
-\-\-exec\ CMD\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Execute\ a\ command\ on\ the\ file\ after
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloading,\ similar\ to\ find\[aq]s\ \-exec
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ syntax.\ Example:\ \-\-exec\ \[aq]adb\ push\ {}
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /sdcard/Music/\ &&\ rm\ {}\[aq]
-\-\-convert\-subtitles\ FORMAT\ \ \ \ \ \ \ Convert\ the\ subtitles\ to\ other\ format
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ supported:\ srt|ass|vtt)
+\-x,\ \-\-extract\-audio\ \ \ \ \ \ \ \ \ \ \ \ \ \ Convert\ video\ files\ to\ audio\-only\ files\ (requires\ ffmpeg\ or\ avconv\ and\ ffprobe\ or\ avprobe)
+\-\-audio\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ Specify\ audio\ format:\ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",\ "opus",\ or\ "wav";\ "best"\ by\ default
+\-\-audio\-quality\ QUALITY\ \ \ \ \ \ \ \ \ \ Specify\ ffmpeg/avconv\ audio\ quality,\ insert\ a\ value\ between\ 0\ (better)\ and\ 9\ (worse)\ for\ VBR\ or\ a\ specific\ bitrate\ like\ 128K\ (default
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 5)
+\-\-recode\-video\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ Encode\ the\ video\ to\ another\ format\ if\ necessary\ (currently\ supported:\ mp4|flv|ogg|webm|mkv)
+\-k,\ \-\-keep\-video\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Keep\ the\ video\ file\ on\ disk\ after\ the\ post\-processing;\ the\ video\ is\ erased\ by\ default
+\-\-no\-post\-overwrites\ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ overwrite\ post\-processed\ files;\ the\ post\-processed\ files\ are\ overwritten\ by\ default
+\-\-embed\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Embed\ subtitles\ in\ the\ video\ (only\ for\ mkv\ and\ mp4\ videos)
+\-\-embed\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Embed\ thumbnail\ in\ the\ audio\ as\ cover\ art
+\-\-add\-metadata\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ metadata\ to\ the\ video\ file
+\-\-metadata\-from\-title\ FORMAT\ \ \ \ \ Parse\ additional\ metadata\ like\ song\ title\ /\ artist\ from\ the\ video\ title.\ The\ format\ syntax\ is\ the\ same\ as\ \-\-output,\ the\ parsed
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ parameters\ replace\ existing\ values.\ Additional\ templates:\ %(album),\ %(artist).\ Example:\ \-\-metadata\-from\-title\ "%(artist)s\ \-
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(title)s"\ matches\ a\ title\ like\ "Coldplay\ \-\ Paradise"
+\-\-xattrs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Write\ metadata\ to\ the\ video\ file\[aq]s\ xattrs\ (using\ dublin\ core\ and\ xdg\ standards)
+\-\-fixup\ POLICY\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Automatically\ correct\ known\ faults\ of\ the\ file.\ One\ of\ never\ (do\ nothing),\ warn\ (only\ emit\ a\ warning),\ detect_or_warn(the\ default;
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ fix\ file\ if\ we\ can,\ warn\ otherwise)
+\-\-prefer\-avconv\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ avconv\ over\ ffmpeg\ for\ running\ the\ postprocessors\ (default)
+\-\-prefer\-ffmpeg\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ ffmpeg\ over\ avconv\ for\ running\ the\ postprocessors
+\-\-ffmpeg\-location\ PATH\ \ \ \ \ \ \ \ \ \ \ Location\ of\ the\ ffmpeg/avconv\ binary;\ either\ the\ path\ to\ the\ binary\ or\ its\ containing\ directory.
+\-\-exec\ CMD\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Execute\ a\ command\ on\ the\ file\ after\ downloading,\ similar\ to\ find\[aq]s\ \-exec\ syntax.\ Example:\ \-\-exec\ \[aq]adb\ push\ {}\ /sdcard/Music/\ &&\ rm
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ {}\[aq]
+\-\-convert\-subtitles\ FORMAT\ \ \ \ \ \ \ Convert\ the\ subtitles\ to\ other\ format\ (currently\ supported:\ srt|ass|vtt)
\f[]
.fi
.SH CONFIGURATION
youtube\-dl_test_video_.mp4\ \ \ \ \ \ \ \ \ \ #\ A\ simple\ file\ name
\f[]
.fi
+.SH FORMAT SELECTION
+.PP
+By default youtube\-dl tries to download the best quality, but sometimes
+you may want to download other format.
+The simplest case is requesting a specific format, for example
+\f[C]\-f\ 22\f[].
+You can get the list of available formats using
+\f[C]\-\-list\-formats\f[], you can also use a file extension (currently
+it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the special names
+\f[C]best\f[], \f[C]bestvideo\f[], \f[C]bestaudio\f[] and
+\f[C]worst\f[].
+.PP
+If you want to download multiple videos and they don\[aq]t have the same
+formats available, you can specify the order of preference using
+slashes, as in \f[C]\-f\ 22/17/18\f[].
+You can also filter the video results by putting a condition in
+brackets, as in \f[C]\-f\ "best[height=720]"\f[] (or
+\f[C]\-f\ "[filesize>10M]"\f[]).
+This works for filesize, height, width, tbr, abr, vbr, asr, and fps and
+the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec,
+container, and protocol and the comparisons =, != .
+Formats for which the value is not known are excluded unless you put a
+question mark (?) after the operator.
+You can combine format filters, so
+\f[C]\-f\ "[height\ <=?\ 720][tbr>500]"\f[] selects up to 720p videos
+(or videos where the height is not known) with a bitrate of at least 500
+KBit/s.
+Use commas to download multiple formats, such as
+\f[C]\-f\ 136/137/mp4/bestvideo,140/m4a/bestaudio\f[].
+You can merge the video and audio of two formats into a single file
+using \f[C]\-f\ <video\-format>+<audio\-format>\f[] (requires ffmpeg or
+avconv), for example \f[C]\-f\ bestvideo+bestaudio\f[].
+.PP
+Since the end of April 2015 and version 2015.04.26 youtube\-dl uses
+\f[C]\-f\ bestvideo+bestaudio/best\f[] as default format selection (see
+#5447, #5456).
+If ffmpeg or avconv are installed this results in downloading
+\f[C]bestvideo\f[] and \f[C]bestaudio\f[] separately and muxing them
+together into a single file giving the best overall quality available.
+Otherwise it falls back to \f[C]best\f[] and results in downloading best
+available quality served as a single file.
+\f[C]best\f[] is also needed for videos that don\[aq]t come from YouTube
+because they don\[aq]t provide the audio and video in two different
+files.
+If you want to only download some dash formats (for example if you are
+not interested in getting videos with a resolution higher than 1080p),
+you can add \f[C]\-f\ bestvideo[height<=?1080]+bestaudio/best\f[] to
+your configuration file.
+Note that if you use youtube\-dl to stream to \f[C]stdout\f[] (and most
+likely to pipe it to your media player then), i.e.
+you explicitly specify output template as \f[C]\-o\ \-\f[], youtube\-dl
+still uses \f[C]\-f\ best\f[] format selection in order to start content
+delivery immediately to your player and not to wait until
+\f[C]bestvideo\f[] and \f[C]bestaudio\f[] are downloaded and muxed.
+.PP
+If you want to preserve the old format selection behavior (prior to
+youtube\-dl 2015.04.26), i.e.
+you want to download best available quality media served as a single
+file, you should explicitly specify your choice with \f[C]\-f\ best\f[].
+You may want to add it to the configuration file (#configuration) in
+order not to type it every time you run youtube\-dl.
.SH VIDEO SELECTION
.PP
Videos can be filtered by their upload date using the options
\- all they have to do is update the package to a somewhat recent
version.
See above for a way to update.
-.SS Do I always have to pass in \f[C]\-\-max\-quality\ FORMAT\f[], or
-\f[C]\-citw\f[]?
+.SS Do I always have to pass \f[C]\-citw\f[]?
.PP
By default, youtube\-dl intends to have the best options (incidentally,
if you have a convincing case that these should be different, please
file an issue where you explain that (https://yt-dl.org/bug)).
Therefore, it is unnecessary and sometimes harmful to copy long option
strings from webpages.
-In particular, \f[C]\-\-max\-quality\f[] \f[I]limits\f[] the video
-quality (so if you want the best quality, do NOT pass it in), and the
-only option out of \f[C]\-citw\f[] that is regularly useful is
-\f[C]\-i\f[].
+In particular, the only option out of \f[C]\-citw\f[] that is regularly
+useful is \f[C]\-i\f[].
.SS Can you please put the \-b option back?
.PP
Most people asking this question are not aware that youtube\-dl now
YouTube requires an additional signature since September 2012 which is
not supported by old versions of youtube\-dl.
See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+.SS Video URL contains an ampersand and I\[aq]m getting some strange
+output \f[C][1]\ 2839\f[] or
+\f[C]\[aq]v\[aq]\ is\ not\ recognized\ as\ an\ internal\ or\ external\ command\f[]
+.PP
+That\[aq]s actually the output from your shell.
+Since ampersand is one of the special shell characters it\[aq]s
+interpreted by shell preventing you from passing the whole URL to
+youtube\-dl.
+To disable your shell from interpreting the ampersands (or any other
+special characters) you have to either put the whole URL in quotes or
+escape them with a backslash (which approach will work depends on your
+shell).
+.PP
+For example if your URL is
+https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with
+following command:
+.PP
+\f[C]youtube\-dl\ \[aq]https://www.youtube.com/watch?t=4&v=BaW_jenozKc\[aq]\f[]
+.PP
+or
+.PP
+\f[C]youtube\-dl\ https://www.youtube.com/watch?t=4\\&v=BaW_jenozKc\f[]
+.PP
+For Windows you have to use the double quotes:
+.PP
+\f[C]youtube\-dl\ "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"\f[]
.SS ExtractorError: Could not find JS function u\[aq]OF\[aq]
.PP
In February 2015, the new YouTube player contained a character sequence
in a string that was misinterpreted by old versions of youtube\-dl.
See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+.SS HTTP Error 429: Too Many Requests or 402: Payment Required
+.PP
+These two error codes indicate that the service is blocking your IP
+address because of overuse.
+Contact the service and ask them to unblock your IP address, or \- if
+you have acquired a whitelisted IP address already \- use the
+\f[C]\-\-proxy\f[] or \f[C]\-\-network\-address\f[]
+options (#network-options) to select another IP address.
.SS SyntaxError: Non\-ASCII character
.PP
The error
distribute their content are perfectly fine though.
If in doubt, you can simply include a source that mentions the
legitimate purchase of content.
+.SS How can I speed up work on my issue?
+.PP
+(Also known as: Help, my important issue not being solved!) The
+youtube\-dl core developer team is quite small.
+While we do our best to solve as many issues as possible, sometimes that
+can take quite a while.
+To speed up your issue, here\[aq]s what you can do:
+.PP
+First of all, please do report the issue at our issue
+tracker (https://yt-dl.org/bugs).
+That allows us to coordinate all efforts by users and developers, and
+serves as a unified point.
+Unfortunately, the youtube\-dl project has grown too large to use
+personal email as an effective communication channel.
+.PP
+Please read the bug reporting instructions (#bugs) below.
+A lot of bugs lack all the necessary information.
+If you can, offer proxy, VPN, or shell access to the youtube\-dl
+developers.
+If you are able to, test the issue from multiple computers in multiple
+countries to exclude local censorship or misconfiguration issues.
+.PP
+If nobody is interested in solving your issue, you are welcome to take
+matters into your own hands and submit a pull request (or coerce/pay
+somebody else to do so).
+.PP
+Feel free to bump the issue from time to time by writing a small comment
+("Issue is still present in youtube\-dl version ...from France, but
+fixed from Belgium"), but please not more than once a month.
+Please do not declare your issue as \f[C]important\f[] or
+\f[C]urgent\f[].
.SS How can I detect whether a given URL is supported by youtube\-dl?
.PP
For one, have a look at the list of supported
.IP
.nf
\f[C]
+from\ __future__\ import\ unicode_literals
import\ youtube_dl
ydl_opts\ =\ {}
.IP
.nf
\f[C]
+from\ __future__\ import\ unicode_literals
import\ youtube_dl
The error message you get for (most) bugs even says so, but you would
not believe how many of our bug reports do not contain this information.
.PP
-Site support requests \f[B]must contain an example URL\f[].
+If your server has multiple IPs or you suspect censorship, adding
+\-\-call\-home may be a good idea to get more diagnostics.
+If the error is \f[C]ERROR:\ Unable\ to\ extract\ ...\f[] and you cannot
+reproduce it from multiple countries, add \f[C]\-\-dump\-pages\f[]
+(warning: this will yield a rather large output, redirect it to the file
+\f[C]log.txt\f[] by adding \f[C]>log.txt\ 2>&1\f[] to your
+command\-line) or upload the \f[C]\&.dump\f[] files you get when you add
+\f[C]\-\-write\-pages\f[] somewhere (https://gist.github.com/).
+.PP
+\f[B]Site support requests must contain an example URL\f[].
An example URL is a URL you might want to download, like
http://www.youtube.com/watch?v=BaW_jenozKc .
There should be an obvious video present.
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
- opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles"
+ opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
diropts="--cache-dir"
-complete --command youtube-dl --long-option help --short-option h --description 'print this help text and exit'
-complete --command youtube-dl --long-option version --description 'print program version and exit'
-complete --command youtube-dl --long-option update --short-option U --description 'update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)'
-complete --command youtube-dl --long-option ignore-errors --short-option i --description 'continue on download errors, for example to skip unavailable videos in a playlist'
+complete --command youtube-dl --long-option help --short-option h --description 'Print this help text and exit'
+complete --command youtube-dl --long-option version --description 'Print program version and exit'
+complete --command youtube-dl --long-option update --short-option U --description 'Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)'
+complete --command youtube-dl --long-option ignore-errors --short-option i --description 'Continue on download errors, for example to skip unavailable videos in a playlist'
complete --command youtube-dl --long-option abort-on-error --description 'Abort downloading of further videos (in the playlist or the command line) if an error occurs'
-complete --command youtube-dl --long-option dump-user-agent --description 'display the current browser identification'
+complete --command youtube-dl --long-option dump-user-agent --description 'Display the current browser identification'
complete --command youtube-dl --long-option list-extractors --description 'List all supported extractors and the URLs they would handle'
complete --command youtube-dl --long-option extractor-descriptions --description 'Output descriptions of all supported extractors'
-complete --command youtube-dl --long-option default-search --description 'Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.'
+complete --command youtube-dl --long-option default-search --description 'Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.'
complete --command youtube-dl --long-option ignore-config --description 'Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)'
complete --command youtube-dl --long-option flat-playlist --description 'Do not extract the videos of a playlist, only list them.'
-complete --command youtube-dl --long-option no-color --description 'Do not emit color codes in output.'
+complete --command youtube-dl --long-option no-color --description 'Do not emit color codes in output'
complete --command youtube-dl --long-option proxy --description 'Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection'
complete --command youtube-dl --long-option socket-timeout --description 'Time to wait before giving up, in seconds'
complete --command youtube-dl --long-option source-address --description 'Client-side IP address to bind to (experimental)'
complete --command youtube-dl --long-option force-ipv4 --short-option 4 --description 'Make all connections via IPv4 (experimental)'
complete --command youtube-dl --long-option force-ipv6 --short-option 6 --description 'Make all connections via IPv6 (experimental)'
-complete --command youtube-dl --long-option playlist-start --description 'playlist video to start at (default is %default)'
-complete --command youtube-dl --long-option playlist-end --description 'playlist video to end at (default is last)'
-complete --command youtube-dl --long-option playlist-items --description 'playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.'
-complete --command youtube-dl --long-option match-title --description 'download only matching titles (regex or caseless sub-string)'
-complete --command youtube-dl --long-option reject-title --description 'skip download for matching titles (regex or caseless sub-string)'
+complete --command youtube-dl --long-option cn-verification-proxy --description 'Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
+complete --command youtube-dl --long-option playlist-start --description 'Playlist video to start at (default is %default)'
+complete --command youtube-dl --long-option playlist-end --description 'Playlist video to end at (default is last)'
+complete --command youtube-dl --long-option playlist-items --description 'Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.'
+complete --command youtube-dl --long-option match-title --description 'Download only matching titles (regex or caseless sub-string)'
+complete --command youtube-dl --long-option reject-title --description 'Skip download for matching titles (regex or caseless sub-string)'
complete --command youtube-dl --long-option max-downloads --description 'Abort after downloading NUMBER files'
complete --command youtube-dl --long-option min-filesize --description 'Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)'
complete --command youtube-dl --long-option max-filesize --description 'Do not download any videos larger than SIZE (e.g. 50k or 44.6m)'
-complete --command youtube-dl --long-option date --description 'download only videos uploaded in this date'
-complete --command youtube-dl --long-option datebefore --description 'download only videos uploaded on or before this date (i.e. inclusive)'
-complete --command youtube-dl --long-option dateafter --description 'download only videos uploaded on or after this date (i.e. inclusive)'
+complete --command youtube-dl --long-option date --description 'Download only videos uploaded in this date'
+complete --command youtube-dl --long-option datebefore --description 'Download only videos uploaded on or before this date (i.e. inclusive)'
+complete --command youtube-dl --long-option dateafter --description 'Download only videos uploaded on or after this date (i.e. inclusive)'
complete --command youtube-dl --long-option min-views --description 'Do not download any videos with less than COUNT views'
complete --command youtube-dl --long-option max-views --description 'Do not download any videos with more than COUNT views'
-complete --command youtube-dl --long-option match-filter --description '(Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" .'
-complete --command youtube-dl --long-option no-playlist --description 'If the URL refers to a video and a playlist, download only the video.'
-complete --command youtube-dl --long-option yes-playlist --description 'If the URL refers to a video and a playlist, download the playlist.'
-complete --command youtube-dl --long-option age-limit --description 'download only videos suitable for the given age'
+complete --command youtube-dl --long-option match-filter --description 'Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" .'
+complete --command youtube-dl --long-option no-playlist --description 'Download only the video, if the URL refers to a video and a playlist.'
+complete --command youtube-dl --long-option yes-playlist --description 'Download the playlist, if the URL refers to a video and a playlist.'
+complete --command youtube-dl --long-option age-limit --description 'Download only videos suitable for the given age'
complete --command youtube-dl --long-option download-archive --description 'Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.' --require-parameter
complete --command youtube-dl --long-option include-ads --description 'Download advertisements as well (experimental)'
-complete --command youtube-dl --long-option rate-limit --short-option r --description 'maximum download rate in bytes per second (e.g. 50K or 4.2M)'
-complete --command youtube-dl --long-option retries --short-option R --description 'number of retries (default is %default), or "infinite".'
-complete --command youtube-dl --long-option buffer-size --description 'size of download buffer (e.g. 1024 or 16K) (default is %default)'
-complete --command youtube-dl --long-option no-resize-buffer --description 'do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.'
+complete --command youtube-dl --long-option rate-limit --short-option r --description 'Maximum download rate in bytes per second (e.g. 50K or 4.2M)'
+complete --command youtube-dl --long-option retries --short-option R --description 'Number of retries (default is %default), or "infinite".'
+complete --command youtube-dl --long-option buffer-size --description 'Size of download buffer (e.g. 1024 or 16K) (default is %default)'
+complete --command youtube-dl --long-option no-resize-buffer --description 'Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.'
complete --command youtube-dl --long-option test
complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
-complete --command youtube-dl --long-option xattr-set-filesize --description '(experimental) set file xattribute ytdl.filesize with expected filesize'
-complete --command youtube-dl --long-option hls-prefer-native --description '(experimental) Use the native HLS downloader instead of ffmpeg.'
-complete --command youtube-dl --long-option external-downloader --description '(experimental) Use the specified external downloader. Currently supports aria2c,curl,wget'
-complete --command youtube-dl --long-option batch-file --short-option a --description 'file containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
-complete --command youtube-dl --long-option id --description 'use only video ID in file name'
-complete --command youtube-dl --long-option output --short-option o --description 'output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube'"'"'s itags: "137"), %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id, %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format. %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o '"'"'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s'"'"' .'
-complete --command youtube-dl --long-option autonumber-size --description 'Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given'
+complete --command youtube-dl --long-option xattr-set-filesize --description 'Set file xattribute ytdl.filesize with expected filesize (experimental)'
+complete --command youtube-dl --long-option hls-prefer-native --description 'Use the native HLS downloader instead of ffmpeg (experimental)'
+complete --command youtube-dl --long-option external-downloader --description 'Use the specified external downloader. Currently supports aria2c,curl,wget'
+complete --command youtube-dl --long-option external-downloader-args --description 'Give these arguments to the external downloader'
+complete --command youtube-dl --long-option batch-file --short-option a --description 'File containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
+complete --command youtube-dl --long-option id --description 'Use only video ID in file name'
+complete --command youtube-dl --long-option output --short-option o --description 'Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube'"'"'s itags: "137"), %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id, %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format. %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o '"'"'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s'"'"' .'
+complete --command youtube-dl --long-option autonumber-size --description 'Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given'
complete --command youtube-dl --long-option restrict-filenames --description 'Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames'
-complete --command youtube-dl --long-option auto-number --short-option A --description '[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000'
-complete --command youtube-dl --long-option title --short-option t --description '[deprecated] use title in file name (default)'
-complete --command youtube-dl --long-option literal --short-option l --description '[deprecated] alias of --title'
-complete --command youtube-dl --long-option no-overwrites --short-option w --description 'do not overwrite files'
-complete --command youtube-dl --long-option continue --short-option c --description 'force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.'
-complete --command youtube-dl --long-option no-continue --description 'do not resume partially downloaded files (restart from beginning)'
-complete --command youtube-dl --long-option no-part --description 'do not use .part files - write directly into output file'
-complete --command youtube-dl --long-option no-mtime --description 'do not use the Last-modified header to set the file modification time'
-complete --command youtube-dl --long-option write-description --description 'write video description to a .description file'
-complete --command youtube-dl --long-option write-info-json --description 'write video metadata to a .info.json file'
-complete --command youtube-dl --long-option write-annotations --description 'write video annotations to a .annotation file'
-complete --command youtube-dl --long-option load-info --description 'json file containing the video information (created with the "--write-json" option)' --require-parameter
-complete --command youtube-dl --long-option cookies --description 'file to read cookies from and dump cookie jar in' --require-parameter
+complete --command youtube-dl --long-option auto-number --short-option A --description '[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000'
+complete --command youtube-dl --long-option title --short-option t --description '[deprecated] Use title in file name (default)'
+complete --command youtube-dl --long-option literal --short-option l --description '[deprecated] Alias of --title'
+complete --command youtube-dl --long-option no-overwrites --short-option w --description 'Do not overwrite files'
+complete --command youtube-dl --long-option continue --short-option c --description 'Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.'
+complete --command youtube-dl --long-option no-continue --description 'Do not resume partially downloaded files (restart from beginning)'
+complete --command youtube-dl --long-option no-part --description 'Do not use .part files - write directly into output file'
+complete --command youtube-dl --long-option no-mtime --description 'Do not use the Last-modified header to set the file modification time'
+complete --command youtube-dl --long-option write-description --description 'Write video description to a .description file'
+complete --command youtube-dl --long-option write-info-json --description 'Write video metadata to a .info.json file'
+complete --command youtube-dl --long-option write-annotations --description 'Write video annotations to a .annotations.xml file'
+complete --command youtube-dl --long-option load-info --description 'JSON file containing the video information (created with the "--write-info-json" option)' --require-parameter
+complete --command youtube-dl --long-option cookies --description 'File to read cookies from and dump cookie jar in' --require-parameter
complete --command youtube-dl --long-option cache-dir --description 'Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.'
complete --command youtube-dl --long-option no-cache-dir --description 'Disable filesystem caching'
complete --command youtube-dl --long-option rm-cache-dir --description 'Delete all filesystem cache files'
-complete --command youtube-dl --long-option write-thumbnail --description 'write thumbnail image to disk'
-complete --command youtube-dl --long-option write-all-thumbnails --description 'write all thumbnail image formats to disk'
+complete --command youtube-dl --long-option write-thumbnail --description 'Write thumbnail image to disk'
+complete --command youtube-dl --long-option write-all-thumbnails --description 'Write all thumbnail image formats to disk'
complete --command youtube-dl --long-option list-thumbnails --description 'Simulate and list all available thumbnail formats'
-complete --command youtube-dl --long-option quiet --short-option q --description 'activates quiet mode'
+complete --command youtube-dl --long-option quiet --short-option q --description 'Activate quiet mode'
complete --command youtube-dl --long-option no-warnings --description 'Ignore warnings'
-complete --command youtube-dl --long-option simulate --short-option s --description 'do not download the video and do not write anything to disk'
-complete --command youtube-dl --long-option skip-download --description 'do not download the video'
-complete --command youtube-dl --long-option get-url --short-option g --description 'simulate, quiet but print URL'
-complete --command youtube-dl --long-option get-title --short-option e --description 'simulate, quiet but print title'
-complete --command youtube-dl --long-option get-id --description 'simulate, quiet but print id'
-complete --command youtube-dl --long-option get-thumbnail --description 'simulate, quiet but print thumbnail URL'
-complete --command youtube-dl --long-option get-description --description 'simulate, quiet but print video description'
-complete --command youtube-dl --long-option get-duration --description 'simulate, quiet but print video length'
-complete --command youtube-dl --long-option get-filename --description 'simulate, quiet but print output filename'
-complete --command youtube-dl --long-option get-format --description 'simulate, quiet but print output format'
-complete --command youtube-dl --long-option dump-json --short-option j --description 'simulate, quiet but print JSON information. See --output for a description of available keys.'
-complete --command youtube-dl --long-option dump-single-json --short-option J --description 'simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.'
+complete --command youtube-dl --long-option simulate --short-option s --description 'Do not download the video and do not write anything to disk'
+complete --command youtube-dl --long-option skip-download --description 'Do not download the video'
+complete --command youtube-dl --long-option get-url --short-option g --description 'Simulate, quiet but print URL'
+complete --command youtube-dl --long-option get-title --short-option e --description 'Simulate, quiet but print title'
+complete --command youtube-dl --long-option get-id --description 'Simulate, quiet but print id'
+complete --command youtube-dl --long-option get-thumbnail --description 'Simulate, quiet but print thumbnail URL'
+complete --command youtube-dl --long-option get-description --description 'Simulate, quiet but print video description'
+complete --command youtube-dl --long-option get-duration --description 'Simulate, quiet but print video length'
+complete --command youtube-dl --long-option get-filename --description 'Simulate, quiet but print output filename'
+complete --command youtube-dl --long-option get-format --description 'Simulate, quiet but print output format'
+complete --command youtube-dl --long-option dump-json --short-option j --description 'Simulate, quiet but print JSON information. See --output for a description of available keys.'
+complete --command youtube-dl --long-option dump-single-json --short-option J --description 'Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.'
complete --command youtube-dl --long-option print-json --description 'Be quiet and print the video information as JSON (video is still being downloaded).'
-complete --command youtube-dl --long-option newline --description 'output progress bar as new lines'
-complete --command youtube-dl --long-option no-progress --description 'do not print progress bar'
-complete --command youtube-dl --long-option console-title --description 'display progress in console titlebar'
-complete --command youtube-dl --long-option verbose --short-option v --description 'print various debugging information'
-complete --command youtube-dl --long-option dump-intermediate-pages --description 'print downloaded pages to debug problems (very verbose)'
+complete --command youtube-dl --long-option newline --description 'Output progress bar as new lines'
+complete --command youtube-dl --long-option no-progress --description 'Do not print progress bar'
+complete --command youtube-dl --long-option console-title --description 'Display progress in console titlebar'
+complete --command youtube-dl --long-option verbose --short-option v --description 'Print various debugging information'
+complete --command youtube-dl --long-option dump-pages --description 'Print downloaded pages to debug problems (very verbose)'
complete --command youtube-dl --long-option write-pages --description 'Write downloaded intermediary pages to files in the current directory to debug problems'
complete --command youtube-dl --long-option youtube-print-sig-code
complete --command youtube-dl --long-option print-traffic --description 'Display sent and read HTTP traffic'
-complete --command youtube-dl --long-option call-home --short-option C --description 'Contact the youtube-dl server for debugging.'
-complete --command youtube-dl --long-option no-call-home --description 'Do NOT contact the youtube-dl server for debugging.'
+complete --command youtube-dl --long-option call-home --short-option C --description 'Contact the youtube-dl server for debugging'
+complete --command youtube-dl --long-option no-call-home --description 'Do NOT contact the youtube-dl server for debugging'
complete --command youtube-dl --long-option encoding --description 'Force the specified encoding (experimental)'
-complete --command youtube-dl --long-option no-check-certificate --description 'Suppress HTTPS certificate validation.'
+complete --command youtube-dl --long-option no-check-certificate --description 'Suppress HTTPS certificate validation'
complete --command youtube-dl --long-option prefer-insecure --description 'Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)'
-complete --command youtube-dl --long-option user-agent --description 'specify a custom user agent'
-complete --command youtube-dl --long-option referer --description 'specify a custom referer, use if the video access is restricted to one domain'
-complete --command youtube-dl --long-option add-header --description 'specify a custom HTTP header and its value, separated by a colon '"'"':'"'"'. You can use this option multiple times'
+complete --command youtube-dl --long-option user-agent --description 'Specify a custom user agent'
+complete --command youtube-dl --long-option referer --description 'Specify a custom referer, use if the video access is restricted to one domain'
+complete --command youtube-dl --long-option add-header --description 'Specify a custom HTTP header and its value, separated by a colon '"'"':'"'"'. You can use this option multiple times'
complete --command youtube-dl --long-option bidi-workaround --description 'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH'
complete --command youtube-dl --long-option sleep-interval --description 'Number of seconds to sleep before each download.'
-complete --command youtube-dl --long-option format --short-option f --description 'video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", "worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]"). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.'
-complete --command youtube-dl --long-option all-formats --description 'download all available video formats'
-complete --command youtube-dl --long-option prefer-free-formats --description 'prefer free video formats unless a specific one is requested'
-complete --command youtube-dl --long-option max-quality --description 'highest quality format to download'
-complete --command youtube-dl --long-option list-formats --short-option F --description 'list all available formats'
+complete --command youtube-dl --long-option format --short-option f --description 'Video format code, see the "FORMAT SELECTION" for all the info'
+complete --command youtube-dl --long-option all-formats --description 'Download all available video formats'
+complete --command youtube-dl --long-option prefer-free-formats --description 'Prefer free video formats unless a specific one is requested'
+complete --command youtube-dl --long-option list-formats --short-option F --description 'List all available formats'
complete --command youtube-dl --long-option youtube-include-dash-manifest
complete --command youtube-dl --long-option youtube-skip-dash-manifest --description 'Do not download the DASH manifest on YouTube videos'
complete --command youtube-dl --long-option merge-output-format --description 'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no merge is required'
-complete --command youtube-dl --long-option write-sub --description 'write subtitle file'
-complete --command youtube-dl --long-option write-auto-sub --description 'write automatic subtitle file (youtube only)'
-complete --command youtube-dl --long-option all-subs --description 'downloads all the available subtitles of the video'
-complete --command youtube-dl --long-option list-subs --description 'lists all available subtitles for the video'
-complete --command youtube-dl --long-option sub-format --description 'subtitle format, accepts formats preference, for example: "ass/srt/best"'
-complete --command youtube-dl --long-option sub-lang --description 'languages of the subtitles to download (optional) separated by commas, use IETF language tags like '"'"'en,pt'"'"''
-complete --command youtube-dl --long-option username --short-option u --description 'login with this account ID'
-complete --command youtube-dl --long-option password --short-option p --description 'account password. If this option is left out, youtube-dl will ask interactively.'
-complete --command youtube-dl --long-option twofactor --short-option 2 --description 'two-factor auth code'
-complete --command youtube-dl --long-option netrc --short-option n --description 'use .netrc authentication data'
-complete --command youtube-dl --long-option video-password --description 'video password (vimeo, smotri)'
-complete --command youtube-dl --long-option extract-audio --short-option x --description 'convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)'
-complete --command youtube-dl --long-option audio-format --description '"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default'
-complete --command youtube-dl --long-option audio-quality --description 'ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)'
+complete --command youtube-dl --long-option write-sub --description 'Write subtitle file'
+complete --command youtube-dl --long-option write-auto-sub --description 'Write automatic subtitle file (YouTube only)'
+complete --command youtube-dl --long-option all-subs --description 'Download all the available subtitles of the video'
+complete --command youtube-dl --long-option list-subs --description 'List all available subtitles for the video'
+complete --command youtube-dl --long-option sub-format --description 'Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"'
+complete --command youtube-dl --long-option sub-lang --description 'Languages of the subtitles to download (optional) separated by commas, use IETF language tags like '"'"'en,pt'"'"''
+complete --command youtube-dl --long-option username --short-option u --description 'Login with this account ID'
+complete --command youtube-dl --long-option password --short-option p --description 'Account password. If this option is left out, youtube-dl will ask interactively.'
+complete --command youtube-dl --long-option twofactor --short-option 2 --description 'Two-factor auth code'
+complete --command youtube-dl --long-option netrc --short-option n --description 'Use .netrc authentication data'
+complete --command youtube-dl --long-option video-password --description 'Video password (vimeo, smotri)'
+complete --command youtube-dl --long-option extract-audio --short-option x --description 'Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)'
+complete --command youtube-dl --long-option audio-format --description 'Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default'
+complete --command youtube-dl --long-option audio-quality --description 'Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)'
complete --command youtube-dl --long-option recode-video --description 'Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)' --arguments 'mp4 flv ogg webm mkv' --exclusive
-complete --command youtube-dl --long-option keep-video --short-option k --description 'keeps the video file on disk after the post-processing; the video is erased by default'
-complete --command youtube-dl --long-option no-post-overwrites --description 'do not overwrite post-processed files; the post-processed files are overwritten by default'
-complete --command youtube-dl --long-option embed-subs --description 'embed subtitles in the video (only for mp4 videos)'
-complete --command youtube-dl --long-option embed-thumbnail --description 'embed thumbnail in the audio as cover art'
-complete --command youtube-dl --long-option add-metadata --description 'write metadata to the video file'
-complete --command youtube-dl --long-option xattrs --description 'write metadata to the video file'"'"'s xattrs (using dublin core and xdg standards)'
+complete --command youtube-dl --long-option keep-video --short-option k --description 'Keep the video file on disk after the post-processing; the video is erased by default'
+complete --command youtube-dl --long-option no-post-overwrites --description 'Do not overwrite post-processed files; the post-processed files are overwritten by default'
+complete --command youtube-dl --long-option embed-subs --description 'Embed subtitles in the video (only for mkv and mp4 videos)'
+complete --command youtube-dl --long-option embed-thumbnail --description 'Embed thumbnail in the audio as cover art'
+complete --command youtube-dl --long-option add-metadata --description 'Write metadata to the video file'
+complete --command youtube-dl --long-option metadata-from-title --description 'Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise"'
+complete --command youtube-dl --long-option xattrs --description 'Write metadata to the video file'"'"'s xattrs (using dublin core and xdg standards)'
complete --command youtube-dl --long-option fixup --description 'Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; fix file if we can, warn otherwise)'
complete --command youtube-dl --long-option prefer-avconv --description 'Prefer avconv over ffmpeg for running the postprocessors (default)'
complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors'
elif [[ ${prev} == "--recode-video" ]]; then
_arguments '*: :(mp4 flv ogg webm mkv)'
else
- _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles)'
+ _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles)'
fi
;;
esac
from __future__ import absolute_import, unicode_literals
import collections
+import contextlib
import datetime
import errno
+import fileinput
import io
import itertools
import json
compat_basestring,
compat_cookiejar,
compat_expanduser,
+ compat_get_terminal_size,
compat_http_client,
compat_kwargs,
compat_str,
ExtractorError,
format_bytes,
formatSeconds,
- get_term_width,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
PagedList,
parse_filesize,
+ PerRequestProxyHandler,
PostProcessingError,
platform_name,
preferredencoding,
render_table,
SameFileError,
sanitize_filename,
+ sanitize_path,
std_headers,
subtitles_filename,
- takewhile_inclusive,
UnavailableVideoError,
url_basename,
version_tuple,
write_string,
YoutubeDLHandler,
prepend_extension,
+ replace_extension,
args_to_str,
age_restricted,
)
(or video) as a single JSON line.
simulate: Do not download the video files.
format: Video format code. See options.py for more information.
- format_limit: Highest quality format to try.
outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names
ignoreerrors: Do not stop on download errors.
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
+ cn_verification_proxy: URL of the proxy to use for IP address verification
+ on Chinese sites. (Experimental)
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
The following parameters are not used by YoutubeDL itself, they are used by
- the FileDownloader:
+ the downloader (see youtube_dl/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
noresizebuffer, retries, continuedl, noprogress, consoletitle,
- xattr_set_filesize.
+ xattr_set_filesize, external_downloader_args.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
otherwise prefer avconv.
- exec_cmd: Arbitrary command to run after downloading
"""
params = None
try:
import pty
master, slave = pty.openpty()
- width = get_term_width()
+ width = compat_get_terminal_size().columns
if width is None:
width_args = []
else:
'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
- if '%(stitle)s' in self.params.get('outtmpl', ''):
- self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
+ if isinstance(params.get('outtmpl'), bytes):
+ self.report_warning(
+ 'Parameter outtmpl is bytes, but should be a unicode string. '
+ 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
self._setup_opener()
if v is not None)
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+ outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
tmpl = compat_expanduser(outtmpl)
filename = tmpl % template_dict
# Temporary fix for #4787
Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
extra_info is a dict containing the extra values to add to each result
- '''
+ '''
if ie_key:
ies = [self.get_info_extractor(ie_key)]
if not available_formats:
return None
- if format_spec == 'best' or format_spec is None:
- return available_formats[-1]
- elif format_spec == 'worst':
- return available_formats[0]
+ if format_spec in ['best', 'worst', None]:
+ format_idx = 0 if format_spec == 'worst' else -1
+ audiovideo_formats = [
+ f for f in available_formats
+ if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
+ if audiovideo_formats:
+ return audiovideo_formats[format_idx]
+ # for audio only urls, select the best/worst audio format
+ elif all(f.get('acodec') != 'none' for f in available_formats):
+ return available_formats[format_idx]
elif format_spec == 'bestaudio':
audio_formats = [
f for f in available_formats
full_format_info.update(format)
format['http_headers'] = self._calc_headers(full_format_info)
- format_limit = self.params.get('format_limit', None)
- if format_limit:
- formats = list(takewhile_inclusive(
- lambda f: f['format_id'] != format_limit, formats
- ))
-
# TODO Central sorting goes here
if formats[0] is not info_dict:
req_format = self.params.get('format')
if req_format is None:
- req_format = 'best'
+ req_format_list = []
+ if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
+ info_dict['extractor'] in ['youtube', 'ted']):
+ merger = FFmpegMergerPP(self)
+ if merger.available and merger.can_merge():
+ req_format_list.append('bestvideo+bestaudio')
+ req_format_list.append('best')
+ req_format = '/'.join(req_format_list)
formats_to_download = []
- # The -1 is for supporting YoutubeIE
- if req_format in ('-1', 'all'):
+ if req_format == 'all':
formats_to_download = formats
else:
for rfstr in req_format.split(','):
if len(info_dict['title']) > 200:
info_dict['title'] = info_dict['title'][:197] + '...'
- # Keep for backwards compatibility
- info_dict['stitle'] = info_dict['title']
-
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
return
try:
- dn = os.path.dirname(encodeFilename(filename))
+ dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
return
if self.params.get('writedescription', False):
- descfn = filename + '.description'
+ descfn = replace_extension(filename, 'description', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
self.to_screen('[info] Video description is already present')
elif info_dict.get('description') is None:
return
if self.params.get('writeannotations', False):
- annofn = filename + '.annotations.xml'
+ annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
self.to_screen('[info] Video annotations are already present')
else:
return
if self.params.get('writeinfojson', False):
- infofn = os.path.splitext(filename)[0] + '.info.json'
+ infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
self.to_screen('[info] Video description metadata is already present')
else:
self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
try:
- write_json_file(info_dict, infofn)
+ write_json_file(self.filter_requested_info(info_dict), infofn)
except (OSError, IOError):
self.report_error('Cannot write metadata to JSON file ' + infofn)
return
if info_dict.get('requested_formats') is not None:
downloaded = []
success = True
- merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
+ merger = FFmpegMergerPP(self)
if not merger.available:
postprocessors = []
self.report_warning('You have requested multiple '
' The formats won\'t be merged')
else:
postprocessors = [merger]
- for f in info_dict['requested_formats']:
- new_info = dict(info_dict)
- new_info.update(f)
- fname = self.prepare_filename(new_info)
- fname = prepend_extension(fname, 'f%s' % f['format_id'])
- downloaded.append(fname)
- partial_success = dl(fname, new_info)
- success = success and partial_success
- info_dict['__postprocessors'] = postprocessors
- info_dict['__files_to_merge'] = downloaded
+
+ def compatible_formats(formats):
+ video, audio = formats
+ # Check extension
+ video_ext, audio_ext = audio.get('ext'), video.get('ext')
+ if video_ext and audio_ext:
+ COMPATIBLE_EXTS = (
+ ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
+ ('webm')
+ )
+ for exts in COMPATIBLE_EXTS:
+ if video_ext in exts and audio_ext in exts:
+ return True
+ # TODO: Check acodec/vcodec
+ return False
+
+ filename_real_ext = os.path.splitext(filename)[1][1:]
+ filename_wo_ext = (
+ os.path.splitext(filename)[0]
+ if filename_real_ext == info_dict['ext']
+ else filename)
+ requested_formats = info_dict['requested_formats']
+ if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
+ info_dict['ext'] = 'mkv'
+ self.report_warning('You have requested formats incompatible for merge. '
+ 'The formats will be merged into mkv')
+ # Ensure filename always has a correct extension for successful merge
+ filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
+ if os.path.exists(encodeFilename(filename)):
+ self.to_screen(
+ '[download] %s has already been downloaded and '
+ 'merged' % filename)
+ else:
+ for f in requested_formats:
+ new_info = dict(info_dict)
+ new_info.update(f)
+ fname = self.prepare_filename(new_info)
+ fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
+ downloaded.append(fname)
+ partial_success = dl(fname, new_info)
+ success = success and partial_success
+ info_dict['__postprocessors'] = postprocessors
+ info_dict['__files_to_merge'] = downloaded
else:
# Just a single file
success = dl(filename, info_dict)
return self._download_retcode
def download_with_info_file(self, info_filename):
- with io.open(info_filename, 'r', encoding='utf-8') as f:
- info = json.load(f)
+ with contextlib.closing(fileinput.FileInput(
+ [info_filename], mode='r',
+ openhook=fileinput.hook_encoded('utf-8'))) as f:
+ # FileInput doesn't have a read method, we can't call json.load
+ info = self.filter_requested_info(json.loads('\n'.join(f)))
try:
self.process_ie_result(info, download=True)
except DownloadError:
raise
return self._download_retcode
+ @staticmethod
+ def filter_requested_info(info_dict):
+ return dict(
+ (k, v) for k, v in info_dict.items()
+ if k not in ['requested_formats', 'requested_subtitles'])
+
def post_process(self, filename, ie_info):
"""Run all the postprocessors on the given file."""
info = dict(ie_info)
pps_chain.extend(ie_info['__postprocessors'])
pps_chain.extend(self._pps)
for pp in pps_chain:
- keep_video = None
- old_filename = info['filepath']
try:
- keep_video_wish, info = pp.run(info)
- if keep_video_wish is not None:
- if keep_video_wish:
- keep_video = keep_video_wish
- elif keep_video is None:
- # No clear decision yet, let IE decide
- keep_video = keep_video_wish
+ files_to_delete, info = pp.run(info)
except PostProcessingError as e:
self.report_error(e.msg)
- if keep_video is False and not self.params.get('keepvideo', False):
- try:
+ if files_to_delete and not self.params.get('keepvideo', False):
+ for old_filename in files_to_delete:
self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded video file')
+ try:
+ os.remove(encodeFilename(old_filename))
+ except (IOError, OSError):
+ self.report_warning('Unable to remove downloaded original file')
def _make_archive_id(self, info_dict):
# Future-proof against any change in case
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
self._write_string('[debug] Git HEAD: ' + out + '\n')
- except:
+ except Exception:
try:
sys.exc_clear()
- except:
+ except Exception:
pass
self._write_string('[debug] Python version %s - %s\n' % (
platform.python_version(), platform_name()))
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
- proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+ proxy_handler = PerRequestProxyHandler(proxies)
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
opener = compat_urllib_request.build_opener(
- https_handler, proxy_handler, cookie_processor, ydlh)
+ proxy_handler, https_handler, cookie_processor, ydlh)
+
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
thumb_ext = determine_ext(t['url'], 'jpg')
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
- thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+ t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
self.to_screen('[%s] %s: Thumbnail %sis already present' %
import io
import os
import random
+import shlex
import sys
if opts.allsubtitles and not opts.writeautomaticsub:
opts.writesubtitles = True
- if sys.version_info < (3,):
- # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
- if opts.outtmpl is not None:
- opts.outtmpl = opts.outtmpl.decode(preferredencoding())
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
# PostProcessors
postprocessors = []
# Add the metadata pp first, the other pps will copy it
+ if opts.metafromtitle:
+ postprocessors.append({
+ 'key': 'MetadataFromTitle',
+ 'titleformat': opts.metafromtitle
+ })
if opts.addmetadata:
postprocessors.append({'key': 'FFmpegMetadata'})
if opts.extractaudio:
if opts.xattrs:
postprocessors.append({'key': 'XAttrMetadata'})
if opts.embedthumbnail:
- if not opts.addmetadata:
- postprocessors.append({'key': 'FFmpegAudioFix'})
- postprocessors.append({'key': 'AtomicParsley'})
+ already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
+ postprocessors.append({
+ 'key': 'EmbedThumbnail',
+ 'already_have_thumbnail': already_have_thumbnail
+ })
+ if not already_have_thumbnail:
+ opts.writethumbnail = True
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
if opts.exec_cmd:
postprocessors.append({
'key': 'ExecAfterDownload',
- 'verboseOutput': opts.verbose,
'exec_cmd': opts.exec_cmd,
})
if opts.xattr_set_filesize:
xattr # Confuse flake8
except ImportError:
parser.error('setting filesize xattr requested but python-xattr is not available')
+ external_downloader_args = None
+ if opts.external_downloader_args:
+ external_downloader_args = shlex.split(opts.external_downloader_args)
match_filter = (
None if opts.match_filter is None
else match_filter_func(opts.match_filter))
'simulate': opts.simulate or any_getting,
'skip_download': opts.skip_download,
'format': opts.format,
- 'format_limit': opts.format_limit,
'listformats': opts.listformats,
'outtmpl': outtmpl,
'autonumber_size': opts.autonumber_size,
'default_search': opts.default_search,
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
'encoding': opts.encoding,
- 'exec_cmd': opts.exec_cmd,
'extract_flat': opts.extract_flat,
'merge_output_format': opts.merge_output_format,
'postprocessors': postprocessors,
'no_color': opts.no_color,
'ffmpeg_location': opts.ffmpeg_location,
'hls_prefer_native': opts.hls_prefer_native,
+ 'external_downloader_args': external_downloader_args,
+ 'cn_verification_proxy': opts.cn_verification_proxy,
}
with YoutubeDL(ydl_opts) as ydl:
from __future__ import unicode_literals
+import collections
import getpass
import optparse
import os
import re
+import shutil
import socket
import subprocess
import sys
except ImportError: # Python 2
import htmlentitydefs as compat_html_entities
-try:
- import html.parser as compat_html_parser
-except ImportError: # Python 2
- import HTMLParser as compat_html_parser
-
try:
import http.client as compat_http_client
except ImportError: # Python 2
return real_add_option(self, *bargs, **bkwargs)
optparse.OptionGroup.add_option = _compat_add_option
+if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
+ compat_get_terminal_size = shutil.get_terminal_size
+else:
+ _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
+
+ def compat_get_terminal_size():
+ columns = compat_getenv('COLUMNS', None)
+ if columns:
+ columns = int(columns)
+ else:
+ columns = None
+ lines = compat_getenv('LINES', None)
+ if lines:
+ lines = int(lines)
+ else:
+ lines = None
+
+ try:
+ sp = subprocess.Popen(
+ ['stty', 'size'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = sp.communicate()
+ lines, columns = map(int, out.split())
+ except Exception:
+ pass
+ return _terminal_size(columns, lines)
+
__all__ = [
'compat_HTTPError',
'compat_chr',
'compat_cookiejar',
'compat_expanduser',
+ 'compat_get_terminal_size',
'compat_getenv',
'compat_getpass',
'compat_html_entities',
- 'compat_html_parser',
'compat_http_client',
'compat_http_server',
'compat_kwargs',
from .hls import HlsFD
from .hls import NativeHlsFD
from .http import HttpFD
-from .mplayer import MplayerFD
+from .rtsp import RtspFD
from .rtmp import RtmpFD
from ..utils import (
'rtmp': RtmpFD,
'm3u8_native': NativeHlsFD,
'm3u8': HlsFD,
- 'mms': MplayerFD,
- 'rtsp': MplayerFD,
+ 'mms': RtspFD,
+ 'rtsp': RtspFD,
'f4m': F4mFD,
}
from ..compat import compat_str
from ..utils import (
encodeFilename,
+ decodeArgument,
format_bytes,
timeconvert,
)
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
(experimenatal)
+ external_downloader_args: A list of additional command-line arguments for the
+ external downloader.
Subclasses of this one must re-define the real_download method.
"""
return
try:
os.utime(filename, (time.time(), filetime))
- except:
+ except Exception:
pass
return filetime
)
continuedl_and_exists = (
- self.params.get('continuedl', False) and
+ self.params.get('continuedl', True) and
os.path.isfile(encodeFilename(filename)) and
not self.params.get('nopart', False)
)
# this interface
self._progress_hooks.append(ph)
- def _debug_cmd(self, args, subprocess_encoding, exe=None):
+ def _debug_cmd(self, args, exe=None):
if not self.params.get('verbose', False):
return
+ str_args = [decodeArgument(a) for a in args]
+
if exe is None:
- exe = os.path.basename(args[0])
+ exe = os.path.basename(str_args[0])
- if subprocess_encoding:
- str_args = [
- a.decode(subprocess_encoding) if isinstance(a, bytes) else a
- for a in args]
- else:
- str_args = args
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
import os.path
import subprocess
-import sys
from .common import FileDownloader
from ..utils import (
encodeFilename,
+ encodeArgument,
)
return []
return [command_option, source_address]
+ def _configuration_args(self, default=[]):
+ ex_args = self.params.get('external_downloader_args')
+ if ex_args is None:
+ return default
+ assert isinstance(ex_args, list)
+ return ex_args
+
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
- cmd = self._make_cmd(tmpfilename, info_dict)
-
- if sys.platform == 'win32' and sys.version_info < (3, 0):
- # Windows subprocess module does not actually support Unicode
- # on Python 2.x
- # See http://stackoverflow.com/a/9951851/35070
- subprocess_encoding = sys.getfilesystemencoding()
- cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd]
- else:
- subprocess_encoding = None
- self._debug_cmd(cmd, subprocess_encoding)
+ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
+
+ self._debug_cmd(cmd)
p = subprocess.Popen(
cmd, stderr=subprocess.PIPE)
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--interface')
+ cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--bind-address')
+ cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
class Aria2cFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [
- self.exe, '-c',
- '--min-split-size', '1M', '--max-connection-per-server', '4']
+ cmd = [self.exe, '-c']
+ cmd += self._configuration_args([
+ '--min-split-size', '1M', '--max-connection-per-server', '4'])
dn = os.path.dirname(tmpfilename)
if dn:
cmd += ['--dir', dn]
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
- bootstrap = base64.b64decode(node.text)
+ bootstrap = base64.b64decode(node.text.encode('ascii'))
boot_info = read_bootstrap_info(bootstrap)
return (boot_info, bootstrap_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
- metadata = base64.b64decode(metadata_node.text)
+ metadata = base64.b64decode(metadata_node.text.encode('ascii'))
else:
metadata = None
url = base_url + name
if akamai_pv:
url += '?' + akamai_pv.strip(';')
+ if info_dict.get('extra_param_to_segment_url'):
+ url += info_dict.get('extra_param_to_segment_url')
frag_filename = '%s-%s' % (tmpfilename, name)
try:
success = http_dl.download(frag_filename, {'url': url})
add_headers = info_dict.get('http_headers')
if add_headers:
headers.update(add_headers)
- data = info_dict.get('http_post_data')
- http_method = info_dict.get('http_method')
- basic_request = compat_urllib_request.Request(url, data, headers)
- request = compat_urllib_request.Request(url, data, headers)
- if http_method is not None:
- basic_request.get_method = lambda: http_method
- request.get_method = lambda: http_method
+ basic_request = compat_urllib_request.Request(url, None, headers)
+ request = compat_urllib_request.Request(url, None, headers)
is_test = self.params.get('test', False)
open_mode = 'wb'
if resume_len != 0:
- if self.params.get('continuedl', False):
+ if self.params.get('continuedl', True):
self.report_resuming_byte(resume_len)
request.add_header('Range', 'bytes=%d-' % resume_len)
open_mode = 'ab'
self._hook_progress({
'filename': filename,
'status': 'finished',
+ 'downloaded_bytes': resume_len,
+ 'total_bytes': resume_len,
})
return True
else:
if tmpfilename != '-':
stream.close()
- self._hook_progress({
- 'downloaded_bytes': byte_counter,
- 'total_bytes': data_len,
- 'tmpfilename': tmpfilename,
- 'status': 'error',
- })
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
import os
import re
import subprocess
-import sys
import time
from .common import FileDownloader
from ..utils import (
check_executable,
encodeFilename,
+ encodeArgument,
get_exe_version,
)
protocol = info_dict.get('rtmp_protocol', None)
real_time = info_dict.get('rtmp_real_time', False)
no_resume = info_dict.get('no_resume', False)
- continue_dl = info_dict.get('continuedl', False)
+ continue_dl = info_dict.get('continuedl', True)
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = [
'rtmpdump', '--verbose', '-r', url,
- '-o', encodeFilename(tmpfilename, True)]
+ '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
if play_path is not None:
basic_args += ['--playpath', play_path]
if tc_url is not None:
- basic_args += ['--tcUrl', url]
+ basic_args += ['--tcUrl', tc_url]
if test:
basic_args += ['--stop', '1']
if flash_version is not None:
if not live and continue_dl:
args += ['--skip', '1']
- if sys.platform == 'win32' and sys.version_info < (3, 0):
- # Windows subprocess module does not actually support Unicode
- # on Python 2.x
- # See http://stackoverflow.com/a/9951851/35070
- subprocess_encoding = sys.getfilesystemencoding()
- args = [a.encode(subprocess_encoding, 'ignore') for a in args]
- else:
- subprocess_encoding = None
+ args = [encodeArgument(a) for a in args]
- self._debug_cmd(args, subprocess_encoding, exe='rtmpdump')
+ self._debug_cmd(args, exe='rtmpdump')
RD_SUCCESS = 0
RD_FAILED = 1
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
- retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
+ args = basic_args + ['--resume']
+ if retval == RD_FAILED:
+ args += ['--skip', '1']
+ args = [encodeArgument(a) for a in args]
+ retval = run_rtmpdump(args)
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == RD_FAILED:
break
)
-class MplayerFD(FileDownloader):
+class RtspFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
- args = [
- 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
- '-dumpstream', '-dumpfile', tmpfilename, url]
- # Check for mplayer first
- if not check_executable('mplayer', ['-h']):
- self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
+ if check_executable('mplayer', ['-h']):
+ args = [
+ 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
+ '-dumpstream', '-dumpfile', tmpfilename, url]
+ elif check_executable('mpv', ['-h']):
+ args = [
+ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
+ else:
+ self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
return False
- # Download using mplayer.
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
return True
else:
self.to_stderr('\n')
- self.report_error('mplayer exited with code %d' % retval)
+ self.report_error('%s exited with code %d' % (args[0], retval))
return False
from .atttechchannel import ATTTechChannelIE
from .audiomack import AudiomackIE, AudiomackAlbumIE
from .azubu import AzubuIE
+from .baidu import BaiduVideoIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .beeg import BeegIE
from .behindkink import BehindKinkIE
+from .beatportpro import BeatportProIE
from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
ChirbitProfileIE,
)
from .cinchcast import CinchcastIE
+from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE
from .condenast import CondeNastIE
from .cracked import CrackedIE
from .criterion import CriterionIE
+from .crooksandliars import CrooksAndLiarsIE
from .crunchyroll import (
CrunchyrollIE,
CrunchyrollShowPlaylistIE
from .dctp import DctpTvIE
from .deezer import DeezerPlaylistIE
from .dfb import DFBIE
+from .dhm import DHMIE
from .dotsub import DotsubIE
+from .douyutv import DouyuTVIE
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
from .drtuber import DrTuberIE
from .drtv import DRTVIE
from .dvtv import DVTVIE
from .dump import DumpIE
+from .dumpert import DumpertIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
from .dropbox import DropboxIE
+from .eagleplatform import EaglePlatformIE
from .ebaumsworld import EbaumsWorldIE
from .echomsk import EchoMskIE
from .ehow import EHowIE
)
from .flickr import FlickrIE
from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
from .fourtube import FourTubeIE
from .foxgay import FoxgayIE
from .foxnews import FoxNewsIE
+from .foxsports import FoxSportsIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE
from .francetv import (
GameOneIE,
GameOnePlaylistIE,
)
+from .gamersyde import GamersydeIE
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
from .gametrailers import GametrailersIE
+from .gazeta import GazetaIE
from .gdcvault import GDCVaultIE
from .generic import GenericIE
+from .gfycat import GfycatIE
from .giantbomb import GiantBombIE
from .giga import GigaIE
from .glide import GlideIE
from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE
from .goshgay import GoshgayIE
-from .grooveshark import GroovesharkIE
from .groupon import GrouponIE
from .hark import HarkIE
from .hearthisat import HearThisAtIE
from .jukebox import JukeboxIE
from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
+from .kanalplay import KanalPlayIE
from .kankan import KankanIE
from .karaoketv import KaraoketvIE
from .keezmovies import KeezMoviesIE
LetvTvIE,
LetvPlaylistIE
)
-from .lifenews import LifeNewsIE
+from .libsyn import LibsynIE
+from .lifenews import (
+ LifeNewsIE,
+ LifeEmbedIE,
+)
from .liveleak import LiveLeakIE
from .livestream import (
LivestreamIE,
from .mailru import MailRuIE
from .malemotion import MalemotionIE
from .mdr import MDRIE
+from .megavideoz import MegaVideozIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mgoon import MgoonIE
from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE
+from .miomio import MioMioIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE
from .mixcloud import MixcloudIE
from .nbc import (
NBCIE,
NBCNewsIE,
+ NBCSportsIE,
+ NBCSportsVPlayerIE,
+)
+from .ndr import (
+ NDRIE,
+ NJoyIE,
)
-from .ndr import NDRIE
from .ndtv import NDTVIE
from .netzkino import NetzkinoIE
from .nerdcubed import NerdCubedFeedIE
)
from .nrk import (
NRKIE,
+ NRKPlaylistIE,
NRKTVIE,
)
from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
-from .nytimes import NYTimesIE
+from .nytimes import (
+ NYTimesIE,
+ NYTimesArticleIE,
+)
from .nuvid import NuvidIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
ORFTVthekIE,
ORFOE1IE,
ORFFM4IE,
+ ORFIPTVIE,
)
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
+from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .planetaplay import PlanetaPlayIE
+from .pladform import PladformIE
from .played import PlayedIE
from .playfm import PlayFMIE
from .playvid import PlayvidIE
+from .playwire import PlaywireIE
from .podomatic import PodomaticIE
from .pornhd import PornHdIE
from .pornhub import (
PornHubPlaylistIE,
)
from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE
+from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
+from .qqmusic import (
+ QQMusicIE,
+ QQMusicSingerIE,
+ QQMusicAlbumIE,
+ QQMusicToplistIE,
+)
from .quickvid import QuickVidIE
from .r7 import R7IE
from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
from .rai import RaiIE
from .rtl2 import RTL2IE
from .rtp import RTPIE
from .rts import RTSIE
-from .rtve import RTVEALaCartaIE, RTVELiveIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
from .ruhd import RUHDIE
from .rutube import (
RutubeIE,
)
from .rutv import RUTVIE
from .sandia import SandiaIE
+from .safari import (
+ SafariIE,
+ SafariCourseIE,
+)
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE
-from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
+from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
+from .senateisvp import SenateISVPIE
from .servingsys import ServingSysIE
from .sexu import SexuIE
from .sexykarma import SexyKarmaIE
)
from .southpark import (
SouthParkIE,
- SouthparkDeIE,
+ SouthParkDeIE,
+ SouthParkDkIE,
+ SouthParkEsIE,
+ SouthParkNlIE
)
from .space import SpaceIE
+from .spankbang import SpankBangIE
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE
from .spiegeltv import SpiegeltvIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
+from .srf import SrfIE
from .srmediathek import SRMediathekIE
+from .ssa import SSAIE
from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE
from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE
from .sunporno import SunPornoIE
-from .svtplay import SVTPlayIE
+from .svt import (
+ SVTIE,
+ SVTPlayIE,
+)
from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE
from .thisav import ThisAVIE
from .tinypic import TinyPicIE
from .tlc import TlcIE, TlcDeIE
-from .tmz import TMZIE
+from .tmz import (
+ TMZIE,
+ TMZArticleIE,
+)
from .tnaflix import TNAFlixIE
from .thvideo import (
THVideoIE,
from .tvplay import TVPlayIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
+from .twentytwotracks import (
+ TwentyTwoTracksIE,
+ TwentyTwoTracksGenreIE
+)
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
UdemyIE,
UdemyCourseIE
)
+from .udn import UDNEmbedIE
+from .ultimedia import UltimediaIE
from .unistra import UnistraIE
from .urort import UrortIE
from .ustream import UstreamIE, UstreamChannelIE
+from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
+from .vessel import VesselIE
from .vesti import VestiIE
from .vevo import VevoIE
-from .vgtv import VGTVIE
+from .vgtv import (
+ BTArticleIE,
+ BTVestlendingenIE,
+ VGTVIE,
+)
from .vh1 import VH1IE
from .vice import ViceIE
from .viddler import ViddlerIE
from .vidme import VidmeIE
from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
+from .viewster import ViewsterIE
from .vimeo import (
VimeoIE,
VimeoAlbumIE,
VKUserVideosIE,
)
from .vodlocker import VodlockerIE
+from .voicerepublic import VoiceRepublicIE
from .vporn import VpornIE
from .vrt import VRTIE
from .vube import VubeIE
from .xhamster import XHamsterIE
from .xminus import XMinusIE
from .xnxx import XNXXIE
-from .xvideos import XVideosIE
+from .xstream import XstreamIE
from .xtube import XTubeUserIE, XTubeIE
from .xuite import XuiteIE
+from .xvideos import XVideosIE
from .xxxymovies import XXXYMoviesIE
from .yahoo import (
YahooIE,
YahooSearchIE,
)
from .yam import YamIE
+from .yandexmusic import (
+ YandexMusicTrackIE,
+ YandexMusicAlbumIE,
+ YandexMusicPlaylistIE,
+)
from .yesjapan import YesJapanIE
from .ynet import YnetIE
from .youjizz import YouJizzIE
)
from ..utils import (
ExtractorError,
+ qualities,
)
class AddAnimeIE(InfoExtractor):
- _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
- _TEST = {
+ _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
+ _TESTS = [{
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
'md5': '72954ea10bc979ab5e2eb288b21425a0',
'info_dict': {
'description': 'One Piece 606',
'title': 'One Piece 606',
}
- }
+ }, {
+ 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
note='Confirming after redirect')
webpage = self._download_webpage(url, video_id)
+ FORMATS = ('normal', 'hq')
+ quality = qualities(FORMATS)
formats = []
- for format_id in ('normal', 'hq'):
+ for format_id in FORMATS:
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
video_url = self._search_regex(rex, webpage, 'video file URLx',
fatal=False)
formats.append({
'format_id': format_id,
'url': video_url,
+ 'quality': quality(format_id),
})
self._sort_formats(formats)
video_title = self._og_search_title(webpage)
from __future__ import unicode_literals
import re
-import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
- xpath_text,
float_or_none,
+ xpath_text,
)
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
+ }, {
+ 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
+ 'playlist': [
+ {
+ 'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
+ 'info_dict': {
+ 'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
+ 'ext': 'flv',
+ 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+ 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+ },
+ }
+ ],
+ 'info_dict': {
+ 'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
+ 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+ 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+ },
}]
@staticmethod
for video in collection.get('videos'):
if video.get('slug') == slug:
return collection, video
+ return None, None
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
webpage = self._download_webpage(url, episode_path)
# Extract the value of `bootstrappedData` from the Javascript in the page.
- bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
-
- try:
- bootstrappedData = json.loads(bootstrappedDataJS)
- except ValueError as ve:
- errmsg = '%s: Failed to parse JSON ' % episode_path
- raise ExtractorError(errmsg, cause=ve)
+ bootstrapped_data = self._parse_json(self._search_regex(
+ r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
# NOTE: We are only downloading one video (the current one) not the playlist
if is_playlist:
- collections = bootstrappedData['playlists']['collections']
+ collections = bootstrapped_data['playlists']['collections']
collection = self.find_collection_by_linkURL(collections, show_path)
video_info = self.find_video_info(collection, episode_path)
show_title = video_info['showTitle']
segment_ids = [video_info['videoPlaybackID']]
else:
- collections = bootstrappedData['show']['collections']
+ collections = bootstrapped_data['show']['collections']
collection, video_info = self.find_collection_containing_video(collections, episode_path)
- show = bootstrappedData['show']
+ # Video wasn't found in the collections, let's try `slugged_video`.
+ if video_info is None:
+ if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
+ video_info = bootstrapped_data['slugged_video']
+ else:
+ raise ExtractorError('Unable to find video info')
+
+ show = bootstrapped_data['show']
show_title = show['title']
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_iso8601,
- xpath_with_ns,
- xpath_text,
- find_xpath_attr,
-)
class AftenpostenIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
-
+ _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
_TEST = {
- 'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
+ 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
}
def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- video_id = self._html_search_regex(
- r'data-xs-id="(\d+)"', webpage, 'video id')
-
- data = self._download_xml(
- 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
-
- NS_MAP = {
- 'atom': 'http://www.w3.org/2005/Atom',
- 'xt': 'http://xstream.dk/',
- 'media': 'http://search.yahoo.com/mrss/',
- }
-
- entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
-
- title = xpath_text(
- entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
- description = xpath_text(
- entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
- timestamp = parse_iso8601(xpath_text(
- entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
-
- formats = []
- media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
- for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
- media_url = media_content.get('url')
- if not media_url:
- continue
- tbr = int_or_none(media_content.get('bitrate'))
- mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
- if mobj:
- formats.append({
- 'url': mobj.group('url'),
- 'play_path': 'mp4:%s' % mobj.group('playpath'),
- 'app': mobj.group('app'),
- 'ext': 'flv',
- 'tbr': tbr,
- 'format_id': 'rtmp-%d' % tbr,
- })
- else:
- formats.append({
- 'url': media_url,
- 'tbr': tbr,
- })
- self._sort_formats(formats)
-
- link = find_xpath_attr(
- entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
- if link is not None:
- formats.append({
- 'url': link.get('href'),
- 'format_id': link.get('rel'),
- })
-
- thumbnails = [{
- 'url': splash.get('url'),
- 'width': int_or_none(splash.get('width')),
- 'height': int_or_none(splash.get('height')),
- } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'formats': formats,
- 'thumbnails': thumbnails,
- }
+ return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream')
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import int_or_none
class AftonbladetIE(InfoExtractor):
- _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
+ _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])'
_TEST = {
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
'info_dict': {
formats.append({
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
'ext': 'mp4',
- 'width': fmt['width'],
- 'height': fmt['height'],
- 'tbr': fmt['bitrate'],
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ 'tbr': int_or_none(fmt.get('bitrate')),
'protocol': 'http',
})
self._sort_formats(formats)
'id': video_id,
'title': internal_meta_json['title'],
'formats': formats,
- 'thumbnail': internal_meta_json['imageUrl'],
- 'description': internal_meta_json['shortPreamble'],
- 'timestamp': internal_meta_json['timePublished'],
- 'duration': internal_meta_json['duration'],
- 'view_count': internal_meta_json['views'],
+ 'thumbnail': internal_meta_json.get('imageUrl'),
+ 'description': internal_meta_json.get('shortPreamble'),
+ 'timestamp': int_or_none(internal_meta_json.get('timePublished')),
+ 'duration': int_or_none(internal_meta_json.get('duration')),
+ 'view_count': int_or_none(internal_meta_json.get('views')),
}
def _real_extract(self, url):
video_id = self._match_id(url)
- json_url = url + ('?' if '?' in url else '&') + 'output=json'
+ json_url = url + ('&' if '?' in url else '?') + 'output=json'
data = self._download_json(json_url, video_id)
def get_optional(data_dict, field):
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+ if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
+ raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
+
if re.search(r'[\?&]rss($|[=&])', url):
doc = parse_xml(webpage)
if doc.tag == 'rss':
formats.append(format)
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
info_dict['formats'] = formats
class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
+ _NETRC_MACHINE = 'atresplayer'
_TESTS = [
{
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+
+
+class BaiduVideoIE(InfoExtractor):
+ _VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
+ _TESTS = [{
+ 'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
+ 'info_dict': {
+ 'id': '1069',
+ 'title': '中华小当家 TV版 (全52集)',
+ 'description': 'md5:395a419e41215e531c857bb037bbaf80',
+ },
+ 'playlist_count': 52,
+ }, {
+ 'url': 'http://v.baidu.com/show/11595.htm?frp=bdbrand',
+ 'info_dict': {
+ 'id': '11595',
+ 'title': 're:^奔跑吧兄弟',
+ 'description': 'md5:1bf88bad6d850930f542d51547c089b8',
+ },
+ 'playlist_mincount': 3,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+ category = category2 = mobj.group('type')
+ if category == 'show':
+ category2 = 'tvshow'
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ playlist_title = self._html_search_regex(
+ r'title\s*:\s*(["\'])(?P<title>[^\']+)\1', webpage,
+ 'playlist title', group='title')
+ playlist_description = self._html_search_regex(
+ r'<input[^>]+class="j-data-intro"[^>]+value="([^"]+)"/>', webpage,
+ playlist_id, 'playlist description')
+
+ site = self._html_search_regex(
+ r'filterSite\s*:\s*["\']([^"]*)["\']', webpage,
+ 'primary provider site')
+ api_result = self._download_json(
+ 'http://v.baidu.com/%s_intro/?dtype=%sPlayUrl&id=%s&site=%s' % (
+ category, category2, playlist_id, site),
+ playlist_id, 'Get playlist links')
+
+ entries = []
+ for episode in api_result[0]['episodes']:
+ episode_id = '%s_%s' % (playlist_id, episode['episode'])
+
+ redirect_page = self._download_webpage(
+ compat_urlparse.urljoin(url, episode['url']), episode_id,
+ note='Download Baidu redirect page')
+ real_url = self._html_search_regex(
+ r'location\.replace\("([^"]+)"\)', redirect_page, 'real URL')
+
+ entries.append(self.url_result(
+ real_url, video_title=episode['single_title']))
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
from __future__ import unicode_literals
import re
-import json
import itertools
from .common import InfoExtractor
from ..compat import (
+ compat_urllib_parse,
compat_urllib_request,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
)
IE_NAME = 'bambuser'
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
_API_KEY = '005f64509e19a868399060af746a00aa'
+ _LOGIN_URL = 'https://bambuser.com/user'
+ _NETRC_MACHINE = 'bambuser'
_TEST = {
'url': 'http://bambuser.com/v/4050584',
'duration': 3741,
'uploader': 'pixelversity',
'uploader_id': '344706',
+ 'timestamp': 1382976692,
+ 'upload_date': '20131028',
+ 'view_count': int,
},
'params': {
# It doesn't respect the 'Range' header, it would download the whole video
},
}
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'form_id': 'user_login',
+ 'op': 'Log in',
+ 'name': username,
+ 'pass': password,
+ }
+
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+ request.add_header('Referer', self._LOGIN_URL)
+ response = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ login_error = self._html_search_regex(
+ r'(?s)<div class="messages error">(.+?)</div>',
+ response, 'login error', default=None)
+ if login_error:
+ raise ExtractorError(
+ 'Unable to login: %s' % login_error, expected=True)
+
+ def _real_initialize(self):
+ self._login()
+
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
- '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
- info_json = self._download_webpage(info_url, video_id)
- info = json.loads(info_json)['result']
+ video_id = self._match_id(url)
+
+ info = self._download_json(
+ 'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s'
+ % (self._API_KEY, video_id), video_id)
+
+ error = info.get('error')
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error), expected=True)
+
+ result = info['result']
return {
'id': video_id,
- 'title': info['title'],
- 'url': info['url'],
- 'thumbnail': info.get('preview'),
- 'duration': int(info['length']),
- 'view_count': int(info['views_total']),
- 'uploader': info['username'],
- 'uploader_id': info['owner']['uid'],
+ 'title': result['title'],
+ 'url': result['url'],
+ 'thumbnail': result.get('preview'),
+ 'duration': int_or_none(result.get('length')),
+ 'uploader': result.get('username'),
+ 'uploader_id': compat_str(result.get('owner', {}).get('uid')),
+ 'timestamp': int_or_none(result.get('created')),
+ 'fps': float_or_none(result.get('framerate')),
+ 'view_count': int_or_none(result.get('views_total')),
+ 'comment_count': int_or_none(result.get('comment_count')),
}
download_link = m_download.group(1)
video_id = self._search_regex(
- r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$',
+ r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
webpage, 'video id')
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
import xml.etree.ElementTree
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
from ..compat import compat_HTTPError
# rtmp download
'skip_download': True,
}
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
+ 'info_dict': {
+ 'id': 'p02n76xf',
+ 'ext': 'flv',
+ 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
+ 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
+ 'duration': 3540,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'geolocation',
}, {
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
'only_matching': True,
webpage = self._download_webpage(url, group_id, 'Downloading video page')
- programme_id = self._search_regex(
- r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
+ programme_id = None
+
+ tviplayer = self._search_regex(
+ r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
+ webpage, 'player', default=None)
+
+ if tviplayer:
+ player = self._parse_json(tviplayer, group_id).get('player', {})
+ duration = int_or_none(player.get('duration'))
+ programme_id = player.get('vpid')
+
+ if not programme_id:
+ programme_id = self._search_regex(
+ r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
+
if programme_id:
- player = self._download_json(
- 'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
- group_id)['jsConf']['player']
- title = player['title']
- description = player['subtitle']
- duration = player['duration']
formats, subtitles = self._download_media_selector(programme_id)
+ title = self._og_search_title(webpage)
+ description = self._search_regex(
+ r'<p class="medium-description">([^<]+)</p>',
+ webpage, 'description', fatal=False)
else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
'id': programme_id,
'title': title,
'description': description,
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
'duration': duration,
'formats': formats,
'subtitles': subtitles,
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class BeatportProIE(InfoExtractor):
+ _VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
+ 'md5': 'b3c34d8639a2f6a7f734382358478887',
+ 'info_dict': {
+ 'id': '5379371',
+ 'display_id': 'synesthesia-original-mix',
+ 'ext': 'mp4',
+ 'title': 'Froxic - Synesthesia (Original Mix)',
+ },
+ }, {
+ 'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
+ 'md5': 'e44c3025dfa38c6577fbaeb43da43514',
+ 'info_dict': {
+ 'id': '3756896',
+ 'display_id': 'love-and-war-original-mix',
+ 'ext': 'mp3',
+ 'title': 'Wolfgang Gartner - Love & War (Original Mix)',
+ },
+ }, {
+ 'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
+ 'md5': 'a1fd8e8046de3950fd039304c186c05f',
+ 'info_dict': {
+ 'id': '4991738',
+ 'display_id': 'birds-original-mix',
+ 'ext': 'mp4',
+ 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ track_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ playables = self._parse_json(
+ self._search_regex(
+ r'window\.Playables\s*=\s*({.+?});', webpage,
+ 'playables info', flags=re.DOTALL),
+ track_id)
+
+ track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
+
+ title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
+ if track['mix']:
+ title += ' (' + track['mix'] + ')'
+
+ formats = []
+ for ext, info in track['preview'].items():
+ if not info['url']:
+ continue
+ fmt = {
+ 'url': info['url'],
+ 'ext': ext,
+ 'format_id': ext,
+ 'vcodec': 'none',
+ }
+ if ext == 'mp3':
+ fmt['preference'] = 0
+ fmt['acodec'] = 'mp3'
+ fmt['abr'] = 96
+ fmt['asr'] = 44100
+ elif ext == 'mp4':
+ fmt['preference'] = 1
+ fmt['acodec'] = 'aac'
+ fmt['abr'] = 96
+ fmt['asr'] = 44100
+ formats.append(fmt)
+ self._sort_formats(formats)
+
+ images = []
+ for name, info in track['images'].items():
+ image_url = info.get('url')
+ if name == 'dynamic' or not image_url:
+ continue
+ image = {
+ 'id': name,
+ 'url': image_url,
+ 'height': int_or_none(info.get('height')),
+ 'width': int_or_none(info.get('width')),
+ }
+ images.append(image)
+
+ return {
+ 'id': compat_str(track.get('id')) or track_id,
+ 'display_id': track.get('slug') or display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': images,
+ }
{
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
'info_dict': {
- 'id': '740ab250-bb94-4a8a-8787-fe0de7c74471',
+ 'id': 'news/national/2014/a-conversation-with-president-obama',
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
'ext': 'flv',
- 'title': 'BET News Presents: A Conversation With President Obama',
- 'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
+ 'title': 'A Conversation With President Obama',
+ 'description': 'md5:699d0652a350cf3e491cd15cc745b5da',
'duration': 1534,
'timestamp': 1418075340,
'upload_date': '20141208',
{
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
'info_dict': {
- 'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d',
+ 'id': 'news/national/2014/justice-for-ferguson-a-community-reacts',
'display_id': 'justice-for-ferguson-a-community-reacts',
'ext': 'flv',
'title': 'Justice for Ferguson: A Community Reacts',
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
webpage, 'media URL'))
+ video_id = self._search_regex(
+ r'/video/(.*)/_jcr_content/', media_url, 'video id')
+
mrss = self._download_xml(media_url, display_id)
item = mrss.find('./channel/item')
description = xpath_text(
item, './description', 'description', fatal=False)
- video_id = xpath_text(item, './guid', 'video id', fatal=False)
-
timestamp = parse_iso8601(xpath_text(
item, xpath_with_ns('./dc:date', NS_MAP),
'upload date', fatal=False))
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ fix_xml_ampersands,
+)
class BildIE(InfoExtractor):
'id': '38184146',
'ext': 'mp4',
'title': 'BILD hat sie getestet',
- 'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg',
+ 'thumbnail': 're:^https?://.*\.jpg$',
'duration': 196,
'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ',
}
video_id = self._match_id(url)
xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml"
- doc = self._download_xml(xml_url, video_id)
+ doc = self._download_xml(xml_url, video_id, transform_source=fix_xml_ampersands)
duration = int_or_none(doc.attrib.get('duration'), scale=1000)
from __future__ import unicode_literals
import re
+import itertools
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
+ ExtractorError,
)
class BiliBiliIE(InfoExtractor):
_VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.bilibili.tv/video/av1074402/',
'md5': '2c301e4dab317596e837c3e7633e7d86',
'info_dict': {
- 'id': '1074402',
+ 'id': '1074402_part1',
'ext': 'flv',
'title': '【金坷垃】金泡沫',
'duration': 308,
'upload_date': '20140420',
'thumbnail': 're:^https?://.+\.jpg',
},
- }
+ }, {
+ 'url': 'http://www.bilibili.com/video/av1041170/',
+ 'info_dict': {
+ 'id': '1041170',
+ 'title': '【BD1080P】刀语【诸神&异域】',
+ },
+ 'playlist_count': 9,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ if self._search_regex(r'(此视频不存在或被删除)', webpage, 'error message', default=None):
+ raise ExtractorError('The video does not exist or was deleted', expected=True)
video_code = self._search_regex(
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
+ entries = []
+
lq_doc = self._download_xml(
'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
video_id,
note='Downloading LQ video info'
)
- lq_durl = lq_doc.find('./durl')
- formats = [{
- 'format_id': 'lq',
- 'quality': 1,
- 'url': lq_durl.find('./url').text,
- 'filesize': int_or_none(
- lq_durl.find('./size'), get_attr='text'),
- }]
+ lq_durls = lq_doc.findall('./durl')
hq_doc = self._download_xml(
'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
note='Downloading HQ video info',
fatal=False,
)
- if hq_doc is not False:
- hq_durl = hq_doc.find('./durl')
- formats.append({
- 'format_id': 'hq',
- 'quality': 2,
- 'ext': 'flv',
- 'url': hq_durl.find('./url').text,
+ hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None)
+
+ assert len(lq_durls) == len(hq_durls)
+
+ i = 1
+ for lq_durl, hq_durl in zip(lq_durls, hq_durls):
+ formats = [{
+ 'format_id': 'lq',
+ 'quality': 1,
+ 'url': lq_durl.find('./url').text,
'filesize': int_or_none(
- hq_durl.find('./size'), get_attr='text'),
+ lq_durl.find('./size'), get_attr='text'),
+ }]
+ if hq_durl:
+ formats.append({
+ 'format_id': 'hq',
+ 'quality': 2,
+ 'ext': 'flv',
+ 'url': hq_durl.find('./url').text,
+ 'filesize': int_or_none(
+ hq_durl.find('./size'), get_attr='text'),
+ })
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': '%s_part%d' % (video_id, i),
+ 'title': title,
+ 'formats': formats,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'thumbnail': thumbnail,
})
- self._sort_formats(formats)
+ i += 1
+
return {
+ '_type': 'multi_video',
+ 'entries': entries,
'id': video_id,
- 'title': title,
- 'formats': formats,
- 'duration': duration,
- 'upload_date': upload_date,
- 'thumbnail': thumbnail,
+ 'title': title
}
},
]
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
+ if mobj:
+ return 'http://blip.tv/a/a-' + mobj.group(1)
+ mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
+ if mobj:
+ return mobj.group(1)
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
lookup_id = mobj.group('lookup_id')
'width': int_or_none(media_content.get('width')),
'height': int_or_none(media_content.get('height')),
})
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
subtitles = self.extract_subtitles(video_id, subtitles_urls)
class BloombergIE(InfoExtractor):
- _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
+ _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
- 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+ 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
# The md5 checksum changes
'info_dict': {
'id': 'qurhIVlJSB6hzkVi229d8g',
'ext': 'flv',
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
- 'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
+ 'description': 'md5:a8ba0302912d03d246979735c17d2761',
},
}
def _real_extract(self, url):
name = self._match_id(url)
webpage = self._download_webpage(url, name)
-
- f4m_url = self._search_regex(
- r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
- 'f4m url')
+ video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')
title = re.sub(': Video$', '', self._og_search_title(webpage))
+ embed_info = self._download_json(
+ 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
+ formats = []
+ for stream in embed_info['streams']:
+ if stream["muxing_format"] == "TS":
+ formats.extend(self._extract_m3u8_formats(stream['url'], video_id))
+ else:
+ formats.extend(self._extract_f4m_formats(stream['url'], video_id))
+ self._sort_formats(formats)
+
return {
- 'id': name.split('-')[-1],
+ 'id': video_id,
'title': title,
- 'formats': self._extract_f4m_formats(f4m_url, name),
+ 'formats': formats,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}
_TESTS = [
{
- 'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html',
- 'md5': '93556dd2bcb2948d9259f8670c516d59',
+ 'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html',
+ 'md5': '83a0477cf0b8451027eb566d88b51106',
'info_dict': {
- 'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
+ 'id': '48f656ef-287e-486f-be86-459122db22cc',
'ext': 'mp4',
- 'title': 'Wenn das Traditions-Theater wackelt',
- 'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
- 'duration': 34,
- 'uploader': 'BR',
- 'upload_date': '20140802',
+ 'title': 'Die böse Überraschung',
+ 'description': 'Betriebliche Altersvorsorge: Die böse Überraschung',
+ 'duration': 180,
+ 'uploader': 'Reinhard Weber',
+ 'upload_date': '20150422',
}
},
{
- 'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
- 'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
+ 'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
+ 'md5': 'a44396d73ab6a68a69a568fae10705bb',
'info_dict': {
- 'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab',
+ 'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
+ 'ext': 'mp4',
+ 'title': 'Manfred Schreiber ist tot',
+ 'description': 'Abendschau kompakt: Manfred Schreiber ist tot',
+ 'duration': 26,
+ }
+ },
+ {
+ 'url': 'http://www.br.de/radio/br-klassik/sendungen/allegro/premiere-urauffuehrung-the-land-2015-dance-festival-muenchen-100.html',
+ 'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
+ 'info_dict': {
+ 'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
'ext': 'aac',
- 'title': '"Keine neuen Schulden im nächsten Jahr"',
- 'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
- 'duration': 64,
+ 'title': 'Kurzweilig und sehr bewegend',
+ 'description': '"The Land" von Peeping Tom: Kurzweilig und sehr bewegend',
+ 'duration': 296,
}
},
{
'tbr': media['bitRate'],
'width': media['width'],
'height': media['height'],
- } for media in info['media']]
+ } for media in info['media'] if media.get('mediaPurpose') == 'play']
if not formats:
formats.append({
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
object_str = fix_xml_ampersands(object_str)
- object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+ try:
+ object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+ except xml.etree.ElementTree.ParseError:
+ return
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
if fv_el is not None:
(?:
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
- ).+?</object>''',
+ ).+?>\s*</object>''',
webpage)
- return [cls._build_brighcove_url(m) for m in matches]
+ return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
'ext': 'mp4',
'description': 'md5:5438d33774b6bdc662f9485a340401cc',
'title': 'Season 5 Episode 5',
- 'thumbnail': 're:^https?://.*promo.*'
+ 'thumbnail': 're:^https?://.*\.jpg$'
},
'params': {
'skip_download': True,
}
_TESTS = [{
- 'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
- 'md5': '3db39fb48b9685438ecf33a1078023e4',
+ 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
+ 'md5': 'b3481d7ca972f61e37420798d0a9d934',
'info_dict': {
- 'id': '922470',
+ 'id': '1263092',
'ext': 'flv',
- 'title': 'Zapping - 26/08/13',
- 'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
- 'upload_date': '20130826',
+ 'title': 'Le Zapping - 13/05/15',
+ 'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
+ 'upload_date': '20150513',
},
}, {
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
'skip': 'videos get deleted after a while',
}, {
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
- 'md5': '65aa83ad62fe107ce29e564bb8712580',
+ 'md5': 'f3a46edcdf28006598ffaf5b30e6a2d4',
'info_dict': {
'id': '1213714',
'ext': 'flv',
--- /dev/null
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+from .bliptv import BlipTVIE
+
+
+class CinemassacreIE(InfoExtractor):
+ _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
+ _TESTS = [
+ {
+ 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
+ 'md5': 'fde81fbafaee331785f58cd6c0d46190',
+ 'info_dict': {
+ 'id': 'Cinemassacre-19911',
+ 'ext': 'mp4',
+ 'upload_date': '20121110',
+ 'title': '“Angry Video Game Nerd: The Movie” – Trailer',
+ 'description': 'md5:fb87405fcb42a331742a0dce2708560b',
+ },
+ },
+ {
+ 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
+ 'md5': 'd72f10cd39eac4215048f62ab477a511',
+ 'info_dict': {
+ 'id': 'Cinemassacre-521be8ef82b16',
+ 'ext': 'mp4',
+ 'upload_date': '20131002',
+ 'title': 'The Mummy’s Hand (1940)',
+ },
+ },
+ {
+ # blip.tv embedded video
+ 'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
+ 'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de',
+ 'info_dict': {
+ 'id': '4065369',
+ 'ext': 'flv',
+ 'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
+ 'upload_date': '20061207',
+ 'uploader': 'cinemassacre',
+ 'uploader_id': '250778',
+ 'timestamp': 1283233867,
+ 'description': 'md5:0a108c78d130676b207d0f6d029ecffd',
+ }
+ },
+ {
+ # Youtube embedded video
+ 'url': 'http://cinemassacre.com/2006/09/01/mckids/',
+ 'md5': '6eb30961fa795fedc750eac4881ad2e1',
+ 'info_dict': {
+ 'id': 'FnxsNhuikpo',
+ 'ext': 'mp4',
+ 'upload_date': '20060901',
+ 'uploader': 'Cinemassacre Extras',
+ 'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
+ 'uploader_id': 'Cinemassacre',
+ 'title': 'AVGN: McKids',
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('display_id')
+ video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
+
+ webpage = self._download_webpage(url, display_id)
+
+ playerdata_url = self._search_regex(
+ [
+ r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+ r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
+ ],
+ webpage, 'player data URL', default=None)
+ if not playerdata_url:
+ playerdata_url = BlipTVIE._extract_url(webpage)
+ if not playerdata_url:
+ raise ExtractorError('Unable to find player data')
+
+ video_title = self._html_search_regex(
+ r'<title>(?P<title>.+?)\|', webpage, 'title')
+ video_description = self._html_search_regex(
+ r'<div class="entry-content">(?P<description>.+?)</div>',
+ webpage, 'description', flags=re.DOTALL, fatal=False)
+ video_thumbnail = self._og_search_thumbnail(webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'display_id': display_id,
+ 'title': video_title,
+ 'description': video_description,
+ 'upload_date': video_date,
+ 'thumbnail': video_thumbnail,
+ 'url': playerdata_url,
+ }
webpage = self._download_webpage(url, video_id)
file_key = self._search_regex(
- r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
+ [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
+ webpage, 'file_key')
return self._extract_video(video_host, video_id, file_key)
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
- (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
+ (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))'''
_TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
'description': 'md5:e7223a503315c9f150acac52e76de086',
'upload_date': '20141222',
}
+ }, {
+ 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
+ 'only_matching': True,
}]
def _real_extract(self, url):
uri = mMovieParams[0][1]
# Correct cc.com in uri
- uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)
+ uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
idoc = self._download_xml(
)
from ..utils import (
age_restricted,
+ bug_reports_message,
clean_html,
compiled_regex_type,
ExtractorError,
information possibly downloading the video to the file system, among
other possible outcomes.
- The type field determines the the type of the result.
+ The type field determines the type of the result.
By far the most common value (and the default if _type is missing) is
"video", which indicates a single video.
(quality takes higher priority)
-1 for default (order by other properties),
-2 or smaller for less than default.
- * http_method HTTP method to use for the download.
* http_headers A dictionary of additional HTTP headers
to add to the request.
- * http_post_data Additional data to send with a POST
- request.
* stretched_ratio If given and not 1, indicates that the
video's pixels are not square.
width : height ratio as float.
self._downloader.report_warning(errmsg)
return False
- def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
+ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None):
""" Returns a tuple (page content as string, URL handle) """
# Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)):
if urlh is False:
assert not fatal
return False
- content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
+ content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
return (content, urlh)
- def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
- content_type = urlh.headers.get('Content-Type', '')
- webpage_bytes = urlh.read()
- if prefix is not None:
- webpage_bytes = prefix + webpage_bytes
+ @staticmethod
+ def _guess_encoding_from_content(content_type, webpage_bytes):
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
if m:
encoding = m.group(1)
encoding = 'utf-16'
else:
encoding = 'utf-8'
+
+ return encoding
+
+ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+ content_type = urlh.headers.get('Content-Type', '')
+ webpage_bytes = urlh.read()
+ if prefix is not None:
+ webpage_bytes = prefix + webpage_bytes
+ if not encoding:
+ encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
if self._downloader.params.get('dump_intermediate_pages', False):
try:
url = url_or_request.get_full_url()
return content
- def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
+ def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
""" Returns the data of the page as a string """
success = False
try_count = 0
while success is False:
try:
- res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+ res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding)
success = True
except compat_http_client.IncompleteRead as e:
try_count += 1
def _download_xml(self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML',
- transform_source=None, fatal=True):
+ transform_source=None, fatal=True, encoding=None):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(
- url_or_request, video_id, note, errnote, fatal=fatal)
+ url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding)
if xml_string is False:
return xml_string
if transform_source:
note='Downloading JSON metadata',
errnote='Unable to download JSON metadata',
transform_source=None,
- fatal=True):
+ fatal=True, encoding=None):
json_string = self._download_webpage(
- url_or_request, video_id, note, errnote, fatal=fatal)
+ url_or_request, video_id, note, errnote, fatal=fatal,
+ encoding=encoding)
if (not fatal) and json_string is False:
return None
return self._parse_json(
# Methods for following #608
@staticmethod
- def url_result(url, ie=None, video_id=None):
+ def url_result(url, ie=None, video_id=None, video_title=None):
"""Returns a url that points to a page that should be processed"""
# TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
'ie_key': ie}
if video_id is not None:
video_info['id'] = video_id
+ if video_title is not None:
+ video_info['title'] = video_title
return video_info
@staticmethod
elif fatal:
raise RegexNotFoundError('Unable to extract %s' % _name)
else:
- self._downloader.report_warning('unable to extract %s; '
- 'please report this issue on http://yt-dl.org/bug' % _name)
+ self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
return None
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
def _get_login_info(self):
"""
- Get the the login info as (username, password)
+ Get the login info as (username, password)
It will look in the netrc file using the _NETRC_MACHINE value
If there's no info available, return (None, None)
"""
return self._html_search_meta('twitter:player', html,
'twitter card player')
- def _sort_formats(self, formats):
+ def _sort_formats(self, formats, field_preference=None):
if not formats:
raise ExtractorError('No video formats found')
if not f.get('ext') and 'url' in f:
f['ext'] = determine_ext(f['url'])
+ if isinstance(field_preference, (list, tuple)):
+ return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference)
+
preference = f.get('preference')
if preference is None:
proto = f.get('protocol')
f.get('fps') if f.get('fps') is not None else -1,
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
f.get('source_preference') if f.get('source_preference') is not None else -1,
- f.get('format_id'),
+ f.get('format_id') if f.get('format_id') is not None else '',
)
formats.sort(key=_formats_key)
formats)
def _is_valid_url(self, url, video_id, item='video'):
+ url = self._proto_relative_url(url, scheme='http:')
+ # For now assume non HTTP(S) URLs always valid
+ if not (url.startswith('http://') or url.startswith('https://')):
+ return True
try:
self._request_webpage(url, video_id, 'Checking %s URL' % item)
return True
(media_el.attrib.get('href') or media_el.attrib.get('url')))
tbr = int_or_none(media_el.attrib.get('bitrate'))
formats.append({
- 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
+ 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
'url': manifest_url,
'ext': 'flv',
'tbr': tbr,
m3u8_id=None):
formats = [{
- 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
+ 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
'url': m3u8_url,
'ext': ext,
'protocol': 'm3u8',
formats.append({'url': format_url(line)})
continue
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
+ format_id = []
+ if m3u8_id:
+ format_id.append(m3u8_id)
+ last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
+ format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
f = {
- 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
+ 'format_id': '-'.join(format_id),
'url': format_url(line.strip()),
'tbr': tbr,
'ext': ext,
class CrackedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
- _TEST = {
+ _TESTS = [{
+ 'url': 'http://www.cracked.com/video_19070_if-animal-actors-got-e21-true-hollywood-stories.html',
+ 'md5': '89b90b9824e3806ca95072c4d78f13f7',
+ 'info_dict': {
+ 'id': '19070',
+ 'ext': 'mp4',
+ 'title': 'If Animal Actors Got E! True Hollywood Stories',
+ 'timestamp': 1404954000,
+ 'upload_date': '20140710',
+ }
+ }, {
+ # youtube embed
'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
- 'md5': '4b29a5eeec292cd5eca6388c7558db9e',
+ 'md5': 'ccd52866b50bde63a6ef3b35016ba8c7',
'info_dict': {
- 'id': '19006',
+ 'id': 'EjI00A3rZD0',
'ext': 'mp4',
- 'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies',
- 'description': 'md5:3b909e752661db86007d10e5ec2df769',
- 'timestamp': 1405659600,
- 'upload_date': '20140718',
+ 'title': "4 Plot Holes You Didn't Notice in Your Favorite Movies - The Spit Take",
+ 'description': 'md5:c603708c718b796fe6079e2b3351ffc7',
+ 'upload_date': '20140725',
+ 'uploader_id': 'Cracked',
+ 'uploader': 'Cracked',
}
- }
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ youtube_url = self._search_regex(
+ r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
+ webpage, 'youtube url', default=None)
+ if youtube_url:
+ return self.url_result(youtube_url, 'Youtube')
+
video_url = self._html_search_regex(
- [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL')
+ [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
+ webpage, 'video URL')
+
+ title = self._search_regex(
+ [r'property="?og:title"?\s+content="([^"]+)"', r'class="?title"?>([^<]+)'],
+ webpage, 'title')
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
+ description = self._search_regex(
+ r'name="?(?:og:)?description"?\s+content="([^"]+)"',
+ webpage, 'description', default=None)
- timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False)
+ timestamp = self._html_search_regex(
+ r'"date"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False)
if timestamp:
timestamp = parse_iso8601(timestamp[:-6])
view_count = str_to_int(self._html_search_regex(
- r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False))
+ r'<span\s+class="?views"? id="?viewCounts"?>([\d,\.]+) Views</span>',
+ webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
- r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False))
+ r'<span\s+id="?commentCounts"?>([\d,\.]+)</span>',
+ webpage, 'comment count', fatal=False))
m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
if m:
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ qualities,
+)
+
+
+class CrooksAndLiarsIE(InfoExtractor):
+ _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi',
+ 'info_dict': {
+ 'id': '8RUoRhRi',
+ 'ext': 'mp4',
+ 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!',
+ 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'timestamp': 1428207000,
+ 'upload_date': '20150405',
+ 'uploader': 'Heather',
+ 'duration': 236,
+ }
+ }, {
+ 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
+
+ manifest = self._parse_json(
+ self._search_regex(
+ r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
+ video_id)
+
+ quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))
+
+ formats = [{
+ 'url': item['url'],
+ 'format_id': item['type'],
+ 'quality': quality(item['type']),
+ } for item in manifest['flavors'] if item['mime'].startswith('video/')]
+ self._sort_formats(formats)
+
+ return {
+ 'url': url,
+ 'id': video_id,
+ 'title': manifest['title'],
+ 'description': manifest.get('description'),
+ 'thumbnail': self._proto_relative_url(manifest.get('poster')),
+ 'timestamp': int_or_none(manifest.get('created')),
+ 'uploader': manifest.get('author'),
+ 'duration': int_or_none(manifest.get('duration')),
+ 'formats': formats,
+ }
)
from ..aes import (
aes_cbc_decrypt,
- inc,
)
class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _NETRC_MACHINE = 'crunchyroll'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': {
key = obfuscate_key(id)
- class Counter:
- __value = iv
-
- def next_value(self):
- temp = self.__value
- self.__value = inc(self.__value)
- return temp
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
return zlib.decompress(decrypted_data)
streamdata = self._download_xml(
streamdata_req, video_id,
note='Downloading media info for %s' % video_format)
- video_url = streamdata.find('.//host').text
- video_play_path = streamdata.find('.//file').text
+ video_url = streamdata.find('./host').text
+ video_play_path = streamdata.find('./file').text
formats.append({
'url': video_url,
'play_path': video_play_path,
int_or_none,
unescapeHTML,
find_xpath_attr,
+ smuggle_url,
+ determine_ext,
)
+from .senateisvp import SenateISVPIE
class CSpanIE(InfoExtractor):
}
}, {
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
+ 'md5': '446562a736c6bf97118e389433ed88d4',
'info_dict': {
'id': '342759',
+ 'ext': 'mp4',
'title': 'General Motors Ignition Switch Recall',
+ 'duration': 14848,
+ 'description': 'md5:70c7c3b8fa63fa60d42772440596034c'
},
- 'playlist_duration_sum': 14855,
+ }, {
+ # Video from senate.gov
+ 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
+ 'info_dict': {
+ 'id': 'judiciary031715',
+ 'ext': 'flv',
+ 'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
+ }
}]
def _real_extract(self, url):
# present, otherwise this is a stripped version
r'<p class=\'initial\'>(.*?)</p>'
],
- webpage, 'description', flags=re.DOTALL)
+ webpage, 'description', flags=re.DOTALL, default=None)
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
data = self._download_json(info_url, video_id)
title = find_xpath_attr(doc, './/string', 'name', 'title').text
thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
+ senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
+ if senate_isvp_url:
+ surl = smuggle_url(senate_isvp_url, {'force_title': title})
+ return self.url_result(surl, 'SenateISVP', video_id, title)
+
files = data['video']['files']
+ try:
+ capfile = data['video']['capfile']['#text']
+ except KeyError:
+ capfile = None
entries = [{
'id': '%s_%d' % (video_id, partnum + 1),
'description': description,
'thumbnail': thumbnail,
'duration': int_or_none(f.get('length', {}).get('#text')),
+ 'subtitles': {
+ 'en': [{
+ 'url': capfile,
+ 'ext': determine_ext(capfile, 'dfxp')
+ }],
+ } if capfile else None,
} for partnum, f in enumerate(files)]
- return {
- '_type': 'playlist',
- 'entries': entries,
- 'title': title,
- 'id': video_id,
- }
+ if len(entries) == 1:
+ entry = dict(entries[0])
+ entry['id'] = video_id
+ return entry
+ else:
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'title': title,
+ 'id': video_id,
+ }
def _build_request(url):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
- request.add_header('Cookie', 'family_filter=off')
- request.add_header('Cookie', 'ff=off')
+ request.add_header('Cookie', 'family_filter=off; ff=off')
return request
_TESTS = [
{
- 'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
- 'md5': '392c4b85a60a90dc4792da41ce3144eb',
+ 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
+ 'md5': '2137c41a8e78554bb09225b8eb322406',
'info_dict': {
- 'id': 'x33vw9',
+ 'id': 'x2iuewm',
'ext': 'mp4',
- 'uploader': 'Amphora Alex and Van .',
- 'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
+ 'uploader': 'IGN',
+ 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
+ 'upload_date': '20150306',
}
},
# Vevo video
def _real_extract(self, url):
video_id = self._match_id(url)
- url = 'http://www.dailymotion.com/video/%s' % video_id
+ url = 'https://www.dailymotion.com/video/%s' % video_id
# Retrieve video webpage to extract further information
request = self._build_request(url)
age_limit = self._rta_search(webpage)
video_upload_date = None
- mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
+ mobj = re.search(r'<meta property="video:release_date" content="([0-9]{4})-([0-9]{2})-([0-9]{2}).+?"/>', webpage)
if mobj is not None:
- video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
+ video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
- embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
- embed_page = self._download_webpage(embed_url, video_id,
- 'Downloading embed page')
+ embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id
+ embed_request = self._build_request(embed_url)
+ embed_page = self._download_webpage(
+ embed_request, video_id, 'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = 'dailymotion:user'
- _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ parse_duration,
+)
+
+
+class DHMIE(InfoExtractor):
+ IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
+ _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
+ 'md5': '11c475f670209bf6acca0b2b7ef51827',
+ 'info_dict': {
+ 'id': 'the-marshallplan-at-work-in-west-germany',
+ 'ext': 'flv',
+ 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
+ 'description': 'md5:1fabd480c153f97b07add61c44407c82',
+ 'duration': 660,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
+ 'md5': '09890226332476a3e3f6f2cb74734aa5',
+ 'info_dict': {
+ 'id': 'rolle-1',
+ 'ext': 'flv',
+ 'title': 'ROLLE 1',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ playlist_url = self._search_regex(
+ r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
+
+ playlist = self._download_xml(playlist_url, video_id)
+
+ track = playlist.find(
+ './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
+
+ video_url = xpath_text(
+ track, './{http://xspf.org/ns/0/}location',
+ 'video url', fatal=True)
+ thumbnail = xpath_text(
+ track, './{http://xspf.org/ns/0/}image',
+ 'thumbnail')
+
+ title = self._search_regex(
+ [r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'],
+ webpage, 'title').strip()
+ description = self._html_search_regex(
+ r'<p><strong>Description:</strong>(.+?)</p>',
+ webpage, 'description', default=None)
+ duration = parse_duration(self._search_regex(
+ r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
+ webpage, 'duration', default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
if not video_url:
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
- r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
+ [r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'],
+ webpage, 'video url')
return {
'id': video_id,
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import time
+from .common import InfoExtractor
+from ..utils import (ExtractorError, unescapeHTML)
+from ..compat import (compat_str, compat_basestring)
+
+
+class DouyuTVIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.douyutv.com/iseven',
+ 'info_dict': {
+ 'id': '17732',
+ 'display_id': 'iseven',
+ 'ext': 'flv',
+ 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': '7师傅',
+ 'uploader_id': '431925',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.douyutv.com/85982',
+ 'info_dict': {
+ 'id': '85982',
+ 'display_id': '85982',
+ 'ext': 'flv',
+ 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'douyu小漠',
+ 'uploader_id': '3769985',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ if video_id.isdigit():
+ room_id = video_id
+ else:
+ page = self._download_webpage(url, video_id)
+ room_id = self._html_search_regex(
+ r'"room_id"\s*:\s*(\d+),', page, 'room id')
+
+ prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
+ room_id, int(time.time()))
+
+ auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
+ config = self._download_json(
+ 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
+ video_id)
+
+ data = config['data']
+
+ error_code = config.get('error', 0)
+ if error_code is not 0:
+ error_desc = 'Server reported error %i' % error_code
+ if isinstance(data, (compat_str, compat_basestring)):
+ error_desc += ': ' + data
+ raise ExtractorError(error_desc, expected=True)
+
+ show_status = data.get('show_status')
+ # 1 = live, 2 = offline
+ if show_status == '2':
+ raise ExtractorError(
+ 'Live stream is offline', expected=True)
+
+ base_url = data['rtmp_url']
+ live_path = data['rtmp_live']
+
+ title = self._live_title(unescapeHTML(data['room_name']))
+ description = data.get('show_details')
+ thumbnail = data.get('room_src')
+
+ uploader = data.get('nickname')
+ uploader_id = data.get('owner_uid')
+
+ multi_formats = data.get('rtmp_multi_bitrate')
+ if not isinstance(multi_formats, dict):
+ multi_formats = {}
+ multi_formats['live'] = live_path
+
+ formats = [{
+ 'url': '%s/%s' % (base_url, format_path),
+ 'format_id': format_id,
+ 'preference': 1 if format_id == 'live' else 0,
+ } for format_id, format_path in multi_formats.items()]
+ self._sort_formats(formats)
+
+ return {
+ 'id': room_id,
+ 'display_id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'is_live': True,
+ }
import re
from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+ ExtractorError,
+ unified_strdate,
+)
class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
- _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
- _TEST = {
- 'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
- 'md5': '9dcfe344732808dbfcc901537973c922',
- 'info_dict': {
- 'id': '36983',
- 'ext': 'mp4',
- 'title': 'Kaffeeland Schweiz',
- 'description': 'md5:cc4424b18b75ae9948b13929a0814033',
- 'uploader': '3sat',
- 'upload_date': '20130622'
- }
- }
+ _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+ _TESTS = [
+ {
+ 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
+ 'md5': 'be37228896d30a88f315b638900a026e',
+ 'info_dict': {
+ 'id': '45918',
+ 'ext': 'mp4',
+ 'title': 'Waidmannsheil',
+ 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
+ 'uploader': '3sat',
+ 'upload_date': '20140913'
+ }
+ },
+ {
+ 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
+ 'only_matching': True,
+ },
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
+ status_code = details_doc.find('./status/statuscode')
+ if status_code is not None and status_code.text != 'ok':
+ code = status_code.text
+ if code == 'notVisibleAnymore':
+ message = 'Video %s is not available' % video_id
+ else:
+ message = '%s returned error: %s' % (self.IE_NAME, code)
+ raise ExtractorError(message, expected=True)
+
thumbnail_els = details_doc.findall('.//teaserimage')
thumbnails = [{
'width': int(te.attrib['key'].partition('x')[0]),
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor, ExtractorError
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
_TEST = {
- 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
- 'md5': '4a7e1dd65cdb2643500a3f753c942f25',
+ 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
+ 'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
'info_dict': {
- 'id': 'partiets-mand-7-8',
+ 'id': 'panisk-paske-5',
'ext': 'mp4',
- 'title': 'Partiets mand (7:8)',
- 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862',
- 'timestamp': 1403047940,
- 'upload_date': '20140617',
- 'duration': 1299.040,
+ 'title': 'Panisk Påske (5)',
+ 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
+ 'timestamp': 1426984612,
+ 'upload_date': '20150322',
+ 'duration': 1455,
},
}
webpage = self._download_webpage(url, video_id)
+ if '>Programmet er ikke længere tilgængeligt' in webpage:
+ raise ExtractorError(
+ 'Video %s is not available' % video_id, expected=True)
+
video_id = self._search_regex(
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
webpage, 'video id')
video_url = self._search_regex(
r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
- thumb = self._og_search_thumbnail(webpage)
- title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
return {
'id': video_id,
'title': title,
'url': video_url,
- 'thumbnail': thumb,
+ 'thumbnail': thumbnail,
}
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import qualities
+
+
+class DumpertIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+ _TEST = {
+ 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
+ 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+ 'info_dict': {
+ 'id': '6646981/951bc60f',
+ 'ext': 'mp4',
+ 'title': 'Ik heb nieuws voor je',
+ 'description': 'Niet schrikken hoor',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ req = compat_urllib_request.Request(url)
+ req.add_header('Cookie', 'nsfw=1; cpc=10')
+ webpage = self._download_webpage(req, video_id)
+
+ files_base64 = self._search_regex(
+ r'data-files="([^"]+)"', webpage, 'data files')
+
+ files = self._parse_json(
+ base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
+ video_id)
+
+ quality = qualities(['flv', 'mobile', 'tablet', '720p'])
+
+ formats = [{
+ 'url': video_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ } for format_id, video_url in files.items() if format_id != 'still']
+ self._sort_formats(formats)
+
+ title = self._html_search_meta(
+ 'title', webpage) or self._og_search_title(webpage)
+ description = self._html_search_meta(
+ 'description', webpage) or self._og_search_description(webpage)
+ thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats
+ }
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class EaglePlatformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ eagleplatform:(?P<custom_host>[^/]+):|
+ https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # http://lenta.ru/news/2015/03/06/navalny/
+ 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
+ 'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
+ 'info_dict': {
+ 'id': '227304',
+ 'ext': 'mp4',
+ 'title': 'Навальный вышел на свободу',
+ 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 87,
+ 'view_count': int,
+ 'age_limit': 0,
+ },
+ }, {
+ # http://muz-tv.ru/play/7129/
+ # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
+ 'url': 'eagleplatform:media.clipyou.ru:12820',
+ 'md5': '6c2ebeab03b739597ce8d86339d5a905',
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ 'skip': 'Georestricted',
+ }]
+
+ def _handle_error(self, response):
+ status = int_or_none(response.get('status', 200))
+ if status != 200:
+ raise ExtractorError(' '.join(response['errors']), expected=True)
+
+ def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
+ response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
+ self._handle_error(response)
+ return response
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
+
+ player_data = self._download_json(
+ 'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
+
+ media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
+
+ title = media['title']
+ description = media.get('description')
+ thumbnail = media.get('snapshot')
+ duration = int_or_none(media.get('duration'))
+ view_count = int_or_none(media.get('views'))
+
+ age_restriction = media.get('age_restriction')
+ age_limit = None
+ if age_restriction:
+ age_limit = 0 if age_restriction == 'allow_all' else 18
+
+ m3u8_data = self._download_json(
+ media['sources']['secure_m3u8']['auto'],
+ video_id, 'Downloading m3u8 JSON')
+
+ formats = self._extract_m3u8_formats(
+ m3u8_data['data'][0], video_id,
+ 'mp4', entry_protocol='m3u8_native')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
import json
import random
-import re
from .common import InfoExtractor
from ..compat import (
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
+ playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
- json_like = self._search_regex(
- r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
- data = json.loads(json_like)
+ data = self._parse_json(
+ self._search_regex(
+ r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
+ playlist_id)
session = str(random.randint(0, 1000000000))
mix_id = data['id']
track_count = data['tracks_count']
duration = data['duration']
avg_song_duration = float(duration) / track_count
+ # duration is sometimes negative, use predefined avg duration
+ if avg_song_duration <= 0:
+ avg_song_duration = 300
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
next_url = first_url
entries = []
from .common import InfoExtractor
from ..utils import (
ExtractorError,
- parse_iso8601,
)
class EllenTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
- _TESTS = [{
- 'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
- 'md5': 'e4af06f3bf0d5f471921a18db5764642',
- 'info_dict': {
- 'id': '0-7jqrsr18',
- 'ext': 'mp4',
- 'title': 'What\'s Wrong with These Photos? A Whole Lot',
- 'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
- 'timestamp': 1406876400,
- 'upload_date': '20140801',
- }
- }, {
- 'url': 'http://ellentube.com/videos/0-dvzmabd5/',
- 'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb',
+ _TEST = {
+ 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
+ 'md5': '8e3c576bf2e9bfff4d76565f56f94c9c',
'info_dict': {
- 'id': '0-dvzmabd5',
+ 'id': '0_ipq1gsai',
'ext': 'mp4',
- 'title': '1 year old twin sister makes her brother laugh',
- 'description': '1 year old twin sister makes her brother laugh',
- 'timestamp': 1419542075,
- 'upload_date': '20141225',
+ 'title': 'Fast Fingers of Fate',
+ 'description': 'md5:587e79fbbd0d73b148bc596d99ce48e6',
+ 'timestamp': 1428035648,
+ 'upload_date': '20150403',
+ 'uploader_id': 'batchUser',
}
- }]
+ }
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- video_url = self._html_search_meta('VideoURL', webpage, 'url')
- title = self._og_search_title(webpage, default=None) or self._search_regex(
- r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
- description = self._html_search_meta(
- 'description', webpage, 'description') or self._og_search_description(webpage)
- timestamp = parse_iso8601(self._search_regex(
- r'<span class="publish-date"><time datetime="([^"]+)">',
- webpage, 'timestamp'))
+ webpage = self._download_webpage(
+ 'http://widgets.ellentube.com/videos/%s' % video_id,
+ video_id)
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- }
+ partner_id = self._search_regex(
+ r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id')
+
+ kaltura_id = self._search_regex(
+ [r'id="kaltura_player_([^"]+)"',
+ r"_wb_entry_id\s*:\s*'([^']+)",
+ r'data-kaltura-entry-id="([^"]+)'],
+ webpage, 'kaltura id')
+
+ return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
class EllenTVClipsIE(InfoExtractor):
'id': 'meryl-streep-vanessa-hudgens',
'title': 'Meryl Streep, Vanessa Hudgens',
},
- 'playlist_mincount': 9,
+ 'playlist_mincount': 7,
}
def _real_extract(self, url):
raise ExtractorError('Failed to download JSON', cause=ve)
def _extract_entries(self, playlist):
- return [self.url_result(item['url'], 'EllenTV') for item in playlist]
+ return [
+ self.url_result(
+ 'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']),
+ 'Kaltura')
+ for item in playlist]
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import (
+ ExtractorError,
+ unescapeHTML
+)
class EroProfileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
- _TEST = {
+ _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
+ _NETRC_MACHINE = 'eroprofile'
+ _TESTS = [{
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
'info_dict': {
'thumbnail': 're:https?://.*\.jpg',
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
+ 'md5': '1baa9602ede46ce904c431f5418d8916',
+ 'info_dict': {
+ 'id': '1133519',
+ 'ext': 'm4v',
+ 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
+ 'thumbnail': 're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': 'Requires login',
+ }]
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ query = compat_urllib_parse.urlencode({
+ 'username': username,
+ 'password': password,
+ 'url': 'http://www.eroprofile.com/',
+ })
+ login_url = self._LOGIN_URL + query
+ login_page = self._download_webpage(login_url, None, False)
+
+ m = re.search(r'Your username or password was incorrect\.', login_page)
+ if m:
+ raise ExtractorError(
+ 'Wrong username and/or password.', expected=True)
+
+ self.report_login()
+ redirect_url = self._search_regex(
+ r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
+ self._download_webpage(redirect_url, None, False)
+
+ def _real_initialize(self):
+ self._login()
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
+ m = re.search(r'You must be logged in to view this video\.', webpage)
+ if m:
+ raise ExtractorError(
+ 'This video requires login. Please specify a username and password and try again.', expected=True)
+
video_id = self._search_regex(
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
webpage, 'video id', default=None)
- video_url = self._search_regex(
- r'<source src="([^"]+)', webpage, 'video url')
+ video_url = unescapeHTML(self._search_regex(
+ r'<source src="([^"]+)', webpage, 'video url'))
title = self._html_search_regex(
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
thumbnail = self._search_regex(
from __future__ import unicode_literals
+import json
+
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
-)
+from ..compat import compat_urllib_request
+
from ..utils import (
- ExtractorError,
- js_to_json,
- parse_duration,
+ determine_ext,
+ clean_html,
+ int_or_none,
+ float_or_none,
)
+def _decrypt_config(key, string):
+ a = ''
+ i = ''
+ r = ''
+
+ while len(a) < (len(string) / 2):
+ a += key
+
+ a = a[0:int(len(string) / 2)]
+
+ t = 0
+ while t < len(string):
+ i += chr(int(string[t] + string[t + 1], 16))
+ t += 2
+
+ icko = [s for s in i]
+
+ for t, c in enumerate(a):
+ r += chr(ord(c) ^ ord(icko[t]))
+
+ return r
+
+
class EscapistIE(InfoExtractor):
- _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
- _USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
- _TEST = {
+ _VALID_URL = r'https?://?(?:www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
+ _TESTS = [{
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
'info_dict': {
'id': '6618',
'ext': 'mp4',
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
- 'uploader_id': 'the-escapist-presents',
- 'uploader': 'The Escapist Presents',
'title': "Breaking Down Baldur's Gate",
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 264,
+ 'uploader': 'The Escapist',
+ }
+ }, {
+ 'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
+ 'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf',
+ 'info_dict': {
+ 'id': '10044',
+ 'ext': 'mp4',
+ 'description': 'This week, Zero Punctuation reviews Evolve.',
+ 'title': 'Evolve - One vs Multiplayer',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 304,
+ 'uploader': 'The Escapist',
}
- }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage_req = compat_urllib_request.Request(url)
- webpage_req.add_header('User-Agent', self._USER_AGENT)
- webpage = self._download_webpage(webpage_req, video_id)
-
- uploader_id = self._html_search_regex(
- r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
- webpage, 'uploader ID', fatal=False)
- uploader = self._html_search_regex(
- r"<h1\s+class='headline'>(.*?)</a>",
- webpage, 'uploader', fatal=False)
- description = self._html_search_meta('description', webpage)
- duration = parse_duration(self._html_search_meta('duration', webpage))
-
- raw_title = self._html_search_meta('title', webpage, fatal=True)
- title = raw_title.partition(' : ')[2]
-
- config_url = compat_urllib_parse.unquote(self._html_search_regex(
- r'''(?x)
- (?:
- <param\s+name="flashvars".*?\s+value="config=|
- flashvars="config=
- )
- (https?://[^"&]+)
- ''',
- webpage, 'config URL'))
-
- formats = []
- ad_formats = []
-
- def _add_format(name, cfg_url, quality):
- cfg_req = compat_urllib_request.Request(cfg_url)
- cfg_req.add_header('User-Agent', self._USER_AGENT)
- config = self._download_json(
- cfg_req, video_id,
- 'Downloading ' + name + ' configuration',
- 'Unable to download ' + name + ' configuration',
- transform_source=js_to_json)
-
- playlist = config['playlist']
- for p in playlist:
- if p.get('eventCategory') == 'Video':
- ar = formats
- elif p.get('eventCategory') == 'Video Postroll':
- ar = ad_formats
- else:
- continue
-
- ar.append({
- 'url': p['url'],
- 'format_id': name,
- 'quality': quality,
- 'http_headers': {
- 'User-Agent': self._USER_AGENT,
- },
- })
-
- _add_format('normal', config_url, quality=0)
- hq_url = (config_url +
- ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
- try:
- _add_format('hq', hq_url, quality=1)
- except ExtractorError:
- pass # That's fine, we'll just use normal quality
+ webpage = self._download_webpage(url, video_id)
+
+ ims_video = self._parse_json(
+ self._search_regex(
+ r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
+ video_id)
+ video_id = ims_video['videoID']
+ key = ims_video['hash']
+
+ config_req = compat_urllib_request.Request(
+ 'http://www.escapistmagazine.com/videos/'
+ 'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
+ config_req.add_header('Referer', url)
+ config = self._download_webpage(config_req, video_id, 'Downloading video config')
+
+ data = json.loads(_decrypt_config(key, config))
+
+ video_data = data['videoData']
+
+ title = clean_html(video_data['title'])
+ duration = float_or_none(video_data.get('duration'), 1000)
+ uploader = video_data.get('publisher')
+
+ formats = [{
+ 'url': video['src'],
+ 'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
+ 'height': int_or_none(video.get('res')),
+ } for video in data['files']['videos']]
self._sort_formats(formats)
- if '/escapist/sales-marketing/' in formats[-1]['url']:
- raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
-
- res = {
+ return {
'id': video_id,
'formats': formats,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
- 'description': description,
+ 'description': self._og_search_description(webpage),
'duration': duration,
+ 'uploader': uploader,
}
-
- if self._downloader.params.get('include_ads') and ad_formats:
- self._sort_formats(ad_formats)
- ad_res = {
- 'id': '%s-ad' % video_id,
- 'title': '%s (Postroll)' % title,
- 'formats': ad_formats,
- }
- return {
- '_type': 'playlist',
- 'entries': [res, ad_res],
- 'title': title,
- 'id': video_id,
- }
-
- return res
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse_urlparse,
+ compat_parse_qs,
compat_urllib_request,
- compat_urllib_parse,
)
from ..utils import (
+ qualities,
str_to_int,
)
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
- 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
+ 'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
'info_dict': {
'id': '652431',
'ext': 'mp4',
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
webpage, 'view count', fatal=False))
- video_url = compat_urllib_parse.unquote(self._html_search_regex(
- r'video_url=(.+?)&', webpage, 'video_url'))
- path = compat_urllib_parse_urlparse(video_url).path
- format = path.split('/')[5].split('_')[:2]
- format = "-".join(format)
+ flash_vars = compat_parse_qs(self._search_regex(
+ r'<param[^>]+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars'))
+
+ formats = []
+ quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p'])
+ for k, vals in flash_vars.items():
+ m = re.match(r'quality_(?P<quality>[0-9]+p)$', k)
+ if m is not None:
+ formats.append({
+ 'format_id': m.group('quality'),
+ 'quality': quality(m.group('quality')),
+ 'url': vals[0],
+ })
+
+ self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
+ 'formats': formats,
'uploader': uploader,
'view_count': view_count,
- 'url': video_url,
- 'format': format,
- 'format_id': format,
'age_limit': 18,
}
_VALID_URL = r'''(?x)
https?://(?:\w+\.)?facebook\.com/
(?:[^#]*?\#!/)?
- (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
- (?:v|video_id)=(?P<id>[0-9]+)
+ (?:
+ (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
+ (?:v|video_id)=|
+ [^/]+/videos/(?:[^/]+/)?
+ )
+ (?P<id>[0-9]+)
(?:.*)'''
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
}, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
+ 'only_matching': True,
}]
def _login(self):
import re
from .common import InfoExtractor
+from ..compat import compat_urllib_request
from ..utils import (
ExtractorError,
- unescapeHTML,
+ find_xpath_attr,
)
video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
- webpage = self._download_webpage(webpage_url, video_id)
+ req = compat_urllib_request.Request(webpage_url)
+ req.add_header(
+ 'User-Agent',
+ # it needs a more recent version
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)')
+ webpage = self._download_webpage(req, video_id)
- secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, 'secret')
+ secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret')
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
- first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
+ first_xml = self._download_xml(first_url, video_id, 'Downloading first data webpage')
- node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
- first_xml, 'node_id')
+ node_id = find_xpath_attr(
+ first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id',
+ 'id').text
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
- second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
+ second_xml = self._download_xml(second_url, video_id, 'Downloading second data webpage')
self.report_extraction(video_id)
- mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
- if mobj is None:
+ stream = second_xml.find('.//STREAM')
+ if stream is None:
raise ExtractorError('Unable to extract video url')
- video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
+ video_url = stream.attrib['APP'] + stream.attrib['FULLPATH']
return {
'id': video_id,
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FootyRoomIE(InfoExtractor):
+ _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
+ 'info_dict': {
+ 'id': 'schalke-04-0-2-real-madrid-2015-02',
+ 'title': 'Schalke 04 0 – 2 Real Madrid',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',
+ 'info_dict': {
+ 'id': 'georgia-0-2-germany-2015-03',
+ 'title': 'Georgia 0 – 2 Germany',
+ },
+ 'playlist_count': 1,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
+ playlist_id)
+
+ playlist_title = self._og_search_title(webpage)
+
+ entries = []
+ for video in playlist:
+ payload = video.get('payload')
+ if not payload:
+ continue
+ playwire_url = self._search_regex(
+ r'data-config="([^"]+)"', payload,
+ 'playwire url', default=None)
+ if playwire_url:
+ entries.append(self.url_result(self._proto_relative_url(
+ playwire_url, 'http:'), 'Playwire'))
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class FoxSportsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://www.foxsports.com/video?vid=432609859715',
+ 'info_dict': {
+ 'id': 'gA0bHB3Ladz3',
+ 'ext': 'flv',
+ 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
+ 'description': 'Courtney Lee talks about Memphis being focused.',
+ },
+ 'add_ie': ['ThePlatform'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ config = self._parse_json(
+ self._search_regex(
+ r"data-player-config='([^']+)'", webpage, 'data player config'),
+ video_id)
+
+ return self.url_result(smuggle_url(
+ config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True}))
clean_html,
ExtractorError,
int_or_none,
+ float_or_none,
parse_duration,
+ determine_ext,
)
if not video_url:
continue
format_id = video['format']
- if video_url.endswith('.f4m'):
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
if georestricted:
# See https://github.com/rg3/youtube-dl/issues/3963
# m3u8 urls work fine
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
video_id, 'Downloading f4m manifest token', fatal=False)
if f4m_url:
- f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
- for f4m_format in f4m_formats:
- f4m_format['preference'] = 1
- formats.extend(f4m_formats)
- elif video_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
+ formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
elif video_url.startswith('rtmp'):
formats.append({
'url': video_url,
'title': info['titre'],
'description': clean_html(info['synopsis']),
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
- 'duration': parse_duration(info['duree']),
+ 'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
'timestamp': int_or_none(info['diffusion']['timestamp']),
'formats': formats,
}
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TEST = {
- 'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553',
- 'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6',
+ 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
+ 'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
'info_dict': {
- 'id': 'EV_22853',
+ 'id': 'EV_50111',
'ext': 'flv',
- 'title': 'Dans les jardins de William Christie - Le Camus',
- 'description': 'md5:4710c82315c40f0c865ca8b9a68b5299',
- 'upload_date': '20140829',
- 'timestamp': 1409317200,
+ 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
+ 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
+ 'upload_date': '20150320',
+ 'timestamp': 1426892400,
+ 'duration': 2760.9,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
+
webpage = self._download_webpage(url, name)
+
+ if ">Ce live n'est plus disponible en replay<" in webpage:
+ raise ExtractorError('Video %s is not available' % name, expected=True)
+
video_id, catalogue = self._search_regex(
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
bitrates.sort()
formats = []
-
for bitrate in bitrates:
for link in links:
formats.append({
'vbr': bitrate,
})
+ subtitles = {}
+ for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
+ subtitles[src_lang] = [{
+ 'ext': src.split('/')[-1],
+ 'url': 'http://www.funnyordie.com%s' % src,
+ }]
+
post_json = self._search_regex(
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
post = json.loads(post_json)
'description': post.get('description'),
'thumbnail': post.get('picture'),
'formats': formats,
+ 'subtitles': subtitles,
}
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ parse_duration,
+ remove_start,
+)
+
+
+class GamersydeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html'
+ _TEST = {
+ 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html',
+ 'md5': 'f38d400d32f19724570040d5ce3a505f',
+ 'info_dict': {
+ 'id': '34371',
+ 'ext': 'mp4',
+ 'duration': 372,
+ 'title': 'Bloodborne - Birth of a hero',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'),
+ display_id, transform_source=js_to_json)
+
+ formats = []
+ for source in playlist['sources']:
+ video_url = source.get('file')
+ if not video_url:
+ continue
+ format_id = source.get('label')
+ f = {
+ 'url': video_url,
+ 'format_id': format_id,
+ }
+ m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id)
+ if m:
+ f.update({
+ 'height': int(m.group('height')),
+ 'fps': int(m.group('fps')),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ title = remove_start(playlist['title'], '%s - ' % video_id)
+ thumbnail = playlist.get('image')
+ duration = parse_duration(self._search_regex(
+ r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class GazetaIE(InfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
+ _TESTS = [{
+ 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
+ 'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
+ 'info_dict': {
+ 'id': '205566',
+ 'ext': 'mp4',
+ 'title': '«70–80 процентов гражданских в Донецке на грани голода»',
+ 'description': 'md5:38617526050bd17b234728e7f9620a71',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ display_id = mobj.group('id')
+ embed_url = '%s?p=embed' % mobj.group('url')
+ embed_page = self._download_webpage(
+ embed_url, display_id, 'Downloading embed page')
+
+ video_id = self._search_regex(
+ r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
+
+ return self.url_result(
+ 'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
class GDCVaultIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
+ _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)?'
+ _NETRC_MACHINE = 'gdcvault'
_TESTS = [
{
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
'md5': '7ce8388f544c88b7ac11c7ab1b593704',
'info_dict': {
'id': '1019721',
+ 'display_id': 'Doki-Doki-Universe-Sweet-Simple',
'ext': 'mp4',
'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
}
'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
'info_dict': {
'id': '1015683',
+ 'display_id': 'Embracing-the-Dark-Art-of',
'ext': 'flv',
'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
},
'md5': 'a5eb77996ef82118afbbe8e48731b98e',
'info_dict': {
'id': '1015301',
+ 'display_id': 'Thexder-Meets-Windows-95-or',
'ext': 'flv',
'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
},
'skip': 'Requires login',
+ },
+ {
+ 'url': 'http://gdcvault.com/play/1020791/',
+ 'only_matching': True,
}
]
})
return video_formats
- def _login(self, webpage_url, video_id):
+ def _login(self, webpage_url, display_id):
(username, password) = self._get_login_info()
if username is None or password is None:
self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- self._download_webpage(request, video_id, 'Logging in')
- start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page')
- self._download_webpage(logout_url, video_id, 'Logging out')
+ self._download_webpage(request, display_id, 'Logging in')
+ start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
+ self._download_webpage(logout_url, display_id, 'Logging out')
return start_page
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ display_id = mobj.group('name') or video_id
+
webpage_url = 'http://www.gdcvault.com/play/' + video_id
- start_page = self._download_webpage(webpage_url, video_id)
+ start_page = self._download_webpage(webpage_url, display_id)
direct_url = self._search_regex(
r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
return {
'id': video_id,
+ 'display_id': display_id,
'url': video_url,
'ext': 'flv',
'title': title,
start_page, 'xml root', default=None)
if xml_root is None:
# Probably need to authenticate
- login_res = self._login(webpage_url, video_id)
+ login_res = self._login(webpage_url, display_id)
if login_res is None:
self.report_warning('Could not login.')
else:
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
xml_decription_url = xml_root + 'xml/' + xml_name
- xml_description = self._download_xml(xml_decription_url, video_id)
+ xml_description = self._download_xml(xml_decription_url, display_id)
video_title = xml_description.find('./metadata/title').text
video_formats = self._parse_mp4(xml_description)
return {
'id': video_id,
+ 'display_id': display_id,
'title': video_title,
'formats': video_formats,
}
unsmuggle_url,
UnsupportedError,
url_basename,
+ xpath_text,
)
from .brightcove import BrightcoveIE
+from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .smotri import SmotriIE
from .condenast import CondeNastIE
+from .udn import UDNEmbedIE
+from .senateisvp import SenateISVPIE
+from .bliptv import BlipTVIE
+from .svt import SVTIE
class GenericIE(InfoExtractor):
},
'add_ie': ['Viddler'],
},
+ # Libsyn embed
+ {
+ 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ },
# jwplayer YouTube
{
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
'title': 'John Carlson Postgame 2/25/15',
},
},
+ # Eagle.Platform embed (generic URL)
+ {
+ 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
+ 'info_dict': {
+ 'id': '227304',
+ 'ext': 'mp4',
+ 'title': 'Навальный вышел на свободу',
+ 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 87,
+ 'view_count': int,
+ 'age_limit': 0,
+ },
+ },
+ # ClipYou (Eagle.Platform) embed (custom URL)
+ {
+ 'url': 'http://muz-tv.ru/play/7129/',
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ },
+ # Pladform embed
+ {
+ 'url': 'http://muz-tv.ru/kinozal/view/7400/',
+ 'info_dict': {
+ 'id': '100183293',
+ 'ext': 'mp4',
+ 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
+ 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 694,
+ 'age_limit': 0,
+ },
+ },
+ # Playwire embed
+ {
+ 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
+ 'info_dict': {
+ 'id': '3519514',
+ 'ext': 'mp4',
+ 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
+ 'thumbnail': 're:^https?://.*\.png$',
+ 'duration': 45.115,
+ },
+ },
+ # 5min embed
+ {
+ 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
+ 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
+ 'info_dict': {
+ 'id': '518726732',
+ 'ext': 'mp4',
+ 'title': 'Facebook Creates "On This Day" | Crunch Report',
+ },
+ },
+ # SVT embed
+ {
+ 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
+ 'info_dict': {
+ 'id': '2900353',
+ 'ext': 'flv',
+ 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
+ 'duration': 27,
+ 'age_limit': 0,
+ },
+ },
+ # RSS feed with enclosure
+ {
+ 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+ 'info_dict': {
+ 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ 'ext': 'm4v',
+ 'upload_date': '20150228',
+ 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ }
+ },
+ # Crooks and Liars embed
+ {
+ 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
+ 'info_dict': {
+ 'id': '8RUoRhRi',
+ 'ext': 'mp4',
+ 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
+ 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
+ 'timestamp': 1428207000,
+ 'upload_date': '20150405',
+ 'uploader': 'Heather',
+ },
+ },
+ # Crooks and Liars external embed
+ {
+ 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
+ 'info_dict': {
+ 'id': 'MTE3MjUtMzQ2MzA',
+ 'ext': 'mp4',
+ 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
+ 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
+ 'timestamp': 1265032391,
+ 'upload_date': '20100201',
+ 'uploader': 'Heather',
+ },
+ },
+ # NBC Sports vplayer embed
+ {
+ 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+ 'info_dict': {
+ 'id': 'ln7x1qSThw4k',
+ 'ext': 'flv',
+ 'title': "PFT Live: New leader in the 'new-look' defense",
+ 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+ },
+ },
+ # UDN embed
+ {
+ 'url': 'http://www.udn.com/news/story/7314/822787',
+ 'md5': 'fd2060e988c326991037b9aff9df21a6',
+ 'info_dict': {
+ 'id': '300346',
+ 'ext': 'mp4',
+ 'title': '中一中男師變性 全校師生力挺',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ },
+ # Ooyala embed
+ {
+ 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
+ 'info_dict': {
+ 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
+ 'ext': 'mp4',
+ 'description': 'VIDEO: Index/Match versus VLOOKUP.',
+ 'title': 'This is what separates the Excel masters from the wannabes',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ }
+ },
+ # Contains a SMIL manifest
+ {
+ 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
+ 'info_dict': {
+ 'id': 'file',
+ 'ext': 'flv',
+ 'title': '+ Football: Lottery Champions League Europe',
+ 'uploader': 'www.telewebion.com',
+ },
+ 'params': {
+ # rtmpe downloads
+ 'skip_download': True,
+ }
+ }
]
def report_following_redirect(self, new_url):
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
- entries = [{
- '_type': 'url',
- 'url': e.find('link').text,
- 'title': e.find('title').text,
- } for e in doc.findall('./channel/item')]
+ entries = []
+ for it in doc.findall('./channel/item'):
+ next_url = xpath_text(it, 'link', fatal=False)
+ if not next_url:
+ enclosure_nodes = it.findall('./enclosure')
+ for e in enclosure_nodes:
+ next_url = e.attrib.get('url')
+ if next_url:
+ break
+
+ if not next_url:
+ continue
+
+ entries.append({
+ '_type': 'url',
+ 'url': next_url,
+ 'title': it.find('title').text,
+ })
return {
'_type': 'playlist',
}
# Look for embedded blip.tv player
- mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
- if mobj:
- return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
- mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
- if mobj:
- return self.url_result(mobj.group(1), 'BlipTV')
+ bliptv_url = BlipTVIE._extract_url(webpage)
+ if bliptv_url:
+ return self.url_result(bliptv_url, 'BlipTV')
+
+ # Look for SVT player
+ svt_url = SVTIE._extract_url(webpage)
+ if svt_url:
+ return self.url_result(svt_url, 'SVT')
# Look for embedded condenast player
matches = re.findall(
if mobj is not None:
return self.url_result(mobj.group('url'))
+ # Look for NYTimes player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for Libsyn player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
# Look for Ooyala videos
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
- re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
+ re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
+ re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
return OoyalaIE._build_url_result(mobj.group('ec'))
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
webpage)
+ if not mobj:
+ mobj = re.search(
+ r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
+ webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'MLB')
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
+ # Look for Eagle.Platform embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'EaglePlatform')
+
+ # Look for ClipYou (uses Eagle.Platform) embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
+ if mobj is not None:
+ return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
+
+ # Look for Pladform embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Pladform')
+
+ # Look for Playwire embeds
+ mobj = re.search(
+ r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for 5min embeds
+ mobj = re.search(
+ r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
+ if mobj is not None:
+ return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
+
+ # Look for Crooks and Liars embeds
+ mobj = re.search(
+ r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for NBC Sports VPlayer embeds
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
+ # Look for UDN embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
+ if mobj is not None:
+ return self.url_result(
+ compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+
+ # Look for Senate ISVP iframe
+ senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
+ if senate_isvp_url:
+ return self.url_result(surl, 'SenateISVP')
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
# HTML5 video
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
if not found:
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
- r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
webpage)
+ if not found:
+ # Look also in Refresh HTTP header
+ refresh_header = head_response.headers.get('Refresh')
+ if refresh_header:
+ found = re.search(REDIRECT_REGEX, refresh_header)
if found:
- new_url = found.group(1)
+ new_url = compat_urlparse.urljoin(url, found.group(1))
self.report_following_redirect(new_url)
return {
'_type': 'url',
# here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0]
- entries.append({
- 'id': video_id,
- 'url': video_url,
- 'uploader': video_uploader,
- 'title': video_title,
- 'age_limit': age_limit,
- })
+ if determine_ext(video_url) == 'smil':
+ entries.append({
+ 'id': video_id,
+ 'formats': self._extract_smil_formats(video_url, video_id),
+ 'uploader': video_uploader,
+ 'title': video_title,
+ 'age_limit': age_limit,
+ })
+ else:
+ entries.append({
+ 'id': video_id,
+ 'url': video_url,
+ 'uploader': video_uploader,
+ 'title': video_title,
+ 'age_limit': age_limit,
+ })
if len(entries) == 1:
return entries[0]
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ qualities,
+)
+
+
+class GfycatIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
+ 'info_dict': {
+ 'id': 'DeadlyDecisiveGermanpinscher',
+ 'ext': 'mp4',
+ 'title': 'Ghost in the Shell',
+ 'timestamp': 1410656006,
+ 'upload_date': '20140914',
+ 'uploader': 'anonymous',
+ 'duration': 10.4,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'categories': list,
+ 'age_limit': 0,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ gfy = self._download_json(
+ 'http://gfycat.com/cajax/get/%s' % video_id,
+ video_id, 'Downloading video info')['gfyItem']
+
+ title = gfy.get('title') or gfy['gfyName']
+ description = gfy.get('description')
+ timestamp = int_or_none(gfy.get('createDate'))
+ uploader = gfy.get('userName')
+ view_count = int_or_none(gfy.get('views'))
+ like_count = int_or_none(gfy.get('likes'))
+ dislike_count = int_or_none(gfy.get('dislikes'))
+ age_limit = 18 if gfy.get('nsfw') == '1' else 0
+
+ width = int_or_none(gfy.get('width'))
+ height = int_or_none(gfy.get('height'))
+ fps = int_or_none(gfy.get('frameRate'))
+ num_frames = int_or_none(gfy.get('numFrames'))
+
+ duration = float_or_none(num_frames, fps) if num_frames and fps else None
+
+ categories = gfy.get('tags') or gfy.get('extraLemmas') or []
+
+ FORMATS = ('gif', 'webm', 'mp4')
+ quality = qualities(FORMATS)
+
+ formats = []
+ for format_id in FORMATS:
+ video_url = gfy.get('%sUrl' % format_id)
+ if not video_url:
+ continue
+ filesize = gfy.get('%sSize' % format_id)
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'width': width,
+ 'height': height,
+ 'fps': fps,
+ 'filesize': filesize,
+ 'quality': quality(format_id),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'categories': categories,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
view_count = str_to_int(self._search_regex(
- r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False))
+ r'<span class="views"><strong>([\d.,]+)</strong>',
+ webpage, 'view count', fatal=False))
return {
'id': video_id,
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
- _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s'
+ _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
_VIDEOID_REGEXES = [
r'\bdata-video-id="(\d+)"',
class GorillaVidIE(InfoExtractor):
- IE_DESC = 'GorillaVid.in, daclips.in, movpod.in and fastvideo.in'
+ IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net'
_VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)?
- (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in))/
+ (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net))/
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
'''
},
}, {
'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
- 'md5': 'c9e293ca74d46cad638e199c3f3fe604',
- 'info_dict': {
- 'id': 'z08zf8le23c6',
- 'ext': 'mp4',
- 'title': 'Say something nice',
- 'thumbnail': 're:http://.*\.jpg',
- },
+ 'only_matching': True,
}, {
'url': 'http://daclips.in/3rso4kdn6f9m',
'md5': '1ad8fd39bb976eeb66004d3a4895f106',
'title': 'Man of Steel - Trailer',
'thumbnail': 're:http://.*\.jpg',
},
+ }, {
+ 'url': 'http://realvid.net/ctn2y6p2eviw',
+ 'md5': 'b2166d2cf192efd6b6d764c18fd3710e',
+ 'info_dict': {
+ 'id': 'ctn2y6p2eviw',
+ 'ext': 'flv',
+ 'title': 'rdx 1955',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
}, {
'url': 'http://movpod.in/0wguyyxi1yca',
'only_matching': True,
webpage = self._download_webpage(req, video_id, 'Downloading video page')
title = self._search_regex(
- r'style="z-index: [0-9]+;">([^<]+)</span>',
+ [r'style="z-index: [0-9]+;">([^<]+)</span>', r'>Watch (.+) '],
webpage, 'title', default=None) or self._og_search_title(webpage)
video_url = self._search_regex(
r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-import time
-import math
-import os.path
-import re
-
-
-from .common import InfoExtractor
-from ..compat import (
- compat_html_parser,
- compat_urllib_parse,
- compat_urllib_request,
- compat_urlparse,
-)
-from ..utils import ExtractorError
-
-
-class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
- def __init__(self):
- self._current_object = None
- self.objects = []
- compat_html_parser.HTMLParser.__init__(self)
-
- def handle_starttag(self, tag, attrs):
- attrs = dict((k, v) for k, v in attrs)
- if tag == 'object':
- self._current_object = {'attrs': attrs, 'params': []}
- elif tag == 'param':
- self._current_object['params'].append(attrs)
-
- def handle_endtag(self, tag):
- if tag == 'object':
- self.objects.append(self._current_object)
- self._current_object = None
-
- @classmethod
- def extract_object_tags(cls, html):
- p = cls()
- p.feed(html)
- p.close()
- return p.objects
-
-
-class GroovesharkIE(InfoExtractor):
- _VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)'
- _TEST = {
- 'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5',
- 'md5': '7ecf8aefa59d6b2098517e1baa530023',
- 'info_dict': {
- 'id': '6SS1DW',
- 'title': 'Jolene (Tenth Key Remix ft. Will Sessions)',
- 'ext': 'mp3',
- 'duration': 227,
- }
- }
-
- do_playerpage_request = True
- do_bootstrap_request = True
-
- def _parse_target(self, target):
- uri = compat_urlparse.urlparse(target)
- hash = uri.fragment[1:].split('?')[0]
- token = os.path.basename(hash.rstrip('/'))
- return (uri, hash, token)
-
- def _build_bootstrap_url(self, target):
- (uri, hash, token) = self._parse_target(target)
- query = 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
- return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
-
- def _build_meta_url(self, target):
- (uri, hash, token) = self._parse_target(target)
- query = 'hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
- return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
-
- def _build_stream_url(self, meta):
- return compat_urlparse.urlunparse(('http', meta['streamKey']['ip'], '/stream.php', None, None, None))
-
- def _build_swf_referer(self, target, obj):
- (uri, _, _) = self._parse_target(target)
- return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
-
- def _transform_bootstrap(self, js):
- return re.split('(?m)^\s*try\s*\{', js)[0] \
- .split(' = ', 1)[1].strip().rstrip(';')
-
- def _transform_meta(self, js):
- return js.split('\n')[0].split('=')[1].rstrip(';')
-
- def _get_meta(self, target):
- (meta_url, token) = self._build_meta_url(target)
- self.to_screen('Metadata URL: %s' % meta_url)
-
- headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
- req = compat_urllib_request.Request(meta_url, headers=headers)
- res = self._download_json(req, token,
- transform_source=self._transform_meta)
-
- if 'getStreamKeyWithSong' not in res:
- raise ExtractorError(
- 'Metadata not found. URL may be malformed, or Grooveshark API may have changed.')
-
- if res['getStreamKeyWithSong'] is None:
- raise ExtractorError(
- 'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.',
- expected=True)
-
- return res['getStreamKeyWithSong']
-
- def _get_bootstrap(self, target):
- (bootstrap_url, token) = self._build_bootstrap_url(target)
-
- headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
- req = compat_urllib_request.Request(bootstrap_url, headers=headers)
- res = self._download_json(req, token, fatal=False,
- note='Downloading player bootstrap data',
- errnote='Unable to download player bootstrap data',
- transform_source=self._transform_bootstrap)
- return res
-
- def _get_playerpage(self, target):
- (_, _, token) = self._parse_target(target)
-
- webpage = self._download_webpage(
- target, token,
- note='Downloading player page',
- errnote='Unable to download player page',
- fatal=False)
-
- if webpage is not None:
- # Search (for example German) error message
- error_msg = self._html_search_regex(
- r'<div id="content">\s*<h2>(.*?)</h2>', webpage,
- 'error message', default=None)
- if error_msg is not None:
- error_msg = error_msg.replace('\n', ' ')
- raise ExtractorError('Grooveshark said: %s' % error_msg)
-
- if webpage is not None:
- o = GroovesharkHtmlParser.extract_object_tags(webpage)
- return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
-
- return (webpage, None)
-
- def _real_initialize(self):
- self.ts = int(time.time() * 1000) # timestamp in millis
-
- def _real_extract(self, url):
- (target_uri, _, token) = self._parse_target(url)
-
- # 1. Fill cookiejar by making a request to the player page
- swf_referer = None
- if self.do_playerpage_request:
- (_, player_objs) = self._get_playerpage(url)
- if player_objs is not None:
- swf_referer = self._build_swf_referer(url, player_objs[0])
- self.to_screen('SWF Referer: %s' % swf_referer)
-
- # 2. Ask preload.php for swf bootstrap data to better mimic webapp
- if self.do_bootstrap_request:
- bootstrap = self._get_bootstrap(url)
- self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken'])
-
- # 3. Ask preload.php for track metadata.
- meta = self._get_meta(url)
-
- # 4. Construct stream request for track.
- stream_url = self._build_stream_url(meta)
- duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000))
- post_dict = {'streamKey': meta['streamKey']['streamKey']}
- post_data = compat_urllib_parse.urlencode(post_dict).encode('utf-8')
- headers = {
- 'Content-Length': len(post_data),
- 'Content-Type': 'application/x-www-form-urlencoded'
- }
- if swf_referer is not None:
- headers['Referer'] = swf_referer
-
- return {
- 'id': token,
- 'title': meta['song']['Name'],
- 'http_method': 'POST',
- 'url': stream_url,
- 'ext': 'mp3',
- 'format': 'mp3 audio',
- 'duration': duration,
- 'http_post_data': post_data,
- 'http_headers': headers,
- }
webpage = self._download_webpage(url, video_id)
tape_id = self._search_regex(
- r'class="tapeId">([^<]+)<', webpage, 'tape id')
+ [r'class="tapeId"[^>]*>([^<]+)<', r'tapeId\s*:\s*"([^"]+)"'],
+ webpage, 'tape id')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
float_or_none,
int_or_none,
compat_str,
+ determine_ext,
)
def _extract_metadata(self, url, video_id):
thumb_base = 'https://edge.sf.hitbox.tv'
metadata = self._download_json(
- '%s/%s' % (url, video_id), video_id)
+ '%s/%s' % (url, video_id), video_id,
+ 'Downloading metadata JSON')
date = 'media_live_since'
media_type = 'livestream'
def _real_extract(self, url):
video_id = self._match_id(url)
- metadata = self._extract_metadata(
- 'https://www.hitbox.tv/api/media/video',
- video_id)
-
player_config = self._download_json(
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
- video_id)
+ video_id, 'Downloading video JSON')
- clip = player_config.get('clip')
- video_url = clip.get('url')
- res = clip.get('bitrates', [])[0].get('label')
+ formats = []
+ for video in player_config['clip']['bitrates']:
+ label = video.get('label')
+ if label == 'Auto':
+ continue
+ video_url = video.get('url')
+ if not video_url:
+ continue
+ bitrate = int_or_none(video.get('bitrate'))
+ if determine_ext(video_url) == 'm3u8':
+ if not video_url.startswith('http'):
+ continue
+ formats.append({
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'tbr': bitrate,
+ 'format_note': label,
+ 'protocol': 'm3u8_native',
+ })
+ else:
+ formats.append({
+ 'url': video_url,
+ 'tbr': bitrate,
+ 'format_note': label,
+ })
+ self._sort_formats(formats)
- metadata['resolution'] = res
- metadata['url'] = video_url
- metadata['protocol'] = 'm3u8'
+ metadata = self._extract_metadata(
+ 'https://www.hitbox.tv/api/media/video',
+ video_id)
+ metadata['formats'] = formats
return metadata
def _real_extract(self, url):
video_id = self._match_id(url)
- metadata = self._extract_metadata(
- 'https://www.hitbox.tv/api/media/live',
- video_id)
-
player_config = self._download_json(
'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
video_id)
servers.append(base_url)
for stream in cdn.get('bitrates'):
label = stream.get('label')
- if label != 'Auto':
+ if label == 'Auto':
+ continue
+ stream_url = stream.get('url')
+ if not stream_url:
+ continue
+ bitrate = int_or_none(stream.get('bitrate'))
+ if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
+ if not stream_url.startswith('http'):
+ continue
+ formats.append({
+ 'url': stream_url,
+ 'ext': 'mp4',
+ 'tbr': bitrate,
+ 'format_note': label,
+ 'rtmp_live': True,
+ })
+ else:
formats.append({
- 'url': '%s/%s' % (base_url, stream.get('url')),
+ 'url': '%s/%s' % (base_url, stream_url),
'ext': 'mp4',
- 'vbr': stream.get('bitrate'),
- 'resolution': label,
+ 'tbr': bitrate,
'rtmp_live': True,
'format_note': host,
'page_url': url,
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
})
-
self._sort_formats(formats)
+
+ metadata = self._extract_metadata(
+ 'https://www.hitbox.tv/api/media/live',
+ video_id)
metadata['formats'] = formats
metadata['is_live'] = True
metadata['title'] = self._live_title(metadata.get('title'))
+
return metadata
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import int_or_none
class IconosquareIE(InfoExtractor):
- _VALID_URL = r'https?://(www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
_TEST = {
'url': 'http://statigr.am/p/522207370455279102_24101272',
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
'info_dict': {
'id': '522207370455279102_24101272',
'ext': 'mp4',
- 'uploader_id': 'aguynamedpatrick',
- 'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
+ 'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)',
'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
+ 'timestamp': 1376471991,
+ 'upload_date': '20130814',
+ 'uploader': 'aguynamedpatrick',
+ 'uploader_id': '24101272',
+ 'comment_count': int,
+ 'like_count': int,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
+
webpage = self._download_webpage(url, video_id)
+
+ media = self._parse_json(
+ self._search_regex(
+ r'window\.media\s*=\s*({.+?});\n', webpage, 'media'),
+ video_id)
+
+ formats = [{
+ 'url': f['url'],
+ 'format_id': format_id,
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height'))
+ } for format_id, f in media['videos'].items()]
+ self._sort_formats(formats)
+
title = self._html_search_regex(
r'<title>(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)</title>',
webpage, 'title')
- uploader_id = self._html_search_regex(
- r'@([^ ]+)', title, 'uploader name', fatal=False)
+
+ timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
+ description = media.get('caption', {}).get('text')
+
+ uploader = media.get('user', {}).get('username')
+ uploader_id = media.get('user', {}).get('id')
+
+ comment_count = int_or_none(media.get('comments', {}).get('count'))
+ like_count = int_or_none(media.get('likes', {}).get('count'))
+
+ thumbnails = [{
+ 'url': t['url'],
+ 'id': thumbnail_id,
+ 'width': int_or_none(t.get('width')),
+ 'height': int_or_none(t.get('height'))
+ } for thumbnail_id, t in media.get('images', {}).items()]
return {
'id': video_id,
- 'url': self._og_search_video_url(webpage),
'title': title,
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader_id': uploader_id
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'comment_count': comment_count,
+ 'like_count': like_count,
+ 'formats': formats,
}
},
{
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
- 'md5': '4e9a0bda1e5eebd31ddcf86ec0b9b3c7',
+ 'md5': '618fedb9c901fd086f6f093564ef8558',
'info_dict': {
'id': '078fdd005f6d3c02f63d795faa1b984f',
'ext': 'mp4',
def _find_video_id(self, webpage):
res_id = [
r'"video_id"\s*:\s*"(.*?)"',
+ r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
r'data-video-id="(.+?)"',
r'<object id="vid_(.+?)"',
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
- r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
]
return self._search_regex(res_id, webpage, 'video id')
import re
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
-)
+from ..utils import int_or_none
class InstagramIE(InfoExtractor):
- _VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/'
+ _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
_TEST = {
'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
+
webpage = self._download_webpage(url, video_id)
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
webpage, 'uploader id', fatal=False)
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
'info_dict': {
- 'id': '5182',
+ 'id': '114765',
'ext': 'mp4',
- 'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
- 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
+ 'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
+ 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
},
}
mobj = re.match(self._VALID_URL, url)
title = mobj.group(1)
webpage = self._download_webpage(url, title)
- xml_link = self._html_search_regex(
- r'<param name="flashvars" value="config=(.*?)" />',
+ title = self._html_search_meta('name', webpage)
+ config_url = self._html_search_regex(
+ r'data-src="(/contenu/medias/video.php.*?)"',
webpage, 'config URL')
+ config_url = 'http://www.jeuxvideo.com' + config_url
video_id = self._search_regex(
- r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
- xml_link, 'video ID')
+ r'id=(\d+)',
+ config_url, 'video ID')
- config = self._download_xml(
- xml_link, title, 'Downloading XML config')
- info_json = config.find('format.json').text
- info = json.loads(info_json)['versions'][0]
+ config = self._download_json(
+ config_url, title, 'Downloading JSON config')
- video_url = 'http://video720.jeuxvideo.com/' + info['file']
+ formats = [{
+ 'url': source['file'],
+ 'format_id': source['label'],
+ 'resolution': source['label'],
+ } for source in reversed(config['sources'])]
return {
'id': video_id,
- 'title': config.find('titre_video').text,
- 'ext': 'mp4',
- 'url': video_url,
+ 'title': title,
+ 'formats': formats,
'description': self._og_search_description(webpage),
- 'thumbnail': config.find('image').text,
+ 'thumbnail': config.get('image'),
}
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ srt_subtitles_timecode,
+)
+
+
+class KanalPlayIE(InfoExtractor):
+ IE_DESC = 'Kanal 5/9/11 Play'
+ _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
+ 'info_dict': {
+ 'id': '3270012277',
+ 'ext': 'flv',
+ 'title': 'Saknar både dusch och avlopp',
+ 'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
+ 'duration': 2636.36,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
+ 'only_matching': True,
+ }]
+
+ def _fix_subtitles(self, subs):
+ return '\r\n\r\n'.join(
+ '%s\r\n%s --> %s\r\n%s'
+ % (
+ num,
+ srt_subtitles_timecode(item['startMillis'] / 1000.0),
+ srt_subtitles_timecode(item['endMillis'] / 1000.0),
+ item['text'],
+ ) for num, item in enumerate(subs, 1))
+
+ def _get_subtitles(self, channel_id, video_id):
+ subs = self._download_json(
+ 'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
+ video_id, 'Downloading subtitles JSON', fatal=False)
+ return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ channel_id = mobj.group('channel_id')
+
+ video = self._download_json(
+ 'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
+ video_id)
+
+ reasons_for_no_streams = video.get('reasonsForNoStreams')
+ if reasons_for_no_streams:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
+ expected=True)
+
+ title = video['title']
+ description = video.get('description')
+ duration = float_or_none(video.get('length'), 1000)
+ thumbnail = video.get('posterUrl')
+
+ stream_base_url = video['streamBaseUrl']
+
+ formats = [{
+ 'url': stream_base_url,
+ 'play_path': stream['source'],
+ 'ext': 'flv',
+ 'tbr': float_or_none(stream.get('bitrate'), 1000),
+ 'rtmp_real_time': True,
+ } for stream in video['streams']]
+ self._sort_formats(formats)
+
+ subtitles = {}
+ if video.get('hasSubtitle'):
+ subtitles = self.extract_subtitles(channel_id, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
description = self._og_search_description(webpage, default=None)
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
duration = int_or_none(flashvars.get('duration'))
- width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
- height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
+ width = int_or_none(self._og_search_property(
+ 'video:width', webpage, 'video width', default=None))
+ height = int_or_none(self._og_search_property(
+ 'video:height', webpage, 'video height', default=None))
return {
'id': video_id,
from .common import InfoExtractor
from ..compat import (
- compat_urlparse,
compat_urllib_parse,
+ compat_urllib_request,
+ compat_urlparse,
)
from ..utils import (
determine_ext,
'title': '美人天下01',
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
},
- 'expected_warnings': [
- 'publish time'
- ]
+ }, {
+ 'note': 'This video is available only in Mainland China, thus a proxy is needed',
+ 'url': 'http://www.letv.com/ptv/vplay/1118082.html',
+ 'md5': 'f80936fbe20fb2f58648e81386ff7927',
+ 'info_dict': {
+ 'id': '1118082',
+ 'ext': 'mp4',
+ 'title': '与龙共舞 完整版',
+ 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
+ },
+ 'params': {
+ 'cn_verification_proxy': 'http://proxy.uku.im:8888'
+ },
}]
- # http://www.letv.com/ptv/vplay/1118082.html
- # This video is available only in Mainland China
@staticmethod
def urshift(val, n):
'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.letv.com'
}
+ play_json_req = compat_urllib_request.Request(
+ 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
+ )
+ cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+ if cn_verification_proxy:
+ play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
play_json = self._download_json(
- 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params),
- media_id, 'playJson data')
+ play_json_req,
+ media_id, 'Downloading playJson data')
# Check for errors
playstatus = play_json['playstatus']
url_info_dict = {
'url': media_url,
- 'ext': determine_ext(dispatch[format_id][1])
+ 'ext': determine_ext(dispatch[format_id][1]),
+ 'format_id': format_id,
}
if format_id[-1:] == 'p':
urls.append(url_info_dict)
publish_time = parse_iso8601(self._html_search_regex(
- r'发布时间 ([^<>]+) ', page, 'publish time', fatal=False),
+ r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
delimiter=' ', timezone=datetime.timedelta(hours=8))
description = self._html_search_meta('description', page, fatal=False)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class LibsynIE(InfoExtractor):
+ _VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
+
+ _TEST = {
+ 'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
+ 'md5': '443360ee1b58007bc3dcf09b41d093bb',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = [{
+ 'url': media_url,
+ } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
+
+ podcast_title = self._search_regex(
+ r'<h2>([^<]+)</h2>', webpage, 'title')
+ episode_title = self._search_regex(
+ r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
+
+ title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
+
+ description = self._html_search_regex(
+ r'<div id="info_text_body">(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ thumbnail = self._search_regex(
+ r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+
+ release_date = unified_strdate(self._search_regex(
+ r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': release_date,
+ 'formats': formats,
+ }
import re
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
+ determine_ext,
int_or_none,
unified_strdate,
ExtractorError,
class LifeNewsIE(InfoExtractor):
IE_NAME = 'lifenews'
IE_DESC = 'LIFE | NEWS'
- _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
+ _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://lifenews.ru/news/126342',
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
'info_dict': {
'thumbnail': 're:http://.*\.jpg',
'upload_date': '20140130',
}
- }
+ }, {
+ # video in <iframe>
+ 'url': 'http://lifenews.ru/news/152125',
+ 'md5': '77d19a6f0886cd76bdbf44b4d971a273',
+ 'info_dict': {
+ 'id': '152125',
+ 'ext': 'mp4',
+ 'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
+ 'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
+ 'upload_date': '20150402',
+ 'uploader': 'embed.life.ru',
+ }
+ }, {
+ 'url': 'http://lifenews.ru/news/153461',
+ 'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
+ 'info_dict': {
+ 'id': '153461',
+ 'ext': 'mp4',
+ 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
+ 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
+ 'upload_date': '20150505',
+ 'uploader': 'embed.life.ru',
+ }
+ }, {
+ 'url': 'http://lifenews.ru/video/13035',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ section = mobj.group('section')
- webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')
+ webpage = self._download_webpage(
+ 'http://lifenews.ru/%s/%s' % (section, video_id),
+ video_id, 'Downloading page')
videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
- if not videos:
+ iframe_link = self._html_search_regex(
+ '<iframe[^>]+src=["\']([^"\']+)["\']', webpage, 'iframe link', default=None)
+ if not videos and not iframe_link:
raise ExtractorError('No media links available for %s' % video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
view_count = self._html_search_regex(
- r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False)
+ r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
comment_count = self._html_search_regex(
- r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False)
+ r'<div class=\'comments\'>\s*<span class=\'counter\'>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)
upload_date = self._html_search_regex(
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
if upload_date is not None:
upload_date = unified_strdate(upload_date)
+ common_info = {
+ 'description': description,
+ 'view_count': int_or_none(view_count),
+ 'comment_count': int_or_none(comment_count),
+ 'upload_date': upload_date,
+ }
+
def make_entry(video_id, media, video_number=None):
- return {
+ cur_info = dict(common_info)
+ cur_info.update({
'id': video_id,
'url': media[1],
'thumbnail': media[0],
'title': title if video_number is None else '%s-video%s' % (title, video_number),
- 'description': description,
- 'view_count': int_or_none(view_count),
- 'comment_count': int_or_none(comment_count),
- 'upload_date': upload_date,
- }
+ })
+ return cur_info
+
+ if iframe_link:
+ iframe_link = self._proto_relative_url(iframe_link, 'http:')
+ cur_info = dict(common_info)
+ cur_info.update({
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'url': iframe_link,
+ })
+ return cur_info
if len(videos) == 1:
return make_entry(video_id, videos[0])
else:
return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]
+
+
+class LifeEmbedIE(InfoExtractor):
+ IE_NAME = 'life:embed'
+ _VALID_URL = r'http://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
+
+ _TEST = {
+ 'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
+ 'md5': 'b889715c9e49cb1981281d0e5458fbbe',
+ 'info_dict': {
+ 'id': 'e50c2dec2867350528e2574c899b8291',
+ 'ext': 'mp4',
+ 'title': 'e50c2dec2867350528e2574c899b8291',
+ 'thumbnail': 're:http://.*\.jpg',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = []
+ for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
+ video_url = compat_urlparse.urljoin(url, video_url)
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', m3u8_id='m3u8'))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': ext,
+ 'preference': 1,
+ })
+ self._sort_formats(formats)
+
+ thumbnail = self._search_regex(
+ r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
import re
import json
+import itertools
from .common import InfoExtractor
from ..compat import (
class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream'
- _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
+ _VALID_URL = r'https?://(?:new\.)?livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
_TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b',
'id': '2245590',
},
'playlist_mincount': 4,
+ }, {
+ 'url': 'http://new.livestream.com/chess24/tatasteelchess',
+ 'info_dict': {
+ 'title': 'Tata Steel Chess',
+ 'id': '3705884',
+ },
+ 'playlist_mincount': 60,
}, {
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
'only_matching': True,
+ }, {
+ 'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
+ 'only_matching': True,
}]
def _parse_smil(self, video_id, smil_url):
'view_count': video_data.get('views'),
}
+ def _extract_event(self, info):
+ event_id = compat_str(info['id'])
+ account = compat_str(info['owner_account_id'])
+ root_url = (
+ 'https://new.livestream.com/api/accounts/{account}/events/{event}/'
+ 'feed.json'.format(account=account, event=event_id))
+
+ def _extract_videos():
+ last_video = None
+ for i in itertools.count(1):
+ if last_video is None:
+ info_url = root_url
+ else:
+ info_url = '{root}?&id={id}&newer=-1&type=video'.format(
+ root=root_url, id=last_video)
+ videos_info = self._download_json(info_url, event_id, 'Downloading page {0}'.format(i))['data']
+ videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
+ if not videos_info:
+ break
+ for v in videos_info:
+ yield self._extract_video_info(v)
+ last_video = videos_info[-1]['id']
+ return self.playlist_result(_extract_videos(), event_id, info['full_name'])
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
result = result and compat_str(vdata['data']['id']) == vid
return result
- videos = [self._extract_video_info(video_data['data'])
- for video_data in info['feed']['data']
- if is_relevant(video_data, video_id)]
if video_id is None:
# This is an event page:
- return self.playlist_result(
- videos, '%s' % info['id'], info['full_name'])
+ return self._extract_event(info)
else:
+ videos = [self._extract_video_info(video_data['data'])
+ for video_data in info['feed']['data']
+ if is_relevant(video_data, video_id)]
if not videos:
raise ExtractorError('Cannot find video %s' % video_id)
return videos[0]
# The original version of Livestream uses a different system
class LivestreamOriginalIE(InfoExtractor):
IE_NAME = 'livestream:original'
- _VALID_URL = r'''(?x)https?://www\.livestream\.com/
+ _VALID_URL = r'''(?x)https?://original\.livestream\.com/
(?P<user>[^/]+)/(?P<type>video|folder)
(?:\?.*?Id=|/)(?P<id>.*?)(&|$)
'''
_TESTS = [{
- 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
+ 'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'info_dict': {
'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
},
- 'params': {
- # rtmp
- 'skip_download': True,
- },
}, {
- 'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+ 'url': 'https://original.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
'info_dict': {
'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
},
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
info = self._download_xml(api_url, video_id)
+ # this url is used on mobile devices
+ stream_url = 'http://x{0}x.api.channel.livestream.com/3.0/getstream.json?id={1}'.format(user, video_id)
+ stream_info = self._download_json(stream_url, video_id)
item = info.find('channel').find('item')
ns = {'media': 'http://search.yahoo.com/mrss'}
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
- # Remove the extension and number from the path (like 1.jpg)
- path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path')
return {
'id': video_id,
'title': item.find('title').text,
- 'url': 'rtmp://extondemand.livestream.com/ondemand',
- 'play_path': 'trans/dv15/mogulus-{0}'.format(path),
- 'player_url': 'http://static.livestream.com/chromelessPlayer/v21/playerapi.swf?hash=5uetk&v=0803&classid=D27CDB6E-AE6D-11cf-96B8-444553540000&jsEnabled=false&wmode=opaque',
- 'ext': 'flv',
+ 'url': stream_info['progressiveUrl'],
'thumbnail': thumbnail_url,
}
'url': data['streamer'],
'play_path': 'mp4:%s' % data['file'],
'preference': -1,
+ 'rtmp_real_time': True,
})
else:
formats.extend(
)
-class LyndaIE(InfoExtractor):
+class LyndaBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
+ _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
+ _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
+ _NETRC_MACHINE = 'lynda'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'username': username,
+ 'password': password,
+ 'remember': 'false',
+ 'stayPut': 'false'
+ }
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
+ login_page = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ # Not (yet) logged in
+ m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
+ if m is not None:
+ response = m.group('json')
+ response_json = json.loads(response)
+ state = response_json['state']
+
+ if state == 'notlogged':
+ raise ExtractorError(
+ 'Unable to login, incorrect username and/or password',
+ expected=True)
+
+ # This is when we get popup:
+ # > You're already logged in to lynda.com on two devices.
+ # > If you log in here, we'll log you out of another device.
+ # So, we need to confirm this.
+ if state == 'conflicted':
+ confirm_form = {
+ 'username': '',
+ 'password': '',
+ 'resolve': 'true',
+ 'remember': 'false',
+ 'stayPut': 'false',
+ }
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
+ login_page = self._download_webpage(
+ request, None,
+ 'Confirming log in and log out from another device')
+
+ if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+ raise ExtractorError('Unable to log in')
+
+
+class LyndaIE(LyndaBaseIE):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
- _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'
- _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
+ _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'
_NETRC_MACHINE = 'lynda'
- _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
- ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
-
_TESTS = [{
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
'only_matching': True,
}]
- def _real_initialize(self):
- self._login()
-
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group(1)
+ video_id = self._match_id(url)
- page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
- 'Downloading video JSON')
+ page = self._download_webpage(
+ 'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
+ video_id, 'Downloading video JSON')
video_json = json.loads(page)
if 'Status' in video_json:
- raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
+ raise ExtractorError(
+ 'lynda returned error: %s' % video_json['Message'], expected=True)
if video_json['HasAccess'] is False:
raise ExtractorError(
- 'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
+ 'Video %s is only available for members. '
+ % video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True)
video_id = compat_str(video_json['ID'])
duration = video_json['DurationInSeconds']
'formats': formats
}
- def _login(self):
- (username, password) = self._get_login_info()
- if username is None:
- return
-
- login_form = {
- 'username': username,
- 'password': password,
- 'remember': 'false',
- 'stayPut': 'false'
- }
- request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
- login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
-
- # Not (yet) logged in
- m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
- if m is not None:
- response = m.group('json')
- response_json = json.loads(response)
- state = response_json['state']
-
- if state == 'notlogged':
- raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
-
- # This is when we get popup:
- # > You're already logged in to lynda.com on two devices.
- # > If you log in here, we'll log you out of another device.
- # So, we need to confirm this.
- if state == 'conflicted':
- confirm_form = {
- 'username': '',
- 'password': '',
- 'resolve': 'true',
- 'remember': 'false',
- 'stayPut': 'false',
- }
- request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
- login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
-
- if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
- raise ExtractorError('Unable to log in')
-
def _fix_subtitles(self, subs):
srt = ''
+ seq_counter = 0
for pos in range(0, len(subs) - 1):
seq_current = subs[pos]
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
continue
appear_time = m_current.group('timecode')
disappear_time = m_next.group('timecode')
- text = seq_current['Caption'].lstrip()
- srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
+ text = seq_current['Caption'].strip()
+ if text:
+ seq_counter += 1
+ srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text)
if srt:
return srt
return {}
-class LyndaCourseIE(InfoExtractor):
+class LyndaCourseIE(LyndaBaseIE):
IE_NAME = 'lynda:course'
IE_DESC = 'lynda.com online courses'
course_path = mobj.group('coursepath')
course_id = mobj.group('courseid')
- page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
- course_id, 'Downloading course JSON')
+ page = self._download_webpage(
+ 'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
+ course_id, 'Downloading course JSON')
course_json = json.loads(page)
if 'Status' in course_json and course_json['Status'] == 'NotFound':
- raise ExtractorError('Course %s does not exist' % course_id, expected=True)
+ raise ExtractorError(
+ 'Course %s does not exist' % course_id, expected=True)
unaccessible_videos = 0
videos = []
- (username, _) = self._get_login_info()
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
# by single video API anymore
for chapter in course_json['Chapters']:
for video in chapter['Videos']:
- if username is None and video['HasAccess'] is False:
+ if video['HasAccess'] is False:
unaccessible_videos += 1
continue
videos.append(video['ID'])
if unaccessible_videos > 0:
- self._downloader.report_warning('%s videos are only available for members and will not be downloaded. '
- % unaccessible_videos + LyndaIE.ACCOUNT_CREDENTIALS_HINT)
+ self._downloader.report_warning(
+ '%s videos are only available for members (or paid members) and will not be downloaded. '
+ % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
entries = [
- self.url_result('http://www.lynda.com/%s/%s-4.html' %
- (course_path, video_id),
- 'Lynda')
+ self.url_result(
+ 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
+ 'Lynda')
for video_id in videos]
course_title = course_json['Title']
--- /dev/null
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ xpath_text,
+)
+
+
+class MegaVideozIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>[^/]+)(?:/(?P<display_id>[^/]+))?'
+ _TEST = {
+ 'url': 'http://megavideoz.eu/video/WM6UB919XMXH/SMPTE-Universal-Film-Leader',
+ 'info_dict': {
+ 'id': '48723',
+ 'display_id': 'SMPTE-Universal-Film-Leader',
+ 'ext': 'mp4',
+ 'title': 'SMPTE Universal Film Leader',
+ 'thumbnail': 're:https?://.*?\.jpg',
+ 'duration': 10.93,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ if any(p in webpage for p in ('>Video Not Found<', '>404 Error<')):
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ config = self._download_xml(
+ self._search_regex(
+ r"var\s+cnf\s*=\s*'([^']+)'", webpage, 'cnf url'),
+ display_id)
+
+ video_url = xpath_text(config, './file', 'video url', fatal=True)
+ title = xpath_text(config, './title', 'title', fatal=True)
+ thumbnail = xpath_text(config, './image', 'thumbnail')
+ duration = float_or_none(xpath_text(config, './duration', 'duration'))
+ video_id = xpath_text(config, './mediaid', 'video id') or video_id
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration
+ }
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ int_or_none,
+ ExtractorError,
+)
+
+
+class MioMioIE(InfoExtractor):
+ IE_NAME = 'miomio.tv'
+ _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
+ _TESTS = [{
+ # "type=video" in flashvars
+ 'url': 'http://www.miomio.tv/watch/cc88912/',
+ 'md5': '317a5f7f6b544ce8419b784ca8edae65',
+ 'info_dict': {
+ 'id': '88912',
+ 'ext': 'flv',
+ 'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
+ 'duration': 5923,
+ },
+ }, {
+ 'url': 'http://www.miomio.tv/watch/cc184024/',
+ 'info_dict': {
+ 'id': '43729',
+ 'title': '《动漫同人插画绘制》',
+ },
+ 'playlist_mincount': 86,
+ 'skip': 'This video takes time too long for retrieving the URL',
+ }, {
+ 'url': 'http://www.miomio.tv/watch/cc173113/',
+ 'info_dict': {
+ 'id': '173113',
+ 'title': 'The New Macbook 2015 上手试玩与简评'
+ },
+ 'playlist_mincount': 2,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_meta(
+ 'description', webpage, 'title', fatal=True)
+
+ mioplayer_path = self._search_regex(
+ r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
+
+ xml_config = self._search_regex(
+ r'flashvars="type=(?:sina|video)&(.+?)&',
+ webpage, 'xml config')
+
+ # skipping the following page causes lags and eventually connection drop-outs
+ self._request_webpage(
+ 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
+ video_id)
+
+ # the following xml contains the actual configuration information on the video file(s)
+ vid_config = self._download_xml(
+ 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
+ video_id)
+
+ http_headers = {
+ 'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
+ }
+
+ if not int_or_none(xpath_text(vid_config, 'timelength')):
+ raise ExtractorError('Unable to load videos!', expected=True)
+
+ entries = []
+ for f in vid_config.findall('./durl'):
+ segment_url = xpath_text(f, 'url', 'video url')
+ if not segment_url:
+ continue
+ order = xpath_text(f, 'order', 'order')
+ segment_id = video_id
+ segment_title = title
+ if order:
+ segment_id += '-%s' % order
+ segment_title += ' part %s' % order
+ entries.append({
+ 'id': segment_id,
+ 'url': segment_url,
+ 'title': segment_title,
+ 'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
+ 'http_headers': http_headers,
+ })
+
+ if len(entries) == 1:
+ segment = entries[0]
+ segment['id'] = video_id
+ segment['title'] = title
+ return segment
+
+ return {
+ '_type': 'multi_video',
+ 'id': video_id,
+ 'entries': entries,
+ 'title': title,
+ 'http_headers': http_headers,
+ }
ExtractorError,
HEADRequest,
str_to_int,
- parse_iso8601,
)
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
'uploader': 'Daniel Holbach',
'uploader_id': 'dholbach',
- 'upload_date': '20111115',
- 'timestamp': 1321359578,
'thumbnail': 're:https?://.*\.jpg',
'view_count': int,
'like_count': int,
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
'info_dict': {
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
- 'ext': 'm4a',
- 'title': 'Electric Relaxation vol. 3',
+ 'ext': 'mp3',
+ 'title': 'Caribou 7 inch Vinyl Mix & Chat',
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
- 'uploader': 'Daniel Drumz',
+ 'uploader': 'Gilles Peterson Worldwide',
'uploader_id': 'gillespeterson',
- 'thumbnail': 're:https?://.*\.jpg',
+ 'thumbnail': 're:https?://.*/images/',
'view_count': int,
'like_count': int,
},
}]
- def _get_url(self, track_id, template_url):
- server_count = 30
- for i in range(server_count):
- url = template_url % i
- try:
- # We only want to know if the request succeed
- # don't download the whole file
- self._request_webpage(
- HEADRequest(url), track_id,
- 'Checking URL %d/%d ...' % (i + 1, server_count + 1))
- return url
- except ExtractorError:
- pass
-
- return None
+ def _check_url(self, url, track_id, ext):
+ try:
+ # We only want to know if the request succeed
+ # don't download the whole file
+ self._request_webpage(
+ HEADRequest(url), track_id,
+ 'Trying %s URL' % ext)
+ return True
+ except ExtractorError:
+ return False
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
preview_url = self._search_regex(
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
song_url = preview_url.replace('/previews/', '/c/originals/')
- template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
- final_song_url = self._get_url(track_id, template_url)
- if final_song_url is None:
- self.to_screen('Trying with m4a extension')
- template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
- final_song_url = self._get_url(track_id, template_url)
- if final_song_url is None:
- raise ExtractorError('Unable to extract track url')
+ if not self._check_url(song_url, track_id, 'mp3'):
+ song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
+ if not self._check_url(song_url, track_id, 'm4a'):
+ raise ExtractorError('Unable to extract track url')
PREFIX = (
- r'<span class="play-button[^"]*?"'
+ r'm-play-on-spacebar[^>]+'
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
title = self._html_search_regex(
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage)
like_count = str_to_int(self._search_regex(
- [r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
- r'/favorites/?">([0-9]+)<'],
+ r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',
webpage, 'like count', fatal=False))
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>'],
webpage, 'play count', fatal=False))
- timestamp = parse_iso8601(self._search_regex(
- r'<time itemprop="dateCreated" datetime="([^"]+)">',
- webpage, 'upload date', default=None))
return {
'id': track_id,
'title': title,
- 'url': final_song_url,
+ 'url': song_url,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
- 'timestamp': timestamp,
'view_count': view_count,
'like_count': like_count,
}
class MLBIE(InfoExtractor):
- _VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[\da-z_-]+\.)*mlb\.com/
+ (?:
+ (?:
+ (?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|
+ (?:
+ shared/video/embed/(?:embed|m-internal-embed)\.html|
+ (?:[^/]+/)+(?:play|index)\.jsp|
+ )\?.*?\bcontent_id=
+ )
+ (?P<id>n?\d+)|
+ (?:[^/]+/)*(?P<path>[^/]+)
+ )
+ '''
_TESTS = [
{
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
+ {
+ 'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
+ 'md5': 'b190e70141fb9a1552a85426b4da1b5d',
+ 'info_dict': {
+ 'id': '75609783',
+ 'ext': 'mp4',
+ 'title': 'Must C: Pillar climbs for catch',
+ 'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
+ 'timestamp': 1429124820,
+ 'upload_date': '20150415',
+ }
+ },
{
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
'only_matching': True,
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
'only_matching': True,
},
+ {
+ 'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
+ 'only_matching': True,
+ },
+ {
+ # From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer
+ 'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ if not video_id:
+ video_path = mobj.group('path')
+ webpage = self._download_webpage(url, video_path)
+ video_id = self._search_regex(
+ [r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id')
+
detail = self._download_xml(
'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
% (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
compat_urllib_parse,
compat_urllib_request,
)
+from ..utils import ExtractorError
class MonikerIE(InfoExtractor):
video_id = self._match_id(url)
orig_webpage = self._download_webpage(url, video_id)
+ if '>File Not Found<' in orig_webpage:
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ error = self._search_regex(
+ r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error), expected=True)
+
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
data = dict(fields)
class MTVServicesInfoExtractor(InfoExtractor):
_MOBILE_TEMPLATE = None
+ _LANG = None
@staticmethod
def _id_from_uri(uri):
mediagen_doc = self._download_xml(mediagen_url, video_id,
'Downloading video urls')
+ item = mediagen_doc.find('./video/item')
+ if item is not None and item.get('type') == 'text':
+ message = '%s returned error: ' % self.IE_NAME
+ if item.get('code') is not None:
+ message += '%s - ' % item.get('code')
+ message += item.text
+ raise ExtractorError(message, expected=True)
+
description_node = itemdoc.find('description')
if description_node is not None:
description = description_node.text.strip()
video_id = self._id_from_uri(uri)
feed_url = self._get_feed_url(uri)
data = compat_urllib_parse.urlencode({'uri': uri})
+ info_url = feed_url + '?'
+ if self._LANG:
+ info_url += 'lang=%s&' % self._LANG
+ info_url += data
idoc = self._download_xml(
- feed_url + '?' + data, video_id,
+ info_url, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
return self.playlist_result(
[self._get_video_info(item) for item in idoc.findall('.//item')])
from ..utils import (
ExtractorError,
find_xpath_attr,
+ lowercase_escape,
+ unescapeHTML,
)
class NBCIE(InfoExtractor):
- _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
+ _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
_TESTS = [
{
},
'skip': 'Only works from US',
},
+ {
+ 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
+ 'info_dict': {
+ 'id': '8iUuyzWDdYUZ',
+ 'ext': 'flv',
+ 'title': 'Star Wars Teaser',
+ 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
+ },
+ 'skip': 'Only works from US',
+ },
+ {
+ # This video has expired but with an escaped embedURL
+ 'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
+ 'skip': 'Expired'
+ }
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- theplatform_url = self._search_regex(
- '(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
- webpage, 'theplatform url').replace('_no_endcard', '')
+ theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
+ [
+ r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
+ r'"embedURL"\s*:\s*"([^"]+)"'
+ ],
+ webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
if theplatform_url.startswith('//'):
theplatform_url = 'http:' + theplatform_url
return self.url_result(theplatform_url)
+class NBCSportsVPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+
+ _TESTS = [{
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ }
+ }, {
+ 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ iframe_m = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+ if iframe_m:
+ return iframe_m.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ theplatform_url = self._og_search_video_url(webpage)
+ return self.url_result(theplatform_url, 'ThePlatform')
+
+
+class NBCSportsIE(InfoExtractor):
+ # Does not include https becuase its certificate is invalid
+ _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+ _TEST = {
+ 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+ 'info_dict': {
+ 'id': 'PHJSaFWbrTY9',
+ 'ext': 'flv',
+ 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+ 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ return self.url_result(
+ NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
(?:video/.+?/(?P<id>\d+)|
ExtractorError,
int_or_none,
qualities,
+ parse_duration,
)
-class NDRIE(InfoExtractor):
- IE_NAME = 'ndr'
- IE_DESC = 'NDR.de - Mediathek'
- _VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
-
- _TESTS = [
- {
- 'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
- 'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
- 'note': 'Video file',
- 'info_dict': {
- 'id': '25866',
- 'ext': 'mp4',
- 'title': 'Kartoffeltage in der Lewitz',
- 'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
- 'duration': 166,
- }
- },
- {
- 'url': 'http://www.ndr.de/info/audio51535.html',
- 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
- 'note': 'Audio file',
- 'info_dict': {
- 'id': '51535',
- 'ext': 'mp3',
- 'title': 'La Valette entgeht der Hinrichtung',
- 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
- 'duration': 884,
- }
- }
- ]
-
+class NDRBaseIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if description:
description = description.strip()
- duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', fatal=False))
+ duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', default=None))
+ if not duration:
+ duration = parse_duration(self._html_search_regex(
+ r'(<span class="min">\d+</span>:<span class="sec">\d+</span>)',
+ page, 'duration', default=None))
formats = []
'duration': duration,
'formats': formats,
}
+
+
+class NDRIE(NDRBaseIE):
+ IE_NAME = 'ndr'
+ IE_DESC = 'NDR.de - Mediathek'
+ _VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
+ 'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
+ 'note': 'Video file',
+ 'info_dict': {
+ 'id': '25866',
+ 'ext': 'mp4',
+ 'title': 'Kartoffeltage in der Lewitz',
+ 'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
+ 'duration': 166,
+ },
+ 'skip': '404 Not found',
+ },
+ {
+ 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
+ 'md5': 'dadc003c55ae12a5d2f6bd436cd73f59',
+ 'info_dict': {
+ 'id': '988',
+ 'ext': 'mp4',
+ 'title': 'Party, Pötte und Parade',
+ 'description': 'Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt.',
+ 'duration': 3498,
+ },
+ },
+ {
+ 'url': 'http://www.ndr.de/info/audio51535.html',
+ 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
+ 'note': 'Audio file',
+ 'info_dict': {
+ 'id': '51535',
+ 'ext': 'mp3',
+ 'title': 'La Valette entgeht der Hinrichtung',
+ 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
+ 'duration': 884,
+ }
+ }
+ ]
+
+
+class NJoyIE(NDRBaseIE):
+ IE_NAME = 'N-JOY'
+ _VALID_URL = r'https?://www\.n-joy\.de/.+?(?P<id>\d+)\.html'
+
+ _TEST = {
+ 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
+ 'md5': 'cb63be60cd6f9dd75218803146d8dc67',
+ 'info_dict': {
+ 'id': '2480',
+ 'ext': 'mp4',
+ 'title': 'Benaissa beim NDR Comedy Contest',
+ 'description': 'Von seinem sehr "behaarten" Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen.',
+ 'duration': 654,
+ }
+ }
'http://www.netzkino.de/beta/dist/production.min.js', video_id,
note='Downloading player code')
avo_js = self._search_regex(
- r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})',
+ r'var urlTemplate=(\{.*?"\})',
production_js, 'URL templates')
templates = self._parse_json(
avo_js, video_id, transform_source=js_to_json)
return json_string.replace('\\\'', '\'')
def _real_extract_video(self, video_id):
+ vid_parts = video_id.split(',')
+ if len(vid_parts) == 3:
+ video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0'))
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
data = self._download_json(
json_url, video_id, transform_source=self._fix_json)
video_url = initial_video_url
join = compat_urlparse.urljoin
- return {
+ ret = {
'id': video_id,
'title': info['name'],
'url': video_url,
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
}
+ if video_url.startswith('rtmp:'):
+ mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url)
+ ret.update({
+ 'tc_url': mobj.group('tc_url'),
+ 'play_path': mobj.group('play_path'),
+ 'app': mobj.group('app'),
+ 'no_resume': True,
+ })
+ return ret
class NHLIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com'
- _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
+ _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P<id>[-0-9a-zA-Z,]+)'
_TESTS = [{
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
}, {
'url': 'http://video.nhl.com/videocenter/?id=736722',
'only_matching': True,
+ }, {
+ 'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en',
+ 'md5': '076fcb88c255154aacbf0a7accc3f340',
+ 'info_dict': {
+ 'id': '2014020299-X-h',
+ 'ext': 'mp4',
+ 'title': 'Penguins at Islanders / Game Highlights',
+ 'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014',
+ 'duration': 268,
+ 'upload_date': '20141122',
+ }
+ }, {
+ 'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4',
+ 'info_dict': {
+ 'id': '691469',
+ 'ext': 'mp4',
+ 'title': 'RAW | Craig MacTavish Full Press Conference',
+ 'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.',
+ 'upload_date': '20141205',
+ },
+ 'params': {
+ 'skip_download': True, # Requires rtmpdump
+ }
}]
def _real_extract(self, url):
import re
import json
+import datetime
from .common import InfoExtractor
from ..compat import (
ExtractorError,
int_or_none,
parse_duration,
- unified_strdate,
+ parse_iso8601,
+ xpath_text,
+ determine_ext,
)
IE_NAME = 'niconico'
IE_DESC = 'ニコニコ動画'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215',
'md5': 'd1a75c0823e2f629128c43e1212760f9',
'info_dict': {
'uploader': 'takuya0301',
'uploader_id': '2698420',
'upload_date': '20131123',
+ 'timestamp': 1385182762,
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
},
- 'params': {
- 'username': 'ydl.niconico@gmail.com',
- 'password': 'youtube-dl',
+ }, {
+ # File downloaded with and without credentials are different, so omit
+ # the md5 field
+ 'url': 'http://www.nicovideo.jp/watch/nm14296458',
+ 'info_dict': {
+ 'id': 'nm14296458',
+ 'ext': 'swf',
+ 'title': '【鏡音リン】Dance on media【オリジナル】take2!',
+ 'description': 'md5:689f066d74610b3b22e0f1739add0f58',
+ 'uploader': 'りょうた',
+ 'uploader_id': '18822557',
+ 'upload_date': '20110429',
+ 'timestamp': 1304065916,
+ 'duration': 209,
},
- }
+ }, {
+ # 'video exists but is marked as "deleted"
+ # md5 is unstable
+ 'url': 'http://www.nicovideo.jp/watch/sm10000',
+ 'info_dict': {
+ 'id': 'sm10000',
+ 'ext': 'unknown_video',
+ 'description': 'deleted',
+ 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
+ 'upload_date': '20071224',
+ 'timestamp': 1198527840, # timestamp field has different value if logged in
+ 'duration': 304,
+ },
+ }, {
+ 'url': 'http://www.nicovideo.jp/watch/so22543406',
+ 'info_dict': {
+ 'id': '1388129933',
+ 'ext': 'mp4',
+ 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
+ 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
+ 'timestamp': 1388851200,
+ 'upload_date': '20140104',
+ 'uploader': 'アニメロチャンネル',
+ 'uploader_id': '312',
+ }
+ }]
- _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
# Determine whether the downloader used authentication to download video
_AUTHENTICATED = False
return True
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group(1)
+ video_id = self._match_id(url)
- # Get video webpage. We are not actually interested in it, but need
- # the cookies in order to be able to download the info webpage
- self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
+ # Get video webpage. We are not actually interested in it for normal
+ # cases, but need the cookies in order to be able to download the
+ # info webpage
+ webpage, handle = self._download_webpage_handle(
+ 'http://www.nicovideo.jp/watch/' + video_id, video_id)
+ if video_id.startswith('so'):
+ video_id = self._match_id(handle.geturl())
video_info = self._download_xml(
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
if self._AUTHENTICATED:
# Get flv info
flv_info_webpage = self._download_webpage(
- 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+ 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
video_id, 'Downloading flv info')
else:
# Get external player info
flv_info_request, video_id,
note='Downloading flv info', errnote='Unable to download flv info')
- if 'deleted=' in flv_info_webpage:
- raise ExtractorError('The video has been deleted.',
- expected=True)
- video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
+ flv_info = compat_urlparse.parse_qs(flv_info_webpage)
+ if 'url' not in flv_info:
+ if 'deleted' in flv_info:
+ raise ExtractorError('The video has been deleted.',
+ expected=True)
+ else:
+ raise ExtractorError('Unable to find video URL')
+
+ video_real_url = flv_info['url'][0]
# Start extracting information
- title = video_info.find('.//title').text
- extension = video_info.find('.//movie_type').text
+ title = xpath_text(video_info, './/title')
+ if not title:
+ title = self._og_search_title(webpage, default=None)
+ if not title:
+ title = self._html_search_regex(
+ r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
+ webpage, 'video title')
+
+ watch_api_data_string = self._html_search_regex(
+ r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
+ webpage, 'watch api data', default=None)
+ watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
+ video_detail = watch_api_data.get('videoDetail', {})
+
+ extension = xpath_text(video_info, './/movie_type')
+ if not extension:
+ extension = determine_ext(video_real_url)
video_format = extension.upper()
- thumbnail = video_info.find('.//thumbnail_url').text
- description = video_info.find('.//description').text
- upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
- view_count = int_or_none(video_info.find('.//view_counter').text)
- comment_count = int_or_none(video_info.find('.//comment_num').text)
- duration = parse_duration(video_info.find('.//length').text)
- webpage_url = video_info.find('.//watch_url').text
+
+ thumbnail = (
+ xpath_text(video_info, './/thumbnail_url') or
+ self._html_search_meta('image', webpage, 'thumbnail', default=None) or
+ video_detail.get('thumbnail'))
+
+ description = xpath_text(video_info, './/description')
+
+ timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve'))
+ if not timestamp:
+ match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
+ if match:
+ timestamp = parse_iso8601(match.replace('+', ':00+'))
+ if not timestamp and video_detail.get('postedAt'):
+ timestamp = parse_iso8601(
+ video_detail['postedAt'].replace('/', '-'),
+ delimiter=' ', timezone=datetime.timedelta(hours=9))
+
+ view_count = int_or_none(xpath_text(video_info, './/view_counter'))
+ if not view_count:
+ match = self._html_search_regex(
+ r'>Views: <strong[^>]*>([^<]+)</strong>',
+ webpage, 'view count', default=None)
+ if match:
+ view_count = int_or_none(match.replace(',', ''))
+ view_count = view_count or video_detail.get('viewCount')
+
+ comment_count = int_or_none(xpath_text(video_info, './/comment_num'))
+ if not comment_count:
+ match = self._html_search_regex(
+ r'>Comments: <strong[^>]*>([^<]+)</strong>',
+ webpage, 'comment count', default=None)
+ if match:
+ comment_count = int_or_none(match.replace(',', ''))
+ comment_count = comment_count or video_detail.get('commentCount')
+
+ duration = (parse_duration(
+ xpath_text(video_info, './/length') or
+ self._html_search_meta(
+ 'video:duration', webpage, 'video duration', default=None)) or
+ video_detail.get('length'))
+
+ webpage_url = xpath_text(video_info, './/watch_url') or url
if video_info.find('.//ch_id') is not None:
uploader_id = video_info.find('.//ch_id').text
'thumbnail': thumbnail,
'description': description,
'uploader': uploader,
- 'upload_date': upload_date,
+ 'timestamp': timestamp,
'uploader_id': uploader_id,
'view_count': view_count,
'comment_count': comment_count,
from ..utils import (
clean_html,
ExtractorError,
- unified_strdate,
+ int_or_none,
+ float_or_none,
+ parse_iso8601,
)
_SUB_LANG_TEMPLATE = '&sub_lang=%s'
_NETRC_MACHINE = 'noco'
- _TEST = {
- 'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
- 'md5': '0a993f0058ddbcd902630b2047ef710e',
- 'info_dict': {
- 'id': '11538',
- 'ext': 'mp4',
- 'title': 'Ami Ami Idol - Hello! France',
- 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
- 'upload_date': '20140412',
- 'uploader': 'Nolife',
- 'uploader_id': 'NOL',
- 'duration': 2851.2,
+ _TESTS = [
+ {
+ 'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
+ 'md5': '0a993f0058ddbcd902630b2047ef710e',
+ 'info_dict': {
+ 'id': '11538',
+ 'ext': 'mp4',
+ 'title': 'Ami Ami Idol - Hello! France',
+ 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
+ 'upload_date': '20140412',
+ 'uploader': 'Nolife',
+ 'uploader_id': 'NOL',
+ 'duration': 2851.2,
+ },
+ 'skip': 'Requires noco account',
},
- 'skip': 'Requires noco account',
- }
+ {
+ 'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call',
+ 'md5': 'c190f1f48e313c55838f1f412225934d',
+ 'info_dict': {
+ 'id': '12610',
+ 'ext': 'mp4',
+ 'title': 'The Guild #1 - Wake-Up Call',
+ 'timestamp': 1403863200,
+ 'upload_date': '20140627',
+ 'uploader': 'LBL42',
+ 'uploader_id': 'LBL',
+ 'duration': 233.023,
+ },
+ 'skip': 'Requires noco account',
+ }
+ ]
def _real_initialize(self):
self._login()
'shows/%s/medias' % video_id,
video_id, 'Downloading video JSON')
+ show = self._call_api(
+ 'shows/by_id/%s' % video_id,
+ video_id, 'Downloading show JSON')[0]
+
+ options = self._call_api(
+ 'users/init', video_id,
+ 'Downloading user options JSON')['options']
+ audio_lang_pref = options.get('audio_language') or options.get('language', 'fr')
+
+ if audio_lang_pref == 'original':
+ audio_lang_pref = show['original_lang']
+ if len(medias) == 1:
+ audio_lang_pref = list(medias.keys())[0]
+ elif audio_lang_pref not in medias:
+ audio_lang_pref = 'fr'
+
qualities = self._call_api(
'qualities',
video_id, 'Downloading qualities JSON')
formats = []
- for lang, lang_dict in medias['fr']['video_list'].items():
- for format_id, fmt in lang_dict['quality_list'].items():
- format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
-
- video = self._call_api(
- 'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
- video_id, 'Downloading %s video JSON' % format_id_extended,
- lang if lang != 'none' else None)
-
- file_url = video['file']
- if not file_url:
- continue
-
- if file_url in ['forbidden', 'not found']:
- popmessage = video['popmessage']
- self._raise_error(popmessage['title'], popmessage['message'])
-
- formats.append({
- 'url': file_url,
- 'format_id': format_id_extended,
- 'width': fmt['res_width'],
- 'height': fmt['res_lines'],
- 'abr': fmt['audiobitrate'],
- 'vbr': fmt['videobitrate'],
- 'filesize': fmt['filesize'],
- 'format_note': qualities[format_id]['quality_name'],
- 'preference': qualities[format_id]['priority'],
- })
+ for audio_lang, audio_lang_dict in medias.items():
+ preference = 1 if audio_lang == audio_lang_pref else 0
+ for sub_lang, lang_dict in audio_lang_dict['video_list'].items():
+ for format_id, fmt in lang_dict['quality_list'].items():
+ format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id)
+
+ video = self._call_api(
+ 'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang),
+ video_id, 'Downloading %s video JSON' % format_id_extended,
+ sub_lang if sub_lang != 'none' else None)
+
+ file_url = video['file']
+ if not file_url:
+ continue
+
+ if file_url in ['forbidden', 'not found']:
+ popmessage = video['popmessage']
+ self._raise_error(popmessage['title'], popmessage['message'])
+
+ formats.append({
+ 'url': file_url,
+ 'format_id': format_id_extended,
+ 'width': int_or_none(fmt.get('res_width')),
+ 'height': int_or_none(fmt.get('res_lines')),
+ 'abr': int_or_none(fmt.get('audiobitrate')),
+ 'vbr': int_or_none(fmt.get('videobitrate')),
+ 'filesize': int_or_none(fmt.get('filesize')),
+ 'format_note': qualities[format_id].get('quality_name'),
+ 'quality': qualities[format_id].get('priority'),
+ 'preference': preference,
+ })
self._sort_formats(formats)
- show = self._call_api(
- 'shows/by_id/%s' % video_id,
- video_id, 'Downloading show JSON')[0]
-
- upload_date = unified_strdate(show['online_date_start_utc'])
- uploader = show['partner_name']
- uploader_id = show['partner_key']
- duration = show['duration_ms'] / 1000.0
+ timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
+ uploader = show.get('partner_name')
+ uploader_id = show.get('partner_key')
+ duration = float_or_none(show.get('duration_ms'), 1000)
thumbnails = []
for thumbnail_key, thumbnail_url in show.items():
'title': title,
'description': description,
'thumbnails': thumbnails,
- 'upload_date': upload_date,
+ 'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
'duration': duration,
if streams:
for stream in streams:
stream_type = stream.get('type').lower()
- if stream_type == 'ss':
+ # smooth streaming is not supported
+ if stream_type in ['ss', 'ms']:
continue
stream_info = self._download_json(
'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
stream_url = self._download_json(
stream_info['stream'], display_id,
'Downloading %s URL' % stream_type,
- transform_source=strip_jsonp)
+ 'Unable to download %s URL' % stream_type,
+ transform_source=strip_jsonp, fatal=False)
+ if not stream_url:
+ continue
if stream_type == 'hds':
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
# f4m downloader downloads only piece of live stream
else:
formats.append({
'url': stream_url,
+ 'preference': -10,
})
self._sort_formats(formats)
import re
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
class NRKIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
+ _VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
_TESTS = [
{
- 'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
- 'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
+ 'url': 'http://www.nrk.no/video/PS*150533',
+ 'md5': 'bccd850baebefe23b56d708a113229c2',
'info_dict': {
'id': '150533',
'ext': 'flv',
'title': 'Dompap og andre fugler i Piip-Show',
- 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
+ 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+ 'duration': 263,
}
},
{
- 'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
- 'md5': '3471f2a51718195164e88f46bf427668',
+ 'url': 'http://www.nrk.no/video/PS*154915',
+ 'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
'info_dict': {
'id': '154915',
'ext': 'flv',
'title': 'Slik høres internett ut når du er blind',
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+ 'duration': 20,
}
},
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- page = self._download_webpage(url, video_id)
-
- video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
+ video_id = self._match_id(url)
data = self._download_json(
- 'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
+ 'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
+ video_id, 'Downloading media JSON')
if data['usageRights']['isGeoBlocked']:
- raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
+ raise ExtractorError(
+ 'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
+ expected=True)
+
+ video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
- video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
+ duration = parse_duration(data.get('duration'))
images = data.get('images')
if images:
'ext': 'flv',
'title': data['title'],
'description': data['description'],
+ 'duration': duration,
'thumbnail': thumbnail,
}
+class NRKPlaylistIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'info_dict': {
+ 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'title': 'Gjenopplev den historiske solformørkelsen',
+ 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+ 'info_dict': {
+ 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+ 'title': 'Rivertonprisen til Karin Fossum',
+ 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+ },
+ 'playlist_count': 5,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('nrk:%s' % video_id, 'NRK')
+ for video_id in re.findall(
+ r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
+ webpage)
+ ]
+
+ playlist_title = self._og_search_title(webpage)
+ playlist_description = self._og_search_description(webpage)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+
class NRKTVIE(InfoExtractor):
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
}
]
- def _seconds2str(self, s):
- return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000)
-
def _debug_print(self, txt):
if self._downloader.params.get('verbose', False):
self.to_screen('[debug] %s' % txt)
url = "%s%s" % (baseurl, subtitlesurl)
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
captions = self._download_xml(
- url, video_id, 'Downloading subtitles',
- transform_source=lambda s: s.replace(r'<br />', '\r\n'))
+ url, video_id, 'Downloading subtitles')
lang = captions.get('lang', 'no')
- ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
- srt = ''
- for pos, p in enumerate(ps):
- begin = parse_duration(p.get('begin'))
- duration = parse_duration(p.get('dur'))
- starttime = self._seconds2str(begin)
- endtime = self._seconds2str(begin + duration)
- srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
return {lang: [
{'ext': 'ttml', 'url': url},
- {'ext': 'srt', 'data': srt},
]}
def _extract_f4m(self, manifest_url, video_id):
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import parse_iso8601
-
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+)
-class NYTimesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
-
- _TEST = {
- 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
- 'md5': '18a525a510f942ada2720db5f31644c0',
- 'info_dict': {
- 'id': '100000002847155',
- 'ext': 'mov',
- 'title': 'Verbatim: What Is a Photocopier?',
- 'description': 'md5:93603dada88ddbda9395632fdc5da260',
- 'timestamp': 1398631707,
- 'upload_date': '20140427',
- 'uploader': 'Brett Weiner',
- 'duration': 419,
- }
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+class NYTimesBaseIE(InfoExtractor):
+ def _extract_video_from_id(self, video_id):
video_data = self._download_json(
- 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
+ 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
+ video_id, 'Downloading video JSON')
title = video_data['headline']
- description = video_data['summary']
- duration = video_data['duration'] / 1000.0
+ description = video_data.get('summary')
+ duration = float_or_none(video_data.get('duration'), 1000)
uploader = video_data['byline']
timestamp = parse_iso8601(video_data['publication_date'][:-8])
formats = [
{
'url': video['url'],
- 'format_id': video['type'],
- 'vcodec': video['video_codec'],
- 'width': video['width'],
- 'height': video['height'],
- 'filesize': get_file_size(video['fileSize']),
+ 'format_id': video.get('type'),
+ 'vcodec': video.get('video_codec'),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ 'filesize': get_file_size(video.get('fileSize')),
} for video in video_data['renditions']
]
self._sort_formats(formats)
thumbnails = [
{
'url': 'http://www.nytimes.com/%s' % image['url'],
- 'resolution': '%dx%d' % (image['width'], image['height']),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
} for image in video_data['images']
]
'formats': formats,
'thumbnails': thumbnails,
}
+
+
+class NYTimesIE(NYTimesBaseIE):
+ _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
+ 'md5': '18a525a510f942ada2720db5f31644c0',
+ 'info_dict': {
+ 'id': '100000002847155',
+ 'ext': 'mov',
+ 'title': 'Verbatim: What Is a Photocopier?',
+ 'description': 'md5:93603dada88ddbda9395632fdc5da260',
+ 'timestamp': 1398631707,
+ 'upload_date': '20140427',
+ 'uploader': 'Brett Weiner',
+ 'duration': 419,
+ }
+ }, {
+ 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ return self._extract_video_from_id(video_id)
+
+
+class NYTimesArticleIE(NYTimesBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
+ _TESTS = [{
+ 'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
+ 'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
+ 'info_dict': {
+ 'id': '100000003628438',
+ 'ext': 'mov',
+ 'title': 'New Minimum Wage: $70,000 a Year',
+ 'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.',
+ 'timestamp': 1429033037,
+ 'upload_date': '20150414',
+ 'uploader': 'Matthew Williams',
+ }
+ }, {
+ 'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_id = self._html_search_regex(r'data-videoid="(\d+)"', webpage, 'video id')
+
+ return self._extract_video_from_id(video_id)
unified_strdate,
int_or_none,
qualities,
+ unescapeHTML,
)
webpage = self._download_webpage(url, video_id)
player = self._parse_json(
- self._search_regex(
- r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'),
+ unescapeHTML(self._search_regex(
+ r'data-attributes="([^"]+)"', webpage, 'player')),
video_id)
metadata = self._parse_json(player['flashvars']['metadata'], video_id)
from __future__ import unicode_literals
import re
import json
+import base64
from .common import InfoExtractor
from ..utils import (
unescapeHTML,
ExtractorError,
+ determine_ext,
+ int_or_none,
)
'description': '',
},
},
+ {
+ # Information available only through SAS api
+ # From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187
+ 'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx',
+ 'md5': 'a84001441b35ea492bc03736e59e7935',
+ 'info_dict': {
+ 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
+ 'ext': 'mp4',
+ 'title': 'Ooyala video',
+ }
+ }
]
@staticmethod
ie=cls.ie_key())
def _extract_result(self, info, more_info):
+ embedCode = info['embedCode']
+ video_url = info.get('ipad_url') or info['url']
+
+ if determine_ext(video_url) == 'm3u8':
+ formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
+ else:
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ }]
+
return {
- 'id': info['embedCode'],
- 'ext': 'mp4',
+ 'id': embedCode,
'title': unescapeHTML(info['title']),
- 'url': info.get('ipad_url') or info['url'],
+ 'formats': formats,
'description': unescapeHTML(more_info['description']),
'thumbnail': more_info['promo'],
}
mobile_player, 'info', fatal=False, default=None)
if videos_info:
break
+
+ if not videos_info:
+ formats = []
+ auth_data = self._download_json(
+ 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (embedCode, embedCode),
+ embedCode)
+
+ cur_auth_data = auth_data['authorization_data'][embedCode]
+
+ for stream in cur_auth_data['streams']:
+ formats.append({
+ 'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
+ 'ext': stream.get('delivery_type'),
+ 'format': stream.get('video_codec'),
+ 'format_id': stream.get('profile'),
+ 'width': int_or_none(stream.get('width')),
+ 'height': int_or_none(stream.get('height')),
+ 'abr': int_or_none(stream.get('audio_bitrate')),
+ 'vbr': int_or_none(stream.get('video_bitrate')),
+ })
+ if formats:
+ return {
+ 'id': embedCode,
+ 'formats': formats,
+ 'title': 'Ooyala video',
+ }
+
+ if not cur_auth_data['authorized']:
+ raise ExtractorError(cur_auth_data['message'], expected=True)
+
if not videos_info:
raise ExtractorError('Unable to extract info')
videos_info = videos_info.replace('\\"', '"')
HEADRequest,
unified_strdate,
ExtractorError,
+ strip_jsonp,
+ int_or_none,
+ float_or_none,
+ determine_ext,
+ remove_end,
)
'description': data['subtitle'],
'entries': entries
}
+
+
+class ORFIPTVIE(InfoExtractor):
+ IE_NAME = 'orf:iptv'
+ IE_DESC = 'iptv.ORF.at'
+ _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://iptv.orf.at/stories/2275236/',
+ 'md5': 'c8b22af4718a4b4af58342529453e3e5',
+ 'info_dict': {
+ 'id': '350612',
+ 'ext': 'flv',
+ 'title': 'Weitere Evakuierungen um Vulkan Calbuco',
+ 'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
+ 'duration': 68.197,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20150425',
+ },
+ }
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://iptv.orf.at/stories/%s' % story_id, story_id)
+
+ video_id = self._search_regex(
+ r'data-video(?:id)?="(\d+)"', webpage, 'video id')
+
+ data = self._download_json(
+ 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+ video_id)[0]
+
+ duration = float_or_none(data['duration'], 1000)
+
+ video = data['sources']['default']
+ load_balancer_url = video['loadBalancerUrl']
+ abr = int_or_none(video.get('audioBitrate'))
+ vbr = int_or_none(video.get('bitrate'))
+ fps = int_or_none(video.get('videoFps'))
+ width = int_or_none(video.get('videoWidth'))
+ height = int_or_none(video.get('videoHeight'))
+ thumbnail = video.get('preview')
+
+ rendition = self._download_json(
+ load_balancer_url, video_id, transform_source=strip_jsonp)
+
+ f = {
+ 'abr': abr,
+ 'vbr': vbr,
+ 'fps': fps,
+ 'width': width,
+ 'height': height,
+ }
+
+ formats = []
+ for format_id, format_url in rendition['redirect'].items():
+ if format_id == 'rtmp':
+ ff = f.copy()
+ ff.update({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ formats.append(ff)
+ elif determine_ext(format_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id))
+ elif determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id))
+ else:
+ continue
+ self._sort_formats(formats)
+
+ title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
+ description = self._og_search_description(webpage)
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dc.date', webpage, 'upload date'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
from .common import InfoExtractor
from ..utils import (
ExtractorError,
+ determine_ext,
+ int_or_none,
unified_strdate,
US_RATINGS,
)
for vid_id in video_id]
return self.playlist_result(entries, display_id)
- info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
- info = self._download_json(info_url, display_id)
-
- redirect_url = info['alternate_encoding']['url']
- redirect_info = self._download_json(
- redirect_url + '?format=json', display_id,
- 'Downloading video url info')
- if redirect_info['status'] == 'error':
- if redirect_info['http_code'] == 403:
- message = (
- 'The video is not available in your region due to '
- 'right restrictions')
+ info = self._download_json(
+ 'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
+ display_id)
+
+ formats = []
+ for encoding_name in ('recommended_encoding', 'alternate_encoding'):
+ redirect = info.get(encoding_name)
+ if not redirect:
+ continue
+ redirect_url = redirect.get('url')
+ if not redirect_url:
+ continue
+
+ redirect_info = self._download_json(
+ redirect_url + '?format=json', display_id,
+ 'Downloading %s video url info' % encoding_name)
+
+ if redirect_info['status'] == 'error':
+ if redirect_info['http_code'] == 403:
+ message = (
+ 'The video is not available in your region due to '
+ 'right restrictions')
+ else:
+ message = redirect_info['message']
+ raise ExtractorError(message, expected=True)
+
+ format_url = redirect_info.get('url')
+ if not format_url:
+ continue
+
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4', preference=1, m3u8_id='hls'))
else:
- message = redirect_info['message']
- raise ExtractorError(message, expected=True)
+ formats.append({
+ 'url': format_url,
+ 'format_id': redirect.get('eeid'),
+ })
+ self._sort_formats(formats)
rating_str = info.get('rating')
if rating_str is not None:
'id': video_id,
'display_id': display_id,
'title': info['title'],
- 'url': redirect_info['url'],
- 'ext': 'mp4',
'description': info['program'].get('description'),
'thumbnail': info.get('image_url'),
- 'duration': info.get('duration'),
+ 'duration': int_or_none(info.get('duration')),
'age_limit': age_limit,
'upload_date': upload_date,
+ 'formats': formats,
}
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ xpath_text,
+)
+
+
+class PhilharmonieDeParisIE(InfoExtractor):
+ IE_DESC = 'Philharmonie de Paris'
+ _VALID_URL = r'http://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
+ 'info_dict': {
+ 'id': '1032066',
+ 'ext': 'flv',
+ 'title': 'md5:d1f5585d87d041d07ce9434804bc8425',
+ 'timestamp': 1428179400,
+ 'upload_date': '20150404',
+ 'duration': 6592.278,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ concert = self._download_xml(
+ 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id,
+ video_id).find('./concert')
+
+ formats = []
+ info_dict = {
+ 'id': video_id,
+ 'title': xpath_text(concert, './titre', 'title', fatal=True),
+ 'formats': formats,
+ }
+
+ fichiers = concert.find('./fichiers')
+ stream = fichiers.attrib['serveurstream']
+ for fichier in fichiers.findall('./fichier'):
+ info_dict['duration'] = float_or_none(fichier.get('timecodefin'))
+ for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]):
+ format_url = fichier.get('url%s' % suffix)
+ if not format_url:
+ continue
+ formats.append({
+ 'url': stream,
+ 'play_path': format_url,
+ 'ext': 'flv',
+ 'format_id': format_id,
+ 'width': int_or_none(concert.get('largeur%s' % suffix)),
+ 'height': int_or_none(concert.get('hauteur%s' % suffix)),
+ 'quality': quality,
+ })
+ self._sort_formats(formats)
+
+ date, hour = concert.get('date'), concert.get('heure')
+ if date and hour:
+ info_dict['timestamp'] = parse_iso8601(
+ '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour))
+ elif date:
+ info_dict['upload_date'] = date
+
+ return info_dict
class PhoenixIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.phoenix.de/content/884301',
- 'md5': 'ed249f045256150c92e72dbb70eadec6',
- 'info_dict': {
- 'id': '884301',
- 'ext': 'mp4',
- 'title': 'Michael Krons mit Hans-Werner Sinn',
- 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
- 'upload_date': '20141025',
- 'uploader': 'Im Dialog',
- }
- }
+ _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
+ (?:
+ phoenix/die_sendungen/(?:[^/]+/)?
+ )?
+ (?P<id>[0-9]+)'''
+ _TESTS = [
+ {
+ 'url': 'http://www.phoenix.de/content/884301',
+ 'md5': 'ed249f045256150c92e72dbb70eadec6',
+ 'info_dict': {
+ 'id': '884301',
+ 'ext': 'mp4',
+ 'title': 'Michael Krons mit Hans-Werner Sinn',
+ 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
+ 'upload_date': '20141025',
+ 'uploader': 'Im Dialog',
+ }
+ },
+ {
+ 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
+ 'only_matching': True,
+ },
+ ]
def _real_extract(self, url):
video_id = self._match_id(url)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ xpath_text,
+ qualities,
+)
+
+
+class PladformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ out\.pladform\.ru/player|
+ static\.pladform\.ru/player\.swf
+ )
+ \?.*\bvideoid=|
+ video\.pladform\.ru/catalog/video/videoid/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # http://muz-tv.ru/kinozal/view/7400/
+ 'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
+ 'md5': '61f37b575dd27f1bb2e1854777fe31f4',
+ 'info_dict': {
+ 'id': '100183293',
+ 'ext': 'mp4',
+ 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
+ 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 694,
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_xml(
+ 'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
+ video_id)
+
+ if video.tag == 'error':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, video.text),
+ expected=True)
+
+ quality = qualities(('ld', 'sd', 'hd'))
+
+ formats = [{
+ 'url': src.text,
+ 'format_id': src.get('quality'),
+ 'quality': quality(src.get('quality')),
+ } for src in video.findall('./src')]
+ self._sort_formats(formats)
+
+ webpage = self._download_webpage(
+ 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
+ video_id)
+
+ title = self._og_search_title(webpage, fatal=False) or xpath_text(
+ video, './/title', 'title', fatal=True)
+ description = self._search_regex(
+ r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
+ video, './/cover', 'cover')
+
+ duration = int_or_none(xpath_text(video, './/time', 'duration'))
+ age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
import re
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
-)
+from ..compat import compat_str
from ..utils import (
ExtractorError,
- float_or_none,
int_or_none,
- str_to_int,
+ parse_iso8601,
)
class PlayFMIE(InfoExtractor):
IE_NAME = 'play.fm'
- _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
+ _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
_TEST = {
- 'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
+ 'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
'md5': 'c505f8307825a245d0c7ad1850001f22',
'info_dict': {
- 'id': '137220',
+ 'id': '71276',
'ext': 'mp3',
- 'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
- 'uploader': 'Sven Tasnadi',
- 'uploader_id': 'sventasnadi',
- 'duration': 5627.428,
- 'upload_date': '20140712',
+ 'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
+ 'description': '',
+ 'duration': 5627,
+ 'timestamp': 1406033781,
+ 'upload_date': '20140722',
+ 'uploader': 'Dan Drastic',
+ 'uploader_id': '71170',
'view_count': int,
'comment_count': int,
- 'thumbnail': 're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- upload_date = mobj.group('upload_date')
-
- rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
- req = compat_urllib_request.Request(
- 'http://www.play.fm/flexRead/recording', data=rec_data)
- req.add_header('Content-Type', 'application/x-www-form-urlencoded')
- rec_doc = self._download_xml(req, video_id)
+ slug = mobj.group('slug')
- error_node = rec_doc.find('./error')
- if error_node is not None:
- raise ExtractorError('An error occured: %s (code %s)' % (
- error_node.text, rec_doc.find('./status').text))
+ recordings = self._download_json(
+ 'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
- recording = rec_doc.find('./recording')
- title = recording.find('./title').text
- view_count = str_to_int(recording.find('./stats/playcount').text)
- comment_count = str_to_int(recording.find('./stats/comments').text)
- duration = float_or_none(recording.find('./duration').text, scale=1000)
- thumbnail = recording.find('./image').text
+ error = recordings.get('error')
+ if isinstance(error, dict):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error.get('message')),
+ expected=True)
- artist = recording.find('./artists/artist')
- uploader = artist.find('./name').text
- uploader_id = artist.find('./slug').text
-
- video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
- 'http:', recording.find('./url').text,
- recording.find('./_class').text, recording.find('./file_id').text,
- rec_doc.find('./uuid').text, video_id,
- rec_doc.find('./jingle/file_id').text,
- 'http%3A%2F%2Fwww.play.fm%2Fplayer',
- )
+ audio_url = recordings['audio']
+ video_id = compat_str(recordings.get('id') or video_id)
+ title = recordings['title']
+ description = recordings.get('description')
+ duration = int_or_none(recordings.get('recordingDuration'))
+ timestamp = parse_iso8601(recordings.get('created_at'))
+ uploader = recordings.get('page', {}).get('title')
+ uploader_id = compat_str(recordings.get('page', {}).get('id'))
+ view_count = int_or_none(recordings.get('playCount'))
+ comment_count = int_or_none(recordings.get('commentCount'))
+ categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
return {
'id': video_id,
- 'url': video_url,
- 'ext': 'mp3',
- 'filesize': int_or_none(recording.find('./size').text),
+ 'url': audio_url,
'title': title,
- 'upload_date': upload_date,
- 'view_count': view_count,
- 'comment_count': comment_count,
+ 'description': description,
'duration': duration,
- 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
}
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ float_or_none,
+ int_or_none,
+)
+
+
+class PlaywireIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
+ 'md5': 'e6398701e3595888125729eaa2329ed9',
+ 'info_dict': {
+ 'id': '3353705',
+ 'ext': 'mp4',
+ 'title': 'S04_RM_UCL_Rus',
+ 'thumbnail': 're:^http://.*\.png$',
+ 'duration': 145.94,
+ },
+ }, {
+ 'url': 'http://cdn.playwire.com/11625/embed/85228.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id')
+
+ player = self._download_json(
+ 'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id),
+ video_id)
+
+ title = player['settings']['title']
+ duration = float_or_none(player.get('duration'), 1000)
+
+ content = player['content']
+ thumbnail = content.get('poster')
+ src = content['media']['f4m']
+
+ f4m = self._download_xml(src, video_id)
+ base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True)
+ formats = []
+ for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'):
+ media_url = media.get('url')
+ if not media_url:
+ continue
+ tbr = int_or_none(media.get('bitrate'))
+ width = int_or_none(media.get('width'))
+ height = int_or_none(media.get('height'))
+ f = {
+ 'url': '%s/%s' % (base_url, media.attrib['url']),
+ 'tbr': tbr,
+ 'width': width,
+ 'height': height,
+ }
+ if not (tbr or width or height):
+ f['quality'] = 1 if '-hd.' in media_url else 0
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
}
def _extract_count(self, pattern, webpage, name):
- count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
- if count:
- count = str_to_int(count)
- return count
+ return str_to_int(self._search_regex(
+ pattern, webpage, '%s count' % name, fatal=False))
def _real_extract(self, url):
video_id = self._match_id(url)
if thumbnail:
thumbnail = compat_urllib_parse.unquote(thumbnail)
- view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
- like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
- dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+ view_count = self._extract_count(
+ r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+ like_count = self._extract_count(
+ r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+ dislike_count = self._extract_count(
+ r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
comment_count = self._extract_count(
- r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+ r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ unified_strdate,
+)
+
+
+class PornoVoisinesIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
+
+ _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
+ '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
+
+ _SERVER_NUMBERS = (1, 2)
+
+ _TEST = {
+ 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
+ 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
+ 'info_dict': {
+ 'id': '1285',
+ 'display_id': 'recherche-appartement',
+ 'ext': 'mp4',
+ 'title': 'Recherche appartement',
+ 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20140925',
+ 'duration': 120,
+ 'view_count': int,
+ 'average_rating': float,
+ 'categories': ['Débutante', 'Scénario', 'Sodomie'],
+ 'age_limit': 18,
+ }
+ }
+
+ @classmethod
+ def build_video_url(cls, num):
+ return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self.build_video_url(video_id)
+
+ title = self._html_search_regex(
+ r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
+ description = self._html_search_regex(
+ r'<article id="descriptif">(.+?)</article>',
+ webpage, "description", fatal=False, flags=re.DOTALL)
+
+ thumbnail = self._search_regex(
+ r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
+ webpage, 'thumbnail', fatal=False)
+ if thumbnail:
+ thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
+
+ upload_date = unified_strdate(self._search_regex(
+ r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
+ duration = int_or_none(self._search_regex(
+ 'Durée (\d+)', webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'(\d+) vues', webpage, 'view count', fatal=False))
+ average_rating = self._search_regex(
+ r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
+ if average_rating:
+ average_rating = float_or_none(average_rating.replace(',', '.'))
+
+ categories = self._html_search_meta(
+ 'keywords', webpage, 'categories', fatal=False)
+ if categories:
+ categories = [category.strip() for category in categories.split(',')]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'categories': categories,
+ 'age_limit': 18,
+ }
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+from ..utils import ExtractorError
+
+
+class PrimeShareTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
+
+ _TEST = {
+ 'url': 'http://primeshare.tv/download/238790B611',
+ 'md5': 'b92d9bf5461137c36228009f31533fbc',
+ 'info_dict': {
+ 'id': '238790B611',
+ 'ext': 'mp4',
+ 'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ if '>File not exist<' in webpage:
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ fields = dict(re.findall(r'''(?x)<input\s+
+ type="hidden"\s+
+ name="([^"]+)"\s+
+ (?:id="[^"]+"\s+)?
+ value="([^"]*)"
+ ''', webpage))
+
+ headers = {
+ 'Referer': url,
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ }
+
+ wait_time = int(self._search_regex(
+ r'var\s+cWaitTime\s*=\s*(\d+)',
+ webpage, 'wait time', default=7)) + 1
+ self._sleep(wait_time, video_id)
+
+ req = compat_urllib_request.Request(
+ url, compat_urllib_parse.urlencode(fields), headers)
+ video_page = self._download_webpage(
+ req, video_id, 'Downloading video page')
+
+ video_url = self._search_regex(
+ r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
+ video_page, 'video url')
+
+ title = self._html_search_regex(
+ r'<h1>Watch\s*(?: )?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?: )?\s*<strong>',
+ video_page, 'title')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'ext': 'mp4',
+ }
)
from ..utils import (
unified_strdate,
+ int_or_none,
)
'info_dict': {
'id': '2104602',
'ext': 'mp4',
- 'title': 'Staffel 2, Episode 18 - Jahresrückblick',
+ 'title': 'Episode 18 - Staffel 2',
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
'upload_date': '20131231',
'duration': 5845.04,
urls_sources = urls_sources.values()
def fix_bitrate(bitrate):
+ bitrate = int_or_none(bitrate)
+ if not bitrate:
+ return None
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
for source in urls_sources:
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import time
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ strip_jsonp,
+ unescapeHTML,
+ js_to_json,
+)
+from ..compat import compat_urllib_request
+
+
+class QQMusicIE(InfoExtractor):
+ IE_NAME = 'qqmusic'
+ _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
+ _TESTS = [{
+ 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
+ 'md5': 'bed90b6db2a7a7a7e11bc585f471f63a',
+ 'info_dict': {
+ 'id': '004295Et37taLD',
+ 'ext': 'm4a',
+ 'title': '可惜没如果',
+ 'upload_date': '20141227',
+ 'creator': '林俊杰',
+ 'description': 'md5:4348ff1dd24036906baa7b6f973f8d30',
+ }
+ }]
+
+ # Reference: m_r_GetRUin() in top_player.js
+ # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
+ @staticmethod
+ def m_r_get_ruin():
+ curMs = int(time.time() * 1000) % 1000
+ return int(round(random.random() * 2147483647) * curMs % 1E10)
+
+ def _real_extract(self, url):
+ mid = self._match_id(url)
+
+ detail_info_page = self._download_webpage(
+ 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid,
+ mid, note='Download song detail info',
+ errnote='Unable to get song detail info', encoding='gbk')
+
+ song_name = self._html_search_regex(
+ r"songname:\s*'([^']+)'", detail_info_page, 'song name')
+
+ publish_time = self._html_search_regex(
+ r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page,
+ 'publish time', default=None)
+ if publish_time:
+ publish_time = publish_time.replace('-', '')
+
+ singer = self._html_search_regex(
+ r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
+
+ lrc_content = self._html_search_regex(
+ r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
+ detail_info_page, 'LRC lyrics', default=None)
+
+ guid = self.m_r_get_ruin()
+
+ vkey = self._download_json(
+ 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
+ mid, note='Retrieve vkey', errnote='Unable to get vkey',
+ transform_source=strip_jsonp)['key']
+ song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid)
+
+ return {
+ 'id': mid,
+ 'url': song_url,
+ 'title': song_name,
+ 'upload_date': publish_time,
+ 'creator': singer,
+ 'description': lrc_content,
+ }
+
+
+class QQPlaylistBaseIE(InfoExtractor):
+ @staticmethod
+ def qq_static_url(category, mid):
+ return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
+
+ @classmethod
+ def get_entries_from_page(cls, page):
+ entries = []
+
+ for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page):
+ song_mid = unescapeHTML(item).split('|')[-5]
+ entries.append(cls.url_result(
+ 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
+ song_mid))
+
+ return entries
+
+
+class QQMusicSingerIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:singer'
+ _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
+ _TEST = {
+ 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
+ 'info_dict': {
+ 'id': '001BLpXF2DyJe2',
+ 'title': '林俊杰',
+ 'description': 'md5:2a222d89ba4455a3af19940c0481bb78',
+ },
+ 'playlist_count': 12,
+ }
+
+ def _real_extract(self, url):
+ mid = self._match_id(url)
+
+ singer_page = self._download_webpage(
+ self.qq_static_url('singer', mid), mid, 'Download singer page')
+
+ entries = self.get_entries_from_page(singer_page)
+
+ singer_name = self._html_search_regex(
+ r"singername\s*:\s*'([^']+)'", singer_page, 'singer name',
+ default=None)
+
+ singer_id = self._html_search_regex(
+ r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
+ default=None)
+
+ singer_desc = None
+
+ if singer_id:
+ req = compat_urllib_request.Request(
+ 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
+ req.add_header(
+ 'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
+ singer_desc_page = self._download_xml(
+ req, mid, 'Donwload singer description XML')
+
+ singer_desc = singer_desc_page.find('./data/info/desc').text
+
+ return self.playlist_result(entries, mid, singer_name, singer_desc)
+
+
+class QQMusicAlbumIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:album'
+ _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
+
+ _TEST = {
+ 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0',
+ 'info_dict': {
+ 'id': '000gXCTb2AhRR1',
+ 'title': '我们都是这样长大的',
+ 'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6',
+ },
+ 'playlist_count': 4,
+ }
+
+ def _real_extract(self, url):
+ mid = self._match_id(url)
+
+ album_page = self._download_webpage(
+ self.qq_static_url('album', mid), mid, 'Download album page')
+
+ entries = self.get_entries_from_page(album_page)
+
+ album_name = self._html_search_regex(
+ r"albumname\s*:\s*'([^']+)',", album_page, 'album name',
+ default=None)
+
+ album_detail = self._html_search_regex(
+ r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>',
+ album_page, 'album details', default=None)
+
+ return self.playlist_result(entries, mid, album_name, album_detail)
+
+
+class QQMusicToplistIE(QQPlaylistBaseIE):
+ IE_NAME = 'qqmusic:toplist'
+ _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://y.qq.com/#type=toplist&p=global_12',
+ 'info_dict': {
+ 'id': 'global_12',
+ 'title': 'itunes榜',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'http://y.qq.com/#type=toplist&p=top_6',
+ 'info_dict': {
+ 'id': 'top_6',
+ 'title': 'QQ音乐巅峰榜·欧美',
+ },
+ 'playlist_count': 100,
+ }, {
+ 'url': 'http://y.qq.com/#type=toplist&p=global_5',
+ 'info_dict': {
+ 'id': 'global_5',
+ 'title': '韩国mnet排行榜',
+ },
+ 'playlist_count': 50,
+ }]
+
+ @staticmethod
+ def strip_qq_jsonp(code):
+ return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code))
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ list_type, num_id = list_id.split("_")
+
+ list_page = self._download_webpage(
+ "http://y.qq.com/y/static/toplist/index/%s.html" % list_id,
+ list_id, 'Download toplist page')
+
+ entries = []
+ if list_type == 'top':
+ jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id
+ else:
+ jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id
+
+ toplist_json = self._download_json(
+ jsonp_url, list_id, note='Retrieve toplist json',
+ errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp)
+
+ for song in toplist_json['l']:
+ s = song['s']
+ song_mid = s.split("|")[20]
+ entries.append(self.url_result(
+ 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
+ song_mid))
+
+ list_name = self._html_search_regex(
+ r'<h2 id="top_name">([^\']+)</h2>', list_page, 'top list name',
+ default=None)
+
+ return self.playlist_result(entries, list_id, list_name)
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import(
+ unified_strdate,
+ str_to_int,
+)
+
+
+class RadioJavanIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
+ 'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
+ 'info_dict': {
+ 'id': 'chaartaar-ashoobam',
+ 'ext': 'mp4',
+ 'title': 'Chaartaar - Ashoobam',
+ 'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'upload_date': '20150215',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = [{
+ 'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
+ 'format_id': '%sp' % height,
+ 'height': int(height),
+ } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
+ self._sort_formats(formats)
+
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ upload_date = unified_strdate(self._search_regex(
+ r'class="date_added">Date added: ([^<]+)<',
+ webpage, 'upload date', fatal=False))
+
+ view_count = str_to_int(self._search_regex(
+ r'class="views">Plays: ([\d,]+)',
+ webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._search_regex(
+ r'class="rating">([\d,]+) likes',
+ webpage, 'like count', fatal=False))
+ dislike_count = str_to_int(self._search_regex(
+ r'class="rating">([\d,]+) dislikes',
+ webpage, 'dislike count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'formats': formats,
+ }
class RaiIE(InfoExtractor):
- _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
+ _VALID_URL = r'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
_TESTS = [
{
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'description': 'Edizione delle ore 20:30 ',
}
},
+ {
+ 'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
+ 'md5': '02b64456f7cc09f96ff14e7dd489017e',
+ 'info_dict': {
+ 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
+ 'ext': 'flv',
+ 'title': 'Il Candidato - Primo episodio: "Le Primarie"',
+ 'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!',
+ 'uploader': 'RaiTre',
+ }
+ }
]
+ def _extract_relinker_url(self, webpage):
+ return self._proto_relative_url(self._search_regex(
+ [r'name="videourl" content="([^"]+)"', r'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'],
+ webpage, 'relinker url', default=None))
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ host = mobj.group('host')
- media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON')
+ webpage = self._download_webpage(url, video_id)
- title = media.get('name')
- description = media.get('desc')
- thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
- duration = parse_duration(media.get('length'))
- uploader = media.get('author')
- upload_date = unified_strdate(media.get('date'))
+ relinker_url = self._extract_relinker_url(webpage)
- formats = []
+ if not relinker_url:
+ iframe_path = self._search_regex(
+ r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"',
+ webpage, 'iframe')
+ webpage = self._download_webpage(
+ '%s/%s' % (host, iframe_path), video_id)
+ relinker_url = self._extract_relinker_url(webpage)
- for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']:
- media_url = media.get(format_id)
- if not media_url:
- continue
- formats.append({
+ relinker = self._download_json(
+ '%s&output=47' % relinker_url, video_id)
+
+ media_url = relinker['video'][0]
+ ct = relinker.get('ct')
+ if ct == 'f4m':
+ formats = self._extract_f4m_formats(
+ media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id)
+ else:
+ formats = [{
'url': media_url,
- 'format_id': format_id,
- 'ext': 'mp4',
- })
+ 'format_id': ct,
+ }]
- subtitles = self.extract_subtitles(video_id, url)
+ json_link = self._html_search_meta(
+ 'jsonlink', webpage, 'JSON link', default=None)
+ if json_link:
+ media = self._download_json(
+ host + json_link, video_id, 'Downloading video JSON')
+ title = media.get('name')
+ description = media.get('desc')
+ thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
+ duration = parse_duration(media.get('length'))
+ uploader = media.get('author')
+ upload_date = unified_strdate(media.get('date'))
+ else:
+ title = (self._search_regex(
+ r'var\s+videoTitolo\s*=\s*"(.+?)";',
+ webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"')
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = None
+ uploader = self._html_search_meta('Editore', webpage, 'uploader')
+ upload_date = unified_strdate(self._html_search_meta(
+ 'item-date', webpage, 'upload date', default=None))
+
+ subtitles = self.extract_subtitles(video_id, webpage)
return {
'id': video_id,
'subtitles': subtitles,
}
- def _get_subtitles(self, video_id, url):
- webpage = self._download_webpage(url, video_id)
+ def _get_subtitles(self, video_id, webpage):
subtitles = {}
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
if m:
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import ExtractorError
class RedTubeIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.redtube.com/66418',
+ 'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
'info_dict': {
'id': '66418',
'ext': 'mp4',
- "title": "Sucked on a toilet",
- "age_limit": 18,
+ 'title': 'Sucked on a toilet',
+ 'age_limit': 18,
}
}
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
+ raise ExtractorError('Video %s has been removed' % video_id, expected=True)
+
video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
video_title = self._html_search_regex(
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
+ ExtractorError,
float_or_none,
remove_end,
+ std_headers,
struct_unpack,
)
'only_matching': True,
}]
+ def _real_initialize(self):
+ user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
+ manager_info = self._download_json(
+ 'http://www.rtve.es/odin/loki/' + user_agent_b64,
+ None, 'Fetching manager info')
+ self._manager = manager_info['manager']
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info = self._download_json(
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
video_id)['page']['items'][0]
- png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
+ if info['state'] == 'DESPU':
+ raise ExtractorError('The video is no longer available', expected=True)
+ png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
png = self._download_webpage(png_url, video_id, 'Downloading url information')
video_url = _decrypt_url(png)
if not video_url.endswith('.f4m'):
for s in subs)
+class RTVEInfantilIE(InfoExtractor):
+ IE_NAME = 'rtve.es:infantil'
+ IE_DESC = 'RTVE infantil'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
+
+ _TESTS = [{
+ 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
+ 'md5': '915319587b33720b8e0357caaa6617e6',
+ 'info_dict': {
+ 'id': '3040283',
+ 'ext': 'mp4',
+ 'title': 'Maneras de vivir',
+ 'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
+ 'duration': 357.958,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._download_json(
+ 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
+ video_id)['page']['items'][0]
+
+ webpage = self._download_webpage(url, video_id)
+ vidplayer_id = self._search_regex(
+ r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
+
+ png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
+ png = self._download_webpage(png_url, video_id, 'Downloading url information')
+ video_url = _decrypt_url(png)
+
+ return {
+ 'id': video_id,
+ 'ext': 'mp4',
+ 'title': info['title'],
+ 'url': video_url,
+ 'thumbnail': info.get('image'),
+ 'duration': float_or_none(info.get('duration'), scale=1000),
+ }
+
+
class RTVELiveIE(InfoExtractor):
IE_NAME = 'rtve.es:live'
IE_DESC = 'RTVE.es live streams'
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
},
+ 'skip': 'Translation has finished',
+ },
+ {
+ 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/',
+ 'info_dict': {
+ 'id': '21',
+ 'ext': 'mp4',
+ 'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
'params': {
- # rtmp download
+ # m3u8 download
'skip_download': True,
},
- 'skip': 'Translation has finished',
},
]
elif video_path.startswith('index/iframe/cast_id'):
video_type = 'live'
+ is_live = video_type == 'live'
+
json_data = self._download_json(
- 'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
+ 'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if is_live else '', video_id),
video_id, 'Downloading JSON')
if json_data['errors']:
for transport, links in media['sources'].items():
for quality, url in links.items():
+ preference = -1 if priority_transport == transport else -2
if transport == 'rtmp':
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
if not mobj:
'rtmp_live': True,
'ext': 'flv',
'vbr': int(quality),
+ 'preference': preference,
}
elif transport == 'm3u8':
- formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4'))
+ formats.extend(self._extract_m3u8_formats(
+ url, video_id, 'mp4', preference=preference, m3u8_id='hls'))
continue
else:
fmt = {
'width': width,
'height': height,
'format_id': '%s-%s' % (transport, quality),
- 'preference': -1 if priority_transport == transport else -2,
})
formats.append(fmt)
- if not formats:
- raise ExtractorError('No media links available for %s' % video_id)
-
self._sort_formats(formats)
return {
'id': video_id,
- 'title': title,
+ 'title': self._live_title(title) if is_live else title,
'description': description,
'thumbnail': thumbnail,
'view_count': view_count,
'duration': duration,
'formats': formats,
+ 'is_live': is_live,
}
--- /dev/null
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+
+from ..compat import (
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+from ..utils import (
+ ExtractorError,
+ smuggle_url,
+ std_headers,
+)
+
+
+class SafariBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
+ _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
+ _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
+ _NETRC_MACHINE = 'safari'
+
+ _API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
+ _API_FORMAT = 'json'
+
+ LOGGED_IN = False
+
+ def _real_initialize(self):
+ # We only need to log in once for courses or individual videos
+ if not self.LOGGED_IN:
+ self._login()
+ SafariBaseIE.LOGGED_IN = True
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ raise ExtractorError(
+ self._ACCOUNT_CREDENTIALS_HINT,
+ expected=True)
+
+ headers = std_headers
+ if 'Referer' not in headers:
+ headers['Referer'] = self._LOGIN_URL
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None,
+ 'Downloading login form')
+
+ csrf = self._html_search_regex(
+ r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
+ login_page, 'csrf token')
+
+ login_form = {
+ 'csrfmiddlewaretoken': csrf,
+ 'email': username,
+ 'password1': password,
+ 'login': 'Sign In',
+ 'next': '',
+ }
+
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
+ login_page = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+ raise ExtractorError(
+ 'Login failed; make sure your credentials are correct and try again.',
+ expected=True)
+
+ self.to_screen('Login successful')
+
+
+class SafariIE(SafariBaseIE):
+ IE_NAME = 'safari'
+ IE_DESC = 'safaribooksonline.com online video'
+ _VALID_URL = r'''(?x)https?://
+ (?:www\.)?safaribooksonline\.com/
+ (?:
+ library/view/[^/]+|
+ api/v1/book
+ )/
+ (?P<course_id>\d+)/
+ (?:chapter(?:-content)?/)?
+ (?P<part>part\d+)\.html
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
+ 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
+ 'info_dict': {
+ 'id': '2842601850001',
+ 'ext': 'mp4',
+ 'title': 'Introduction',
+ },
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ course_id = mobj.group('course_id')
+ part = mobj.group('part')
+
+ webpage = self._download_webpage(
+ '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
+ part)
+
+ bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+ if not bc_url:
+ raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
+
+ return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
+
+
+class SafariCourseIE(SafariBaseIE):
+ IE_NAME = 'safari:course'
+ IE_DESC = 'safaribooksonline.com online courses'
+
+ _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'info_dict': {
+ 'id': '9780133392838',
+ 'title': 'Hadoop Fundamentals LiveLessons',
+ },
+ 'playlist_count': 22,
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ course_json = self._download_json(
+ '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+ course_id, 'Downloading course JSON')
+
+ if 'chapters' not in course_json:
+ raise ExtractorError(
+ 'No chapters found for course %s' % course_id, expected=True)
+
+ entries = [
+ self.url_result(chapter, 'Safari')
+ for chapter in course_json['chapters']]
+
+ course_title = course_json['title']
+
+ return self.playlist_result(entries, course_id, course_title)
class ScreenwaveMediaIE(InfoExtractor):
- _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
+ _VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
_TESTS = [{
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
def _real_extract(self, url):
video_id = self._match_id(url)
- playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
+
+ playerdata = self._download_webpage(
+ 'http://player.screenwavemedia.com/play/player.php?id=%s' % video_id,
+ video_id, 'Downloading player webpage')
vidtitle = self._search_regex(
r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
}
-class CinemassacreIE(InfoExtractor):
- _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
- _TESTS = [
- {
- 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
- 'md5': 'fde81fbafaee331785f58cd6c0d46190',
- 'info_dict': {
- 'id': 'Cinemassacre-19911',
- 'ext': 'mp4',
- 'upload_date': '20121110',
- 'title': '“Angry Video Game Nerd: The Movie” – Trailer',
- 'description': 'md5:fb87405fcb42a331742a0dce2708560b',
- },
- },
- {
- 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
- 'md5': 'd72f10cd39eac4215048f62ab477a511',
- 'info_dict': {
- 'id': 'Cinemassacre-521be8ef82b16',
- 'ext': 'mp4',
- 'upload_date': '20131002',
- 'title': 'The Mummy’s Hand (1940)',
- },
- }
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('display_id')
- video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
-
- webpage = self._download_webpage(url, display_id)
-
- playerdata_url = self._search_regex(
- r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
- webpage, 'player data URL')
- video_title = self._html_search_regex(
- r'<title>(?P<title>.+?)\|', webpage, 'title')
- video_description = self._html_search_regex(
- r'<div class="entry-content">(?P<description>.+?)</div>',
- webpage, 'description', flags=re.DOTALL, fatal=False)
- video_thumbnail = self._og_search_thumbnail(webpage)
-
- return {
- '_type': 'url_transparent',
- 'display_id': display_id,
- 'title': video_title,
- 'description': video_description,
- 'upload_date': video_date,
- 'thumbnail': video_thumbnail,
- 'url': playerdata_url,
- }
-
-
class TeamFourIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
_TEST = {
webpage = self._download_webpage(url, display_id)
playerdata_url = self._search_regex(
- r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
+ r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
webpage, 'player data URL')
video_title = self._html_search_regex(
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ unsmuggle_url,
+)
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+
+
+class SenateISVPIE(InfoExtractor):
+ _COMM_MAP = [
+ ["ag", "76440", "http://ag-f.akamaihd.net"],
+ ["aging", "76442", "http://aging-f.akamaihd.net"],
+ ["approps", "76441", "http://approps-f.akamaihd.net"],
+ ["armed", "76445", "http://armed-f.akamaihd.net"],
+ ["banking", "76446", "http://banking-f.akamaihd.net"],
+ ["budget", "76447", "http://budget-f.akamaihd.net"],
+ ["cecc", "76486", "http://srs-f.akamaihd.net"],
+ ["commerce", "80177", "http://commerce1-f.akamaihd.net"],
+ ["csce", "75229", "http://srs-f.akamaihd.net"],
+ ["dpc", "76590", "http://dpc-f.akamaihd.net"],
+ ["energy", "76448", "http://energy-f.akamaihd.net"],
+ ["epw", "76478", "http://epw-f.akamaihd.net"],
+ ["ethics", "76449", "http://ethics-f.akamaihd.net"],
+ ["finance", "76450", "http://finance-f.akamaihd.net"],
+ ["foreign", "76451", "http://foreign-f.akamaihd.net"],
+ ["govtaff", "76453", "http://govtaff-f.akamaihd.net"],
+ ["help", "76452", "http://help-f.akamaihd.net"],
+ ["indian", "76455", "http://indian-f.akamaihd.net"],
+ ["intel", "76456", "http://intel-f.akamaihd.net"],
+ ["intlnarc", "76457", "http://intlnarc-f.akamaihd.net"],
+ ["jccic", "85180", "http://jccic-f.akamaihd.net"],
+ ["jec", "76458", "http://jec-f.akamaihd.net"],
+ ["judiciary", "76459", "http://judiciary-f.akamaihd.net"],
+ ["rpc", "76591", "http://rpc-f.akamaihd.net"],
+ ["rules", "76460", "http://rules-f.akamaihd.net"],
+ ["saa", "76489", "http://srs-f.akamaihd.net"],
+ ["smbiz", "76461", "http://smbiz-f.akamaihd.net"],
+ ["srs", "75229", "http://srs-f.akamaihd.net"],
+ ["uscc", "76487", "http://srs-f.akamaihd.net"],
+ ["vetaff", "76462", "http://vetaff-f.akamaihd.net"],
+ ["arch", "", "http://ussenate-f.akamaihd.net/"]
+ ]
+ _IE_NAME = 'senate.gov'
+ _VALID_URL = r'http://www\.senate\.gov/isvp/\?(?P<qs>.+)'
+ _TESTS = [{
+ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
+ 'info_dict': {
+ 'id': 'judiciary031715',
+ 'ext': 'flv',
+ 'title': 'Integrated Senate Video Player',
+ 'thumbnail': 're:^https?://.*\.(?:jpg|png)$',
+ }
+ }, {
+ 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
+ 'info_dict': {
+ 'id': 'commerce011514',
+ 'ext': 'flv',
+ 'title': 'Integrated Senate Video Player'
+ }
+ }, {
+ 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
+ # checksum differs each time
+ 'info_dict': {
+ 'id': 'intel090613',
+ 'ext': 'mp4',
+ 'title': 'Integrated Senate Video Player'
+ }
+ }]
+
+ @staticmethod
+ def _search_iframe_url(webpage):
+ mobj = re.search(
+ r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/\?[^'\"]+)['\"]",
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _get_info_for_comm(self, committee):
+ for entry in self._COMM_MAP:
+ if entry[0] == committee:
+ return entry[1:]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs'))
+ if not qs.get('filename') or not qs.get('type') or not qs.get('comm'):
+ raise ExtractorError('Invalid URL', expected=True)
+
+ video_id = re.sub(r'.mp4$', '', qs['filename'][0])
+
+ webpage = self._download_webpage(url, video_id)
+
+ if smuggled_data.get('force_title'):
+ title = smuggled_data['force_title']
+ else:
+ title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id)
+ poster = qs.get('poster')
+ thumbnail = poster[0] if poster else None
+
+ video_type = qs['type'][0]
+ committee = video_type if video_type == 'arch' else qs['comm'][0]
+ stream_num, domain = self._get_info_for_comm(committee)
+
+ formats = []
+ if video_type == 'arch':
+ filename = video_id if '.' in video_id else video_id + '.mp4'
+ formats = [{
+ # All parameters in the query string are necessary to prevent a 403 error
+ 'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=',
+ }]
+ else:
+ hdcore_sign = '?hdcore=3.1.0'
+ url_params = (domain, video_id, stream_num)
+ f4m_url = '%s/z/%s_1@%s/manifest.f4m' % url_params + hdcore_sign
+ m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params
+ for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'):
+ # URLs without the extra param induce an 404 error
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+ formats.append(entry)
+ for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'):
+ mobj = re.search(r'(?P<tag>(?:-p|-b)).m3u8', entry['url'])
+ if mobj:
+ entry['format_id'] += mobj.group('tag')
+ formats.append(entry)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ }
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
slideshare_obj = self._search_regex(
- r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
+ r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
webpage, 'slideshare object')
info = json.loads(slideshare_obj)
if info['slideshow']['type'] != 'video':
import re
from .common import InfoExtractor
-from .common import compat_str
+from ..compat import (
+ compat_str,
+ compat_urllib_request
+)
+from ..utils import ExtractorError
class SohuIE(InfoExtractor):
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
- _TEST = {
+ _TESTS = [{
+ 'note': 'This video is available only in Mainland China',
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
- 'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7',
+ 'md5': '29175c8cadd8b5cc4055001e85d6b372',
'info_dict': {
'id': '382479172',
'ext': 'mp4',
'title': 'MV:Far East Movement《The Illest》',
},
- 'skip': 'Only available from China',
- }
+ 'params': {
+ 'cn_verification_proxy': 'proxy.uku.im:8888'
+ }
+ }, {
+ 'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
+ 'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
+ 'info_dict': {
+ 'id': '409385080',
+ 'ext': 'mp4',
+ 'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
+ }
+ }, {
+ 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
+ 'md5': '49308ff6dafde5ece51137d04aec311e',
+ 'info_dict': {
+ 'id': '78693464',
+ 'ext': 'mp4',
+ 'title': '【爱范品】第31期:MWC见不到的奇葩手机',
+ }
+ }, {
+ 'note': 'Multipart video',
+ 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml',
+ 'info_dict': {
+ 'id': '78910339',
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ },
+ 'playlist': [{
+ 'md5': '492923eac023ba2f13ff69617c32754a',
+ 'info_dict': {
+ 'id': '78910339_part1',
+ 'ext': 'mp4',
+ 'duration': 294,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'md5': 'de604848c0e8e9c4a4dde7e1347c0637',
+ 'info_dict': {
+ 'id': '78910339_part2',
+ 'ext': 'mp4',
+ 'duration': 300,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'md5': '93584716ee0657c0b205b8aa3d27aa13',
+ 'info_dict': {
+ 'id': '78910339_part3',
+ 'ext': 'mp4',
+ 'duration': 150,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }]
+ }, {
+ 'note': 'Video with title containing dash',
+ 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
+ 'info_dict': {
+ 'id': '78932792',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl testing video',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }]
def _real_extract(self, url):
else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+ req = compat_urllib_request.Request(base_data_url + vid_id)
+
+ cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+ if cn_verification_proxy:
+ req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
return self._download_json(
- base_data_url + vid_id, video_id,
+ req, video_id,
'Downloading JSON data for %s' % vid_id)
mobj = re.match(self._VALID_URL, url)
mytv = mobj.group('mytv') is not None
webpage = self._download_webpage(url, video_id)
- raw_title = self._html_search_regex(
- r'(?s)<title>(.+?)</title>',
- webpage, 'video title')
- title = raw_title.partition('-')[0].strip()
+
+ title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage))
vid = self._html_search_regex(
r'var vid ?= ?["\'](\d+)["\']',
webpage, 'video path')
vid_data = _fetch_data(vid, mytv)
+ if vid_data['play'] != 1:
+ if vid_data.get('status') == 12:
+ raise ExtractorError(
+ 'Sohu said: There\'s something wrong in the video.',
+ expected=True)
+ else:
+ raise ExtractorError(
+ 'Sohu said: The video is only licensed to users in Mainland China.',
+ expected=True)
formats_json = {}
for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
for i in range(part_count):
formats = []
for format_id, format_data in formats_json.items():
- allot = format_data['allot']
- prot = format_data['prot']
-
data = format_data['data']
- clips_url = data['clipsURL']
- su = data['su']
- part_str = self._download_webpage(
- 'http://%s/?prot=%s&file=%s&new=%s' %
- (allot, prot, clips_url[i], su[i]),
- video_id,
- 'Downloading %s video URL part %d of %d'
- % (format_id, i + 1, part_count))
-
- part_info = part_str.split('|')
- video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+ # URLs starts with http://newflv.sohu.ccgslb.net/ is not usable
+ # so retry until got a working URL
+ video_url = 'newflv.sohu.ccgslb.net'
+ retries = 0
+ while 'newflv.sohu.ccgslb.net' in video_url and retries < 5:
+ download_note = 'Download information from CDN gateway for format ' + format_id
+ if retries > 0:
+ download_note += ' (retry #%d)' % retries
+ retries += 1
+ cdn_info = self._download_json(
+ 'http://data.vod.itc.cn/cdnList?new=' + data['su'][i],
+ video_id, download_note)
+ video_url = cdn_info['url']
formats.append({
'url': video_url,
info['id'] = video_id
else:
info = {
- '_type': 'playlist',
+ '_type': 'multi_video',
'entries': playlist,
'id': video_id,
+ 'title': title,
}
return info
'format_id': key,
'url': url,
'play_path': 'mp3:' + path,
- 'ext': ext,
+ 'ext': 'flv',
'vcodec': 'none',
})
if f['format_id'].startswith('rtmp'):
f['protocol'] = 'rtmp'
- self._sort_formats(formats)
- result['formats'] = formats
+ self._check_formats(formats, track_id)
+ self._sort_formats(formats)
+ result['formats'] = formats
return result
info_json_url += "&secret_token=" + token
elif mobj.group('player'):
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- return self.url_result(query['url'][0])
+ real_url = query['url'][0]
+ # If the token is in the query of the original url we have to
+ # manually add it
+ if 'secret_token' in query:
+ real_url += '?secret_token=' + query['secret_token'][0]
+ return self.url_result(real_url)
else:
# extract uploader (which is in the url)
uploader = mobj.group('uploader')
class SoundcloudSetIE(SoundcloudIE):
- _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
IE_NAME = 'soundcloud:set'
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
info = self._download_json(resolv_url, full_title)
if 'errors' in info:
- for err in info['errors']:
- self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
- return
+ msgs = (compat_str(err['error_message']) for err in info['errors'])
+ raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
return {
'_type': 'playlist',
class SoundcloudUserIE(SoundcloudIE):
- _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
IE_NAME = 'soundcloud:user'
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band',
if len(new_entries) == 0:
self.to_screen('%s: End page received' % uploader)
break
- entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries)
+ entries.extend(self.url_result(e['permalink_url'], 'Soundcloud') for e in new_entries)
return {
'_type': 'playlist',
+# encoding: utf-8
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
class SouthParkIE(MTVServicesInfoExtractor):
IE_NAME = 'southpark.cc.com'
- _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
}]
-class SouthparkDeIE(SouthParkIE):
+class SouthParkEsIE(SouthParkIE):
+ IE_NAME = 'southpark.cc.com:español'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))'
+ _LANG = 'es'
+
+ _TESTS = [{
+ 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
+ 'playlist_count': 4,
+ }]
+
+
+class SouthParkDeIE(SouthParkIE):
IE_NAME = 'southpark.de'
- _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
_TESTS = [{
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
},
}]
+
+
+class SouthParkNlIE(SouthParkIE):
+ IE_NAME = 'southpark.nl'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+ _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
+ 'playlist_count': 4,
+ }]
+
+
+class SouthParkDkIE(SouthParkIE):
+ IE_NAME = 'southparkstudios.dk'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.dk/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+ _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
+ 'playlist_count': 4,
+ }]
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class SpankBangIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
+ _TEST = {
+ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
+ 'md5': '1cc433e1d6aa14bc376535b8679302f7',
+ 'info_dict': {
+ 'id': '3vvn',
+ 'ext': 'mp4',
+ 'title': 'fantasy solo',
+ 'description': 'dillion harper masturbates on a bed',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'silly2587',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ stream_key = self._html_search_regex(
+ r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
+ webpage, 'stream key')
+
+ formats = [{
+ 'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
+ 'ext': 'mp4',
+ 'format_id': '%sp' % height,
+ 'height': int(height),
+ } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
+ self._sort_formats(formats)
+
+ title = self._html_search_regex(
+ r'(?s)<h1>(.+?)</h1>', webpage, 'title')
+ description = self._search_regex(
+ r'class="desc"[^>]*>([^<]+)',
+ webpage, 'description', default=None)
+ thumbnail = self._og_search_thumbnail(webpage)
+ uploader = self._search_regex(
+ r'class="user"[^>]*>([^<]+)',
+ webpage, 'uploader', fatal=False)
+
+ age_limit = self._rta_search(webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'formats': formats,
+ 'age_limit': age_limit,
+ }
class SpikeIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://
- (?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+|
+ (?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+|
m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
'''
_TEST = {
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ parse_iso8601,
+ xpath_text,
+)
+
+
+class SrfIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})'
+ _TESTS = [{
+ 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+ 'md5': '4cd93523723beff51bb4bee974ee238d',
+ 'info_dict': {
+ 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+ 'display_id': 'snowden-beantragt-asyl-in-russland',
+ 'ext': 'm4v',
+ 'upload_date': '20130701',
+ 'title': 'Snowden beantragt Asyl in Russland',
+ 'timestamp': 1372713995,
+ }
+ }, {
+ # No Speichern (Save) button
+ 'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa',
+ 'md5': 'd97e236e80d1d24729e5d0953d276a4f',
+ 'info_dict': {
+ 'id': '677f5829-e473-4823-ac83-a1087fe97faa',
+ 'display_id': 'jaguar-xk120-shadow-und-tornado-dampflokomotive',
+ 'ext': 'flv',
+ 'upload_date': '20130710',
+ 'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive',
+ 'timestamp': 1373493600,
+ },
+ }, {
+ 'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tp.srgssr.ch/p/flash?urn=urn:srf:ais:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ display_id = re.match(self._VALID_URL, url).group('display_id') or video_id
+
+ video_data = self._download_xml(
+ 'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id,
+ display_id)
+
+ title = xpath_text(
+ video_data, './AssetMetadatas/AssetMetadata/title', fatal=True)
+ thumbnails = [{
+ 'url': s.text
+ } for s in video_data.findall('.//ImageRepresentation/url')]
+ timestamp = parse_iso8601(xpath_text(video_data, './createdDate'))
+ # The <duration> field in XML is different from the exact duration, skipping
+
+ formats = []
+ for item in video_data.findall('./Playlists/Playlist') + video_data.findall('./Downloads/Download'):
+ for url_node in item.findall('url'):
+ quality = url_node.attrib['quality']
+ full_url = url_node.text
+ original_ext = determine_ext(full_url)
+ format_id = '%s-%s' % (quality, item.attrib['protocol'])
+ if original_ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ full_url + '?hdcore=3.4.0', display_id, f4m_id=format_id))
+ elif original_ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ full_url, display_id, 'mp4', m3u8_id=format_id))
+ else:
+ formats.append({
+ 'url': full_url,
+ 'ext': original_ext,
+ 'format_id': format_id,
+ 'quality': 0 if 'HD' in quality else -1,
+ 'preference': 1,
+ })
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ subtitles_data = video_data.find('Subtitles')
+ if subtitles_data is not None:
+ subtitles_list = [{
+ 'url': sub.text,
+ 'ext': determine_ext(sub.text),
+ } for sub in subtitles_data]
+ if subtitles_list:
+ subtitles['de'] = subtitles_list
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'subtitles': subtitles,
+ }
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ unescapeHTML,
+ parse_duration,
+)
+
+
+class SSAIE(InfoExtractor):
+ _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://ssa.nls.uk/film/3561',
+ 'info_dict': {
+ 'id': '3561',
+ 'ext': 'flv',
+ 'title': 'SHETLAND WOOL',
+ 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
+ 'duration': 900,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ streamer = self._search_regex(
+ r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
+ play_path = self._search_regex(
+ r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
+
+ def search_field(field_name, fatal=False):
+ return self._search_regex(
+ r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
+ webpage, 'title', fatal=fatal)
+
+ title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
+ description = unescapeHTML(search_field('Description'))
+ duration = parse_duration(search_field('Running time'))
+ thumbnail = self._search_regex(
+ r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': streamer,
+ 'play_path': play_path,
+ 'ext': 'flv',
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
)
-class SVTPlayIE(InfoExtractor):
- IE_DESC = 'SVT Play and Öppet arkiv'
- _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
- 'md5': 'ade3def0643fa1c40587a422f98edfd9',
- 'info_dict': {
- 'id': '2609989',
- 'ext': 'flv',
- 'title': 'SM veckan vinter, Örebro - Rally, final',
- 'duration': 4500,
- 'thumbnail': 're:^https?://.*[\.-]jpg$',
- 'age_limit': 0,
- },
- }, {
- 'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
- 'md5': 'c3101a17ce9634f4c1f9800f0746c187',
- 'info_dict': {
- 'id': '1058509',
- 'ext': 'flv',
- 'title': 'Farlig kryssning',
- 'duration': 2566,
- 'thumbnail': 're:^https?://.*[\.-]jpg$',
- 'age_limit': 0,
- },
- 'skip': 'Only works from Sweden',
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- host = mobj.group('host')
-
- info = self._download_json(
- 'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id)
+class SVTBaseIE(InfoExtractor):
+ def _extract_video(self, url, video_id):
+ info = self._download_json(url, video_id)
title = info['context']['title']
thumbnail = info['context'].get('thumbnailImage')
'duration': duration,
'age_limit': age_limit,
}
+
+
+class SVTIE(SVTBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
+ 'md5': '9648197555fc1b49e3dc22db4af51d46',
+ 'info_dict': {
+ 'id': '2900353',
+ 'ext': 'flv',
+ 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
+ 'duration': 27,
+ 'age_limit': 0,
+ },
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ widget_id = mobj.group('widget_id')
+ article_id = mobj.group('id')
+ return self._extract_video(
+ 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
+ article_id)
+
+
+class SVTPlayIE(SVTBaseIE):
+ IE_DESC = 'SVT Play and Öppet arkiv'
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
+ 'md5': 'ade3def0643fa1c40587a422f98edfd9',
+ 'info_dict': {
+ 'id': '2609989',
+ 'ext': 'flv',
+ 'title': 'SM veckan vinter, Örebro - Rally, final',
+ 'duration': 4500,
+ 'thumbnail': 're:^https?://.*[\.-]jpg$',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
+ 'md5': 'c3101a17ce9634f4c1f9800f0746c187',
+ 'info_dict': {
+ 'id': '1058509',
+ 'ext': 'flv',
+ 'title': 'Farlig kryssning',
+ 'duration': 2566,
+ 'thumbnail': 're:^https?://.*[\.-]jpg$',
+ 'age_limit': 0,
+ },
+ 'skip': 'Only works from Sweden',
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ host = mobj.group('host')
+ return self._extract_video(
+ 'http://www.%s.se/video/%s?output=json' % (host, video_id),
+ video_id)
+# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import base64
+import binascii
import re
+import json
from .common import InfoExtractor
-from ..utils import qualities
+from ..utils import (
+ ExtractorError,
+ qualities,
+)
+from ..compat import compat_ord
class TeamcocoIE(InfoExtractor):
'ext': 'mp4',
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
+ 'duration': 504,
'age_limit': 0,
}
}, {
'ext': 'mp4',
'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
'title': 'Louis C.K. Interview Pt. 1 11/3/11',
+ 'duration': 288,
'age_limit': 0,
}
+ }, {
+ 'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
+ 'info_dict': {
+ 'id': '88748',
+ 'ext': 'mp4',
+ 'title': 'Timothy Olyphant Raises A Toast To “Justified”',
+ 'description': 'md5:15501f23f020e793aeca761205e42c24',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 downloads
+ }
}
]
_VIDEO_ID_REGEXES = (
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
- webpage = self._download_webpage(url, display_id)
+ webpage, urlh = self._download_webpage_handle(url, display_id)
+ if 'src=expired' in urlh.geturl():
+ raise ExtractorError('This video is expired.', expected=True)
video_id = mobj.group('video_id')
if not video_id:
video_id = self._html_search_regex(
self._VIDEO_ID_REGEXES, webpage, 'video id')
- embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
- embed = self._download_webpage(
- embed_url, video_id, 'Downloading embed page')
+ data = None
+
+ preload_codes = self._html_search_regex(
+ r'(function.+)setTimeout\(function\(\)\{playlist',
+ webpage, 'preload codes')
+ base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes)
+ base64_fragments.remove('init')
- encoded_data = self._search_regex(
- r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
- data = self._parse_json(
- base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
+ def _check_sequence(cur_fragments):
+ if not cur_fragments:
+ return
+ for i in range(len(cur_fragments)):
+ cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii')
+ try:
+ raw_data = base64.b64decode(cur_sequence)
+ if compat_ord(raw_data[0]) == compat_ord('{'):
+ return json.loads(raw_data.decode('utf-8'))
+ except (TypeError, binascii.Error, UnicodeDecodeError, ValueError):
+ continue
+
+ def _check_data():
+ for i in range(len(base64_fragments) + 1):
+ for j in range(i, len(base64_fragments) + 1):
+ data = _check_sequence(base64_fragments[:i] + base64_fragments[j:])
+ if data:
+ return data
+
+ self.to_screen('Try to compute possible data sequence. This may take some time.')
+ data = _check_data()
+
+ if not data:
+ raise ExtractorError(
+ 'Preload information could not be extracted', expected=True)
formats = []
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
for filed in data['files']:
- m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
- if m_format is not None:
- format_id = m_format.group(1)
+ if filed['type'] == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ filed['url'], video_id, ext='mp4'))
else:
- format_id = filed['bitrate']
- tbr = (
- int(filed['bitrate'])
- if filed['bitrate'].isdigit()
- else None)
-
- formats.append({
- 'url': filed['url'],
- 'ext': 'mp4',
- 'tbr': tbr,
- 'format_id': format_id,
- 'quality': get_quality(format_id),
- })
+ m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
+ if m_format is not None:
+ format_id = m_format.group(1)
+ else:
+ format_id = filed['bitrate']
+ tbr = (
+ int(filed['bitrate'])
+ if filed['bitrate'].isdigit()
+ else None)
+
+ formats.append({
+ 'url': filed['url'],
+ 'ext': 'mp4',
+ 'tbr': tbr,
+ 'format_id': format_id,
+ 'quality': get_quality(format_id),
+ })
self._sort_formats(formats)
'title': data['title'],
'thumbnail': data.get('thumb', {}).get('href'),
'description': data.get('teaser'),
+ 'duration': data.get('duration'),
'age_limit': self._family_friendly_search(webpage),
}
from .common import InfoExtractor
-from ..compat import (
- compat_str,
-)
+from ..compat import compat_str
+from ..utils import int_or_none
class TEDIE(InfoExtractor):
+ IE_NAME = 'ted'
_VALID_URL = r'''(?x)
(?P<proto>https?://)
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
finfo = self._NATIVE_FORMATS.get(f['format_id'])
if finfo:
f.update(finfo)
- else:
- # Use rtmp downloads
- formats = [{
- 'format_id': f['name'],
- 'url': talk_info['streamer'],
- 'play_path': f['file'],
- 'ext': 'flv',
- 'width': f['width'],
- 'height': f['height'],
- 'tbr': f['bitrate'],
- } for f in talk_info['resources']['rtmp']]
+
+ for format_id, resources in talk_info['resources'].items():
+ if format_id == 'h264':
+ for resource in resources:
+ bitrate = int_or_none(resource.get('bitrate'))
+ formats.append({
+ 'url': resource['file'],
+ 'format_id': '%s-%sk' % (format_id, bitrate),
+ 'tbr': bitrate,
+ })
+ elif format_id == 'rtmp':
+ streamer = talk_info.get('streamer')
+ if not streamer:
+ continue
+ for resource in resources:
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, resource.get('name')),
+ 'url': streamer,
+ 'play_path': resource['file'],
+ 'ext': 'flv',
+ 'width': int_or_none(resource.get('width')),
+ 'height': int_or_none(resource.get('height')),
+ 'tbr': int_or_none(resource.get('bitrate')),
+ })
+ elif format_id == 'hls':
+ hls_formats = self._extract_m3u8_formats(
+ resources.get('stream'), video_name, 'mp4', m3u8_id=format_id)
+ for f in hls_formats:
+ if f.get('format_id') == 'hls-meta':
+ continue
+ if not f.get('height'):
+ f['vcodec'] = 'none'
+ else:
+ f['acodec'] = 'none'
+ formats.extend(hls_formats)
+
+ audio_download = talk_info.get('audioDownload')
+ if audio_download:
+ formats.append({
+ 'url': audio_download,
+ 'format_id': 'audio',
+ 'vcodec': 'none',
+ 'preference': -0.5,
+ })
+
self._sort_formats(formats)
video_id = compat_str(talk_info['id'])
'id': '60163',
'display_id': '5-weird-ways-plants-can-eat-animals',
'duration': 275,
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': '5 Weird Ways Plants Can Eat Animals',
'description': 'Why have some plants evolved to eat meat?',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'DNews',
'uploader_id': 'dnews',
},
+ }, {
+ 'url': 'https://testtube.com/iflscience/insane-jet-ski-flipping',
+ 'info_dict': {
+ 'id': 'fAGfJ4YjVus',
+ 'ext': 'mp4',
+ 'title': 'Flipping Jet-Ski Skills | Outrageous Acts of Science',
+ 'uploader': 'Science Channel',
+ 'uploader_id': 'ScienceChannel',
+ 'upload_date': '20150203',
+ 'description': 'md5:e61374030015bae1d2e22f096d4769d6',
+ }
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
+
+ youtube_url = self._html_search_regex(
+ r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
+ webpage, 'youtube iframe', default=None)
+ if youtube_url:
+ return self.url_result(youtube_url, 'Youtube', video_id=display_id)
+
video_id = self._search_regex(
r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
webpage, 'video ID')
ExtractorError,
xpath_with_ns,
unsmuggle_url,
+ int_or_none,
)
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|theplatform:)(?P<id>[^/\?&]+)'''
- _TEST = {
+ _TESTS = [{
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
'info_dict': {
# rtmp download
'skip_download': True,
},
- }
+ }, {
+ # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
+ 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
+ 'info_dict': {
+ 'id': '22d_qsQ6MIRT',
+ 'ext': 'flv',
+ 'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
+ 'title': 'Tesla Model S: A second step towards a cleaner motoring future',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }]
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
error_msg = next(
n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref'))
- if n.attrib.get('title') == 'Geographic Restriction')
+ if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
except StopIteration:
pass
else:
body = meta.find(_x('smil:body'))
f4m_node = body.find(_x('smil:seq//smil:video'))
+ if f4m_node is None:
+ f4m_node = body.find(_x('smil:seq/smil:video'))
if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
f4m_url = f4m_node.attrib['src']
if 'manifest.f4m?' not in f4m_url:
else:
formats = []
switch = body.find(_x('smil:switch'))
+ if switch is None:
+ switch = body.find(_x('smil:par//smil:switch'))
+ if switch is None:
+ switch = body.find(_x('smil:par/smil:switch'))
+ if switch is None:
+ switch = body.find(_x('smil:par'))
if switch is not None:
base_url = head.find(_x('smil:meta')).attrib['base']
for f in switch.findall(_x('smil:video')):
attr = f.attrib
- width = int(attr['width'])
- height = int(attr['height'])
- vbr = int(attr['system-bitrate']) // 1000
+ width = int_or_none(attr.get('width'))
+ height = int_or_none(attr.get('height'))
+ vbr = int_or_none(attr.get('system-bitrate'), 1000)
format_id = '%dx%d_%dk' % (width, height, vbr)
formats.append({
'format_id': format_id,
})
else:
switch = body.find(_x('smil:seq//smil:switch'))
+ if switch is None:
+ switch = body.find(_x('smil:seq/smil:switch'))
for f in switch.findall(_x('smil:video')):
attr = f.attrib
- vbr = int(attr['system-bitrate']) // 1000
+ vbr = int_or_none(attr.get('system-bitrate'), 1000)
ext = determine_ext(attr['src'])
if ext == 'once':
ext = 'mp4'
'formats': formats,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
- 'duration': info['duration'] // 1000,
+ 'duration': int_or_none(info.get('duration'), 1000),
}
'description': self._og_search_description(webpage),
'thumbnail': self._html_search_meta('ThumbURL', webpage),
}
+
+
+class TMZArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
+ 'md5': 'e482a414a38db73087450e3a6ce69d00',
+ 'info_dict': {
+ 'id': '0_6snoelag',
+ 'ext': 'mp4',
+ 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
+ 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ embedded_video_info_str = self._html_search_regex(
+ r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info')
+
+ embedded_video_info = self._parse_json(
+ embedded_video_info_str, video_id,
+ transform_source=lambda s: s.replace('\\', ''))
+
+ return self.url_result(
+ 'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
'url': video_url,
'ext': 'mp4',
'title': video_title,
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
}
_VALID_URL = r'''(?x)http://(?:www\.)?
(?:tvplay\.lv/parraides|
tv3play\.lt/programos|
+ play\.tv3\.lt/programos|
tv3play\.ee/sisu|
tv3play\.se/program|
tv6play\.se/program|
},
},
{
- 'url': 'http://www.tv3play.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
+ 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
'info_dict': {
'id': '409229',
'ext': 'flv',
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+# 22Tracks regularly replace the audio tracks that can be streamed on their
+# site. The tracks usually expire after 1 months, so we can't add tests.
+
+
+class TwentyTwoTracksIE(InfoExtractor):
+ _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
+ IE_NAME = '22tracks:track'
+
+ _API_BASE = 'http://22tracks.com/api'
+
+ def _extract_info(self, city, genre_name, track_id=None):
+ item_id = track_id if track_id else genre_name
+
+ cities = self._download_json(
+ '%s/cities' % self._API_BASE, item_id,
+ 'Downloading cities info',
+ 'Unable to download cities info')
+ city_id = [x['id'] for x in cities if x['slug'] == city][0]
+
+ genres = self._download_json(
+ '%s/genres/%s' % (self._API_BASE, city_id), item_id,
+ 'Downloading %s genres info' % city,
+ 'Unable to download %s genres info' % city)
+ genre = [x for x in genres if x['slug'] == genre_name][0]
+ genre_id = genre['id']
+
+ tracks = self._download_json(
+ '%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
+ 'Downloading %s genre tracks info' % genre_name,
+ 'Unable to download track info')
+
+ return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
+
+ def _get_track_url(self, filename, track_id):
+ token = self._download_json(
+ 'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
+ track_id, 'Downloading token', 'Unable to download token')
+ return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
+
+ def _extract_track_info(self, track_info, track_id):
+ download_url = self._get_track_url(track_info['filename'], track_id)
+ title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
+ return {
+ 'id': track_id,
+ 'url': download_url,
+ 'ext': 'mp3',
+ 'title': title,
+ 'duration': int_or_none(track_info.get('duration')),
+ 'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ city = mobj.group('city')
+ genre = mobj.group('genre')
+ track_id = mobj.group('id')
+
+ track_info = self._extract_info(city, genre, track_id)
+ return self._extract_track_info(track_info, track_id)
+
+
+class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
+ _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
+ IE_NAME = '22tracks:genre'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ city = mobj.group('city')
+ genre = mobj.group('genre')
+
+ genre_title, tracks = self._extract_info(city, genre)
+
+ entries = [
+ self._extract_track_info(track_info, track_info['id'])
+ for track_info in tracks]
+
+ return self.playlist_result(entries, genre, genre_title)
_API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'http://usher.twitch.tv'
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
+ _LOGIN_POST_URL = 'https://secure-login.twitch.tv/login'
+ _NETRC_MACHINE = 'twitch'
def _handle_error(self, response):
if not isinstance(response, dict):
'authenticity_token': authenticity_token,
'redirect_on_login': '',
'embed_form': 'false',
- 'mp_source_action': '',
+ 'mp_source_action': 'login-button',
'follow': '',
- 'user[login]': username,
- 'user[password]': password,
+ 'login': username,
+ 'password': password,
}
request = compat_urllib_request.Request(
- self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+ self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
raise ExtractorError(
'Unable to login: %s' % m.group('msg').strip(), expected=True)
+ def _prefer_source(self, formats):
+ try:
+ source = next(f for f in formats if f['format_id'] == 'Source')
+ source['preference'] = 10
+ except StopIteration:
+ pass # No Source stream present
+ self._sort_formats(formats)
+
class TwitchItemBaseIE(TwitchBaseIE):
def _download_info(self, item, item_id):
class TwitchVideoIE(TwitchItemBaseIE):
IE_NAME = 'twitch:video'
- _VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'video'
_ITEM_SHORTCUT = 'a'
class TwitchChapterIE(TwitchItemBaseIE):
IE_NAME = 'twitch:chapter'
- _VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'chapter'
_ITEM_SHORTCUT = 'c'
class TwitchVodIE(TwitchItemBaseIE):
IE_NAME = 'twitch:vod'
- _VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/v/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'vod'
_ITEM_SHORTCUT = 'v'
'%s/vod/%s?nauth=%s&nauthsig=%s'
% (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
item_id, 'mp4')
+ self._prefer_source(formats)
info['formats'] = formats
return info
'p': random.randint(1000000, 10000000),
'player': 'twitchweb',
'segment_preference': '4',
- 'sig': access_token['sig'],
- 'token': access_token['token'],
+ 'sig': access_token['sig'].encode('utf-8'),
+ 'token': access_token['token'].encode('utf-8'),
}
-
formats = self._extract_m3u8_formats(
'%s/api/channel/hls/%s.m3u8?%s'
- % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
+ % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)),
channel_id, 'mp4')
-
- # prefer the 'source' stream, the others are limited to 30 fps
- def _sort_source(f):
- if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
- return 1
- return 0
- formats = sorted(formats, key=_sort_source)
+ self._prefer_source(formats)
view_count = stream.get('viewers')
timestamp = parse_iso8601(stream.get('created_at'))
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ ExtractorError,
+)
+from ..compat import compat_urlparse
+
+
+class UDNEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://video.udn.com/embed/news/300040',
+ 'md5': 'de06b4c90b042c128395a88f0384817e',
+ 'info_dict': {
+ 'id': '300040',
+ 'ext': 'mp4',
+ 'title': '生物老師男變女 全校挺"做自己"',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://video.udn.com/embed/news/300040',
+ 'only_matching': True,
+ }, {
+ # From https://video.udn.com/news/303776
+ 'url': 'https://video.udn.com/play/news/303776',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ page = self._download_webpage(url, video_id)
+
+ options = json.loads(js_to_json(self._html_search_regex(
+ r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))
+
+ video_urls = options['video']
+
+ if video_urls.get('youtube'):
+ return self.url_result(video_urls.get('youtube'), 'Youtube')
+
+ try:
+ del video_urls['youtube']
+ except KeyError:
+ pass
+
+ formats = [{
+ 'url': self._download_webpage(
+ compat_urlparse.urljoin(url, api_url), video_id,
+ 'retrieve url for %s video' % video_type),
+ 'format_id': video_type,
+ 'preference': 0 if video_type == 'mp4' else -1,
+ } for video_type, api_url in video_urls.items() if api_url]
+
+ if not formats:
+ raise ExtractorError('No videos found', expected=True)
+
+ self._sort_formats(formats)
+
+ thumbnail = None
+
+ if options.get('gallery') and len(options['gallery']):
+ thumbnail = options['gallery'][0].get('original')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': options['title'],
+ 'thumbnail': thumbnail
+ }
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ qualities,
+ unified_strdate,
+ clean_html,
+)
+
+
+class UltimediaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)'
+ _TESTS = [{
+ # news
+ 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
+ 'md5': '276a0e49de58c7e85d32b057837952a2',
+ 'info_dict': {
+ 'id': 's8uk0r',
+ 'ext': 'mp4',
+ 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
+ 'description': 'md5:3e5c8fd65791487333dda5db8aed32af',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'upload_date': '20150317',
+ },
+ }, {
+ # music
+ 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
+ 'md5': '2ea3513813cf230605c7e2ffe7eca61c',
+ 'info_dict': {
+ 'id': 'xvpfp8',
+ 'ext': 'mp4',
+ 'title': "Two - C'est la vie (Clip)",
+ 'description': 'Two',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'upload_date': '20150224',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ deliver_url = self._search_regex(
+ r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
+ webpage, 'deliver URL')
+
+ deliver_page = self._download_webpage(
+ deliver_url, video_id, 'Downloading iframe page')
+
+ if '>This video is currently not available' in deliver_page:
+ raise ExtractorError(
+ 'Video %s is currently not available' % video_id, expected=True)
+
+ player = self._parse_json(
+ self._search_regex(
+ r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'),
+ video_id)
+
+ quality = qualities(['flash', 'html5'])
+ formats = []
+ for mode in player['modes']:
+ video_url = mode.get('config', {}).get('file')
+ if not video_url:
+ continue
+ if re.match(r'https?://www\.youtube\.com/.+?', video_url):
+ return self.url_result(video_url, 'Youtube')
+ formats.append({
+ 'url': video_url,
+ 'format_id': mode.get('type'),
+ 'quality': quality(mode.get('type')),
+ })
+ self._sort_formats(formats)
+
+ thumbnail = player.get('image')
+
+ title = clean_html((
+ self._html_search_regex(
+ r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
+ webpage, 'title', default=None) or
+ self._search_regex(
+ r"var\s+nameVideo\s*=\s*'([^']+)'",
+ deliver_page, 'title')))
+
+ description = clean_html(self._html_search_regex(
+ r'(?s)<span>Description</span>(.+?)</p>', webpage,
+ 'description', fatal=False))
+
+ upload_date = unified_strdate(self._search_regex(
+ r'Ajouté le\s*<span>([^<]+)', webpage,
+ 'upload date', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
)
+from ..utils import ExtractorError
class UstreamIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<videoID>\d+)'
IE_NAME = 'ustream'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.ustream.tv/recorded/20274954',
'md5': '088f151799e8f572f84eb62f17d73e5c',
'info_dict': {
'uploader': 'Young Americans for Liberty',
'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
},
- }
+ }, {
+ # From http://sportscanada.tv/canadagames/index.php/week2/figure-skating/444
+ # Title and uploader available only from params JSON
+ 'url': 'http://www.ustream.tv/embed/recorded/59307601?ub=ff0000&lc=ff0000&oc=ffffff&uc=ffffff&v=3&wmode=direct',
+ 'md5': '5a2abf40babeac9812ed20ae12d34e10',
+ 'info_dict': {
+ 'id': '59307601',
+ 'ext': 'flv',
+ 'title': '-CG11- Canada Games Figure Skating',
+ 'uploader': 'sportscanadatv',
+ }
+ }]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
return self.url_result(desktop_url, 'Ustream')
- video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
+ params = self._download_json(
+ 'http://cdngw.ustream.tv/rgwjson/Viewer.getVideo/' + json.dumps({
+ 'brandId': 1,
+ 'videoId': int(video_id),
+ 'autoplay': False,
+ }), video_id)
+
+ if 'error' in params:
+ raise ExtractorError(params['error']['message'], expected=True)
+
+ video_url = params['flv']
+
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id)
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
- webpage, 'title')
+ webpage, 'title', default=None)
+
+ if not video_title:
+ try:
+ video_title = params['moduleConfig']['meta']['title']
+ except KeyError:
+ pass
+
+ if not video_title:
+ video_title = 'Ustream video ' + video_id
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
- webpage, 'uploader', fatal=False, flags=re.DOTALL)
+ webpage, 'uploader', fatal=False, flags=re.DOTALL, default=None)
+
+ if not uploader:
+ try:
+ uploader = params['moduleConfig']['meta']['userName']
+ except KeyError:
+ uploader = None
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
webpage, 'thumbnail', fatal=False)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class Varzesh3IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
+ 'md5': '2a933874cb7dce4366075281eb49e855',
+ 'info_dict': {
+ 'id': '76337',
+ 'ext': 'mp4',
+ 'title': '۵ واکنش برتر دروازهبانان؛هفته ۲۶ بوندسلیگا',
+ 'description': 'فصل ۲۰۱۵-۲۰۱۴',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._search_regex(
+ r'<source[^>]+src="([^"]+)"', webpage, 'video url')
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_regex(
+ r'(?s)<div class="matn">(.+?)</div>',
+ webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ video_id = self._search_regex(
+ r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
+ webpage, display_id, default=display_id)
+
+ return {
+ 'url': video_url,
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
class VeeHDIE(InfoExtractor):
_VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
- _TEST = {
+ # Seems VeeHD videos have multiple copies on several servers, all of
+ # whom have different MD5 checksums, so omit md5 field in all tests
+ _TESTS = [{
'url': 'http://veehd.com/video/4639434_Solar-Sinter',
'info_dict': {
'id': '4639434',
'uploader_id': 'VideoEyes',
'description': 'md5:46a840e8692ddbaffb5f81d9885cb457',
},
- }
+ 'skip': 'Video deleted',
+ }, {
+ 'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling',
+ 'info_dict': {
+ 'id': '4905758',
+ 'ext': 'mp4',
+ 'title': 'Elysian Fields - Channeling',
+ 'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b',
+ 'uploader_id': 'spotted',
+ }
+ }, {
+ 'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer',
+ 'info_dict': {
+ 'id': '2046729',
+ 'ext': 'avi',
+ 'title': '2012 (2009) DivX Trailer',
+ 'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b',
+ 'uploader_id': 'Movie_Trailers',
+ }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
player_page = self._download_webpage(
player_url, video_id, 'Downloading player page')
+ video_url = None
+
config_json = self._search_regex(
r'value=\'config=({.+?})\'', player_page, 'config json', default=None)
if config_json:
config = json.loads(config_json)
video_url = compat_urlparse.unquote(config['clip']['url'])
- else:
+
+ if not video_url:
+ video_url = self._html_search_regex(
+ r'<embed[^>]+type="video/divx"[^>]+src="([^"]+)"',
+ player_page, 'video url', default=None)
+
+ if not video_url:
iframe_src = self._search_regex(
r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url')
iframe_url = 'http://veehd.com/%s' % iframe_src
'id': video_id,
'title': title,
'url': video_url,
- 'ext': 'mp4',
'uploader_id': uploader_id,
'thumbnail': thumbnail,
'description': description,
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+)
+
+
+class VesselIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)'
+ _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
+ _LOGIN_URL = 'https://www.vessel.com/api/account/login'
+ _NETRC_MACHINE = 'vessel'
+ _TEST = {
+ 'url': 'https://www.vessel.com/videos/HDN7G5UMs',
+ 'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
+ 'info_dict': {
+ 'id': 'HDN7G5UMs',
+ 'ext': 'mp4',
+ 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20150317',
+ 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
+ 'timestamp': int,
+ },
+ }
+
+ @staticmethod
+ def make_json_request(url, data):
+ payload = json.dumps(data).encode('utf-8')
+ req = compat_urllib_request.Request(url, payload)
+ req.add_header('Content-Type', 'application/json; charset=utf-8')
+ return req
+
+ @staticmethod
+ def find_assets(data, asset_type, asset_id=None):
+ for asset in data.get('assets', []):
+ if not asset.get('type') == asset_type:
+ continue
+ elif asset_id is not None and not asset.get('id') == asset_id:
+ continue
+ else:
+ yield asset
+
+ def _check_access_rights(self, data):
+ access_info = data.get('__view', {})
+ if not access_info.get('allow_access', True):
+ err_code = access_info.get('error_code') or ''
+ if err_code == 'ITEM_PAID_ONLY':
+ raise ExtractorError(
+ 'This video requires subscription.', expected=True)
+ else:
+ raise ExtractorError(
+ 'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+ self.report_login()
+ data = {
+ 'client_id': 'web',
+ 'type': 'password',
+ 'user_key': username,
+ 'password': password,
+ }
+ login_request = VesselIE.make_json_request(self._LOGIN_URL, data)
+ self._download_webpage(login_request, None, False, 'Wrong login info')
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ data = self._parse_json(self._search_regex(
+ r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id)
+ asset_id = data['model']['data']['id']
+
+ req = VesselIE.make_json_request(
+ self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
+ data = self._download_json(req, video_id)
+ video_asset_id = data.get('main_video_asset')
+
+ self._check_access_rights(data)
+
+ try:
+ video_asset = next(
+ VesselIE.find_assets(data, 'video', asset_id=video_asset_id))
+ except StopIteration:
+ raise ExtractorError('No video assets found')
+
+ formats = []
+ for f in video_asset.get('sources', []):
+ if f['name'] == 'hls-index':
+ formats.extend(self._extract_m3u8_formats(
+ f['location'], video_id, ext='mp4', m3u8_id='m3u8'))
+ else:
+ formats.append({
+ 'format_id': f['name'],
+ 'tbr': f.get('bitrate'),
+ 'height': f.get('height'),
+ 'width': f.get('width'),
+ 'url': f['location'],
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for im_asset in VesselIE.find_assets(data, 'image'):
+ thumbnails.append({
+ 'url': im_asset['location'],
+ 'width': im_asset.get('width', 0),
+ 'height': im_asset.get('height', 0),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': data['title'],
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': data.get('short_description'),
+ 'duration': data.get('duration'),
+ 'comment_count': data.get('comment_count'),
+ 'like_count': data.get('like_count'),
+ 'view_count': data.get('view_count'),
+ 'timestamp': parse_iso8601(data.get('released_at')),
+ }
class VGTVIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/(?:.*)/(?P<id>[0-9]+)'
+ IE_DESC = 'VGTV and BTTV'
+ _VALID_URL = r'''(?x)
+ (?:
+ vgtv:|
+ http://(?:www\.)?
+ )
+ (?P<host>vgtv|bt)
+ (?:
+ :|
+ \.no/(?:tv/)?\#!/(?:video|live)/
+ )
+ (?P<id>[0-9]+)
+ '''
_TESTS = [
{
# streamType: vod
'skip_download': True,
},
},
+ {
+ 'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
+ 'only_matching': True,
+ },
]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ host = mobj.group('host')
+
+ HOST_WEBSITES = {
+ 'vgtv': 'vgtv',
+ 'bt': 'bttv',
+ }
+
data = self._download_json(
- 'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,
+ 'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
+ % (host, video_id, HOST_WEBSITES[host]),
video_id, 'Downloading media JSON')
streams = data['streamUrls']
hls_url = streams.get('hls')
if hls_url:
- formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4'))
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', m3u8_id='hls'))
hds_url = streams.get('hds')
if hds_url:
- formats.extend(self._extract_f4m_formats(hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id))
+ formats.extend(self._extract_f4m_formats(
+ hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
+ video_id, f4m_id='hds'))
mp4_url = streams.get('mp4')
if mp4_url:
'view_count': data['displays'],
'formats': formats,
}
+
+
+class BTArticleIE(InfoExtractor):
+ IE_NAME = 'bt:article'
+ IE_DESC = 'Bergens Tidende Articles'
+ _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
+ _TEST = {
+ 'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
+ 'md5': 'd055e8ee918ef2844745fcfd1a4175fb',
+ 'info_dict': {
+ 'id': '23199',
+ 'ext': 'mp4',
+ 'title': 'Alrekstad internat',
+ 'description': 'md5:dc81a9056c874fedb62fc48a300dac58',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'duration': 191,
+ 'timestamp': 1289991323,
+ 'upload_date': '20101117',
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage(url, self._match_id(url))
+ video_id = self._search_regex(
+ r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id')
+ return self.url_result('vgtv:bt:%s' % video_id, 'VGTV')
+
+
+class BTVestlendingenIE(InfoExtractor):
+ IE_NAME = 'bt:vestlendingen'
+ IE_DESC = 'Bergens Tidende - Vestlendingen'
+ _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
+ 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
+ 'info_dict': {
+ 'id': '86588',
+ 'ext': 'mov',
+ 'title': 'Otto Wollertsen',
+ 'description': 'Vestlendingen Otto Fredrik Wollertsen',
+ 'timestamp': 1430473209,
+ 'upload_date': '20150501',
+ },
+ }
+
+ def _real_extract(self, url):
+ return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream')
r'embedCode=([^&\'"]+)', webpage,
'ooyala embed code')
ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
- print(ooyala_url)
except ExtractorError:
raise ExtractorError('The page doesn\'t contain a video', expected=True)
return self.url_result(ooyala_url, ie='Ooyala')
import re
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
-)
-from ..utils import (
- ExtractorError,
- remove_start,
-)
+from ..compat import compat_urllib_request
class VideoMegaIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://
(?:www\.)?videomega\.tv/
- (?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
+ (?:iframe\.php|cdn\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
'''
_TEST = {
- 'url': 'http://videomega.tv/?ref=QR0HCUHI1661IHUCH0RQ',
+ 'url': 'http://videomega.tv/?ref=4GNA688SU99US886ANG4',
'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
'info_dict': {
- 'id': 'QR0HCUHI1661IHUCH0RQ',
+ 'id': '4GNA688SU99US886ANG4',
'ext': 'mp4',
- 'title': 'Big Buck Bunny',
+ 'title': 'BigBuckBunny_320x180',
'thumbnail': 're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
- iframe_url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
+ iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
req = compat_urllib_request.Request(iframe_url)
req.add_header('Referer', url)
webpage = self._download_webpage(req, video_id)
- try:
- escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1]
- except IndexError:
- raise ExtractorError('Unable to extract escaped data')
-
- playlist = compat_urllib_parse.unquote(escaped_data)
-
+ title = self._html_search_regex(
+ r'<title>(.*?)</title>', webpage, 'title')
+ title = re.sub(
+ r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s?|\s?-\svideomega\.tv$)', '', title)
thumbnail = self._search_regex(
- r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False)
- video_url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL')
- title = remove_start(self._html_search_regex(
- r'<title>(.*?)</title>', webpage, 'title'), 'VideoMega.tv - ')
-
- formats = [{
- 'format_id': 'sd',
- 'url': video_url,
- }]
- self._sort_formats(formats)
+ r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
+ video_url = self._search_regex(
+ r'<source[^>]+?src="([^"]+)"', webpage, 'video URL')
return {
'id': video_id,
'title': title,
- 'formats': formats,
+ 'url': video_url,
'thumbnail': thumbnail,
'http_headers': {
'Referer': iframe_url,
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
+ video_url = self._html_search_regex(
+ r'<source src="([^"]+)"', webpage, 'video URL')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage, default='')
duration = float_or_none(self._html_search_regex(
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
view_count = str_to_int(self._html_search_regex(
- r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
+ r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
like_count = str_to_int(self._html_search_regex(
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
webpage, 'like count', fatal=False))
- comment_count = str_to_int(self._html_search_regex(
- r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
- webpage, 'comment count', fatal=False))
return {
'id': video_id,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
- 'comment_count': comment_count,
}
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+
+
+class ViewsterIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P<id>\d+-\d+-\d+)'
+ _TESTS = [{
+ # movielink, paymethod=fre
+ 'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/',
+ 'playlist': [{
+ 'md5': '8f9d94b282d80c42b378dffdbb11caf3',
+ 'info_dict': {
+ 'id': '1293-19341-000-movie',
+ 'ext': 'flv',
+ 'title': "'Hout' (Wood) - Movie",
+ },
+ }],
+ 'info_dict': {
+ 'id': '1293-19341-000',
+ 'title': "'Hout' (Wood)",
+ 'description': 'md5:925733185a9242ef96f436937683f33b',
+ }
+ }, {
+ # movielink, paymethod=adv
+ 'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
+ 'playlist': [{
+ 'md5': '77a005453ca7396cbe3d35c9bea30aef',
+ 'info_dict': {
+ 'id': '1140-11855-000-movie',
+ 'ext': 'flv',
+ 'title': "THE LISTENING PROJECT - Movie",
+ },
+ }],
+ 'info_dict': {
+ 'id': '1140-11855-000',
+ 'title': "THE LISTENING PROJECT",
+ 'description': 'md5:714421ae9957e112e672551094bf3b08',
+ }
+ }, {
+ # direct links, no movielink
+ 'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/',
+ 'playlist': [{
+ 'md5': '0307b7eac6bfb21ab0577a71f6eebd8f',
+ 'info_dict': {
+ 'id': '1198-56411-000-trailer',
+ 'ext': 'mp4',
+ 'title': "Sinister - Trailer",
+ },
+ }, {
+ 'md5': '80b9ee3ad69fb368f104cb5d9732ae95',
+ 'info_dict': {
+ 'id': '1198-56411-000-behind-scenes',
+ 'ext': 'mp4',
+ 'title': "Sinister - Behind Scenes",
+ },
+ }, {
+ 'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5',
+ 'info_dict': {
+ 'id': '1198-56411-000-scene-from-movie',
+ 'ext': 'mp4',
+ 'title': "Sinister - Scene from movie",
+ },
+ }],
+ 'info_dict': {
+ 'id': '1198-56411-000',
+ 'title': "Sinister",
+ 'description': 'md5:014c40b0488848de9683566a42e33372',
+ }
+ }]
+
+ _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ request = compat_urllib_request.Request(
+ 'http://api.live.viewster.com/api/v1/movie/%s' % video_id)
+ request.add_header('Accept', self._ACCEPT_HEADER)
+
+ movie = self._download_json(
+ request, video_id, 'Downloading movie metadata JSON')
+
+ title = movie.get('title') or movie['original_title']
+ description = movie.get('synopsis')
+ thumbnail = movie.get('large_artwork') or movie.get('artwork')
+
+ entries = []
+ for clip in movie['play_list']:
+ entry = None
+
+ # movielink api
+ link_request = clip.get('link_request')
+ if link_request:
+ request = compat_urllib_request.Request(
+ 'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s¤cy=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s'
+ % link_request)
+ request.add_header('Accept', self._ACCEPT_HEADER)
+
+ movie_link = self._download_json(
+ request, video_id, 'Downloading movie link JSON', fatal=False)
+
+ if movie_link:
+ formats = self._extract_f4m_formats(
+ movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id)
+ self._sort_formats(formats)
+ entry = {
+ 'formats': formats,
+ }
+
+ # direct link
+ clip_url = clip.get('clip_data', {}).get('url')
+ if clip_url:
+ entry = {
+ 'url': clip_url,
+ 'ext': 'mp4',
+ }
+
+ if entry:
+ entry.update({
+ 'id': '%s-%s' % (video_id, clip['canonical_title']),
+ 'title': '%s - %s' % (title, clip['title']),
+ })
+ entries.append(entry)
+
+ playlist = self.playlist_result(entries, video_id, title, description)
+ playlist['thumbnail'] = thumbnail
+ return playlist
import re
-from ..compat import compat_urlparse
+from ..compat import (
+ compat_urlparse,
+ compat_urllib_request,
+)
from ..utils import (
ExtractorError,
unescapeHTML,
unified_strdate,
US_RATINGS,
+ determine_ext,
+ mimetype2ext,
)
from .common import InfoExtractor
class VikiIE(InfoExtractor):
IE_NAME = 'viki'
+ # iPad2
+ _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5'
+
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
'info_dict': {
'id': '1023585v',
'age_limit': 13,
},
'skip': 'Blocked in the US',
- }
+ }, {
+ 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
+ 'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c',
+ 'info_dict': {
+ 'id': '1067139v',
+ 'ext': 'mp4',
+ 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
+ 'upload_date': '20150430',
+ 'title': '\'The Avengers: Age of Ultron\' Press Conference',
+ }
+ }, {
+ 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
+ 'info_dict': {
+ 'id': '1048879v',
+ 'ext': 'mp4',
+ 'upload_date': '20140820',
+ 'description': 'md5:54ff56d51bdfc7a30441ec967394e91c',
+ 'title': 'Ankhon Dekhi',
+ },
+ 'params': {
+ # requires ffmpeg
+ 'skip_download': True,
+ }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
'rating information', default='').strip()
age_limit = US_RATINGS.get(rating_str)
- info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
+ req = compat_urllib_request.Request(
+ 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id)
+ req.add_header('User-Agent', self._USER_AGENT)
info_webpage = self._download_webpage(
- info_url, video_id, note='Downloading info page')
- if re.match(r'\s*<div\s+class="video-error', info_webpage):
- raise ExtractorError(
- 'Video %s is blocked from your location.' % video_id,
- expected=True)
- video_url = self._html_search_regex(
- r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
+ req, video_id, note='Downloading info page')
+ err_msg = self._html_search_regex(r'<div[^>]+class="video-error[^>]+>(.+)</div>', info_webpage, 'error message', default=None)
+ if err_msg:
+ if 'not available in your region' in err_msg:
+ raise ExtractorError(
+ 'Video %s is blocked from your location.' % video_id,
+ expected=True)
+ else:
+ raise ExtractorError('Viki said: ' + err_msg)
+ mobj = re.search(
+ r'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"', info_webpage)
+ if not mobj:
+ raise ExtractorError('Unable to find video URL')
+ video_url = unescapeHTML(mobj.group('url'))
+ video_ext = mimetype2ext(mobj.group('mime_type'))
+
+ if determine_ext(video_url) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ video_url, video_id, ext=video_ext)
+ else:
+ formats = [{
+ 'url': video_url,
+ 'ext': video_ext,
+ }]
upload_date_str = self._html_search_regex(
r'"created_at":"([^"]+)"', info_webpage, 'upload date')
return {
'id': video_id,
'title': title,
- 'url': video_url,
+ 'formats': formats,
'description': description,
'thumbnail': thumbnail,
'age_limit': age_limit,
import json
import re
import itertools
-import hashlib
from .common import InfoExtractor
from ..compat import (
RegexNotFoundError,
smuggle_url,
std_headers,
+ unified_strdate,
unsmuggle_url,
urlencode_postdata,
)
self.report_login()
login_url = 'https://vimeo.com/log_in'
webpage = self._download_webpage(login_url, None, False)
- token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+ token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token')
data = urlencode_postdata({
'email': username,
'password': password,
'description': 'md5:8678b246399b070816b12313e8b4eb5c',
'uploader_id': 'atencio',
'uploader': 'Peter Atencio',
+ 'upload_date': '20130927',
'duration': 187,
},
},
password = self._downloader.params.get('videopassword', None)
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
- token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
- data = compat_urllib_parse.urlencode({
+ token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
+ data = urlencode_postdata({
'password': password,
'token': token,
})
- # I didn't manage to use the password with https
- if url.startswith('https'):
- pass_url = url.replace('https', 'http')
- else:
- pass_url = url
- password_request = compat_urllib_request.Request(pass_url + '/password', data)
+ if url.startswith('http://'):
+ # vimeo only supports https now, but the user can give an http url
+ url = url.replace('http://', 'https://')
+ password_request = compat_urllib_request.Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Cookie', 'xsrft=%s' % token)
return self._download_webpage(
video_id = mobj.group('id')
orig_url = url
if mobj.group('pro') or mobj.group('player'):
- url = 'http://player.vimeo.com/video/' + video_id
-
- password = self._downloader.params.get('videopassword', None)
- if password:
- headers['Cookie'] = '%s_password=%s' % (
- video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
+ url = 'https://player.vimeo.com/video/' + video_id
+ else:
+ url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers)
# and latter we extract those that are Vimeo specific.
self.report_extraction(video_id)
+ vimeo_config = self._search_regex(
+ r'vimeo\.config\s*=\s*({.+?});', webpage,
+ 'vimeo config', default=None)
+ if vimeo_config:
+ seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
+ if seed_status.get('state') == 'failed':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, seed_status['title']),
+ expected=True)
+
# Extract the config JSON
try:
try:
# Extract upload date
video_upload_date = None
- mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage)
+ mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
if mobj is not None:
- video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
+ video_upload_date = unified_strdate(mobj.group(1))
try:
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
for tt in text_tracks:
subtitles[tt['lang']] = [{
'ext': 'vtt',
- 'url': 'http://vimeo.com' + tt['url'],
+ 'url': 'https://vimeo.com' + tt['url'],
}]
return {
class VimeoChannelIE(InfoExtractor):
IE_NAME = 'vimeo:channel'
- _VALID_URL = r'https?://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
+ _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
_TESTS = [{
- 'url': 'http://vimeo.com/channels/tributes',
+ 'url': 'https://vimeo.com/channels/tributes',
'info_dict': {
'id': 'tributes',
'title': 'Vimeo Tributes',
name="([^"]+)"\s+
value="([^"]*)"
''', login_form))
- token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+ token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
fields['token'] = token
fields['password'] = password
- post = compat_urllib_parse.urlencode(fields)
+ post = urlencode_postdata(fields)
password_path = self._search_regex(
r'action="([^"]+)"', login_form, 'password URL')
password_url = compat_urlparse.urljoin(page_url, password_path)
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
- entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
+ entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
for video_id in video_ids]
return {'_type': 'playlist',
'id': list_id,
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
- return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
+ return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id)
class VimeoUserIE(VimeoChannelIE):
IE_NAME = 'vimeo:user'
- _VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
+ _VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
_TESTS = [{
- 'url': 'http://vimeo.com/nkistudio/videos',
+ 'url': 'https://vimeo.com/nkistudio/videos',
'info_dict': {
'title': 'Nki',
'id': 'nkistudio',
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
- return self._extract_videos(name, 'http://vimeo.com/%s' % name)
+ return self._extract_videos(name, 'https://vimeo.com/%s' % name)
class VimeoAlbumIE(VimeoChannelIE):
IE_NAME = 'vimeo:album'
- _VALID_URL = r'https?://vimeo\.com/album/(?P<id>\d+)'
+ _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
_TESTS = [{
- 'url': 'http://vimeo.com/album/2632481',
+ 'url': 'https://vimeo.com/album/2632481',
'info_dict': {
'id': '2632481',
'title': 'Staff Favorites: November 2013',
def _real_extract(self, url):
album_id = self._match_id(url)
- return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
+ return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
class VimeoGroupsIE(VimeoAlbumIE):
IE_NAME = 'vimeo:group'
- _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)'
+ _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)'
_TESTS = [{
- 'url': 'http://vimeo.com/groups/rolexawards',
+ 'url': 'https://vimeo.com/groups/rolexawards',
'info_dict': {
'id': 'rolexawards',
'title': 'Rolex Awards for Enterprise',
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
- return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)
+ return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name)
class VimeoReviewIE(InfoExtractor):
IE_NAME = 'vimeo:review'
IE_DESC = 'Review pages on vimeo'
- _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
+ _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'md5': 'c507a72f780cacc12b2248bb4006d253',
}
}, {
'note': 'video player needs Referer',
- 'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
+ 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
'md5': '6295fdab8f4bf6a002d058b2c6dce276',
'info_dict': {
'id': '91613211',
class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
IE_NAME = 'vimeo:watchlater'
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
- _VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
+ _VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater'
_LOGIN_REQUIRED = True
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
_TESTS = [{
- 'url': 'http://vimeo.com/home/watchlater',
+ 'url': 'https://vimeo.com/home/watchlater',
'only_matching': True,
}]
class VimeoLikesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
+ _VALID_URL = r'https://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
IE_NAME = 'vimeo:likes'
IE_DESC = 'Vimeo user likes'
_TEST = {
description = self._html_search_meta('description', webpage)
def _get_page(idx):
- page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % (
- self.http_scheme(), user_id, idx + 1)
+ page_url = 'https://vimeo.com/user%s/likes/page:%d/sort:date' % (
+ user_id, idx + 1)
webpage = self._download_webpage(
page_url, user_id,
note='Downloading page %d/%d' % (idx + 1, page_count))
-# coding: utf-8
from __future__ import unicode_literals
-import base64
-import re
-import xml.etree.ElementTree
-import zlib
-
from .common import InfoExtractor
from ..utils import int_or_none
class VimpleIE(InfoExtractor):
- IE_DESC = 'Vimple.ru'
- _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'
+ IE_DESC = 'Vimple - one-click video hosting'
+ _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'
_TESTS = [
{
'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
'md5': '2e750a330ed211d3fd41821c6ad9a279',
'info_dict': {
- 'id': 'c0f6b1687dcd4000a97ebe70068039cf',
+ 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
'ext': 'mp4',
'title': 'Sunset',
'duration': 20,
'thumbnail': 're:https?://.*?\.jpg',
},
- },
+ }, {
+ 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id
-
- iframe = self._download_webpage(
- iframe_url, video_id,
- note='Downloading iframe', errnote='unable to fetch iframe')
- player_url = self._html_search_regex(
- r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url')
+ video_id = self._match_id(url)
- player = self._request_webpage(
- player_url, video_id, note='Downloading swf player').read()
+ webpage = self._download_webpage(
+ 'http://player.vimple.ru/iframe/%s' % video_id, video_id)
- player = zlib.decompress(player[8:])
+ playlist = self._parse_json(
+ self._search_regex(
+ r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'),
+ video_id)['playlist'][0]
- xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player)
- xml_pieces = [piece[1:-1] for piece in xml_pieces]
+ title = playlist['title']
+ video_id = playlist.get('videoId') or video_id
+ thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
+ duration = int_or_none(playlist.get('duration'))
- xml_data = b''.join(xml_pieces)
- xml_data = base64.b64decode(xml_data)
-
- xml_data = xml.etree.ElementTree.fromstring(xml_data)
-
- video = xml_data.find('Video')
- quality = video.get('quality')
- q_tag = video.find(quality.capitalize())
-
- formats = [
- {
- 'url': q_tag.get('url'),
- 'tbr': int(q_tag.get('bitrate')),
- 'filesize': int(q_tag.get('filesize')),
- 'format_id': quality,
- },
- ]
+ formats = [{
+ 'url': f['url'],
+ } for f in playlist['video']]
+ self._sort_formats(formats)
return {
'id': video_id,
- 'title': video.find('Title').text,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
'formats': formats,
- 'thumbnail': video.find('Poster').get('url'),
- 'duration': int_or_none(video.get('duration')),
- 'webpage_url': video.find('Share').get('videoPageUrl'),
}
from __future__ import unicode_literals
import re
-import json
import itertools
from .common import InfoExtractor
class VineIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)'
+ _TESTS = [{
'url': 'https://vine.co/v/b9KOOWX7HUx',
'md5': '2f36fed6235b16da96ce9b4dc890940d',
'info_dict': {
'uploader': 'Jack Dorsey',
'uploader_id': '76',
},
- }
+ }, {
+ 'url': 'https://vine.co/v/MYxVapFvz2z',
+ 'md5': '7b9a7cbc76734424ff942eb52c8f1065',
+ 'info_dict': {
+ 'id': 'MYxVapFvz2z',
+ 'ext': 'mp4',
+ 'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
+ 'alt_title': 'Vine by Luna',
+ 'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
+ 'upload_date': '20140815',
+ 'uploader': 'Luna',
+ 'uploader_id': '1102363502380728320',
+ },
+ }, {
+ 'url': 'https://vine.co/v/bxVjBbZlPUH',
+ 'md5': 'ea27decea3fa670625aac92771a96b73',
+ 'info_dict': {
+ 'id': 'bxVjBbZlPUH',
+ 'ext': 'mp4',
+ 'title': '#mw3 #ac130 #killcam #angelofdeath',
+ 'alt_title': 'Vine by Z3k3',
+ 'description': '#mw3 #ac130 #killcam #angelofdeath',
+ 'upload_date': '20130430',
+ 'uploader': 'Z3k3',
+ 'uploader_id': '936470460173008896',
+ },
+ }, {
+ 'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
- data = json.loads(self._html_search_regex(
- r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
+ data = self._parse_json(
+ self._html_search_regex(
+ r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id,
+ webpage, 'vine data'),
+ video_id)
formats = [{
- 'url': data['videoLowURL'],
- 'ext': 'mp4',
- 'format_id': 'low',
- }, {
- 'url': data['videoUrl'],
- 'ext': 'mp4',
- 'format_id': 'standard',
- }]
+ 'format_id': '%(format)s-%(rate)s' % f,
+ 'vcodec': f['format'],
+ 'quality': f['rate'],
+ 'url': f['videoUrl'],
+ } for f in data['videoUrls']]
+
+ self._sort_formats(formats)
return {
'id': video_id,
'title': self._og_search_title(webpage),
- 'alt_title': self._og_search_description(webpage),
+ 'alt_title': self._og_search_description(webpage, default=None),
'description': data['description'],
'thumbnail': data['thumbnailUrl'],
'upload_date': unified_strdate(data['created']),
'id': '162222515',
'ext': 'flv',
'title': 'ProtivoGunz - Хуёвая песня',
- 'uploader': 're:Noize MC.*',
+ 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'duration': 195,
'upload_date': '20120212',
},
if not video_id:
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
- info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
+ info_url = 'http://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
info_page = self._download_webpage(info_url, video_id)
ERRORS = {
'use --username and --password options to provide account credentials.',
r'<!>Unknown error':
- 'Video %s does not exist.'
+ 'Video %s does not exist.',
+
+ r'<!>Видео временно недоступно':
+ 'Video %s is temporarily unavailable.',
}
for error_re, error_msg in ERRORS.items():
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_request,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+)
+
+
+class VoiceRepublicIE(InfoExtractor):
+ _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
+ _TESTS = [{
+ 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
+ 'md5': '0554a24d1657915aa8e8f84e15dc9353',
+ 'info_dict': {
+ 'id': '2296',
+ 'display_id': 'watching-the-watchers-building-a-sousveillance-state',
+ 'ext': 'm4a',
+ 'title': 'Watching the Watchers: Building a Sousveillance State',
+ 'description': 'md5:715ba964958afa2398df615809cfecb1',
+ 'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
+ 'duration': 1800,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ req = compat_urllib_request.Request(
+ compat_urlparse.urljoin(url, '/talks/%s' % display_id))
+ # Older versions of Firefox get redirected to an "upgrade browser" page
+ req.add_header('User-Agent', 'youtube-dl')
+ webpage = self._download_webpage(req, display_id)
+
+ if '>Queued for processing, please stand by...<' in webpage:
+ raise ExtractorError(
+ 'Audio is still queued for processing', expected=True)
+
+ config = self._search_regex(
+ r'(?s)return ({.+?});\s*\n', webpage,
+ 'data', default=None)
+ data = self._parse_json(config, display_id, fatal=False) if config else None
+ if data:
+ title = data['title']
+ description = data.get('teaser')
+ talk_id = data.get('talk_id') or display_id
+ talk = data['talk']
+ duration = int_or_none(talk.get('duration'))
+ formats = [{
+ 'url': compat_urlparse.urljoin(url, talk_url),
+ 'format_id': format_id,
+ 'ext': determine_ext(talk_url) or format_id,
+ 'vcodec': 'none',
+ } for format_id, talk_url in talk['links'].items()]
+ else:
+ title = self._og_search_title(webpage)
+ description = self._html_search_regex(
+ r"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>",
+ webpage, 'description', fatal=False)
+ talk_id = self._search_regex(
+ [r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"],
+ webpage, 'talk id', default=None) or display_id
+ duration = None
+ player = self._search_regex(
+ r"class='vr-player jp-jplayer'([^>]+)>", webpage, 'player')
+ formats = [{
+ 'url': compat_urlparse.urljoin(url, talk_url),
+ 'format_id': format_id,
+ 'ext': determine_ext(talk_url) or format_id,
+ 'vcodec': 'none',
+ } for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", player)]
+ self._sort_formats(formats)
+
+ thumbnail = self._og_search_thumbnail(webpage)
+ view_count = int_or_none(self._search_regex(
+ r"class='play-count[^']*'>\s*(\d+) plays",
+ webpage, 'play count', fatal=False))
+
+ return {
+ 'id': talk_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
'duration': 393,
'age_limit': 18,
'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
}
},
{
'duration': 588,
'age_limit': 18,
'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
}
},
]
title = self._html_search_regex(
r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
description = self._html_search_regex(
- r'<div class="description_txt">(.*?)</div>', webpage, 'description', fatal=False)
+ r'class="(?:descr|description_txt)">(.*?)</div>',
+ webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
if thumbnail:
thumbnail = 'http://www.vporn.com' + thumbnail
uploader = self._html_search_regex(
- r'(?s)UPLOADED BY.*?<a href="/user/[^"]+">([^<]+)</a>',
+ r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
webpage, 'uploader', fatal=False)
- categories = re.findall(r'<a href="/cat/[^"]+">([^<]+)</a>', webpage)
+ categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage)
duration = parse_duration(self._search_regex(
- r'duration (\d+ min \d+ sec)', webpage, 'duration', fatal=False))
+ r'Runtime:\s*</span>\s*(\d+ min \d+ sec)',
+ webpage, 'duration', fatal=False))
- view_count = str_to_int(self._html_search_regex(
- r'<span>([\d,\.]+) VIEWS</span>', webpage, 'view count', fatal=False))
- like_count = str_to_int(self._html_search_regex(
- r'<span id="like" class="n">([\d,\.]+)</span>', webpage, 'like count', fatal=False))
- dislike_count = str_to_int(self._html_search_regex(
- r'<span id="dislike" class="n">([\d,\.]+)</span>', webpage, 'dislike count', fatal=False))
+ view_count = str_to_int(self._search_regex(
+ r'class="views">([\d,\.]+) [Vv]iews<',
+ webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
- r'<h4>Comments \(<b>([\d,\.]+)</b>\)</h4>', webpage, 'comment count', fatal=False))
+ r"'Comments \(([\d,\.]+)\)'",
+ webpage, 'comment count', default=None))
formats = []
'categories': categories,
'duration': duration,
'view_count': view_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
'comment_count': comment_count,
'age_limit': 18,
'formats': formats,
video_url = self._download_webpage(
'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
real_id,
- 'Downloding %s video URL' % fmt[0],
+ 'Downloading %s video URL' % fmt[0],
'Failed to download %s video URL' % fmt[0],
False)
if not video_url:
class WorldStarHipHopIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
+ _TESTS = [{
"url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
"md5": "9d04de741161603bf7071bbf4e883186",
"info_dict": {
"ext": "mp4",
"title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
}
- }
+ }, {
+ 'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
+ 'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
+ 'info_dict': {
+ 'id': 'wshh6a7q1ny0G34ZwuIO',
+ 'ext': 'mp4',
+ "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
+ }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
video_url = self._search_regex(
- r'so\.addVariable\("file","(.*?)"\)', webpage, 'video URL')
+ [r'so\.addVariable\("file","(.*?)"\)',
+ r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
+ webpage, 'video URL')
if 'youtube' in video_url:
return self.url_result(video_url, ie='Youtube')
video_title = self._html_search_regex(
- r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
+ [r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
+ r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
webpage, 'title')
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
thumbnail = self._html_search_regex(
r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
- fatal=False)
+ default=None)
if not thumbnail:
_title = r'candytitles.*>(.*)</span>'
mobj = re.search(_title, webpage)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ xpath_with_ns,
+ xpath_text,
+ find_xpath_attr,
+)
+
+
+class XstreamIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ xstream:|
+ https?://frontend\.xstream\.(?:dk|net)/
+ )
+ (?P<partner_id>[^/]+)
+ (?:
+ :|
+ /feed/video/\?.*?\bid=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
+ 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
+ 'info_dict': {
+ 'id': '86588',
+ 'ext': 'mov',
+ 'title': 'Otto Wollertsen',
+ 'description': 'Vestlendingen Otto Fredrik Wollertsen',
+ 'timestamp': 1430473209,
+ 'upload_date': '20150501',
+ },
+ }, {
+ 'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ partner_id = mobj.group('partner_id')
+ video_id = mobj.group('id')
+
+ data = self._download_xml(
+ 'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
+ % (partner_id, video_id),
+ video_id)
+
+ NS_MAP = {
+ 'atom': 'http://www.w3.org/2005/Atom',
+ 'xt': 'http://xstream.dk/',
+ 'media': 'http://search.yahoo.com/mrss/',
+ }
+
+ entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
+
+ title = xpath_text(
+ entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
+ description = xpath_text(
+ entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
+ timestamp = parse_iso8601(xpath_text(
+ entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
+
+ formats = []
+ media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
+ for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
+ media_url = media_content.get('url')
+ if not media_url:
+ continue
+ tbr = int_or_none(media_content.get('bitrate'))
+ mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
+ if mobj:
+ formats.append({
+ 'url': mobj.group('url'),
+ 'play_path': 'mp4:%s' % mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'ext': 'flv',
+ 'tbr': tbr,
+ 'format_id': 'rtmp-%d' % tbr,
+ })
+ else:
+ formats.append({
+ 'url': media_url,
+ 'tbr': tbr,
+ })
+ self._sort_formats(formats)
+
+ link = find_xpath_attr(
+ entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
+ if link is not None:
+ formats.append({
+ 'url': link.get('href'),
+ 'format_id': link.get('rel'),
+ })
+
+ thumbnails = [{
+ 'url': splash.get('url'),
+ 'width': int_or_none(splash.get('width')),
+ 'height': int_or_none(splash.get('height')),
+ } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
'only_matching': True,
}]
+ @staticmethod
+ def base64_decode_utf8(data):
+ return base64.b64decode(data.encode('utf-8')).decode('utf-8')
+
+ @staticmethod
+ def base64_encode_utf8(data):
+ return base64.b64encode(data.encode('utf-8')).decode('utf-8')
+
def _extract_flv_config(self, media_id):
- base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
+ base64_media_id = self.base64_encode_utf8(media_id)
flv_config = self._download_xml(
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
'flv config')
prop_dict = {}
for prop in flv_config.findall('./property'):
- prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
+ prop_id = self.base64_decode_utf8(prop.attrib['id'])
# CDATA may be empty in flv config
if not prop.text:
continue
- encoded_content = base64.b64decode(prop.text).decode('utf-8')
+ encoded_content = self.base64_decode_utf8(prop.text)
prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
return prop_dict
int_or_none,
)
+from .nbc import NBCSportsVPlayerIE
+
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
- _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+?)-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
+ _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
_TESTS = [
{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
}, {
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
'only_matching': True,
+ }, {
+ 'note': 'NBC Sports embeds',
+ 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ }
+ }, {
+ 'url': 'https://tw.news.yahoo.com/-100120367.html',
+ 'only_matching': True,
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('display_id')
+ display_id = mobj.group('display_id') or self._match_id(url)
page_id = mobj.group('id')
url = mobj.group('url')
host = mobj.group('host')
items = json.loads(items_json)
video_id = items[0]['id']
return self._get_info(video_id, display_id, webpage)
+ # Look for NBCSports iframes
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
items_json = self._search_regex(
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
from ..utils import (
float_or_none,
month_by_abbreviation,
+ ExtractorError,
+ get_element_by_attribute,
)
'id': '2283921',
'ext': 'mp3',
'title': '發現 - 趙薇 京華煙雲主題曲',
+ 'description': '發現 - 趙薇 京華煙雲主題曲',
'uploader_id': 'princekt',
'upload_date': '20080807',
'duration': 313.0,
}
}, {
# An external video hosted on YouTube
- 'url': 'http://mymedia.yam.com/m/3598173',
- 'md5': '0238ceec479c654e8c2f1223755bf3e9',
+ 'url': 'http://mymedia.yam.com/m/3599430',
+ 'md5': '03127cf10d8f35d120a9e8e52e3b17c6',
'info_dict': {
- 'id': 'pJ2Deys283c',
+ 'id': 'CNpEoQlrIgA',
'ext': 'mp4',
- 'upload_date': '20150202',
+ 'upload_date': '20150306',
'uploader': '新莊社大瑜伽社',
- 'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff',
+ 'description': 'md5:11e2e405311633ace874f2e6226c8b17',
'uploader_id': '2323agoy',
- 'title': '外婆的澎湖灣KTV-潘安邦',
+ 'title': '20090412陽明山二子坪-1',
+ },
+ 'skip': 'Video does not exist',
+ }, {
+ 'url': 'http://mymedia.yam.com/m/3598173',
+ 'info_dict': {
+ 'id': '3598173',
+ 'ext': 'mp4',
+ },
+ 'skip': 'cause Yam system error',
+ }, {
+ 'url': 'http://mymedia.yam.com/m/3599437',
+ 'info_dict': {
+ 'id': '3599437',
+ 'ext': 'mp4',
+ },
+ 'skip': 'invalid YouTube URL',
+ }, {
+ 'url': 'http://mymedia.yam.com/m/2373534',
+ 'md5': '7ff74b91b7a817269d83796f8c5890b1',
+ 'info_dict': {
+ 'id': '2373534',
+ 'ext': 'mp3',
+ 'title': '林俊傑&蔡卓妍-小酒窩',
+ 'description': 'md5:904003395a0fcce6cfb25028ff468420',
+ 'upload_date': '20080928',
+ 'uploader_id': 'onliner2',
}
}]
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
+ # Check for errors
+ system_msg = self._html_search_regex(
+ r'系統訊息(?:<br>|\n|\r)*([^<>]+)<br>', page, 'system message',
+ default=None)
+ if system_msg:
+ raise ExtractorError(system_msg, expected=True)
+
# Is it hosted externally on YouTube?
youtube_url = self._html_search_regex(
r'<embed src="(http://www.youtube.com/[^"]+)"',
if youtube_url:
return self.url_result(youtube_url, 'Youtube')
+ title = self._html_search_regex(
+ r'<h1[^>]+class="heading"[^>]*>\s*(.+)\s*</h1>', page, 'title')
+
api_page = self._download_webpage(
'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id,
note='Downloading API page')
api_result_obj = compat_urlparse.parse_qs(api_page)
+ info_table = get_element_by_attribute('class', 'info', page)
uploader_id = self._html_search_regex(
- r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z]+)"',
- page, 'uploader id', fatal=False)
- mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2}) ' +
+ r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z0-9]+)"',
+ info_table, 'uploader id', fatal=False)
+ mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2})\s+' +
r'(?P<day>\d{1,2}), (?P<year>\d{4})', page)
if mobj:
upload_date = '%s%02d%02d' % (
return {
'id': video_id,
'url': api_result_obj['mp3file'][0],
- 'title': self._html_search_meta('description', page),
+ 'title': title,
+ 'description': self._html_search_meta('description', page),
'duration': duration,
'uploader_id': uploader_id,
'upload_date': upload_date,
--- /dev/null
+# coding=utf-8
+from __future__ import unicode_literals
+
+import re
+import hashlib
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ float_or_none,
+)
+
+
+class YandexMusicBaseIE(InfoExtractor):
+ def _get_track_url(self, storage_dir, track_id):
+ data = self._download_json(
+ 'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
+ % storage_dir,
+ track_id, 'Downloading track location JSON')
+
+ key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
+ storage = storage_dir.split('.')
+
+ return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default'
+ % (data['host'], key, data['ts'] + data['path'], storage[1]))
+
+ def _get_track_info(self, track):
+ return {
+ 'id': track['id'],
+ 'ext': 'mp3',
+ 'url': self._get_track_url(track['storageDir'], track['id']),
+ 'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
+ 'filesize': int_or_none(track.get('fileSize')),
+ 'duration': float_or_none(track.get('durationMs'), 1000),
+ }
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:track'
+ IE_DESC = 'Яндекс.Музыка - Трек'
+ _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://music.yandex.ru/album/540508/track/4878838',
+ 'md5': 'f496818aa2f60b6c0062980d2e00dc20',
+ 'info_dict': {
+ 'id': '4878838',
+ 'ext': 'mp3',
+ 'title': 'Carlo Ambrosio - Gypsy Eyes 1',
+ 'filesize': 4628061,
+ 'duration': 193.04,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ album_id, track_id = mobj.group('album_id'), mobj.group('id')
+
+ track = self._download_json(
+ 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
+ track_id, 'Downloading track JSON')['track']
+
+ return self._get_track_info(track)
+
+
+class YandexMusicAlbumIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:album'
+ IE_DESC = 'Яндекс.Музыка - Альбом'
+ _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
+
+ _TEST = {
+ 'url': 'http://music.yandex.ru/album/540508',
+ 'info_dict': {
+ 'id': '540508',
+ 'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
+ },
+ 'playlist_count': 50,
+ }
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+
+ album = self._download_json(
+ 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
+ album_id, 'Downloading album JSON')
+
+ entries = [self._get_track_info(track) for track in album['volumes'][0]]
+
+ title = '%s - %s' % (album['artists'][0]['name'], album['title'])
+ year = album.get('year')
+ if year:
+ title += ' (%s)' % year
+
+ return self.playlist_result(entries, compat_str(album['id']), title)
+
+
+class YandexMusicPlaylistIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:playlist'
+ IE_DESC = 'Яндекс.Музыка - Плейлист'
+ _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
+ 'info_dict': {
+ 'id': '1245',
+ 'title': 'Что слушают Enter Shikari',
+ 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
+ },
+ 'playlist_count': 6,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
+ playlist_id)['pageData']['playlist']
+
+ entries = [self._get_track_info(track) for track in playlist['tracks']]
+
+ return self.playlist_result(
+ entries, compat_str(playlist_id),
+ playlist['title'], playlist.get('description'))
# Get JSON parameters
json_params = self._search_regex(
- r'var currentVideo = new Video\((.*)\)[,;]',
+ [r'videoJa?son\s*=\s*({.+})',
+ r'var\s+currentVideo\s*=\s*new\s+Video\((.+?)\)[,;]'],
webpage, 'JSON parameters')
try:
params = json.loads(json_params)
- except:
+ except ValueError:
raise ExtractorError('Invalid JSON')
self.report_extraction(video_id)
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
_TESTS = [
{
'url': 'http://yourupload.com/watch/14i14h',
- 'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
+ 'md5': '5e2c63385454c557f97c4c4131a393cd',
'info_dict': {
'id': '14i14h',
'ext': 'mp4',
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
- url = 'http://embed.yucache.net/{0:}'.format(video_id)
- webpage = self._download_webpage(url, video_id)
+ embed_url = 'http://embed.yucache.net/{0:}'.format(video_id)
+ webpage = self._download_webpage(embed_url, video_id)
title = self._og_search_title(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- url = self._og_search_video_url(webpage)
-
- formats = [{
- 'format_id': 'sd',
- 'url': url,
- }]
+ video_url = self._og_search_video_url(webpage)
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
return {
'id': video_id,
'title': title,
- 'formats': formats,
+ 'url': video_url,
'thumbnail': thumbnail,
+ 'http_headers': {
+ 'Referer': embed_url,
+ },
}
get_element_by_attribute,
get_element_by_id,
int_or_none,
- OnDemandPagedList,
orderedSet,
unescapeHTML,
unified_strdate,
'uploader': '孫艾倫',
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
},
- }
+ },
+ # url_encoded_fmt_stream_map is empty string
+ {
+ 'url': 'qEJwOuvDf7I',
+ 'info_dict': {
+ 'id': 'qEJwOuvDf7I',
+ 'ext': 'mp4',
+ 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
+ 'description': '',
+ 'upload_date': '20150404',
+ 'uploader_id': 'spbelect',
+ 'uploader': 'Наблюдатели Петербурга',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ }
+ },
]
def __init__(self, *args, **kwargs):
errnote='Could not download DASH manifest')
formats = []
- for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
- url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
- if url_el is None:
- continue
- format_id = r.attrib['id']
- video_url = url_el.text
- filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
- f = {
- 'format_id': format_id,
- 'url': video_url,
- 'width': int_or_none(r.attrib.get('width')),
- 'height': int_or_none(r.attrib.get('height')),
- 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
- 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
- 'filesize': filesize,
- 'fps': int_or_none(r.attrib.get('frameRate')),
- }
- try:
- existing_format = next(
- fo for fo in formats
- if fo['format_id'] == format_id)
- except StopIteration:
- full_info = self._formats.get(format_id, {}).copy()
- full_info.update(f)
- formats.append(full_info)
- else:
- existing_format.update(f)
+ for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
+ mime_type = a.attrib.get('mimeType')
+ for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+ url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
+ if url_el is None:
+ continue
+ if mime_type == 'text/vtt':
+ # TODO implement WebVTT downloading
+ pass
+ elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+ format_id = r.attrib['id']
+ video_url = url_el.text
+ filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
+ f = {
+ 'format_id': format_id,
+ 'url': video_url,
+ 'width': int_or_none(r.attrib.get('width')),
+ 'height': int_or_none(r.attrib.get('height')),
+ 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+ 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+ 'filesize': filesize,
+ 'fps': int_or_none(r.attrib.get('frameRate')),
+ }
+ try:
+ existing_format = next(
+ fo for fo in formats
+ if fo['format_id'] == format_id)
+ except StopIteration:
+ full_info = self._formats.get(format_id, {}).copy()
+ full_info.update(f)
+ formats.append(full_info)
+ else:
+ existing_format.update(f)
+ else:
+ self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats
def _real_extract(self, url):
args = ytplayer_config['args']
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
- if 'url_encoded_fmt_stream_map' not in args:
+ if not args.get('url_encoded_fmt_stream_map'):
raise ValueError('No stream_map present') # caught below
except ValueError:
# We fallback to the get_video_info pages (used by the embed page)
return self.playlist_result(url_results, playlist_id, title)
- def _real_extract(self, url):
- # Extract playlist id
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- playlist_id = mobj.group(1) or mobj.group(2)
-
- # Check if it's a video-specific URL
- query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- if 'v' in query_dict:
- video_id = query_dict['v'][0]
- if self._downloader.params.get('noplaylist'):
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- return self.url_result(video_id, 'Youtube', video_id=video_id)
- else:
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
-
- if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
- # Mixes require a custom extraction process
- return self._extract_mix(playlist_id)
-
+ def _extract_playlist(self, playlist_id):
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
more_widget_html = content_html = page
- # Check if the playlist exists or is private
- if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
- raise ExtractorError(
- 'The playlist doesn\'t exist or is private, use --username or '
- '--netrc to access it.',
- expected=True)
+ for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
+ match = match.strip()
+ # Check if the playlist exists or is private
+ if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
+ raise ExtractorError(
+ 'The playlist doesn\'t exist or is private, use --username or '
+ '--netrc to access it.',
+ expected=True)
+ elif re.match(r'[^<]*Invalid parameters[^<]*', match):
+ raise ExtractorError(
+ 'Invalid parameters. Maybe URL is incorrect.',
+ expected=True)
+ elif re.match(r'[^<]*Choose your language[^<]*', match):
+ continue
+ else:
+ self.report_warning('Youtube gives an alert message: ' + match)
# Extract the video ids from the playlist pages
ids = []
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)
+ def _real_extract(self, url):
+ # Extract playlist id
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ playlist_id = mobj.group(1) or mobj.group(2)
+
+ # Check if it's a video-specific URL
+ query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ if 'v' in query_dict:
+ video_id = query_dict['v'][0]
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result(video_id, 'Youtube', video_id=video_id)
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+ if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
+ # Mixes require a custom extraction process
+ return self._extract_mix(playlist_id)
+
+ return self._extract_playlist(playlist_id)
+
class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels'
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
+ _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
IE_NAME = 'youtube:channel'
_TESTS = [{
'note': 'paginated channel',
}
}]
- def extract_videos_from_page(self, page):
+ @staticmethod
+ def extract_videos_from_page(page):
ids_in_page = []
- for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
- if mobj.group(1) not in ids_in_page:
- ids_in_page.append(mobj.group(1))
- return ids_in_page
+ titles_in_page = []
+ for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
+ video_id = mobj.group('id')
+ video_title = unescapeHTML(mobj.group('title'))
+ try:
+ idx = ids_in_page.index(video_id)
+ if video_title and not titles_in_page[idx]:
+ titles_in_page[idx] = video_title
+ except ValueError:
+ ids_in_page.append(video_id)
+ titles_in_page.append(video_title)
+ return zip(ids_in_page, titles_in_page)
def _real_extract(self, url):
channel_id = self._match_id(url)
- video_ids = []
- url = 'https://www.youtube.com/channel/%s/videos' % channel_id
- channel_page = self._download_webpage(url, channel_id)
+ url = self._TEMPLATE_URL % channel_id
+ channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
autogenerated = re.search(r'''(?x)
class="[^"]*?(?:
channel-header-autogenerated-label|
if autogenerated:
# The videos are contained in a single page
# the ajax pages can't be used, they are empty
- video_ids = self.extract_videos_from_page(channel_page)
entries = [
- self.url_result(video_id, 'Youtube', video_id=video_id)
- for video_id in video_ids]
+ self.url_result(
+ video_id, 'Youtube', video_id=video_id,
+ video_title=video_title)
+ for video_id, video_title in self.extract_videos_from_page(channel_page)]
return self.playlist_result(entries, channel_id)
def _entries():
more_widget_html = content_html = channel_page
for pagenum in itertools.count(1):
- ids_in_page = self.extract_videos_from_page(content_html)
- for video_id in ids_in_page:
+ for video_id, video_title in self.extract_videos_from_page(content_html):
yield self.url_result(
- video_id, 'Youtube', video_id=video_id)
+ video_id, 'Youtube', video_id=video_id,
+ video_title=video_title)
mobj = re.search(
r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
return self.playlist_result(_entries(), channel_id)
-class YoutubeUserIE(InfoExtractor):
+class YoutubeUserIE(YoutubeChannelIE):
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
- _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
- _GDATA_PAGE_SIZE = 50
- _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
+ _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
IE_NAME = 'youtube:user'
_TESTS = [{
else:
return super(YoutubeUserIE, cls).suitable(url)
- def _real_extract(self, url):
- username = self._match_id(url)
-
- # Download video ids using YouTube Data API. Result size per
- # query is limited (currently to 50 videos) so we need to query
- # page by page until there are no video ids - it means we got
- # all of them.
-
- def download_page(pagenum):
- start_index = pagenum * self._GDATA_PAGE_SIZE + 1
-
- gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
- page = self._download_webpage(
- gdata_url, username,
- 'Downloading video ids from %d to %d' % (
- start_index, start_index + self._GDATA_PAGE_SIZE))
- try:
- response = json.loads(page)
- except ValueError as err:
- raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
- if 'entry' not in response['feed']:
- return
-
- # Extract video identifiers
- entries = response['feed']['entry']
- for entry in entries:
- title = entry['title']['$t']
- video_id = entry['id']['$t'].split('/')[-1]
- yield {
- '_type': 'url',
- 'url': video_id,
- 'ie_key': 'Youtube',
- 'id': video_id,
- 'title': title,
- }
- url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
-
- return self.playlist_result(url_results, playlist_title=username)
-
-
-class YoutubeSearchIE(SearchInfoExtractor):
+class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
IE_DESC = 'YouTube.com searches'
- _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
- _MAX_RESULTS = 1000
+ # there doesn't appear to be a real limit, for example if you search for
+ # 'python' you get more than 8.000.000 results
+ _MAX_RESULTS = float('inf')
IE_NAME = 'youtube:search'
_SEARCH_KEY = 'ytsearch'
+ _EXTRA_QUERY_ARGS = {}
+ _TESTS = []
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
- video_ids = []
- pagenum = 0
+ videos = []
limit = n
- PAGE_SIZE = 50
- while (PAGE_SIZE * pagenum) < limit:
- result_url = self._API_URL % (
- compat_urllib_parse.quote_plus(query.encode('utf-8')),
- (PAGE_SIZE * pagenum) + 1)
- data_json = self._download_webpage(
+ for pagenum in itertools.count(1):
+ url_query = {
+ 'search_query': query,
+ 'page': pagenum,
+ 'spf': 'navigate',
+ }
+ url_query.update(self._EXTRA_QUERY_ARGS)
+ result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
+ data = self._download_json(
result_url, video_id='query "%s"' % query,
- note='Downloading page %s' % (pagenum + 1),
+ note='Downloading page %s' % pagenum,
errnote='Unable to download API page')
- data = json.loads(data_json)
- api_response = data['data']
+ html_content = data[1]['body']['content']
- if 'items' not in api_response:
+ if 'class="search-message' in html_content:
raise ExtractorError(
'[youtube] No video results', expected=True)
- new_ids = list(video['id'] for video in api_response['items'])
- video_ids += new_ids
-
- limit = min(n, api_response['totalItems'])
- pagenum += 1
+ new_videos = self._ids_to_results(orderedSet(re.findall(
+ r'href="/watch\?v=(.{11})', html_content)))
+ videos += new_videos
+ if not new_videos or len(videos) > limit:
+ break
- if len(video_ids) > n:
- video_ids = video_ids[:n]
- videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
- for video_id in video_ids]
+ if len(videos) > n:
+ videos = videos[:n]
return self.playlist_result(videos, query)
class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
- _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = 'YouTube.com searches, newest videos first'
+ _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
class YoutubeSearchURLIE(InfoExtractor):
webpage = self._download_webpage(url, query)
result_code = self._search_regex(
- r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
+ r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
part_codes = re.findall(
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+ IE_NAME = 'youtube:recommended'
IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
_PLAYLIST_TITLE = 'Youtube Recommended videos'
-class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
+class YoutubeWatchLaterIE(YoutubePlaylistIE):
+ IE_NAME = 'youtube:watchlater'
IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
- _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
- _FEED_NAME = 'watch_later'
- _PLAYLIST_TITLE = 'Youtube Watch Later'
- _PERSONAL_FEED = True
+ _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
+ _TESTS = [] # override PlaylistIE tests
+
+ def _real_extract(self, url):
+ return self._extract_playlist('WL')
-class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+
+class YoutubeHistoryIE(YoutubePlaylistIE):
+ IE_NAME = 'youtube:history'
IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
- _FEED_NAME = 'history'
- _PERSONAL_FEED = True
- _PLAYLIST_TITLE = 'Youtube Watch History'
+ _TESTS = []
+
+ def _real_extract(self, url):
+ title = 'Youtube History'
+ page = self._download_webpage('https://www.youtube.com/feed/history', title)
+
+ # The extraction process is the same as for playlists, but the regex
+ # for the video ids doesn't contain an index
+ ids = []
+ more_widget_html = content_html = page
+
+ for page_num in itertools.count(1):
+ matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
+ new_ids = orderedSet(matches)
+ ids.extend(new_ids)
+
+ mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+ if not mobj:
+ break
+
+ more = self._download_json(
+ 'https://youtube.com/%s' % mobj.group('more'), title,
+ 'Downloading page #%s' % page_num,
+ transform_source=uppercase_escape)
+ content_html = more['content_html']
+ more_widget_html = more['load_more_widget_html']
+
+ return {
+ '_type': 'playlist',
+ 'title': title,
+ 'entries': self._ids_to_results(ids),
+ }
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
import re
from .common import InfoExtractor
+from ..utils import ExtractorError
class ZingMp3BaseInfoExtractor(InfoExtractor):
- @staticmethod
- def _extract_item(item):
+ def _extract_item(self, item):
+ error_message = item.find('./errormessage').text
+ if error_message:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error_message),
+ expected=True)
+
title = item.find('./title').text.strip()
source = item.find('./source').text
extension = item.attrib['type']
from .downloader.external import list_external_downloaders
from .compat import (
compat_expanduser,
+ compat_get_terminal_size,
compat_getenv,
compat_kwargs,
)
from .utils import (
- get_term_width,
+ preferredencoding,
write_string,
)
from .version import __version__
return opts
# No need to wrap help messages if we're on a wide console
- columns = get_term_width()
+ columns = compat_get_terminal_size().columns
max_width = columns if columns else 80
max_help_position = 80
general.add_option(
'-h', '--help',
action='help',
- help='print this help text and exit')
+ help='Print this help text and exit')
general.add_option(
'-v', '--version',
action='version',
- help='print program version and exit')
+ help='Print program version and exit')
general.add_option(
'-U', '--update',
action='store_true', dest='update_self',
- help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
+ help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
general.add_option(
'-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', default=False,
- help='continue on download errors, for example to skip unavailable videos in a playlist')
+ help='Continue on download errors, for example to skip unavailable videos in a playlist')
general.add_option(
'--abort-on-error',
action='store_false', dest='ignoreerrors',
general.add_option(
'--dump-user-agent',
action='store_true', dest='dump_user_agent', default=False,
- help='display the current browser identification')
+ help='Display the current browser identification')
general.add_option(
'--list-extractors',
action='store_true', dest='list_extractors', default=False,
general.add_option(
'--default-search',
dest='default_search', metavar='PREFIX',
- help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
+ help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
general.add_option(
'--ignore-config',
action='store_true',
'--no-color', '--no-colors',
action='store_true', dest='no_color',
default=False,
- help='Do not emit color codes in output.')
+ help='Do not emit color codes in output')
network = optparse.OptionGroup(parser, 'Network Options')
network.add_option(
action='store_const', const='::', dest='source_address',
help='Make all connections via IPv6 (experimental)',
)
+ network.add_option(
+ '--cn-verification-proxy',
+ dest='cn_verification_proxy', default=None, metavar='URL',
+ help='Use this proxy to verify the IP address for some Chinese sites. '
+ 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
+ )
selection = optparse.OptionGroup(parser, 'Video Selection')
selection.add_option(
'--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int,
- help='playlist video to start at (default is %default)')
+ help='Playlist video to start at (default is %default)')
selection.add_option(
'--playlist-end',
dest='playlistend', metavar='NUMBER', default=None, type=int,
- help='playlist video to end at (default is last)')
+ help='Playlist video to end at (default is last)')
selection.add_option(
'--playlist-items',
dest='playlist_items', metavar='ITEM_SPEC', default=None,
- help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
+ help='Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
selection.add_option(
'--match-title',
dest='matchtitle', metavar='REGEX',
- help='download only matching titles (regex or caseless sub-string)')
+ help='Download only matching titles (regex or caseless sub-string)')
selection.add_option(
'--reject-title',
dest='rejecttitle', metavar='REGEX',
- help='skip download for matching titles (regex or caseless sub-string)')
+ help='Skip download for matching titles (regex or caseless sub-string)')
selection.add_option(
'--max-downloads',
dest='max_downloads', metavar='NUMBER', type=int, default=None,
selection.add_option(
'--date',
metavar='DATE', dest='date', default=None,
- help='download only videos uploaded in this date')
+ help='Download only videos uploaded in this date')
selection.add_option(
'--datebefore',
metavar='DATE', dest='datebefore', default=None,
- help='download only videos uploaded on or before this date (i.e. inclusive)')
+ help='Download only videos uploaded on or before this date (i.e. inclusive)')
selection.add_option(
'--dateafter',
metavar='DATE', dest='dateafter', default=None,
- help='download only videos uploaded on or after this date (i.e. inclusive)')
+ help='Download only videos uploaded on or after this date (i.e. inclusive)')
selection.add_option(
'--min-views',
metavar='COUNT', dest='min_views', default=None, type=int,
- help='Do not download any videos with less than COUNT views',)
+ help='Do not download any videos with less than COUNT views')
selection.add_option(
'--max-views',
metavar='COUNT', dest='max_views', default=None, type=int,
'--match-filter',
metavar='FILTER', dest='match_filter', default=None,
help=(
- '(Experimental) Generic video filter. '
+ 'Generic video filter (experimental). '
'Specify any key (see help for -o for a list of available keys) to'
' match if the key is present, '
'!key to check if the key is not present,'
selection.add_option(
'--no-playlist',
action='store_true', dest='noplaylist', default=False,
- help='If the URL refers to a video and a playlist, download only the video.')
+ help='Download only the video, if the URL refers to a video and a playlist.')
selection.add_option(
'--yes-playlist',
action='store_false', dest='noplaylist', default=False,
- help='If the URL refers to a video and a playlist, download the playlist.')
+ help='Download the playlist, if the URL refers to a video and a playlist.')
selection.add_option(
'--age-limit',
metavar='YEARS', dest='age_limit', default=None, type=int,
- help='download only videos suitable for the given age')
+ help='Download only videos suitable for the given age')
selection.add_option(
'--download-archive', metavar='FILE',
dest='download_archive',
authentication.add_option(
'-u', '--username',
dest='username', metavar='USERNAME',
- help='login with this account ID')
+ help='Login with this account ID')
authentication.add_option(
'-p', '--password',
dest='password', metavar='PASSWORD',
- help='account password. If this option is left out, youtube-dl will ask interactively.')
+ help='Account password. If this option is left out, youtube-dl will ask interactively.')
authentication.add_option(
'-2', '--twofactor',
dest='twofactor', metavar='TWOFACTOR',
- help='two-factor auth code')
+ help='Two-factor auth code')
authentication.add_option(
'-n', '--netrc',
action='store_true', dest='usenetrc', default=False,
- help='use .netrc authentication data')
+ help='Use .netrc authentication data')
authentication.add_option(
'--video-password',
dest='videopassword', metavar='PASSWORD',
- help='video password (vimeo, smotri)')
+ help='Video password (vimeo, smotri)')
video_format = optparse.OptionGroup(parser, 'Video Format Options')
video_format.add_option(
'-f', '--format',
action='store', dest='format', metavar='FORMAT', default=None,
- help=(
- 'video format code, specify the order of preference using'
- ' slashes, as in -f 22/17/18 . '
- ' Instead of format codes, you can select by extension for the '
- 'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
- 'You can also use the special names "best",'
- ' "bestvideo", "bestaudio", "worst". '
- ' You can filter the video results by putting a condition in'
- ' brackets, as in -f "best[height=720]"'
- ' (or -f "[filesize>10M]"). '
- ' This works for filesize, height, width, tbr, abr, vbr, asr, and fps'
- ' and the comparisons <, <=, >, >=, =, !='
- ' and for ext, acodec, vcodec, container, and protocol'
- ' and the comparisons =, != .'
- ' Formats for which the value is not known are excluded unless you'
- ' put a question mark (?) after the operator.'
- ' You can combine format filters, so '
- '-f "[height <=? 720][tbr>500]" '
- 'selects up to 720p videos (or videos where the height is not '
- 'known) with a bitrate of at least 500 KBit/s.'
- ' By default, youtube-dl will pick the best quality.'
- ' Use commas to download multiple audio formats, such as'
- ' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'
- ' You can merge the video and audio of two formats into a single'
- ' file using -f <video-format>+<audio-format> (requires ffmpeg or'
- ' avconv), for example -f bestvideo+bestaudio.'))
+ help='Video format code, see the "FORMAT SELECTION" for all the info')
video_format.add_option(
'--all-formats',
action='store_const', dest='format', const='all',
- help='download all available video formats')
+ help='Download all available video formats')
video_format.add_option(
'--prefer-free-formats',
action='store_true', dest='prefer_free_formats', default=False,
- help='prefer free video formats unless a specific one is requested')
- video_format.add_option(
- '--max-quality',
- action='store', dest='format_limit', metavar='FORMAT',
- help='highest quality format to download')
+ help='Prefer free video formats unless a specific one is requested')
video_format.add_option(
'-F', '--list-formats',
action='store_true', dest='listformats',
- help='list all available formats')
+ help='List all available formats')
video_format.add_option(
'--youtube-include-dash-manifest',
action='store_true', dest='youtube_include_dash_manifest', default=True,
subtitles.add_option(
'--write-sub', '--write-srt',
action='store_true', dest='writesubtitles', default=False,
- help='write subtitle file')
+ help='Write subtitle file')
subtitles.add_option(
'--write-auto-sub', '--write-automatic-sub',
action='store_true', dest='writeautomaticsub', default=False,
- help='write automatic subtitle file (youtube only)')
+ help='Write automatic subtitle file (YouTube only)')
subtitles.add_option(
'--all-subs',
action='store_true', dest='allsubtitles', default=False,
- help='downloads all the available subtitles of the video')
+ help='Download all the available subtitles of the video')
subtitles.add_option(
'--list-subs',
action='store_true', dest='listsubtitles', default=False,
- help='lists all available subtitles for the video')
+ help='List all available subtitles for the video')
subtitles.add_option(
'--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
- help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
+ help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
subtitles.add_option(
'--sub-lang', '--sub-langs', '--srt-lang',
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
default=[], callback=_comma_separated_values_options_callback,
- help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
+ help='Languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
downloader = optparse.OptionGroup(parser, 'Download Options')
downloader.add_option(
'-r', '--rate-limit',
dest='ratelimit', metavar='LIMIT',
- help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
+ help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
downloader.add_option(
'-R', '--retries',
dest='retries', metavar='RETRIES', default=10,
- help='number of retries (default is %default), or "infinite".')
+ help='Number of retries (default is %default), or "infinite".')
downloader.add_option(
'--buffer-size',
dest='buffersize', metavar='SIZE', default='1024',
- help='size of download buffer (e.g. 1024 or 16K) (default is %default)')
+ help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
downloader.add_option(
'--no-resize-buffer',
action='store_true', dest='noresizebuffer', default=False,
- help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
+ help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
downloader.add_option(
'--test',
action='store_true', dest='test', default=False,
downloader.add_option(
'--xattr-set-filesize',
dest='xattr_set_filesize', action='store_true',
- help='(experimental) set file xattribute ytdl.filesize with expected filesize')
+ help='Set file xattribute ytdl.filesize with expected filesize (experimental)')
downloader.add_option(
'--hls-prefer-native',
dest='hls_prefer_native', action='store_true',
- help='(experimental) Use the native HLS downloader instead of ffmpeg.')
+ help='Use the native HLS downloader instead of ffmpeg (experimental)')
downloader.add_option(
'--external-downloader',
dest='external_downloader', metavar='COMMAND',
- help='(experimental) Use the specified external downloader. '
+ help='Use the specified external downloader. '
'Currently supports %s' % ','.join(list_external_downloaders()))
+ downloader.add_option(
+ '--external-downloader-args',
+ dest='external_downloader_args', metavar='ARGS',
+ help='Give these arguments to the external downloader')
workarounds = optparse.OptionGroup(parser, 'Workarounds')
workarounds.add_option(
workarounds.add_option(
'--no-check-certificate',
action='store_true', dest='no_check_certificate', default=False,
- help='Suppress HTTPS certificate validation.')
+ help='Suppress HTTPS certificate validation')
workarounds.add_option(
'--prefer-insecure',
'--prefer-unsecure', action='store_true', dest='prefer_insecure',
workarounds.add_option(
'--user-agent',
metavar='UA', dest='user_agent',
- help='specify a custom user agent')
+ help='Specify a custom user agent')
workarounds.add_option(
'--referer',
metavar='URL', dest='referer', default=None,
- help='specify a custom referer, use if the video access is restricted to one domain',
+ help='Specify a custom referer, use if the video access is restricted to one domain',
)
workarounds.add_option(
'--add-header',
metavar='FIELD:VALUE', dest='headers', action='append',
- help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+ help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
)
workarounds.add_option(
'--bidi-workaround',
verbosity.add_option(
'-q', '--quiet',
action='store_true', dest='quiet', default=False,
- help='activates quiet mode')
+ help='Activate quiet mode')
verbosity.add_option(
'--no-warnings',
dest='no_warnings', action='store_true', default=False,
verbosity.add_option(
'-s', '--simulate',
action='store_true', dest='simulate', default=False,
- help='do not download the video and do not write anything to disk',)
+ help='Do not download the video and do not write anything to disk')
verbosity.add_option(
'--skip-download',
action='store_true', dest='skip_download', default=False,
- help='do not download the video',)
+ help='Do not download the video')
verbosity.add_option(
'-g', '--get-url',
action='store_true', dest='geturl', default=False,
- help='simulate, quiet but print URL')
+ help='Simulate, quiet but print URL')
verbosity.add_option(
'-e', '--get-title',
action='store_true', dest='gettitle', default=False,
- help='simulate, quiet but print title')
+ help='Simulate, quiet but print title')
verbosity.add_option(
'--get-id',
action='store_true', dest='getid', default=False,
- help='simulate, quiet but print id')
+ help='Simulate, quiet but print id')
verbosity.add_option(
'--get-thumbnail',
action='store_true', dest='getthumbnail', default=False,
- help='simulate, quiet but print thumbnail URL')
+ help='Simulate, quiet but print thumbnail URL')
verbosity.add_option(
'--get-description',
action='store_true', dest='getdescription', default=False,
- help='simulate, quiet but print video description')
+ help='Simulate, quiet but print video description')
verbosity.add_option(
'--get-duration',
action='store_true', dest='getduration', default=False,
- help='simulate, quiet but print video length')
+ help='Simulate, quiet but print video length')
verbosity.add_option(
'--get-filename',
action='store_true', dest='getfilename', default=False,
- help='simulate, quiet but print output filename')
+ help='Simulate, quiet but print output filename')
verbosity.add_option(
'--get-format',
action='store_true', dest='getformat', default=False,
- help='simulate, quiet but print output format')
+ help='Simulate, quiet but print output format')
verbosity.add_option(
'-j', '--dump-json',
action='store_true', dest='dumpjson', default=False,
- help='simulate, quiet but print JSON information. See --output for a description of available keys.')
+ help='Simulate, quiet but print JSON information. See --output for a description of available keys.')
verbosity.add_option(
'-J', '--dump-single-json',
action='store_true', dest='dump_single_json', default=False,
- help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
+ help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
verbosity.add_option(
'--print-json',
action='store_true', dest='print_json', default=False,
verbosity.add_option(
'--newline',
action='store_true', dest='progress_with_newline', default=False,
- help='output progress bar as new lines')
+ help='Output progress bar as new lines')
verbosity.add_option(
'--no-progress',
action='store_true', dest='noprogress', default=False,
- help='do not print progress bar')
+ help='Do not print progress bar')
verbosity.add_option(
'--console-title',
action='store_true', dest='consoletitle', default=False,
- help='display progress in console titlebar')
+ help='Display progress in console titlebar')
verbosity.add_option(
'-v', '--verbose',
action='store_true', dest='verbose', default=False,
- help='print various debugging information')
+ help='Print various debugging information')
verbosity.add_option(
- '--dump-intermediate-pages',
+ '--dump-pages', '--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False,
- help='print downloaded pages to debug problems (very verbose)')
+ help='Print downloaded pages to debug problems (very verbose)')
verbosity.add_option(
'--write-pages',
action='store_true', dest='write_pages', default=False,
verbosity.add_option(
'-C', '--call-home',
dest='call_home', action='store_true', default=False,
- help='Contact the youtube-dl server for debugging.')
+ help='Contact the youtube-dl server for debugging')
verbosity.add_option(
'--no-call-home',
dest='call_home', action='store_false', default=False,
- help='Do NOT contact the youtube-dl server for debugging.')
+ help='Do NOT contact the youtube-dl server for debugging')
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
filesystem.add_option(
'-a', '--batch-file',
dest='batchfile', metavar='FILE',
- help='file containing URLs to download (\'-\' for stdin)')
+ help='File containing URLs to download (\'-\' for stdin)')
filesystem.add_option(
'--id', default=False,
- action='store_true', dest='useid', help='use only video ID in file name')
+ action='store_true', dest='useid', help='Use only video ID in file name')
filesystem.add_option(
'-o', '--output',
dest='outtmpl', metavar='TEMPLATE',
- help=('output filename template. Use %(title)s to get the title, '
+ help=('Output filename template. Use %(title)s to get the title, '
'%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
'%(autonumber)s to get an automatically incremented number, '
'%(ext)s for the filename extension, '
'%(format)s for the format description (like "22 - 1280x720" or "HD"), '
- '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
+ '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), '
'%(upload_date)s for the upload date (YYYYMMDD), '
'%(extractor)s for the provider (youtube, metacafe, etc), '
'%(id)s for the video id, '
filesystem.add_option(
'--autonumber-size',
dest='autonumber_size', metavar='NUMBER',
- help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
+ help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
filesystem.add_option(
'--restrict-filenames',
action='store_true', dest='restrictfilenames', default=False,
filesystem.add_option(
'-A', '--auto-number',
action='store_true', dest='autonumber', default=False,
- help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000')
+ help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000')
filesystem.add_option(
'-t', '--title',
action='store_true', dest='usetitle', default=False,
- help='[deprecated] use title in file name (default)')
+ help='[deprecated] Use title in file name (default)')
filesystem.add_option(
'-l', '--literal', default=False,
action='store_true', dest='usetitle',
- help='[deprecated] alias of --title')
+ help='[deprecated] Alias of --title')
filesystem.add_option(
'-w', '--no-overwrites',
action='store_true', dest='nooverwrites', default=False,
- help='do not overwrite files')
+ help='Do not overwrite files')
filesystem.add_option(
'-c', '--continue',
action='store_true', dest='continue_dl', default=True,
- help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
+ help='Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
filesystem.add_option(
'--no-continue',
action='store_false', dest='continue_dl',
- help='do not resume partially downloaded files (restart from beginning)')
+ help='Do not resume partially downloaded files (restart from beginning)')
filesystem.add_option(
'--no-part',
action='store_true', dest='nopart', default=False,
- help='do not use .part files - write directly into output file')
+ help='Do not use .part files - write directly into output file')
filesystem.add_option(
'--no-mtime',
action='store_false', dest='updatetime', default=True,
- help='do not use the Last-modified header to set the file modification time')
+ help='Do not use the Last-modified header to set the file modification time')
filesystem.add_option(
'--write-description',
action='store_true', dest='writedescription', default=False,
- help='write video description to a .description file')
+ help='Write video description to a .description file')
filesystem.add_option(
'--write-info-json',
action='store_true', dest='writeinfojson', default=False,
- help='write video metadata to a .info.json file')
+ help='Write video metadata to a .info.json file')
filesystem.add_option(
'--write-annotations',
action='store_true', dest='writeannotations', default=False,
- help='write video annotations to a .annotation file')
+ help='Write video annotations to a .annotations.xml file')
filesystem.add_option(
'--load-info',
dest='load_info_filename', metavar='FILE',
- help='json file containing the video information (created with the "--write-json" option)')
+ help='JSON file containing the video information (created with the "--write-info-json" option)')
filesystem.add_option(
'--cookies',
dest='cookiefile', metavar='FILE',
- help='file to read cookies from and dump cookie jar in')
+ help='File to read cookies from and dump cookie jar in')
filesystem.add_option(
'--cache-dir', dest='cachedir', default=None, metavar='DIR',
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
thumbnail.add_option(
'--write-thumbnail',
action='store_true', dest='writethumbnail', default=False,
- help='write thumbnail image to disk')
+ help='Write thumbnail image to disk')
thumbnail.add_option(
'--write-all-thumbnails',
action='store_true', dest='write_all_thumbnails', default=False,
- help='write all thumbnail image formats to disk')
+ help='Write all thumbnail image formats to disk')
thumbnail.add_option(
'--list-thumbnails',
action='store_true', dest='list_thumbnails', default=False,
postproc.add_option(
'-x', '--extract-audio',
action='store_true', dest='extractaudio', default=False,
- help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
+ help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
postproc.add_option(
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
- help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default')
+ help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default')
postproc.add_option(
'--audio-quality', metavar='QUALITY',
dest='audioquality', default='5',
- help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
+ help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
postproc.add_option(
'--recode-video',
metavar='FORMAT', dest='recodevideo', default=None,
postproc.add_option(
'-k', '--keep-video',
action='store_true', dest='keepvideo', default=False,
- help='keeps the video file on disk after the post-processing; the video is erased by default')
+ help='Keep the video file on disk after the post-processing; the video is erased by default')
postproc.add_option(
'--no-post-overwrites',
action='store_true', dest='nopostoverwrites', default=False,
- help='do not overwrite post-processed files; the post-processed files are overwritten by default')
+ help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
postproc.add_option(
'--embed-subs',
action='store_true', dest='embedsubtitles', default=False,
- help='embed subtitles in the video (only for mp4 videos)')
+ help='Embed subtitles in the video (only for mkv and mp4 videos)')
postproc.add_option(
'--embed-thumbnail',
action='store_true', dest='embedthumbnail', default=False,
- help='embed thumbnail in the audio as cover art')
+ help='Embed thumbnail in the audio as cover art')
postproc.add_option(
'--add-metadata',
action='store_true', dest='addmetadata', default=False,
- help='write metadata to the video file')
+ help='Write metadata to the video file')
+ postproc.add_option(
+ '--metadata-from-title',
+ metavar='FORMAT', dest='metafromtitle',
+ help='Parse additional metadata like song title / artist from the video title. '
+ 'The format syntax is the same as --output, '
+ 'the parsed parameters replace existing values. '
+ 'Additional templates: %(album), %(artist). '
+ 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
+ '"Coldplay - Paradise"')
postproc.add_option(
'--xattrs',
action='store_true', dest='xattrs', default=False,
- help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+ help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
postproc.add_option(
'--fixup',
metavar='POLICY', dest='fixup', default='detect_or_warn',
if opts.verbose:
write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
- command_line_conf = sys.argv[1:]
+ def compat_conf(conf):
+ if sys.version_info < (3,):
+ return [a.decode(preferredencoding(), 'replace') for a in conf]
+ return conf
+
+ command_line_conf = compat_conf(sys.argv[1:])
+
if '--ignore-config' in command_line_conf:
system_conf = []
user_conf = []
else:
- system_conf = _readOptions('/etc/youtube-dl.conf')
+ system_conf = compat_conf(_readOptions('/etc/youtube-dl.conf'))
if '--ignore-config' in system_conf:
user_conf = []
else:
- user_conf = _readUserConf()
+ user_conf = compat_conf(_readUserConf())
argv = system_conf + user_conf + command_line_conf
opts, args = parser.parse_args(argv)
from __future__ import unicode_literals
-from .atomicparsley import AtomicParsleyPP
+from .embedthumbnail import EmbedThumbnailPP
from .ffmpeg import (
FFmpegPostProcessor,
- FFmpegAudioFixPP,
FFmpegEmbedSubtitlePP,
FFmpegExtractAudioPP,
FFmpegFixupStretchedPP,
)
from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP
+from .metadatafromtitle import MetadataFromTitlePP
def get_postprocessor(key):
__all__ = [
- 'AtomicParsleyPP',
+ 'EmbedThumbnailPP',
'ExecAfterDownloadPP',
- 'FFmpegAudioFixPP',
'FFmpegEmbedSubtitlePP',
'FFmpegExtractAudioPP',
'FFmpegFixupM4aPP',
'FFmpegPostProcessor',
'FFmpegSubtitlesConvertorPP',
'FFmpegVideoConvertorPP',
+ 'MetadataFromTitlePP',
'XAttrMetadataPP',
]
+++ /dev/null
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
-
-import os
-import subprocess
-
-from .common import PostProcessor
-from ..compat import (
- compat_urlretrieve,
-)
-from ..utils import (
- check_executable,
- encodeFilename,
- PostProcessingError,
- prepend_extension,
- shell_quote
-)
-
-
-class AtomicParsleyPPError(PostProcessingError):
- pass
-
-
-class AtomicParsleyPP(PostProcessor):
- def run(self, info):
- if not check_executable('AtomicParsley', ['-v']):
- raise AtomicParsleyPPError('AtomicParsley was not found. Please install.')
-
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
- temp_thumbnail = prepend_extension(filename, 'thumb')
-
- if not info.get('thumbnail'):
- raise AtomicParsleyPPError('Thumbnail was not found. Nothing to do.')
-
- compat_urlretrieve(info['thumbnail'], temp_thumbnail)
-
- cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
-
- self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
-
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
-
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = p.communicate()
-
- if p.returncode != 0:
- msg = stderr.decode('utf-8', 'replace').strip()
- raise AtomicParsleyPPError(msg)
-
- os.remove(encodeFilename(filename))
- os.remove(encodeFilename(temp_thumbnail))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
- return True, info
from __future__ import unicode_literals
-from ..utils import PostProcessingError
+import os
+
+from ..utils import (
+ PostProcessingError,
+ encodeFilename,
+)
class PostProcessor(object):
one has an extra field called "filepath" that points to the
downloaded file.
- This method returns a tuple, the first element of which describes
- whether the original file should be kept (i.e. not deleted - None for
- no preference), and the second of which is the updated information.
+ This method returns a tuple, the first element is a list of the files
+ that can be deleted, and the second of which is the updated
+ information.
In addition, this method may raise a PostProcessingError
exception if post processing fails.
"""
- return None, information # by default, keep file and do nothing
+ return [], information # by default, keep file and do nothing
+
+ def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'):
+ try:
+ os.utime(encodeFilename(path), (atime, mtime))
+ except Exception:
+ self._downloader.report_warning(errnote)
class AudioConversionError(PostProcessingError):
--- /dev/null
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+
+import os
+import subprocess
+
+from .ffmpeg import FFmpegPostProcessor
+
+from ..utils import (
+ check_executable,
+ encodeArgument,
+ encodeFilename,
+ PostProcessingError,
+ prepend_extension,
+ shell_quote
+)
+
+
+class EmbedThumbnailPPError(PostProcessingError):
+ pass
+
+
+class EmbedThumbnailPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, already_have_thumbnail=False):
+ super(EmbedThumbnailPP, self).__init__(downloader)
+ self._already_have_thumbnail = already_have_thumbnail
+
+ def run(self, info):
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ if not info.get('thumbnails'):
+ raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.')
+
+ thumbnail_filename = info['thumbnails'][-1]['filename']
+
+ if info['ext'] == 'mp3':
+ options = [
+ '-c', 'copy', '-map', '0', '-map', '1',
+ '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
+
+ self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
+
+ self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
+
+ if not self._already_have_thumbnail:
+ os.remove(encodeFilename(thumbnail_filename))
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ elif info['ext'] == 'm4a':
+ if not check_executable('AtomicParsley', ['-v']):
+ raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
+
+ cmd = [encodeFilename('AtomicParsley', True),
+ encodeFilename(filename, True),
+ encodeArgument('--artwork'),
+ encodeFilename(thumbnail_filename, True),
+ encodeArgument('-o'),
+ encodeFilename(temp_filename, True)]
+
+ self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
+
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
+
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+
+ if p.returncode != 0:
+ msg = stderr.decode('utf-8', 'replace').strip()
+ raise EmbedThumbnailPPError(msg)
+
+ if not self._already_have_thumbnail:
+ os.remove(encodeFilename(thumbnail_filename))
+ # for formats that don't support thumbnails (like 3gp) AtomicParsley
+ # won't create to the temporary file
+ if b'No changes' in stdout:
+ self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail')
+ else:
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ else:
+ raise EmbedThumbnailPPError('Only mp3 and m4a are supported for thumbnail embedding for now.')
+
+ return [], info
class ExecAfterDownloadPP(PostProcessor):
- def __init__(self, downloader=None, verboseOutput=None, exec_cmd=None):
- self.verboseOutput = verboseOutput
+ def __init__(self, downloader, exec_cmd):
+ super(ExecAfterDownloadPP, self).__init__(downloader)
self.exec_cmd = exec_cmd
def run(self, information):
raise PostProcessingError(
'Command returned error code %d' % retCode)
- return None, information # by default, keep file and do nothing
+ return [], information
import io
import os
import subprocess
-import sys
import time
prepend_extension,
shell_quote,
subtitles_filename,
+ dfxp2srt,
)
class FFmpegPostProcessor(PostProcessor):
- def __init__(self, downloader=None, deletetempfiles=False):
+ def __init__(self, downloader=None):
PostProcessor.__init__(self, downloader)
- self._deletetempfiles = deletetempfiles
self._determine_executables()
def check_version(self):
def executable(self):
return self._paths[self.basename]
+ @property
+ def probe_available(self):
+ return self.probe_basename is not None
+
@property
def probe_executable(self):
return self._paths[self.probe_basename]
stderr = stderr.decode('utf-8', 'replace')
msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg)
- os.utime(encodeFilename(out_path), (oldest_mtime, oldest_mtime))
- if self._deletetempfiles:
- for ipath in input_paths:
- os.remove(ipath)
+ self.try_utime(out_path, oldest_mtime, oldest_mtime)
def run_ffmpeg(self, path, out_path, opts):
self.run_ffmpeg_multiple_files([path], out_path, opts)
def get_audio_codec(self, path):
- if not self.probe_executable:
+ if not self.probe_available:
raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
try:
cmd = [
new_path = prefix + sep + extension
# If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
- if new_path == path:
- self._nopostoverwrites = True
+ if (new_path == path or
+ (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
+ self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
+ return [], information
try:
- if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
- self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
- else:
- self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
- self.run_ffmpeg(path, new_path, acodec, more_opts)
- except:
- etype, e, tb = sys.exc_info()
- if isinstance(e, AudioConversionError):
- msg = 'audio conversion failed: ' + e.msg
- else:
- msg = 'error running ' + self.basename
- raise PostProcessingError(msg)
+ self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
+ self.run_ffmpeg(path, new_path, acodec, more_opts)
+ except AudioConversionError as e:
+ raise PostProcessingError(
+ 'audio conversion failed: ' + e.msg)
+ except Exception:
+ raise PostProcessingError('error running ' + self.basename)
# Try to update the date time for extracted audio file.
if information.get('filetime') is not None:
- try:
- os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
- except:
- self._downloader.report_warning('Cannot update utime of audio file')
+ self.try_utime(
+ new_path, time.time(), information['filetime'],
+ errnote='Cannot update utime of audio file')
information['filepath'] = new_path
- return self._nopostoverwrites, information
+ information['ext'] = extension
+
+ return [path], information
class FFmpegVideoConvertorPP(FFmpegPostProcessor):
outpath = prefix + sep + self._preferedformat
if information['ext'] == self._preferedformat:
self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
- return True, information
+ return [], information
self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
self.run_ffmpeg(path, outpath, [])
information['filepath'] = outpath
information['format'] = self._preferedformat
information['ext'] = self._preferedformat
- return False, information
+ return [path], information
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
return cls._lang_map.get(code[:2])
def run(self, information):
- if information['ext'] != 'mp4':
- self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
- return True, information
+ if information['ext'] not in ['mp4', 'mkv']:
+ self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files')
+ return [], information
subtitles = information.get('requested_subtitles')
if not subtitles:
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
- return True, information
+ return [], information
sub_langs = list(subtitles.keys())
filename = information['filepath']
- input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
+ sub_filenames = [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
+ input_files = [filename] + sub_filenames
opts = [
'-map', '0',
# Don't copy the existing subtitles, we may be running the
# postprocessor a second time
'-map', '-0:s',
- '-c:s', 'mov_text',
]
+ if information['ext'] == 'mp4':
+ opts += ['-c:s', 'mov_text']
for (i, lang) in enumerate(sub_langs):
opts.extend(['-map', '%d:0' % (i + 1)])
lang_code = self._conver_lang_code(lang)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return True, information
+ return sub_filenames, information
class FFmpegMetadataPP(FFmpegPostProcessor):
metadata['title'] = info['title']
if info.get('upload_date') is not None:
metadata['date'] = info['upload_date']
- if info.get('uploader') is not None:
+ if info.get('artist') is not None:
+ metadata['artist'] = info['artist']
+ elif info.get('uploader') is not None:
metadata['artist'] = info['uploader']
elif info.get('uploader_id') is not None:
metadata['artist'] = info['uploader_id']
metadata['comment'] = info['description']
if info.get('webpage_url') is not None:
metadata['purl'] = info['webpage_url']
+ if info.get('album') is not None:
+ metadata['album'] = info['album']
if not metadata:
self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
- return True, info
+ return [], info
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return True, info
+ return [], info
class FFmpegMergerPP(FFmpegPostProcessor):
def run(self, info):
filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
- self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
- return True, info
-
-
-class FFmpegAudioFixPP(FFmpegPostProcessor):
- def run(self, info):
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
-
- options = ['-vn', '-acodec', 'copy']
- self._downloader.to_screen('[ffmpeg] Fixing audio file "%s"' % filename)
- self.run_ffmpeg(filename, temp_filename, options)
-
- os.remove(encodeFilename(filename))
+ self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return info['__files_to_merge'], info
- return True, info
+ def can_merge(self):
+ # TODO: figure out merge-capable ffmpeg version
+ if self.basename != 'avconv':
+ return True
+
+ required_version = '10-0'
+ if is_outdated_version(
+ self._versions[self.basename], required_version):
+ warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
+ 'youtube-dl will download single file media. '
+ 'Update %s to version %s or newer to fix this.') % (
+ self.basename, self.basename, required_version)
+ if self._downloader:
+ self._downloader.report_warning(warning)
+ return False
+ return True
class FFmpegFixupStretchedPP(FFmpegPostProcessor):
def run(self, info):
stretched_ratio = info.get('stretched_ratio')
if stretched_ratio is None or stretched_ratio == 1:
- return True, info
+ return [], info
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return True, info
+ return [], info
class FFmpegFixupM4aPP(FFmpegPostProcessor):
def run(self, info):
if info.get('container') != 'm4a_dash':
- return True, info
+ return [], info
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return True, info
+ return [], info
class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
new_format = 'webvtt'
if subs is None:
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
- return True, info
+ return [], info
self._downloader.to_screen('[ffmpeg] Converting subtitles')
for lang, sub in subs.items():
ext = sub['ext']
'format' % new_ext)
continue
new_file = subtitles_filename(filename, lang, new_ext)
+
+ if ext == 'dfxp' or ext == 'ttml':
+ self._downloader.report_warning(
+ 'You have requested to convert dfxp (TTML) subtitles into another format, '
+ 'which results in style information loss')
+
+ dfxp_file = subtitles_filename(filename, lang, ext)
+ srt_file = subtitles_filename(filename, lang, 'srt')
+
+ with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
+ srt_data = dfxp2srt(f.read())
+
+ with io.open(srt_file, 'wt', encoding='utf-8') as f:
+ f.write(srt_data)
+
+ ext = 'srt'
+ subs[lang] = {
+ 'ext': 'srt',
+ 'data': srt_data
+ }
+
+ if new_ext == 'srt':
+ continue
+
self.run_ffmpeg(
subtitles_filename(filename, lang, ext),
new_file, ['-f', new_format])
'data': f.read(),
}
- return True, info
+ return [], info
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import PostProcessor
+from ..utils import PostProcessingError
+
+
+class MetadataFromTitlePPError(PostProcessingError):
+ pass
+
+
+class MetadataFromTitlePP(PostProcessor):
+ def __init__(self, downloader, titleformat):
+ super(MetadataFromTitlePP, self).__init__(downloader)
+ self._titleformat = titleformat
+ self._titleregex = self.format_to_regex(titleformat)
+
+ def format_to_regex(self, fmt):
+ """
+ Converts a string like
+ '%(title)s - %(artist)s'
+ to a regex like
+ '(?P<title>.+)\ \-\ (?P<artist>.+)'
+ """
+ lastpos = 0
+ regex = ""
+ # replace %(..)s with regex group and escape other string parts
+ for match in re.finditer(r'%\((\w+)\)s', fmt):
+ regex += re.escape(fmt[lastpos:match.start()])
+ regex += r'(?P<' + match.group(1) + '>.+)'
+ lastpos = match.end()
+ if lastpos < len(fmt):
+ regex += re.escape(fmt[lastpos:len(fmt)])
+ return regex
+
+ def run(self, info):
+ title = info['title']
+ match = re.match(self._titleregex, title)
+ if match is None:
+ raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat)
+ for attribute, value in match.groupdict().items():
+ value = match.group(attribute)
+ info[attribute] = value
+ self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
+
+ return [], info
import os
import subprocess
import sys
+import errno
from .common import PostProcessor
-from ..compat import (
- subprocess_check_output
-)
from ..utils import (
check_executable,
hyphenate_date,
+ version_tuple,
+ PostProcessingError,
+ encodeArgument,
+ encodeFilename,
)
+class XAttrMetadataError(PostProcessingError):
+ def __init__(self, code=None, msg='Unknown error'):
+ super(XAttrMetadataError, self).__init__(msg)
+ self.code = code
+
+ # Parsing code and msg
+ if (self.code in (errno.ENOSPC, errno.EDQUOT) or
+ 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+ self.reason = 'NO_SPACE'
+ elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+ self.reason = 'VALUE_TOO_LONG'
+ else:
+ self.reason = 'NOT_SUPPORTED'
+
+
class XAttrMetadataPP(PostProcessor):
#
# try the pyxattr module...
import xattr
+ # Unicode arguments are not supported in python-pyxattr until
+ # version 0.5.0
+ # See https://github.com/rg3/youtube-dl/issues/5498
+ pyxattr_required_version = '0.5.0'
+ if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+ self._downloader.report_warning(
+ 'python-pyxattr is detected but is too old. '
+ 'youtube-dl requires %s or above while your version is %s. '
+ 'Falling back to other xattr implementations' % (
+ pyxattr_required_version, xattr.__version__))
+
+ raise ImportError
+
def write_xattr(path, key, value):
- return xattr.setxattr(path, key, value)
+ try:
+ xattr.set(path, key, value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
except ImportError:
if os.name == 'nt':
assert os.path.exists(path)
ads_fn = path + ":" + key
- with open(ads_fn, "wb") as f:
- f.write(value)
+ try:
+ with open(ads_fn, "wb") as f:
+ f.write(value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
else:
user_has_setfattr = check_executable("setfattr", ['--version'])
user_has_xattr = check_executable("xattr", ['-h'])
if user_has_setfattr or user_has_xattr:
def write_xattr(path, key, value):
+ value = value.decode('utf-8')
if user_has_setfattr:
- cmd = ['setfattr', '-n', key, '-v', value, path]
+ executable = 'setfattr'
+ opts = ['-n', key, '-v', value]
elif user_has_xattr:
- cmd = ['xattr', '-w', key, value, path]
-
- subprocess_check_output(cmd)
+ executable = 'xattr'
+ opts = ['-w', key, value]
+
+ cmd = ([encodeFilename(executable, True)] +
+ [encodeArgument(o) for o in opts] +
+ [encodeFilename(path, True)])
+
+ try:
+ p = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ stdout, stderr = p.communicate()
+ stderr = stderr.decode('utf-8', 'replace')
+ if p.returncode != 0:
+ raise XAttrMetadataError(p.returncode, stderr)
else:
# On Unix, and can't find pyxattr, setfattr, or xattr.
byte_value = value.encode('utf-8')
write_xattr(filename, xattrname, byte_value)
- return True, info
+ return [], info
- except (subprocess.CalledProcessError, OSError):
- self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")
- return False, info
+ except XAttrMetadataError as e:
+ if e.reason == 'NO_SPACE':
+ self._downloader.report_warning(
+ 'There\'s no disk space left or disk quota exceeded. ' +
+ 'Extended attributes are not written.')
+ elif e.reason == 'VALUE_TOO_LONG':
+ self._downloader.report_warning(
+ 'Unable to write extended attributes due to too long values.')
+ else:
+ msg = 'This filesystem doesn\'t support extended attributes. '
+ if os.name == 'nt':
+ msg += 'You need to use NTFS.'
+ else:
+ msg += '(You may have to enable them in your /etc/fstab)'
+ self._downloader.report_error(msg)
+ return [], info
# Check if there is a new version
try:
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
- except:
+ except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t find the current version. Please try again later.')
try:
versions_info = opener.open(JSON_URL).read().decode('utf-8')
versions_info = json.loads(versions_info)
- except:
+ except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
from .compat import (
compat_basestring,
compat_chr,
- compat_getenv,
compat_html_entities,
compat_http_client,
+ compat_kwargs,
compat_parse_qs,
compat_socket_create_connection,
compat_str,
try:
pref = locale.getpreferredencoding()
'TEST'.encode(pref)
- except:
+ except Exception:
pref = 'UTF-8'
return pref
'encoding': 'utf-8',
})
- tf = tempfile.NamedTemporaryFile(**args)
+ tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
try:
with tf:
except OSError:
pass
os.rename(tf.name, fn)
- except:
+ except Exception:
try:
os.remove(tf.name)
except OSError:
raise
# In case of error, try to remove win32 forbidden chars
- alt_filename = os.path.join(
- re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
- for path_part in os.path.split(filename)
- )
+ alt_filename = sanitize_path(filename)
if alt_filename == filename:
raise
else:
# An exception here should be caught in the caller
- stream = open(encodeFilename(filename), open_mode)
+ stream = open(encodeFilename(alt_filename), open_mode)
return (stream, alt_filename)
result = result[2:]
if result.startswith('-'):
result = '_' + result[len('-'):]
+ result = result.lstrip('.')
if not result:
result = '_'
return result
+def sanitize_path(s):
+ """Sanitizes and normalizes path on Windows"""
+ if sys.platform != 'win32':
+ return s
+ drive_or_unc, _ = os.path.splitdrive(s)
+ if sys.version_info < (2, 7) and not drive_or_unc:
+ drive_or_unc, _ = os.path.splitunc(s)
+ norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
+ if drive_or_unc:
+ norm_path.pop(0)
+ sanitized_path = [
+ path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
+ for path_part in norm_path]
+ if drive_or_unc:
+ sanitized_path.insert(0, drive_or_unc + os.path.sep)
+ return os.path.join(*sanitized_path)
+
+
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """
res = []
if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity])
- mobj = re.match(r'#(x?[0-9]+)', entity)
+ mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
numstr = mobj.group(1)
if numstr.startswith('x'):
r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
+def get_subprocess_encoding():
+ if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ # For subprocess calls, encode with locale encoding
+ # Refer to http://stackoverflow.com/a/9951851/35070
+ encoding = preferredencoding()
+ else:
+ encoding = sys.getfilesystemencoding()
+ if encoding is None:
+ encoding = 'utf-8'
+ return encoding
+
+
def encodeFilename(s, for_subprocess=False):
"""
@param s The name of the file
if sys.version_info >= (3, 0):
return s
- if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
- # Pass '' directly to use Unicode APIs on Windows 2000 and up
- # (Detecting Windows NT 4 is tricky because 'major >= 4' would
- # match Windows 9x series as well. Besides, NT 4 is obsolete.)
- if not for_subprocess:
- return s
- else:
- # For subprocess calls, encode with locale encoding
- # Refer to http://stackoverflow.com/a/9951851/35070
- encoding = preferredencoding()
- else:
- encoding = sys.getfilesystemencoding()
- if encoding is None:
- encoding = 'utf-8'
- return s.encode(encoding, 'ignore')
+ # Pass '' directly to use Unicode APIs on Windows 2000 and up
+ # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+ # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+ if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ return s
+
+ return s.encode(get_subprocess_encoding(), 'ignore')
+
+
+def decodeFilename(b, for_subprocess=False):
+
+ if sys.version_info >= (3, 0):
+ return b
+
+ if not isinstance(b, bytes):
+ return b
+
+ return b.decode(get_subprocess_encoding(), 'ignore')
def encodeArgument(s):
return encodeFilename(s, True)
+def decodeArgument(b):
+ return decodeFilename(b, True)
+
+
def decodeOption(optval):
if optval is None:
return optval
return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+def bug_reports_message():
+ if ytdl_is_updateable():
+ update_cmd = 'type youtube-dl -U to update'
+ else:
+ update_cmd = 'see https://yt-dl.org/update on how to update'
+ msg = '; please report this issue on https://yt-dl.org/bug .'
+ msg += ' Make sure you are using the latest version; %s.' % update_cmd
+ msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+ return msg
+
+
class ExtractorError(Exception):
"""Error during info extraction."""
if cause:
msg += ' (caused by %r)' % cause
if not expected:
- if ytdl_is_updateable():
- update_cmd = 'type youtube-dl -U to update'
- else:
- update_cmd = 'see https://yt-dl.org/update on how to update'
- msg += '; please report this issue on https://yt-dl.org/bug .'
- msg += ' Make sure you are using the latest version; %s.' % update_cmd
- msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+ msg += bug_reports_message()
super(ExtractorError, self).__init__(msg)
self.traceback = tb
# Replace commas
date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2
- date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+ if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
+ date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
]
if day_first:
format_expressions.extend([
+ '%d-%m-%Y',
'%d.%m.%Y',
'%d/%m/%Y',
'%d/%m/%y',
])
else:
format_expressions.extend([
+ '%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',
return ' '.join(quoted_args)
-def takewhile_inclusive(pred, seq):
- """ Like itertools.takewhile, but include the latest evaluated element
- (the first element so that Not pred(e)) """
- for e in seq:
- yield e
- if not pred(e):
- return
-
-
def smuggle_url(url, data):
""" Pass additional data in a URL for internal use. """
return int(float(num_str) * mult)
-def get_term_width():
- columns = compat_getenv('COLUMNS', None)
- if columns:
- return int(columns)
-
- try:
- sp = subprocess.Popen(
- ['stty', 'size'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = sp.communicate()
- return int(out.split()[1])
- except:
- pass
- return None
-
-
def month_by_name(name):
""" Return the number of a month by (locale-independently) English name """
return res
-def prepend_extension(filename, ext):
+def prepend_extension(filename, ext, expected_real_ext=None):
name, real_ext = os.path.splitext(filename)
- return '{0}.{1}{2}'.format(name, ext, real_ext)
+ return (
+ '{0}.{1}{2}'.format(name, ext, real_ext)
+ if not expected_real_ext or real_ext[1:] == expected_real_ext
+ else '{0}.{1}'.format(filename, ext))
+
+
+def replace_extension(filename, ext, expected_real_ext=None):
+ name, real_ext = os.path.splitext(filename)
+ return '{0}.{1}'.format(
+ name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
+ ext)
def check_executable(exe, args=[]):
or False if the executable is not present """
try:
out, _ = subprocess.Popen(
- [exe] + args,
+ [encodeArgument(exe)] + args,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
except OSError:
return False
s)
+def lowercase_escape(s):
+ unicode_escape = codecs.getdecoder('unicode_escape')
+ return re.sub(
+ r'\\u[0-9a-fA-F]{4}',
+ lambda m: unicode_escape(m.group(0))[0],
+ s)
+
+
def escape_rfc3986(s):
"""Escape non-ASCII characters as suggested by RFC 3986"""
if sys.version_info < (3, 0) and isinstance(s, compat_str):
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
[a-zA-Z_][.a-zA-Z_0-9]*
''', fix_kv, code)
- res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
+ res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
return res
video_title = info_dict.get('title', info_dict.get('id', 'video'))
return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
return _match_func
+
+
+def parse_dfxp_time_expr(time_expr):
+ if not time_expr:
+ return 0.0
+
+ mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
+ if mobj:
+ return float(mobj.group('time_offset'))
+
+ mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
+ if mobj:
+ return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
+
+
+def srt_subtitles_timecode(seconds):
+ return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
+
+
+def dfxp2srt(dfxp_data):
+ _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
+
+ def parse_node(node):
+ str_or_empty = functools.partial(str_or_none, default='')
+
+ out = str_or_empty(node.text)
+
+ for child in node:
+ if child.tag == _x('ttml:br'):
+ out += '\n' + str_or_empty(child.tail)
+ elif child.tag == _x('ttml:span'):
+ out += str_or_empty(parse_node(child))
+ else:
+ out += str_or_empty(xml.etree.ElementTree.tostring(child))
+
+ return out
+
+ dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
+ out = []
+ paras = dfxp.findall(_x('.//ttml:p'))
+
+ for para, index in zip(paras, itertools.count(1)):
+ begin_time = parse_dfxp_time_expr(para.attrib['begin'])
+ end_time = parse_dfxp_time_expr(para.attrib.get('end'))
+ if not end_time:
+ end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
+ out.append('%d\n%s --> %s\n%s\n\n' % (
+ index,
+ srt_subtitles_timecode(begin_time),
+ srt_subtitles_timecode(end_time),
+ parse_node(para)))
+
+ return ''.join(out)
+
+
+class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
+ def __init__(self, proxies=None):
+ # Set default handlers
+ for type in ('http', 'https'):
+ setattr(self, '%s_open' % type,
+ lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
+ meth(r, proxy, type))
+ return compat_urllib_request.ProxyHandler.__init__(self, proxies)
+
+ def proxy_open(self, req, proxy, type):
+ req_proxy = req.headers.get('Ytdl-request-proxy')
+ if req_proxy is not None:
+ proxy = req_proxy
+ del req.headers['Ytdl-request-proxy']
+
+ if proxy == '__noproxy__':
+ return None # No Proxy
+ return compat_urllib_request.ProxyHandler.proxy_open(
+ self, req, proxy, type)
from __future__ import unicode_literals
-__version__ = '2015.02.28'
+__version__ = '2015.05.15'