--- /dev/null
+NAME
+====
+
+youtube-dl
+
+SYNOPSIS
+========
+
+youtube-dl OPTIONS URL [URL...]
+
+DESCRIPTION
+===========
+
+youtube-dl is a small command-line program to download videos from
+YouTube.com and a few more sites. It requires the Python interpreter,
+version 2.6, 2.7, or 3.3+, and it is not platform specific. It should
+work on your Unix box, on Windows or on Mac OS X. It is released to the
+public domain, which means you can modify it, redistribute it or use it
+however you like.
+
+OPTIONS
+=======
+
+ -h, --help print this help text and exit
+ --version print program version and exit
+ -U, --update update this program to latest version
+ -i, --ignore-errors continue on download errors
+ -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m)
+ -R, --retries RETRIES number of retries (default is 10)
+ --buffer-size SIZE size of download buffer (e.g. 1024 or 16k)
+ (default is 1024)
+ --no-resize-buffer do not automatically adjust the buffer size. By
+ default, the buffer size is automatically resized
+ from an initial value of SIZE.
+ --dump-user-agent display the current browser identification
+ --user-agent UA specify a custom user agent
+ --referer REF specify a custom referer, use if the video access
+ is restricted to one domain
+ --list-extractors List all supported extractors and the URLs they
+ would handle
+ --proxy URL Use the specified HTTP/HTTPS proxy
+ --no-check-certificate Suppress HTTPS certificate validation.
+
+Video Selection:
+----------------
+
+ --playlist-start NUMBER playlist video to start at (default is 1)
+ --playlist-end NUMBER playlist video to end at (default is last)
+ --match-title REGEX download only matching titles (regex or caseless
+ sub-string)
+ --reject-title REGEX skip download for matching titles (regex or
+ caseless sub-string)
+ --max-downloads NUMBER Abort after downloading NUMBER files
+ --min-filesize SIZE Do not download any videos smaller than SIZE
+ (e.g. 50k or 44.6m)
+ --max-filesize SIZE Do not download any videos larger than SIZE (e.g.
+ 50k or 44.6m)
+ --date DATE download only videos uploaded in this date
+ --datebefore DATE download only videos uploaded before this date
+ --dateafter DATE download only videos uploaded after this date
+
+Filesystem Options:
+-------------------
+
+ -t, --title use title in file name (default)
+ --id use only video ID in file name
+ -l, --literal [deprecated] alias of --title
+ -A, --auto-number number downloaded files starting from 00000
+ -o, --output TEMPLATE output filename template. Use %(title)s to get
+ the title, %(uploader)s for the uploader name,
+ %(uploader_id)s for the uploader nickname if
+ different, %(autonumber)s to get an automatically
+ incremented number, %(ext)s for the filename
+ extension, %(upload_date)s for the upload date
+ (YYYYMMDD), %(extractor)s for the provider
+ (youtube, metacafe, etc), %(id)s for the video id
+ , %(playlist)s for the playlist the video is in,
+ %(playlist_index)s for the position in the
+ playlist and %% for a literal percent. Use - to
+ output to stdout. Can also be used to download to
+ a different directory, for example with -o '/my/d
+ ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
+ --autonumber-size NUMBER Specifies the number of digits in %(autonumber)s
+ when it is present in output filename template or
+ --autonumber option is given
+ --restrict-filenames Restrict filenames to only ASCII characters, and
+ avoid "&" and spaces in filenames
+ -a, --batch-file FILE file containing URLs to download ('-' for stdin)
+ -w, --no-overwrites do not overwrite files
+ -c, --continue resume partially downloaded files
+ --no-continue do not resume partially downloaded files (restart
+ from beginning)
+ --cookies FILE file to read cookies from and dump cookie jar in
+ --no-part do not use .part files
+ --no-mtime do not use the Last-modified header to set the
+ file modification time
+ --write-description write video description to a .description file
+ --write-info-json write video metadata to a .info.json file
+ --write-thumbnail write thumbnail image to disk
+
+Verbosity / Simulation Options:
+-------------------------------
+
+ -q, --quiet activates quiet mode
+ -s, --simulate do not download the video and do not write
+ anything to disk
+ --skip-download do not download the video
+ -g, --get-url simulate, quiet but print URL
+ -e, --get-title simulate, quiet but print title
+ --get-id simulate, quiet but print id
+ --get-thumbnail simulate, quiet but print thumbnail URL
+ --get-description simulate, quiet but print video description
+ --get-filename simulate, quiet but print output filename
+ --get-format simulate, quiet but print output format
+ --newline output progress bar as new lines
+ --no-progress do not print progress bar
+ --console-title display progress in console titlebar
+ -v, --verbose print various debugging information
+ --dump-intermediate-pages print downloaded pages to debug problems(very
+ verbose)
+
+Video Format Options:
+---------------------
+
+ -f, --format FORMAT video format code, specifiy the order of
+ preference using slashes: "-f 22/17/18"
+ --all-formats download all available video formats
+ --prefer-free-formats prefer free video formats unless a specific one
+ is requested
+ --max-quality FORMAT highest quality format to download
+ -F, --list-formats list all available formats (currently youtube
+ only)
+ --write-sub write subtitle file (currently youtube only)
+ --only-sub [deprecated] alias of --skip-download
+ --all-subs downloads all the available subtitles of the
+ video (currently youtube only)
+ --list-subs lists all available subtitles for the video
+ (currently youtube only)
+ --sub-format LANG subtitle format [srt/sbv] (default=srt)
+ (currently youtube only)
+ --sub-lang LANG language of the subtitles to download (optional)
+ use IETF language tags like 'en'
+
+Authentication Options:
+-----------------------
+
+ -u, --username USERNAME account username
+ -p, --password PASSWORD account password
+ -n, --netrc use .netrc authentication data
+
+Post-processing Options:
+------------------------
+
+ -x, --extract-audio convert video files to audio-only files (requires
+ ffmpeg or avconv and ffprobe or avprobe)
+ --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or
+ "wav"; best by default
+ --audio-quality QUALITY ffmpeg/avconv audio quality specification, insert
+ a value between 0 (better) and 9 (worse) for VBR
+ or a specific bitrate like 128K (default 5)
+ --recode-video FORMAT Encode the video to another format if necessary
+ (currently supported: mp4|flv|ogg|webm)
+ -k, --keep-video keeps the video file on disk after the post-
+ processing; the video is erased by default
+ --no-post-overwrites do not overwrite post-processed files; the post-
+ processed files are overwritten by default
+
+CONFIGURATION
+=============
+
+You can configure youtube-dl by placing default arguments (such as
+--extract-audio --no-mtime to always extract the audio and not copy the
+mtime) into /etc/youtube-dl.conf and/or ~/.config/youtube-dl.conf.
+
+OUTPUT TEMPLATE
+===============
+
+The -o option allows users to indicate a template for the output file
+names. The basic usage is not to set any template arguments when
+downloading a single file, like in
+youtube-dl -o funny_video.flv "http://some/video". However, it may
+contain special sequences that will be replaced when downloading each
+video. The special sequences have the format %(NAME)s. To clarify, that
+is a percent symbol followed by a name in parenthesis, followed by a
+lowercase S. Allowed names are:
+
+- id: The sequence will be replaced by the video identifier.
+- url: The sequence will be replaced by the video URL.
+- uploader: The sequence will be replaced by the nickname of the
+ person who uploaded the video.
+- upload_date: The sequence will be replaced by the upload date in
+ YYYYMMDD format.
+- title: The sequence will be replaced by the video title.
+- ext: The sequence will be replaced by the appropriate extension
+ (like flv or mp4).
+- epoch: The sequence will be replaced by the Unix epoch when creating
+ the file.
+- autonumber: The sequence will be replaced by a five-digit number
+ that will be increased with each download, starting at zero.
+- playlist: The name or the id of the playlist that contains the
+ video.
+- playlist_index: The index of the video in the playlist, a five-digit
+ number.
+
+The current default template is %(id)s.%(ext)s, but that will be
+switchted to %(title)s-%(id)s.%(ext)s (which can be requested with -t at
+the moment).
+
+In some cases, you don't want special characters such as 中, spaces, or
+&, such as when transferring the downloaded filename to a Windows system
+or the filename through an 8bit-unsafe channel. In these cases, add the
+--restrict-filenames flag to get a shorter title:
+
+ $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
+ youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
+ $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
+ youtube-dl_test_video_.mp4 # A simple file name
+
+VIDEO SELECTION
+===============
+
+Videos can be filtered by their upload date using the options --date,
+--datebefore or --dateafter, they accept dates in two formats:
+
+- Absolute dates: Dates in the format YYYYMMDD.
+- Relative dates: Dates in the format
+ (now|today)[+-][0-9](day|week|month|year)(s)?
+
+Examples:
+
+ $ youtube-dl --dateafter now-6months #will only download the videos uploaded in the last 6 months
+ $ youtube-dl --date 19700101 #will only download the videos uploaded in January 1, 1970
+ $ youtube-dl --dateafter 20000101 --datebefore 20100101 #will only download the videos uploaded between 2000 and 2010
+
+FAQ
+===
+
+Can you please put the -b option back?
+
+Most people asking this question are not aware that youtube-dl now
+defaults to downloading the highest available quality as reported by
+YouTube, which will be 1080p or 720p in some cases, so you no longer
+need the -b option. For some specific videos, maybe YouTube does not
+report them to be available in a specific high quality format you''re
+interested in. In that case, simply request it with the -f option and
+youtube-dl will try to download it.
+
+I get HTTP error 402 when trying to download a video. What's this?
+
+Apparently YouTube requires you to pass a CAPTCHA test if you download
+too much. We''re considering to provide a way to let you solve the
+CAPTCHA, but at the moment, your best course of action is pointing a
+webbrowser to the youtube URL, solving the CAPTCHA, and restart
+youtube-dl.
+
+I have downloaded a video but how can I play it?
+
+Once the video is fully downloaded, use any video player, such as vlc or
+mplayer.
+
+The links provided by youtube-dl -g are not working anymore
+
+The URLs youtube-dl outputs require the downloader to have the correct
+cookies. Use the --cookies option to write the required cookies into a
+file, and advise your downloader to read cookies from that file. Some
+sites also require a common user agent to be used, use --dump-user-agent
+to see the one in use by youtube-dl.
+
+ERROR: no fmt_url_map or conn information found in video info
+
+youtube has switched to a new video info format in July 2011 which is
+not supported by old versions of youtube-dl. You can update youtube-dl
+with sudo youtube-dl --update.
+
+ERROR: unable to download video
+
+youtube requires an additional signature since September 2012 which is
+not supported by old versions of youtube-dl. You can update youtube-dl
+with sudo youtube-dl --update.
+
+SyntaxError: Non-ASCII character
+
+The error
+
+ File "youtube-dl", line 2
+ SyntaxError: Non-ASCII character '\x93' ...
+
+means you're using an outdated version of Python. Please update to
+Python 2.6 or 2.7.
+
+What is this binary file? Where has the code gone?
+
+Since June 2012 (#342) youtube-dl is packed as an executable zipfile,
+simply unzip it (might need renaming to youtube-dl.zip first on some
+systems) or clone the git repository, as laid out above. If you modify
+the code, you can run it by executing the __main__.py file. To recompile
+the executable, run make youtube-dl.
+
+The exe throws a Runtime error from Visual C++
+
+To run the exe you need to install first the Microsoft Visual C++ 2008
+Redistributable Package.
+
+COPYRIGHT
+=========
+
+youtube-dl is released into the public domain by the copyright holders.
+
+This README file was originally written by Daniel Bolton
+(https://github.com/dbbolton) and is likewise released into the public
+domain.
+
+BUGS
+====
+
+Bugs and suggestions should be reported at:
+https://github.com/rg3/youtube-dl/issues
+
+Please include:
+
+- Your exact command line, like
+ youtube-dl -t "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title".
+ A common mistake is not to escape the &. Putting URLs in quotes
+ should solve this problem.
+- If possible re-run the command with --verbose, and include the full
+ output, it is really helpful to us.
+- The output of youtube-dl --version
+- The output of python --version
+- The name and version of your Operating System ("Ubuntu 11.04 x64" or
+ "Windows 7 x64" is usually enough).
+
+For discussions, join us in the irc channel #youtube-dl on freenode.
"name": "Dailymotion",
"md5": "392c4b85a60a90dc4792da41ce3144eb",
"url": "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech",
- "file": "x33vw9.mp4"
+ "file": "x33vw9.mp4",
+ "info_dict": {
+ "uploader": "Alex and Van .",
+ "title": "Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
+ }
},
{
"name": "Metacafe",
"add_ie": ["Youtube"],
"url": "http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
- "file": "_aUehQsCQtM.flv"
+ "file": "_aUehQsCQtM.flv",
+ "info_dict": {
+ "upload_date": "20090102",
+ "title": "The Electric Company | \"Short I\" | PBS KIDS GO!",
+ "description": "md5:2439a8ef6d5a70e380c22f5ad323e5a8",
+ "uploader": "PBS",
+ "uploader_id": "PBS"
+ }
},
{
"name": "BlipTV",
"md5": "b2d849efcf7ee18917e4b4d9ff37cafe",
"url": "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352",
- "file": "5779306.m4v"
+ "file": "5779306.m4v",
+ "info_dict": {
+ "upload_date": "20111205",
+ "description": "md5:9bc31f227219cde65e47eeec8d2dc596",
+ "uploader": "Comic Book Resources - CBR TV",
+ "title": "CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3"
+ }
},
{
"name": "XVideos",
"md5": "1d0c835822f0a71a7bf011855db929d0",
"url": "http://www.xvideos.com/video939581/funny_porns_by_s_-1",
- "file": "939581.flv"
+ "file": "939581.flv",
+ "info_dict": {
+ "title": "Funny Porns By >>>>S<<<<<< -1"
+ }
},
{
"name": "YouPorn",
"md5": "c37ddbaaa39058c76a7e86c6813423c1",
"url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/",
- "file": "505835.mp4"
+ "file": "505835.mp4",
+ "info_dict": {
+ "upload_date": "20101221",
+ "description": "Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
+ "uploader": "Ask Dan And Jennifer",
+ "title": "Sex Ed: Is It Safe To Masturbate Daily?"
+ }
},
{
"name": "Pornotube",
"md5": "374dd6dcedd24234453b295209aa69b6",
"url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing",
- "file": "1689755.flv"
+ "file": "1689755.flv",
+ "info_dict": {
+ "upload_date": "20090708",
+ "title": "Marilyn-Monroe-Bathing"
+ }
},
{
"name": "YouJizz",
"md5": "07e15fa469ba384c7693fd246905547c",
"url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html",
- "file": "2189178.flv"
+ "file": "2189178.flv",
+ "info_dict": {
+ "title": "Zeichentrick 1"
+ }
},
{
"name": "Vimeo",
"name": "Soundcloud",
"md5": "ebef0a451b909710ed1d7787dddbf0d7",
"url": "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy",
- "file": "62986583.mp3"
+ "file": "62986583.mp3",
+ "info_dict": {
+ "upload_date": "20121011",
+ "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
+ "uploader": "E.T. ExTerrestrial Music",
+ "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
+ }
},
{
"name": "StanfordOpenClassroom",
"md5": "544a9468546059d4e80d76265b0443b8",
"url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100",
- "file": "PracticalUnix_intro-environment.mp4"
+ "file": "PracticalUnix_intro-environment.mp4",
+ "info_dict": {
+ "title": "Intro Environment"
+ }
},
{
"name": "XNXX",
"md5": "0831677e2b4761795f68d417e0b7b445",
"url": "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_",
- "file": "1135332.flv"
+ "file": "1135332.flv",
+ "info_dict": {
+ "title": "lida » Naked Funny Actress (5)"
+ }
},
{
"name": "Youku",
"url": "http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
"file": "XNDgyMDQ2NTQw_part00.flv",
"md5": "ffe3f2e435663dc2d1eea34faeff5b5b",
- "params": { "test": false }
+ "params": { "test": false },
+ "info_dict": {
+ "title": "youtube-dl test video \"'/\\ä↭𝕐"
+ }
},
{
"name": "NBA",
"url": "http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html",
"file": "0021200253-okc-bkn-recap.nba.mp4",
- "md5": "c0edcfc37607344e2ff8f13c378c88a4"
+ "md5": "c0edcfc37607344e2ff8f13c378c88a4",
+ "info_dict": {
+ "description": "Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.",
+ "title": "Thunder vs. Nets"
+ }
},
{
"name": "JustinTV",
"url": "http://www.twitch.tv/thegamedevhub/b/296128360",
"file": "296128360.flv",
- "md5": "ecaa8a790c22a40770901460af191c9a"
+ "md5": "ecaa8a790c22a40770901460af191c9a",
+ "info_dict": {
+ "upload_date": "20110927",
+ "uploader_id": 25114803,
+ "uploader": "thegamedevhub",
+ "title": "Beginner Series - Scripting With Python Pt.1"
+ }
},
{
"name": "MyVideo",
"url": "http://www.myvideo.de/watch/8229274/bowling_fail_or_win",
"file": "8229274.flv",
- "md5": "2d2753e8130479ba2cb7e0a37002053e"
+ "md5": "2d2753e8130479ba2cb7e0a37002053e",
+ "info_dict": {
+ "title": "bowling-fail-or-win"
+ }
},
{
"name": "Escapist",
"url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate",
"file": "6618-Breaking-Down-Baldurs-Gate.mp4",
- "md5": "c6793dbda81388f4264c1ba18684a74d"
+ "md5": "c6793dbda81388f4264c1ba18684a74d",
+ "info_dict": {
+ "description": "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
+ "uploader": "the-escapist-presents",
+ "title": "Breaking Down Baldur's Gate"
+ }
},
{
"name": "GooglePlus",
"url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
- "file": "ZButuJc6CtH.flv"
+ "file": "ZButuJc6CtH.flv",
+ "info_dict": {
+ "upload_date": "20120613",
+ "uploader": "井上ヨシマサ",
+ "title": "嘆きの天使 降臨"
+ }
},
{
"name": "FunnyOrDie",
"url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version",
"file": "0732f586d7.mp4",
- "md5": "f647e9e90064b53b6e046e75d0241fbd"
+ "md5": "f647e9e90064b53b6e046e75d0241fbd",
+ "info_dict": {
+ "description": "Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.",
+ "title": "Heart-Shaped Box: Literal Video Version"
+ }
},
{
"name": "Steam",
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
"file": "12-jan-pythonthings.mp4",
"info_dict": {
+ "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
"title": "A Few of My Favorite [Python] Things"
},
"params": {
"file": "422212.mp4",
"md5": "4e2f5cb088a83cd8cdb7756132f9739d",
"info_dict": {
- "title": "thedailyshow-kristen-stewart part 1"
+ "upload_date": "20121214",
+ "description": "Kristen Stewart",
+ "uploader": "thedailyshow",
+ "title": "thedailyshow-kristen-stewart part 1"
}
},
{
"file": "11885679.m4a",
"md5": "d30b5b5f74217410f4689605c35d1fd7",
"info_dict": {
- "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad"
+ "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
+ "uploader_id": "ytdl"
}
},
{
"file": "11885680.m4a",
"md5": "4eb0a669317cd725f6bbd336a29f923a",
"info_dict": {
- "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad"
+ "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
+ "uploader_id": "ytdl"
}
},
{
"file": "11885682.m4a",
"md5": "1893e872e263a2705558d1d319ad19e8",
"info_dict": {
- "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad"
+ "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
+ "uploader_id": "ytdl"
}
},
{
"file": "11885683.m4a",
"md5": "b673c46f47a216ab1741ae8836af5899",
"info_dict": {
- "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad"
+ "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
+ "uploader_id": "ytdl"
}
},
{
"file": "11885684.m4a",
"md5": "1d74534e95df54986da7f5abf7d842b7",
"info_dict": {
- "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad"
+ "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
+ "uploader_id": "ytdl"
}
},
{
"file": "11885685.m4a",
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
"info_dict": {
- "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad"
+ "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
+ "uploader_id": "ytdl"
}
}
]
"file": "NODfbab.mp4",
"md5": "9b0636f8c0f7614afa4ea5e4c6e57e83",
"info_dict": {
+ "uploader": "ytdl",
"title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
}
-
},
{
"name": "TED",
"url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html",
"file": "102.mp4",
- "md5": "7bc087e71d16f18f9b8ab9fa62a8a031",
+ "md5": "8cd9dfa41ee000ce658fd48fb5d89a61",
"info_dict": {
"title": "Dan Dennett: The illusion of consciousness",
- "thumbnail": "http://images.ted.com/images/ted/488_389x292.jpg"
+ "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922"
}
},
{
"file": "11741.mp4",
"md5": "0b49f4844a068f8b33f4b7c88405862b",
"info_dict": {
- "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
+ "description": "Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
+ "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
}
},
{
"name": "Generic",
"url": "http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html",
"file": "13601338388002.mp4",
- "md5": "85b90ccc9d73b4acd9138d3af4c27f89"
+ "md5": "85b90ccc9d73b4acd9138d3af4c27f89",
+ "info_dict": {
+ "uploader": "www.hodiho.fr",
+ "title": "Régis plante sa Jeep"
+ }
},
{
"name": "Spiegel",
"file": "wshh6a7q1ny0G34ZwuIO.mp4",
"md5": "9d04de741161603bf7071bbf4e883186",
"info_dict": {
- "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick! "
+ "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
}
},
{
},
{
"name": "Tumblr",
- "url": "http://birthdayproject2012.tumblr.com/post/17258355236/a-sample-video-from-leeann-if-you-need-an-idea",
- "file": "17258355236.mp4",
- "md5": "7c6a514d691b034ccf8567999e9e88a3",
+ "url": "http://resigno.tumblr.com/post/53364321212/e-de-extrema-importancia-que-esse-video-seja",
+ "file": "53364321212.mp4",
+ "md5": "0716d3dd51baf68a28b40fdf1251494e",
"info_dict": {
- "title": "Calling all Pris! - A sample video from LeeAnn. (If you need an idea..."
+ "title": "Rafael Lemos | Tumblr"
}
},
{
"file":"30510138.mp3",
"md5":"f9136bf103901728f29e419d2c70f55d",
"info_dict": {
- "title":"D-D-Dance"
+ "upload_date": "20111213",
+ "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
+ "uploader": "The Royal Concept",
+ "title": "D-D-Dance"
}
},
{
"file":"47127625.mp3",
"md5":"09b6758a018470570f8fd423c9453dd8",
"info_dict": {
- "title":"The Royal Concept - Gimme Twice"
+ "upload_date": "20120521",
+ "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
+ "uploader": "The Royal Concept",
+ "title": "The Royal Concept - Gimme Twice"
}
},
{
"file":"47127627.mp3",
"md5":"154abd4e418cea19c3b901f1e1306d9c",
"info_dict": {
- "title":"Goldrushed"
+ "upload_date": "20120521",
+ "uploader": "The Royal Concept",
+ "title": "Goldrushed"
}
},
{
"file":"47127629.mp3",
"md5":"2f5471edc79ad3f33a683153e96a79c1",
"info_dict": {
- "title":"In the End"
+ "upload_date": "20120521",
+ "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
+ "uploader": "The Royal Concept",
+ "title": "In the End"
}
},
{
"file":"47127631.mp3",
"md5":"f9ba87aa940af7213f98949254f1c6e2",
"info_dict": {
- "title":"Knocked Up"
+ "upload_date": "20120521",
+ "description": "The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
+ "uploader": "The Royal Concept",
+ "title": "Knocked Up"
}
},
{
"file":"75206121.mp3",
"md5":"f9d1fe9406717e302980c30de4af9353",
"info_dict": {
- "title":"World On Fire"
+ "upload_date": "20130116",
+ "description": "The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ",
+ "uploader": "The Royal Concept",
+ "title": "World On Fire"
}
}
]
"url": "http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0",
"file": "zpsc0c3b9fa.mp4",
"md5": "7dabfb92b0a31f6c16cebc0f8e60ff99",
- "info_dict":{
- "title":"Tired of Link Building? Try BacklinkMyDomain.com!"
+ "info_dict": {
+ "upload_date": "20130504",
+ "uploader": "rachaneronas",
+ "title": "Tired of Link Building? Try BacklinkMyDomain.com!"
}
},
{
},
{
"name": "Yahoo",
- "url": "http://screen.yahoo.com/obama-celebrates-iraq-victory-27592561.html",
- "file": "27592561.flv",
- "md5": "c6179bed843512823fd284fa2e7f012d",
+ "url": "http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html",
+ "file": "214727115.flv",
+ "md5": "2e717f169c1be93d84d3794a00d4a325",
"info_dict": {
- "title": "Obama Celebrates Iraq Victory"
+ "title": "Julian Smith & Travis Legg Watch Julian Smith"
},
"skip": "Requires rtmpdump"
},
"title": "Louis C.K. Interview Pt. 1 11/3/11",
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one."
}
+ },
+ {
+ "name": "XHamster",
+ "url": "http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html",
+ "file": "1509445.flv",
+ "md5": "9f48e0e8d58e3076bb236ff412ab62fa",
+ "info_dict": {
+ "upload_date": "20121014",
+ "uploader_id": "Ruseful2011",
+ "title": "FemaleAgent Shy beauty takes the bait"
+ }
+ },
+ {
+ "name": "Hypem",
+ "url": "http://hypem.com/track/1v6ga/BODYWORK+-+TAME",
+ "file": "1v6ga.mp3",
+ "md5": "b9cc91b5af8995e9f0c1cee04c575828",
+ "info_dict":{
+ "title":"Tame"
+ }
+ },
+ {
+ "name": "Vbox7",
+ "url": "http://vbox7.com/play:249bb972c2",
+ "file": "249bb972c2.flv",
+ "md5": "9c70d6d956f888bdc08c124acc120cfe",
+ "info_dict":{
+ "title":"Смях! Чудо - чист за секунди - Скрита камера"
+ }
+ },
+ {
+ "name": "Gametrailers",
+ "url": "http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer",
+ "file": "zbvr8i.flv",
+ "md5": "c3edbc995ab4081976e16779bd96a878",
+ "info_dict": {
+ "title": "E3 2013: Debut Trailer"
+ },
+ "skip": "Requires rtmpdump"
}
]
--- /dev/null
+.TH YOUTUBE\-DL 1 ""
+.SH NAME
+.PP
+youtube\-dl
+.SH SYNOPSIS
+.PP
+\f[B]youtube\-dl\f[] OPTIONS (#options) URL [URL...]
+.SH DESCRIPTION
+.PP
+\f[B]youtube\-dl\f[] is a small command\-line program to download videos
+from YouTube.com and a few more sites.
+It requires the Python interpreter, version 2.6, 2.7, or 3.3+, and it is
+not platform specific.
+It should work on your Unix box, on Windows or on Mac OS X.
+It is released to the public domain, which means you can modify it,
+redistribute it or use it however you like.
+.SH OPTIONS
+.IP
+.nf
+\f[C]
+\-h,\ \-\-help\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ this\ help\ text\ and\ exit
+\-\-version\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ program\ version\ and\ exit
+\-U,\ \-\-update\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ update\ this\ program\ to\ latest\ version
+\-i,\ \-\-ignore\-errors\ \ \ \ \ \ \ \ continue\ on\ download\ errors
+\-r,\ \-\-rate\-limit\ LIMIT\ \ \ \ \ maximum\ download\ rate\ (e.g.\ 50k\ or\ 44.6m)
+\-R,\ \-\-retries\ RETRIES\ \ \ \ \ \ number\ of\ retries\ (default\ is\ 10)
+\-\-buffer\-size\ SIZE\ \ \ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16k)
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (default\ is\ 1024)
+\-\-no\-resize\-buffer\ \ \ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer\ size.\ By
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ the\ buffer\ size\ is\ automatically\ resized
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ an\ initial\ value\ of\ SIZE.
+\-\-dump\-user\-agent\ \ \ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification
+\-\-user\-agent\ UA\ \ \ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent
+\-\-referer\ REF\ \ \ \ \ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ referer,\ use\ if\ the\ video\ access
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ restricted\ to\ one\ domain
+\-\-list\-extractors\ \ \ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ would\ handle
+\-\-proxy\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ specified\ HTTP/HTTPS\ proxy
+\-\-no\-check\-certificate\ \ \ \ \ Suppress\ HTTPS\ certificate\ validation.
+\f[]
+.fi
+.SS Video Selection:
+.IP
+.nf
+\f[C]
+\-\-playlist\-start\ NUMBER\ \ \ \ playlist\ video\ to\ start\ at\ (default\ is\ 1)
+\-\-playlist\-end\ NUMBER\ \ \ \ \ \ playlist\ video\ to\ end\ at\ (default\ is\ last)
+\-\-match\-title\ REGEX\ \ \ \ \ \ \ \ download\ only\ matching\ titles\ (regex\ or\ caseless
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ sub\-string)
+\-\-reject\-title\ REGEX\ \ \ \ \ \ \ skip\ download\ for\ matching\ titles\ (regex\ or
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ caseless\ sub\-string)
+\-\-max\-downloads\ NUMBER\ \ \ \ \ Abort\ after\ downloading\ NUMBER\ files
+\-\-min\-filesize\ SIZE\ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ smaller\ than\ SIZE
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (e.g.\ 50k\ or\ 44.6m)
+\-\-max\-filesize\ SIZE\ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ larger\ than\ SIZE\ (e.g.
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 50k\ or\ 44.6m)
+\-\-date\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ in\ this\ date
+\-\-datebefore\ DATE\ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ before\ this\ date
+\-\-dateafter\ DATE\ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ after\ this\ date
+\f[]
+.fi
+.SS Filesystem Options:
+.IP
+.nf
+\f[C]
+\-t,\ \-\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ title\ in\ file\ name\ (default)
+\-\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ only\ video\ ID\ in\ file\ name
+\-l,\ \-\-literal\ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-title
+\-A,\ \-\-auto\-number\ \ \ \ \ \ \ \ \ \ number\ downloaded\ files\ starting\ from\ 00000
+\-o,\ \-\-output\ TEMPLATE\ \ \ \ \ \ output\ filename\ template.\ Use\ %(title)s\ to\ get
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ title,\ %(uploader)s\ for\ the\ uploader\ name,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(uploader_id)s\ for\ the\ uploader\ nickname\ if
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ different,\ %(autonumber)s\ to\ get\ an\ automatically
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ incremented\ number,\ %(ext)s\ for\ the\ filename
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ extension,\ %(upload_date)s\ for\ the\ upload\ date
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (YYYYMMDD),\ %(extractor)s\ for\ the\ provider
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (youtube,\ metacafe,\ etc),\ %(id)s\ for\ the\ video\ id
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ,\ %(playlist)s\ for\ the\ playlist\ the\ video\ is\ in,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(playlist_index)s\ for\ the\ position\ in\ the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist\ and\ %%\ for\ a\ literal\ percent.\ Use\ \-\ to
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ output\ to\ stdout.\ Can\ also\ be\ used\ to\ download\ to
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ a\ different\ directory,\ for\ example\ with\ \-o\ \[aq]/my/d
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ownloads/%(uploader)s/%(title)s\-%(id)s.%(ext)s\[aq]\ .
+\-\-autonumber\-size\ NUMBER\ \ \ Specifies\ the\ number\ of\ digits\ in\ %(autonumber)s
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ when\ it\ is\ present\ in\ output\ filename\ template\ or
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-\-autonumber\ option\ is\ given
+\-\-restrict\-filenames\ \ \ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames
+\-a,\ \-\-batch\-file\ FILE\ \ \ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]\-\[aq]\ for\ stdin)
+\-w,\ \-\-no\-overwrites\ \ \ \ \ \ \ \ do\ not\ overwrite\ files
+\-c,\ \-\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
+\-\-no\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ resume\ partially\ downloaded\ files\ (restart
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ beginning)
+\-\-cookies\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ file\ to\ read\ cookies\ from\ and\ dump\ cookie\ jar\ in
+\-\-no\-part\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ use\ .part\ files
+\-\-no\-mtime\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ use\ the\ Last\-modified\ header\ to\ set\ the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file\ modification\ time
+\-\-write\-description\ \ \ \ \ \ \ \ write\ video\ description\ to\ a\ .description\ file
+\-\-write\-info\-json\ \ \ \ \ \ \ \ \ \ write\ video\ metadata\ to\ a\ .info.json\ file
+\-\-write\-thumbnail\ \ \ \ \ \ \ \ \ \ write\ thumbnail\ image\ to\ disk
+\f[]
+.fi
+.SS Verbosity / Simulation Options:
+.IP
+.nf
+\f[C]
+\-q,\ \-\-quiet\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ activates\ quiet\ mode
+\-s,\ \-\-simulate\ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ download\ the\ video\ and\ do\ not\ write
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ anything\ to\ disk
+\-\-skip\-download\ \ \ \ \ \ \ \ \ \ \ \ do\ not\ download\ the\ video
+\-g,\ \-\-get\-url\ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ URL
+\-e,\ \-\-get\-title\ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ title
+\-\-get\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ id
+\-\-get\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ thumbnail\ URL
+\-\-get\-description\ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ video\ description
+\-\-get\-filename\ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ output\ filename
+\-\-get\-format\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ output\ format
+\-\-newline\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ output\ progress\ bar\ as\ new\ lines
+\-\-no\-progress\ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ print\ progress\ bar
+\-\-console\-title\ \ \ \ \ \ \ \ \ \ \ \ display\ progress\ in\ console\ titlebar
+\-v,\ \-\-verbose\ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ various\ debugging\ information
+\-\-dump\-intermediate\-pages\ \ print\ downloaded\ pages\ to\ debug\ problems(very
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ verbose)
+\f[]
+.fi
+.SS Video Format Options:
+.IP
+.nf
+\f[C]
+\-f,\ \-\-format\ FORMAT\ \ \ \ \ \ \ \ video\ format\ code,\ specifiy\ the\ order\ of
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ preference\ using\ slashes:\ "\-f\ 22/17/18"
+\-\-all\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ all\ available\ video\ formats
+\-\-prefer\-free\-formats\ \ \ \ \ \ prefer\ free\ video\ formats\ unless\ a\ specific\ one
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ requested
+\-\-max\-quality\ FORMAT\ \ \ \ \ \ \ highest\ quality\ format\ to\ download
+\-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ list\ all\ available\ formats\ (currently\ youtube
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
+\-\-write\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ subtitle\ file\ (currently\ youtube\ only)
+\-\-only\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-skip\-download
+\-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ all\ the\ available\ subtitles\ of\ the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ (currently\ youtube\ only)
+\-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ lists\ all\ available\ subtitles\ for\ the\ video
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only)
+\-\-sub\-format\ LANG\ \ \ \ \ \ \ \ \ \ subtitle\ format\ [srt/sbv]\ (default=srt)
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only)
+\-\-sub\-lang\ LANG\ \ \ \ \ \ \ \ \ \ \ \ language\ of\ the\ subtitles\ to\ download\ (optional)
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ IETF\ language\ tags\ like\ \[aq]en\[aq]
+\f[]
+.fi
+.SS Authentication Options:
+.IP
+.nf
+\f[C]
+\-u,\ \-\-username\ USERNAME\ \ \ \ account\ username
+\-p,\ \-\-password\ PASSWORD\ \ \ \ account\ password
+\-n,\ \-\-netrc\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ .netrc\ authentication\ data
+\f[]
+.fi
+.SS Post\-processing Options:
+.IP
+.nf
+\f[C]
+\-x,\ \-\-extract\-audio\ \ \ \ \ \ \ \ convert\ video\ files\ to\ audio\-only\ files\ (requires
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ffmpeg\ or\ avconv\ and\ ffprobe\ or\ avprobe)
+\-\-audio\-format\ FORMAT\ \ \ \ \ \ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",\ "opus",\ or
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "wav";\ best\ by\ default
+\-\-audio\-quality\ QUALITY\ \ \ \ ffmpeg/avconv\ audio\ quality\ specification,\ insert
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ a\ value\ between\ 0\ (better)\ and\ 9\ (worse)\ for\ VBR
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ or\ a\ specific\ bitrate\ like\ 128K\ (default\ 5)
+\-\-recode\-video\ FORMAT\ \ \ \ \ \ Encode\ the\ video\ to\ another\ format\ if\ necessary
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ supported:\ mp4|flv|ogg|webm)
+\-k,\ \-\-keep\-video\ \ \ \ \ \ \ \ \ \ \ keeps\ the\ video\ file\ on\ disk\ after\ the\ post\-
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processing;\ the\ video\ is\ erased\ by\ default
+\-\-no\-post\-overwrites\ \ \ \ \ \ \ do\ not\ overwrite\ post\-processed\ files;\ the\ post\-
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processed\ files\ are\ overwritten\ by\ default
+\f[]
+.fi
+.SH CONFIGURATION
+.PP
+You can configure youtube\-dl by placing default arguments (such as
+\f[C]\-\-extract\-audio\ \-\-no\-mtime\f[] to always extract the audio
+and not copy the mtime) into \f[C]/etc/youtube\-dl.conf\f[] and/or
+\f[C]~/.config/youtube\-dl.conf\f[].
+.SH OUTPUT TEMPLATE
+.PP
+The \f[C]\-o\f[] option allows users to indicate a template for the
+output file names.
+The basic usage is not to set any template arguments when downloading a
+single file, like in
+\f[C]youtube\-dl\ \-o\ funny_video.flv\ "http://some/video"\f[].
+However, it may contain special sequences that will be replaced when
+downloading each video.
+The special sequences have the format \f[C]%(NAME)s\f[].
+To clarify, that is a percent symbol followed by a name in parenthesis,
+followed by a lowercase S.
+Allowed names are:
+.IP \[bu] 2
+\f[C]id\f[]: The sequence will be replaced by the video identifier.
+.IP \[bu] 2
+\f[C]url\f[]: The sequence will be replaced by the video URL.
+.IP \[bu] 2
+\f[C]uploader\f[]: The sequence will be replaced by the nickname of the
+person who uploaded the video.
+.IP \[bu] 2
+\f[C]upload_date\f[]: The sequence will be replaced by the upload date
+in YYYYMMDD format.
+.IP \[bu] 2
+\f[C]title\f[]: The sequence will be replaced by the video title.
+.IP \[bu] 2
+\f[C]ext\f[]: The sequence will be replaced by the appropriate extension
+(like flv or mp4).
+.IP \[bu] 2
+\f[C]epoch\f[]: The sequence will be replaced by the Unix epoch when
+creating the file.
+.IP \[bu] 2
+\f[C]autonumber\f[]: The sequence will be replaced by a five\-digit
+number that will be increased with each download, starting at zero.
+.IP \[bu] 2
+\f[C]playlist\f[]: The name or the id of the playlist that contains the
+video.
+.IP \[bu] 2
+\f[C]playlist_index\f[]: The index of the video in the playlist, a
+five\-digit number.
+.PP
+The current default template is \f[C]%(id)s.%(ext)s\f[], but that will
+be switchted to \f[C]%(title)s\-%(id)s.%(ext)s\f[] (which can be
+requested with \f[C]\-t\f[] at the moment).
+.PP
+In some cases, you don\[aq]t want special characters such as 中, spaces,
+or &, such as when transferring the downloaded filename to a Windows
+system or the filename through an 8bit\-unsafe channel.
+In these cases, add the \f[C]\-\-restrict\-filenames\f[] flag to get a
+shorter title:
+.IP
+.nf
+\f[C]
+$\ youtube\-dl\ \-\-get\-filename\ \-o\ "%(title)s.%(ext)s"\ BaW_jenozKc
+youtube\-dl\ test\ video\ \[aq]\[aq]_ä↭𝕐.mp4\ \ \ \ #\ All\ kinds\ of\ weird\ characters
+$\ youtube\-dl\ \-\-get\-filename\ \-o\ "%(title)s.%(ext)s"\ BaW_jenozKc\ \-\-restrict\-filenames
+youtube\-dl_test_video_.mp4\ \ \ \ \ \ \ \ \ \ #\ A\ simple\ file\ name
+\f[]
+.fi
+.SH VIDEO SELECTION
+.PP
+Videos can be filtered by their upload date using the options
+\f[C]\-\-date\f[], \f[C]\-\-datebefore\f[] or \f[C]\-\-dateafter\f[],
+they accept dates in two formats:
+.IP \[bu] 2
+Absolute dates: Dates in the format \f[C]YYYYMMDD\f[].
+.IP \[bu] 2
+Relative dates: Dates in the format
+\f[C](now|today)[+\-][0\-9](day|week|month|year)(s)?\f[]
+.PP
+Examples:
+.IP
+.nf
+\f[C]
+$\ youtube\-dl\ \-\-dateafter\ now\-6months\ #will\ only\ download\ the\ videos\ uploaded\ in\ the\ last\ 6\ months
+$\ youtube\-dl\ \-\-date\ 19700101\ #will\ only\ download\ the\ videos\ uploaded\ in\ January\ 1,\ 1970
+$\ youtube\-dl\ \-\-dateafter\ 20000101\ \-\-datebefore\ 20100101\ #will\ only\ download\ the\ videos\ uploaded\ between\ 2000\ and\ 2010
+\f[]
+.fi
+.SH FAQ
+.SS Can you please put the \-b option back?
+.PP
+Most people asking this question are not aware that youtube\-dl now
+defaults to downloading the highest available quality as reported by
+YouTube, which will be 1080p or 720p in some cases, so you no longer
+need the \-b option.
+For some specific videos, maybe YouTube does not report them to be
+available in a specific high quality format you\[aq]\[aq]re interested
+in.
+In that case, simply request it with the \-f option and youtube\-dl will
+try to download it.
+.SS I get HTTP error 402 when trying to download a video. What\[aq]s
+this?
+.PP
+Apparently YouTube requires you to pass a CAPTCHA test if you download
+too much.
+We\[aq]\[aq]re considering to provide a way to let you solve the
+CAPTCHA (https://github.com/rg3/youtube-dl/issues/154), but at the
+moment, your best course of action is pointing a webbrowser to the
+youtube URL, solving the CAPTCHA, and restart youtube\-dl.
+.SS I have downloaded a video but how can I play it?
+.PP
+Once the video is fully downloaded, use any video player, such as
+vlc (http://www.videolan.org) or mplayer (http://www.mplayerhq.hu/).
+.SS The links provided by youtube\-dl \-g are not working anymore
+.PP
+The URLs youtube\-dl outputs require the downloader to have the correct
+cookies.
+Use the \f[C]\-\-cookies\f[] option to write the required cookies into a
+file, and advise your downloader to read cookies from that file.
+Some sites also require a common user agent to be used, use
+\f[C]\-\-dump\-user\-agent\f[] to see the one in use by youtube\-dl.
+.SS ERROR: no fmt_url_map or conn information found in video info
+.PP
+youtube has switched to a new video info format in July 2011 which is
+not supported by old versions of youtube\-dl.
+You can update youtube\-dl with \f[C]sudo\ youtube\-dl\ \-\-update\f[].
+.SS ERROR: unable to download video
+.PP
+youtube requires an additional signature since September 2012 which is
+not supported by old versions of youtube\-dl.
+You can update youtube\-dl with \f[C]sudo\ youtube\-dl\ \-\-update\f[].
+.SS SyntaxError: Non\-ASCII character
+.PP
+The error
+.IP
+.nf
+\f[C]
+File\ "youtube\-dl",\ line\ 2
+SyntaxError:\ Non\-ASCII\ character\ \[aq]\\x93\[aq]\ ...
+\f[]
+.fi
+.PP
+means you\[aq]re using an outdated version of Python.
+Please update to Python 2.6 or 2.7.
+.SS What is this binary file? Where has the code gone?
+.PP
+Since June 2012 (#342) youtube\-dl is packed as an executable zipfile,
+simply unzip it (might need renaming to \f[C]youtube\-dl.zip\f[] first
+on some systems) or clone the git repository, as laid out above.
+If you modify the code, you can run it by executing the
+\f[C]__main__.py\f[] file.
+To recompile the executable, run \f[C]make\ youtube\-dl\f[].
+.SS The exe throws a \f[I]Runtime error from Visual C++\f[]
+.PP
+To run the exe you need to install first the Microsoft Visual C++ 2008
+Redistributable
+Package (http://www.microsoft.com/en-us/download/details.aspx?id=29).
+.SH COPYRIGHT
+.PP
+youtube\-dl is released into the public domain by the copyright holders.
+.PP
+This README file was originally written by Daniel Bolton
+(<https://github.com/dbbolton>) and is likewise released into the public
+domain.
+.SH BUGS
+.PP
+Bugs and suggestions should be reported at:
+<https://github.com/rg3/youtube-dl/issues>
+.PP
+Please include:
+.IP \[bu] 2
+Your exact command line, like
+\f[C]youtube\-dl\ \-t\ "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"\f[].
+A common mistake is not to escape the \f[C]&\f[].
+Putting URLs in quotes should solve this problem.
+.IP \[bu] 2
+If possible re\-run the command with \f[C]\-\-verbose\f[], and include
+the full output, it is really helpful to us.
+.IP \[bu] 2
+The output of \f[C]youtube\-dl\ \-\-version\f[]
+.IP \[bu] 2
+The output of \f[C]python\ \-\-version\f[]
+.IP \[bu] 2
+The name and version of your Operating System ("Ubuntu 11.04 x64" or
+"Windows 7 x64" is usually enough).
+.PP
+For discussions, join us in the irc channel #youtube\-dl on freenode.
video_info['title'] = playlist_title
return video_info
+ def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+ """
+ Perform a regex search on the given string, using a single or a list of
+ patterns returning the first matching group.
+ In case of failure return a default value or raise a WARNING or a
+ ExtractorError, depending on fatal, specifying the field name.
+ """
+ if isinstance(pattern, (str, compat_str, compiled_regex_type)):
+ mobj = re.search(pattern, string, flags)
+ else:
+ for p in pattern:
+ mobj = re.search(p, string, flags)
+ if mobj: break
+
+ if sys.stderr.isatty() and os.name != 'nt':
+ _name = u'\033[0;34m%s\033[0m' % name
+ else:
+ _name = name
+
+ if mobj:
+ # return the first matching group
+ return next(g for g in mobj.groups() if g is not None)
+ elif default is not None:
+ return default
+ elif fatal:
+ raise ExtractorError(u'Unable to extract %s' % _name)
+ else:
+ self._downloader.report_warning(u'unable to extract %s; '
+ u'please report this issue on GitHub.' % _name)
+ return None
+
+ def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+ """
+ Like _search_regex, but strips HTML tags and unescapes entities.
+ """
+ res = self._search_regex(pattern, string, name, default, fatal, flags)
+ if res:
+ return clean_html(res).strip()
+ else:
+ return res
+
class SearchInfoExtractor(InfoExtractor):
"""
Base class for paged search queries extractors.
return (u'Did not fetch video subtitles', None, None)
return (None, sub_lang, sub)
+ def _request_automatic_caption(self, video_id, webpage):
+ """We need the webpage for getting the captions url, pass it as an
+ argument to speed up the process."""
+ sub_lang = self._downloader.params.get('subtitleslang')
+ sub_format = self._downloader.params.get('subtitlesformat')
+ self.to_screen(u'%s: Looking for automatic captions' % video_id)
+ mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
+ err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
+ if mobj is None:
+ return [(err_msg, None, None)]
+ player_config = json.loads(mobj.group(1))
+ try:
+ args = player_config[u'args']
+ caption_url = args[u'ttsurl']
+ timestamp = args[u'timestamp']
+ params = compat_urllib_parse.urlencode({
+ 'lang': 'en',
+ 'tlang': sub_lang,
+ 'fmt': sub_format,
+ 'ts': timestamp,
+ 'kind': 'asr',
+ })
+ subtitles_url = caption_url + '&' + params
+ sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
+ return [(None, sub_lang, sub)]
+ except KeyError:
+ return [(err_msg, None, None)]
+
def _extract_subtitle(self, video_id):
"""
Return a list with a tuple:
if video_subtitles:
(sub_error, sub_lang, sub) = video_subtitles[0]
if sub_error:
- self._downloader.report_error(sub_error)
+ # We try with the automatic captions
+ video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+ (sub_error_auto, sub_lang, sub) = video_subtitles[0]
+ if sub is not None:
+ pass
+ else:
+ # We report the original error
+ self._downloader.report_error(sub_error)
if self._downloader.params.get('allsubtitles', False):
video_subtitles = self._extract_all_subtitles(video_id)
video_title = unescapeHTML(mobj.group('title'))
video_uploader = None
- mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
- if mobj is None:
- # lookin for official user
- mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage)
- if mobj_official is None:
- self._downloader.report_warning(u'unable to extract uploader nickname')
- else:
- video_uploader = mobj_official.group(1)
- else:
- video_uploader = mobj.group(1)
+ video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
+ # Looking for official user
+ r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
+ webpage, 'video uploader')
video_upload_date = None
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
}]
# We try looking in other parts of the webpage
- mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract media URL')
- mediaURL = compat_urllib_parse.unquote(mobj.group(1))
-
- video_url = mediaURL
+ video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
+ webpage, u'video URL')
mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1).decode('utf-8')
-
video_uploader = mobj.group(2).decode('utf-8')
return [{
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
- _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
+ _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
IE_NAME = u'vimeo'
def _real_extract(self, url, new_video=True):
video_id = mobj.group('id')
if not mobj.group('proto'):
url = 'https://' + url
- if mobj.group('direct_link'):
+ if mobj.group('direct_link') or mobj.group('pro'):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
# Extract uploader and uploader_id
video_uploader = config["video"]["owner"]["name"]
- video_uploader_id = config["video"]["owner"]["url"].split('/')[-1]
+ video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
# Extract video thumbnail
video_thumbnail = config["video"]["thumbnail"]
if mobj is None:
# Broaden the search a little bit: JWPlayer JS loader
mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage)
+ if mobj is None:
+ # Try to find twitter cards info
+ mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
# Site Name | Video Title
# Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical
- mobj = re.search(r'<title>(.*)</title>', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract title')
- video_title = mobj.group(1)
+ video_title = self._html_search_regex(r'<title>(.*)</title>',
+ webpage, u'video title')
# video uploader is domain name
- mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
- if mobj is None:
- raise ExtractorError(u'Unable to extract title')
- video_uploader = mobj.group(1)
+ video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
+ url, u'video uploader')
return [{
'id': video_id,
def report_download_page(self, query, pagenum):
"""Report attempt to download search page with given number."""
- query = query.decode(preferredencoding())
self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
def _get_n_results(self, query, n):
|
((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
)"""
- _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json'
+ _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
_MAX_RESULTS = 50
IE_NAME = u'youtube:playlist'
file_extension = os.path.splitext(file_url)[1][1:]
# Search for file title
- mobj = re.search(r'<b title="(.*?)">', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract title')
- file_title = mobj.group(1).decode('utf-8')
+ file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
return [{
'id': file_id.decode('utf-8'),
video_duration = int(video_data['video_duration'])
thumbnail = video_data['thumbnail_src']
- m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
- if not m:
- raise ExtractorError(u'Cannot find title in webpage')
- video_title = unescapeHTML(m.group(1))
+ video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>',
+ webpage, u'title')
info = {
'id': video_id,
class BlipTVIE(InfoExtractor):
"""Information extractor for blip.tv"""
- _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
+ _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
_URL_EXT = r'^.*\.([a-z0-9]+)$'
IE_NAME = u'blip.tv'
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
+ # See https://github.com/rg3/youtube-dl/issues/857
+ api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url)
+ if api_mobj is not None:
+ url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
urlp = compat_urllib_parse_urlparse(url)
if urlp.path.startswith('/play/'):
request = compat_urllib_request.Request(url)
self.report_extraction(video_id)
video_url = mobj.group(1) + '.flv'
- mobj = re.search('<title>([^<]+)</title>', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract title')
- video_title = mobj.group(1)
+ video_title = self._html_search_regex('<title>([^<]+)</title>',
+ webpage, u'title')
- mobj = re.search('[.](.+?)$', video_url)
- if mobj is None:
- raise ExtractorError(u'Unable to extract extention')
- video_ext = mobj.group(1)
+ video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
return [{
'id': video_id,
# extracting infos
self.report_extraction(video_id)
+ video_url = None
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
- if mobj is None:
- raise ExtractorError(u'unable to extract rtmpurl')
- video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1))
- if 'myvideo2flash' in video_rtmpurl:
- self._downloader.report_warning(u'forcing RTMPT ...')
- video_rtmpurl = video_rtmpurl.replace('rtmpe://', 'rtmpt://')
-
- # extract non rtmp videos
- if (video_rtmpurl is None) or (video_rtmpurl == ''):
+ if mobj:
+ video_url = compat_urllib_parse.unquote(mobj.group(1))
+ if 'myvideo2flash' in video_url:
+ self._downloader.report_warning(u'forcing RTMPT ...')
+ video_url = video_url.replace('rtmpe://', 'rtmpt://')
+
+ if not video_url:
+ # extract non rtmp videos
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError(u'unable to extract url')
- video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
+ video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
- mobj = re.search('source=\'(.*?)\'', dec_data)
- if mobj is None:
- raise ExtractorError(u'unable to extract swfobj')
- video_file = compat_urllib_parse.unquote(mobj.group(1))
+ video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
+ video_file = compat_urllib_parse.unquote(video_file)
if not video_file.endswith('f4m'):
ppath, prefix = video_file.split('.')
video_filepath + video_file
).replace('.f4m', '.m3u8')
- mobj = re.search('swfobject.embedSWF\(\'(.+?)\'', webpage)
- if mobj is None:
- raise ExtractorError(u'unable to extract swfobj')
- video_swfobj = compat_urllib_parse.unquote(mobj.group(1))
+ video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
+ video_swfobj = compat_urllib_parse.unquote(video_swfobj)
- mobj = re.search("<h1(?: class='globalHd')?>(.*?)</h1>", webpage)
- if mobj is None:
- raise ExtractorError(u'unable to extract title')
- video_title = mobj.group(1)
+ video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
+ webpage, u'title')
return [{
'id': video_id,
- 'url': video_rtmpurl,
- 'tc_url': video_rtmpurl,
+ 'url': video_url,
+ 'tc_url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'player_url': video_swfobj,
}]
+
class ComedyCentralIE(InfoExtractor):
"""Information extractor for The Daily Show and Colbert Report """
showName = mobj.group('showname')
videoId = mobj.group('episode')
- self.report_extraction(showName)
- webPage = self._download_webpage(url, showName)
+ self.report_extraction(videoId)
+ webpage = self._download_webpage(url, videoId)
+
+ videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
+ webpage, u'description', fatal=False)
+
+ imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"',
+ webpage, u'thumbnail', fatal=False)
+
+ playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"',
+ webpage, u'player url')
+
+ title = self._html_search_regex('<meta name="title" content="([^"]*)"',
+ webpage, u'player url').split(' : ')[-1]
- descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
- description = unescapeHTML(descMatch.group(1))
- imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
- imgUrl = unescapeHTML(imgMatch.group(1))
- playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
- playerUrl = unescapeHTML(playerUrlMatch.group(1))
- configUrlMatch = re.search('config=(.*)$', playerUrl)
- configUrl = compat_urllib_parse.unquote(configUrlMatch.group(1))
+ configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
+ configUrl = compat_urllib_parse.unquote(configUrl)
- configJSON = self._download_webpage(configUrl, showName,
+ configJSON = self._download_webpage(configUrl, videoId,
u'Downloading configuration',
u'unable to download configuration')
'url': videoUrl,
'uploader': showName,
'upload_date': None,
- 'title': showName,
+ 'title': title,
'ext': 'mp4',
'thumbnail': imgUrl,
- 'description': description,
+ 'description': videoDesc,
'player_url': playerUrl,
}
self.report_extraction(video_id)
-
# Extract video URL
- mobj = re.search(r'flv_url=(.+?)&', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video url')
- video_url = compat_urllib_parse.unquote(mobj.group(1))
-
+ video_url = compat_urllib_parse.unquote(self._search_regex(r'flv_url=(.+?)&',
+ webpage, u'video URL'))
# Extract title
- mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video title')
- video_title = mobj.group(1)
-
+ video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XVID',
+ webpage, u'title')
# Extract video thumbnail
- mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video thumbnail')
- video_thumbnail = mobj.group(0)
+ video_thumbnail = self._search_regex(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)',
+ webpage, u'thumbnail', fatal=False)
info = {
'id': video_id,
video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
# Extract title
- mobj = re.search(r'contentTitle = "(.*?)";', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video title')
- video_title = mobj.group(1)
+ video_title = self._search_regex(r'contentTitle = "(.*?)";',
+ webpage, u'title')
# Extract description
- video_description = u'No description available.'
- mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage)
- if mobj is not None:
- video_description = mobj.group(1)
+ video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
+ webpage, u'description', fatal=False)
video_filename = video_url.split('/')[-1]
video_id, extension = video_filename.split('.')
note='Downloading course info page',
errnote='Unable to download course info page')
- m = re.search('<h1>([^<]+)</h1>', coursepage)
- if m:
- info['title'] = unescapeHTML(m.group(1))
- else:
- info['title'] = info['id']
+ info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
- m = re.search('<description>([^<]+)</description>', coursepage)
- if m:
- info['description'] = unescapeHTML(m.group(1))
+ info['description'] = self._html_search_regex('<description>([^<]+)</description>',
+ coursepage, u'description', fatal=False)
links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
info['list'] = [
webpage = self._download_webpage(url, video_id)
- mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract song name')
- song_name = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
- mobj = re.search(r'<meta name="mtv_an" content="([^"]+)"/>', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract performer')
- performer = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
- video_title = performer + ' - ' + song_name
+ song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>',
+ webpage, u'song name', fatal=False)
- mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to mtvn_uri')
- mtvn_uri = mobj.group(1)
+ video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>',
+ webpage, u'title')
- mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract content id')
- content_id = mobj.group(1)
+ mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>',
+ webpage, u'mtvn_uri', fatal=False)
+
+ content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);',
+ webpage, u'content id', fatal=False)
videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
self.report_extraction(video_id)
# Get webpage content
webpage = self._download_webpage(url, video_id)
- result = re.search(self.VIDEO_URL_RE, webpage)
- if result is None:
- raise ExtractorError(u'Unable to extract video url')
- video_url = compat_urllib_parse.unquote(result.group(1))
+ video_url = self._search_regex(self.VIDEO_URL_RE,
+ webpage, u'video URL')
+ video_url = compat_urllib_parse.unquote(video_url)
- result = re.search(self.VIDEO_TITLE_RE, webpage)
- if result is None:
- raise ExtractorError(u'Unable to extract video title')
- video_title = result.group(1)
+ video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
+ webpage, u'title')
- result = re.search(self.VIDEO_THUMB_RE, webpage)
- if result is None:
- raise ExtractorError(u'Unable to extract video thumbnail')
- video_thumbnail = result.group(1)
+ video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
+ webpage, u'thumbnail', fatal=False)
return [{
'id': video_id,
_VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
IE_NAME = u'plus.google'
- def report_extract_entry(self, url):
- """Report downloading extry"""
- self.to_screen(u'Downloading entry: %s' % url)
-
- def report_date(self, upload_date):
- """Report downloading extry"""
- self.to_screen(u'Entry date: %s' % upload_date)
-
- def report_uploader(self, uploader):
- """Report downloading extry"""
- self.to_screen(u'Uploader: %s' % uploader)
-
- def report_title(self, video_title):
- """Report downloading extry"""
- self.to_screen(u'Title: %s' % video_title)
-
- def report_extract_vid_page(self, video_page):
- """Report information extraction."""
- self.to_screen(u'Extracting video page: %s' % video_page)
-
def _real_extract(self, url):
# Extract id from URL
mobj = re.match(self._VALID_URL, url)
video_extension = 'flv'
# Step 1, Retrieve post webpage to extract further information
- self.report_extract_entry(post_url)
webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
+ self.report_extraction(video_id)
+
# Extract update date
- upload_date = None
- pattern = 'title="Timestamp">(.*?)</a>'
- mobj = re.search(pattern, webpage)
- if mobj:
- upload_date = mobj.group(1)
+ upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
+ webpage, u'upload date', fatal=False)
+ if upload_date:
# Convert timestring to a format suitable for filename
upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
upload_date = upload_date.strftime('%Y%m%d')
- self.report_date(upload_date)
# Extract uploader
- uploader = None
- pattern = r'rel\="author".*?>(.*?)</a>'
- mobj = re.search(pattern, webpage)
- if mobj:
- uploader = mobj.group(1)
- self.report_uploader(uploader)
+ uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
+ webpage, u'uploader', fatal=False)
# Extract title
# Get the first line for title
- video_title = u'NA'
- pattern = r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]'
- mobj = re.search(pattern, webpage)
- if mobj:
- video_title = mobj.group(1)
- self.report_title(video_title)
+ video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
+ webpage, 'title', default=u'NA')
# Step 2, Stimulate clicking the image box to launch video
- pattern = '"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]'
- mobj = re.search(pattern, webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video page URL')
-
- video_page = mobj.group(1)
+ video_page = self._search_regex('"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]',
+ webpage, u'video page URL')
webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
- self.report_extract_vid_page(video_page)
-
# Extract video links on video page
"""Extract video links of all sizes"""
}]
class NBAIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*)(\?.*)?$'
+ _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
IE_NAME = u'nba'
def _real_extract(self, url):
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group(1)
- if video_id.endswith('/index.html'):
- video_id = video_id[:-len('/index.html')]
webpage = self._download_webpage(url, video_id)
video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
- def _findProp(rexp, default=None):
- m = re.search(rexp, webpage)
- if m:
- return unescapeHTML(m.group(1))
- else:
- return default
shortened_video_id = video_id.rpartition('/')[2]
- title = _findProp(r'<meta property="og:title" content="(.*?)"', shortened_video_id).replace('NBA.com: ', '')
+ title = self._html_search_regex(r'<meta property="og:title" content="(.*?)"',
+ webpage, 'title', default=shortened_video_id).replace('NBA.com: ', '')
+
+ # It isn't there in the HTML it returns to us
+ # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
+
+ description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
+
info = {
'id': shortened_video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
- 'uploader_date': _findProp(r'<b>Date:</b> (.*?)</div>'),
- 'description': _findProp(r'<div class="description">(.*?)</h1>'),
+ # 'uploader_date': uploader_date,
+ 'description': description,
}
return [info]
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- m = re.search(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', webpage, re.DOTALL)
- if not m:
- raise ExtractorError(u'Unable to find video information')
- video_url = unescapeHTML(m.group('url'))
+ video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
+ webpage, u'video URL', flags=re.DOTALL)
- m = re.search(r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>", webpage, flags=re.DOTALL)
- if not m:
- m = re.search(r'<title>(?P<title>[^<]+?)</title>', webpage)
- if not m:
- raise ExtractorError(u'Cannot find video title')
- title = clean_html(m.group('title'))
+ title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
+ r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
- m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
- if m:
- desc = unescapeHTML(m.group('desc'))
- else:
- desc = None
+ video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
+ webpage, u'description', fatal=False, flags=re.DOTALL)
info = {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
- 'description': desc,
+ 'description': video_description,
}
return [info]
(?P<gameID>\d+)/?
(?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
"""
+ _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
+ _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
@classmethod
def suitable(cls, url):
def _real_extract(self, url):
m = re.match(self._VALID_URL, url, re.VERBOSE)
gameID = m.group('gameID')
- videourl = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' % gameID
- self.report_age_confirmation()
+
+ videourl = self._VIDEO_PAGE_TEMPLATE % gameID
webpage = self._download_webpage(videourl, gameID)
- game_title = re.search(r'<h2 class="pageheader">(?P<game_title>.*?)</h2>', webpage).group('game_title')
-
+
+ if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
+ videourl = self._AGECHECK_TEMPLATE % gameID
+ self.report_age_confirmation()
+ webpage = self._download_webpage(videourl, gameID)
+
+ self.report_extraction(gameID)
+ game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
+ webpage, 'game title')
+
urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
mweb = re.finditer(urlRE, webpage)
namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
+
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
webpage = self._download_webpage(url, video_id)
+
self.report_extraction(video_id)
- try:
- m = re.search(r'data-title="(?P<title>.+)"',webpage)
- title = m.group('title')
- m = re.search(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
- webpage, re.DOTALL)
- uploader = unescapeHTML(m.group('uploader').strip())
- m = re.search(r'<link rel="image_src" href="(?P<thumb>.*?)"', webpage)
- thumb = m.group('thumb')
- except AttributeError:
- raise ExtractorError(u'Unable to extract info')
+
+ video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
+ webpage, u'title')
+
+ uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
+ webpage, u'uploader', fatal=False, flags=re.DOTALL)
+
+ thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
+ webpage, u'thumbnail', fatal=False)
+
info = {
- 'id':video_id,
- 'url':video_url,
+ 'id': video_id,
+ 'url': video_url,
'ext': 'flv',
- 'title': title,
+ 'title': video_title,
'uploader': uploader,
- 'thumbnail': thumb,
- }
+ 'thumbnail': thumbnail,
+ }
return info
class WorldStarHipHopIE(InfoExtractor):
IE_NAME = u'WorldStarHipHop'
def _real_extract(self, url):
- _src_url = r'so\.addVariable\("file","(.*?)"\)'
-
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
- webpage_src = self._download_webpage(url, video_id)
+ webpage_src = self._download_webpage(url, video_id)
- mobj = re.search(_src_url, webpage_src)
+ video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
+ webpage_src, u'video URL')
- if mobj is not None:
- video_url = mobj.group(1)
- if 'mp4' in video_url:
- ext = 'mp4'
- else:
- ext = 'flv'
+ if 'mp4' in video_url:
+ ext = 'mp4'
else:
- raise ExtractorError(u'Cannot find video url for %s' % video_id)
+ ext = 'flv'
- mobj = re.search(r"<title>(.*)</title>", webpage_src)
+ video_title = self._html_search_regex(r"<title>(.*)</title>",
+ webpage_src, u'title')
- if mobj is None:
- raise ExtractorError(u'Cannot determine title')
- title = mobj.group(1)
-
- mobj = re.search(r'rel="image_src" href="(.*)" />', webpage_src)
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
- if mobj is not None:
- thumbnail = mobj.group(1)
- else:
+ thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
+ webpage_src, u'thumbnail', fatal=False)
+
+ if not thumbnail:
_title = r"""candytitles.*>(.*)</span>"""
mobj = re.search(_title, webpage_src)
if mobj is not None:
- title = mobj.group(1)
- thumbnail = None
+ video_title = mobj.group(1)
results = [{
'id': video_id,
'url' : video_url,
- 'title' : title,
+ 'title' : video_title,
'thumbnail' : thumbnail,
'ext' : ext,
}]
video_id = m.group('videoID')
webpage = self._download_webpage(url, video_id)
- m = re.search(r'<script>window.gon = {.*?};gon\.show=(.+?);</script>', webpage)
- if not m:
- raise ExtractorError(u'Cannot find metadata')
- json_data = m.group(1)
+
+ json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
+ webpage, u'json data', flags=re.MULTILINE)
try:
data = json.loads(json_data)
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
-
video_id = mobj.group('videoid')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
- # Get the video title
- result = re.search(r'<h1.*?>(?P<title>.*)</h1>', webpage)
- if result is None:
- raise ExtractorError(u'Unable to extract video title')
- video_title = result.group('title').strip()
-
- # Get the video date
- result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage)
- if result is None:
- self._downloader.report_warning(u'unable to extract video date')
- upload_date = None
- else:
- upload_date = unified_strdate(result.group('date').strip())
+ # Get JSON parameters
+ json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
+ try:
+ params = json.loads(json_params)
+ except:
+ raise ExtractorError(u'Invalid JSON')
- # Get the video uploader
- result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
- if result is None:
- self._downloader.report_warning(u'unable to extract uploader')
- video_uploader = None
- else:
- video_uploader = result.group('uploader').strip()
- video_uploader = clean_html( video_uploader )
+ self.report_extraction(video_id)
+ try:
+ video_title = params['title']
+ upload_date = unified_strdate(params['release_date_f'])
+ video_description = params['description']
+ video_uploader = params['submitted_by']
+ thumbnail = params['thumbnails'][0]['image']
+ except KeyError:
+ raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
# Get all of the formats available
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
- result = re.search(DOWNLOAD_LIST_RE, webpage)
- if result is None:
- raise ExtractorError(u'Unable to extract download list')
- download_list_html = result.group('download_list').strip()
+ download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
+ webpage, u'download list').strip()
# Get all of the links from the page
LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
size = format[0]
bitrate = format[1]
format = "-".join( format )
- title = u'%s-%s-%s' % (video_title, size, bitrate)
+ # title = u'%s-%s-%s' % (video_title, size, bitrate)
formats.append({
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'upload_date': upload_date,
- 'title': title,
+ 'title': video_title,
'ext': extension,
'format': format,
- 'thumbnail': None,
- 'description': None,
- 'player_url': None
+ 'thumbnail': thumbnail,
+ 'description': video_description
})
if self._downloader.params.get('listformats', None):
# Get the video URL
VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
- result = re.search(VIDEO_URL_RE, webpage)
- if result is None:
- raise ExtractorError(u'Unable to extract video url')
- video_url = compat_urllib_parse.unquote(result.group('url'))
+ video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
+ video_url = compat_urllib_parse.unquote(video_url)
#Get the uploaded date
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
- result = re.search(VIDEO_UPLOADED_RE, webpage)
- if result is None:
- raise ExtractorError(u'Unable to extract video title')
- upload_date = unified_strdate(result.group('date'))
+ upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
+ if upload_date: upload_date = unified_strdate(upload_date)
info = {'id': video_id,
'url': video_url,
webpage = self._download_webpage(url, video_id)
# Get the video title
- result = re.search(r'<title>(?P<title>.*)</title>', webpage)
- if result is None:
- raise ExtractorError(u'ERROR: unable to extract video title')
- video_title = result.group('title').strip()
+ video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
+ webpage, u'title').strip()
# Get the embed page
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
webpage = self._download_webpage(embed_page_url, video_id)
# Get the video URL
- result = re.search(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', webpage)
- if result is None:
- raise ExtractorError(u'ERROR: unable to extract video url')
- video_url = result.group('source')
+ video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
+ webpage, u'video URL')
info = {'id': video_id,
'url': video_url,
webpage = self._download_webpage(url, playlist_id)
- m = re.search(r"PAGE.mix = (.*?);\n", webpage, flags=re.DOTALL)
- if not m:
- raise ExtractorError(u'Cannot find trax information')
- json_like = m.group(1)
+ json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
data = json.loads(json_like)
session = str(random.randint(0, 1000000000))
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
+
video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
webpage = self._download_webpage(url, video_id)
- m = re.search(r'<meta property="og:title" content="(?P<title>.*?)"', webpage)
- title = unescapeHTML(m.group('title'))
- m = re.search(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', webpage)
- uploader = clean_html(m.group('uploader'))
+
+ video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
+ webpage, u'title')
+
+ uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
+ webpage, u'uploader', fatal=False)
+
info = {
'id': video_id,
'url': video_url,
'ext': 'mp4',
- 'title': title,
+ 'title': video_title,
'thumbnail': thumbnail,
'uploader': uploader
}
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
return [self._playlist_videos_info(url,name,playlist_id)]
- def _talk_video_link(self,mediaSlug):
- '''Returns the video link for that mediaSlug'''
- return 'http://download.ted.com/talks/%s.mp4' % mediaSlug
-
def _playlist_videos_info(self,url,name,playlist_id=0):
'''Returns the videos of the playlist'''
video_RE=r'''
m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
m_names=re.finditer(video_name_RE,webpage)
- playlist_RE = r'div class="headline">(\s*?)<h1>(\s*?)<span>(?P<playlist_title>.*?)</span>'
- m_playlist = re.search(playlist_RE, webpage)
- playlist_title = m_playlist.group('playlist_title')
+ playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
+ webpage, 'playlist title')
playlist_entries = []
for m_video, m_name in zip(m_videos,m_names):
def _talk_info(self, url, video_id=0):
"""Return the video for the talk in the url"""
- m=re.match(self._VALID_URL, url,re.VERBOSE)
- videoName=m.group('name')
- webpage=self._download_webpage(url, video_id, 'Downloading \"%s\" page' % videoName)
+ m = re.match(self._VALID_URL, url,re.VERBOSE)
+ video_name = m.group('name')
+ webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
+ self.report_extraction(video_name)
# If the url includes the language we get the title translated
- title_RE=r'<span id="altHeadline" >(?P<title>.*)</span>'
- title=re.search(title_RE, webpage).group('title')
- info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?)
- "id":(?P<videoID>[\d]+).*?
- "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"'''
- thumb_RE=r'</span>[\s.]*</div>[\s.]*<img src="(?P<thumbnail>.*?)"'
- thumb_match=re.search(thumb_RE,webpage)
- info_match=re.search(info_RE,webpage,re.VERBOSE)
- video_id=info_match.group('videoID')
- mediaSlug=info_match.group('mediaSlug')
- video_url=self._talk_video_link(mediaSlug)
+ title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
+ webpage, 'title')
+ json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
+ webpage, 'json data')
+ info = json.loads(json_data)
+ desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
+ webpage, 'description', flags = re.DOTALL)
+
+ thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
+ webpage, 'thumbnail')
info = {
- 'id': video_id,
- 'url': video_url,
+ 'id': info['id'],
+ 'url': info['htmlStreams'][-1]['file'],
'ext': 'mp4',
'title': title,
- 'thumbnail': thumb_match.group('thumbnail')
+ 'thumbnail': thumbnail,
+ 'description': desc,
}
return info
video_id = m.group('videoID')
webpage = self._download_webpage(url, video_id)
- m = re.search(r'<div class="spVideoTitle">(.*?)</div>', webpage)
- if not m:
- raise ExtractorError(u'Cannot find title')
- video_title = unescapeHTML(m.group(1))
+
+ video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
+ webpage, u'title')
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
xml_code = self._download_webpage(xml_url, video_id,
webpage = self._download_webpage(url, video_id)
- m = re.search(r'file: "(.*?)",', webpage)
- if not m:
- raise ExtractorError(u'Unable to find video url')
- video_url = m.group(1)
+ video_url = self._search_regex(r'file: "(.*?)",',
+ webpage, u'video URL')
- m = re.search(r'<meta property="og:title" content="(?P<title>.*?)"', webpage)
- if not m:
- raise ExtractorError(u'Cannot find video title')
- title = unescapeHTML(m.group('title')).replace('LiveLeak.com -', '').strip()
+ video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
+ webpage, u'title').replace('LiveLeak.com -', '').strip()
- m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
- if m:
- desc = unescapeHTML(m.group('desc'))
- else:
- desc = None
+ video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
+ webpage, u'description', fatal=False)
- m = re.search(r'By:.*?(\w+)</a>', webpage)
- if m:
- uploader = clean_html(m.group(1))
- else:
- uploader = None
+ video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
+ webpage, u'uploader', fatal=False)
info = {
'id': video_id,
'url': video_url,
'ext': 'mp4',
- 'title': title,
- 'description': desc,
- 'uploader': uploader
+ 'title': video_title,
+ 'description': video_description,
+ 'uploader': video_uploader
}
return [info]
info["url"] = stream["video_url"]
return [info]
+class ZDFIE(InfoExtractor):
+ _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+ _TITLE = r'<h1(?: class="beitragHeadline")?>(?P<title>.*)</h1>'
+ _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
+ _MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
+ _RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+ video_id = mobj.group('video_id')
+
+ html = self._download_webpage(url, video_id)
+ streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
+ if streams is None:
+ raise ExtractorError(u'No media url found.')
+
+ # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
+ # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
+ # choose first/default media type and highest quality for now
+ for s in streams: #find 300 - dsl1000mbit
+ if s['quality'] == '300' and s['media_type'] == 'wstreaming':
+ stream_=s
+ break
+ for s in streams: #find veryhigh - dsl2000mbit
+ if s['quality'] == 'veryhigh' and s['media_type'] == 'wstreaming': # 'hstreaming' - rtsp is not working
+ stream_=s
+ break
+ if stream_ is None:
+ raise ExtractorError(u'No stream found.')
+
+ media_link = self._download_webpage(stream_['video_url'], video_id,'Get stream URL')
+
+ self.report_extraction(video_id)
+ mobj = re.search(self._TITLE, html)
+ if mobj is None:
+ raise ExtractorError(u'Cannot extract title')
+ title = unescapeHTML(mobj.group('title'))
+
+ mobj = re.search(self._MMS_STREAM, media_link)
+ if mobj is None:
+ mobj = re.search(self._RTSP_STREAM, media_link)
+ if mobj is None:
+ raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
+ mms_url = mobj.group('video_url')
+
+ mobj = re.search('(.*)[.](?P<ext>[^.]+)', mms_url)
+ if mobj is None:
+ raise ExtractorError(u'Cannot extract extention')
+ ext = mobj.group('ext')
+
+ return [{'id': video_id,
+ 'url': mms_url,
+ 'title': title,
+ 'ext': ext
+ }]
+
class TumblrIE(InfoExtractor):
_VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
video = re.search(re_video, webpage)
if video is None:
- self.to_screen("No video found")
- return []
+ raise ExtractorError(u'Unable to extract video')
video_url = video.group('video_url')
ext = video.group('ext')
- re_thumb = r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22' # We pick the first poster
- thumb = re.search(re_thumb, webpage).group('thumb').replace('\\', '')
+ video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
+ webpage, u'thumbnail', fatal=False) # We pick the first poster
+ if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
# The only place where you can get a title, it's not complete,
# but searching in other places doesn't work for all videos
- re_title = r'<title>(?P<title>.*?)</title>'
- title = unescapeHTML(re.search(re_title, webpage, re.DOTALL).group('title'))
+ video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
+ webpage, u'title', flags=re.DOTALL)
return [{'id': video_id,
'url': video_url,
- 'title': title,
- 'thumbnail': thumb,
+ 'title': video_title,
+ 'thumbnail': video_thumbnail,
'ext': ext
}]
# We get the link to the free download page
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
if m_download is None:
- raise ExtractorError(u'No free songs founded')
+ raise ExtractorError(u'No free songs found')
download_link = m_download.group(1)
id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
track_info = {'id':id,
'title' : info[u'title'],
- 'ext' : 'mp3',
- 'url' : final_url,
+ 'ext' : 'mp3',
+ 'url' : final_url,
'thumbnail' : info[u'thumb_url'],
- 'uploader' : info[u'artist']
+ 'uploader' : info[u'artist']
}
return [track_info]
video_id = mobj.group('id')
video_extension = 'mp4'
webpage = self._download_webpage(url, video_id)
+
self.report_extraction(video_id)
- mobj = re.search(r'<source src="'+'(.+)'+'" type="video/mp4">',webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract media URL')
+ video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
+ webpage, u'video URL')
- video_url = mobj.group(1)
- mobj = re.search('<h1 class="videoTitle slidePanelMovable">(.+)</h1>',webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract title')
- video_title = mobj.group(1)
+ video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
+ webpage, u'title')
return [{
'id': video_id,
video_extension = 'mp4'
webpage = self._download_webpage(mrss_url, video_id)
- mobj = re.search(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract media URL')
- video_url = mobj.group(1)
+ self.report_extraction(video_id)
- mobj = re.search(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract title')
- video_title = mobj.group(1)
+ video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
+ webpage, u'video URL')
+
+ video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
+ webpage, u'title')
return [{
'id': video_id,
self.report_extraction(video_id)
- mobj = re.search(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)"', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video URL')
- video_url = mobj.group(1)
+ video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
+ webpage, u'video URL')
- mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract title')
- video_title = mobj.group(1) or mobj.group(2)
+ video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
+ webpage, u'title')
- mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', webpage)
- if mobj is None:
- self._downloader.report_warning(u'unable to extract description')
- video_description = None
- else:
- video_description = mobj.group(1) or mobj.group(2)
+ video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
+ webpage, u'description', fatal=False)
- mobj = re.search(r'<meta content=\'(.+?)\' property=\'og:image\'', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract thumbnail')
- thumbnail = mobj.group(1)
+ thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
+ webpage, u'thumbnail', fatal=False)
return [{
'id': video_id,
_VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
def _real_extract(self, url):
-
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
self.report_extraction(video_id)
- mobj = re.search(r'<meta property="twitter:player:stream" content="(.+?)"', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video URL')
- video_url = mobj.group(1)
+ video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
+ webpage, u'video URL')
- mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract title')
- video_title = mobj.group(1)
+ video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
+ webpage, u'title')
- mobj = re.search(r'<meta property="og:image" content="(.+?)(\?.*?)?"', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract thumbnail')
- thumbnail = mobj.group(1)
+ thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
+ webpage, u'thumbnail', fatal=False)
- mobj = re.search(r'<div class="user">.*?<h2>(.+?)</h2>', webpage, re.DOTALL)
- if mobj is None:
- raise ExtractorError(u'Unable to extract uploader')
- uploader = mobj.group(1)
+ uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
+ webpage, u'uploader', fatal=False, flags=re.DOTALL)
return [{
'id': video_id,
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
- mobj = re.search(r"photo_secret: '(\w+)'", webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video secret')
- secret = mobj.group(1)
+ secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
- mobj = re.search(r'<Item id="id">(\d+-\d+)</Item>', first_xml)
- if mobj is None:
- raise ExtractorError(u'Unable to extract node_id')
- node_id = mobj.group(1)
+ node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
+ first_xml, u'node_id')
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
raise ExtractorError(u'Unable to extract video url')
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
- mobj = re.search(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract title')
- video_title = mobj.group(1) or mobj.group(2)
+ video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
+ webpage, u'video title')
- mobj = re.search(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
- if mobj is None:
- self._downloader.report_warning(u'unable to extract description')
- video_description = None
- else:
- video_description = mobj.group(1) or mobj.group(2)
+ video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
+ webpage, u'description', fatal=False)
- mobj = re.search(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract thumbnail')
- thumbnail = mobj.group(1) or mobj.group(2)
+ thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
+ webpage, u'thumbnail', fatal=False)
return [{
'id': video_id,
url_title = mobj.group('url_title')
webpage = self._download_webpage(url, url_title)
- mobj = re.search(r'<article class="video" data-id="(\d+?)"', webpage)
- video_id = mobj.group(1)
+ video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
+ webpage, u'video id')
self.report_extraction(video_id)
- mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract title')
- video_title = mobj.group(1)
+ video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
+ webpage, u'title')
- mobj = re.search(r'<meta property="og:image" content="(.+?)"', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract thumbnail')
- thumbnail = mobj.group(1)
+ thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
+ webpage, u'thumbnail', fatal=False)
- mobj = re.search(r'<meta property="og:description" content="(.*?)"', webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract description')
- description = mobj.group(1)
+ video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
+ webpage, u'description', fatal=False)
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
- mobj = re.search(r'<file type="high".*?>(.*?)</file>', data)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video url')
- video_url = mobj.group(1)
+
+ video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
+ data, u'video URL')
return [{
'id': video_id,
'ext': 'mp4',
'title': video_title,
'thumbnail': thumbnail,
- 'description': description,
+ 'description': video_description,
+ }]
+
+class XHamsterIE(InfoExtractor):
+ """Information Extractor for xHamster"""
+ _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
+ webpage = self._download_webpage(mrss_url, video_id)
+
+ mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract media URL')
+ if len(mobj.group('server')) == 0:
+ video_url = compat_urllib_parse.unquote(mobj.group('file'))
+ else:
+ video_url = mobj.group('server')+'/key='+mobj.group('file')
+ video_extension = video_url.split('.')[-1]
+
+ video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
+ webpage, u'title')
+
+ # Can't see the description anywhere in the UI
+ # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
+ # webpage, u'description', fatal=False)
+ # if video_description: video_description = unescapeHTML(video_description)
+
+ mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
+ if mobj:
+ video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
+ else:
+ video_upload_date = None
+ self._downloader.report_warning(u'Unable to extract upload date')
+
+ video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
+ webpage, u'uploader id', default=u'anonymous')
+
+ video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
+ webpage, u'thumbnail', fatal=False)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_extension,
+ 'title': video_title,
+ # 'description': video_description,
+ 'upload_date': video_upload_date,
+ 'uploader_id': video_uploader_id,
+ 'thumbnail': video_thumbnail
+ }]
+
+class HypemIE(InfoExtractor):
+ """Information Extractor for hypem"""
+ _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+ track_id = mobj.group(1)
+
+ data = { 'ax': 1, 'ts': time.time() }
+ data_encoded = compat_urllib_parse.urlencode(data)
+ complete_url = url + "?" + data_encoded
+ request = compat_urllib_request.Request(complete_url)
+ response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
+ cookie = urlh.headers.get('Set-Cookie', '')
+
+ self.report_extraction(track_id)
+
+ html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
+ response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
+ try:
+ track_list = json.loads(html_tracks)
+ track = track_list[u'tracks'][0]
+ except ValueError:
+ raise ExtractorError(u'Hypemachine contained invalid JSON.')
+
+ key = track[u"key"]
+ track_id = track[u"id"]
+ artist = track[u"artist"]
+ title = track[u"song"]
+
+ serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
+ request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
+ request.add_header('cookie', cookie)
+ song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
+ try:
+ song_data = json.loads(song_data_json)
+ except ValueError:
+ raise ExtractorError(u'Hypemachine contained invalid JSON.')
+ final_url = song_data[u"url"]
+
+ return [{
+ 'id': track_id,
+ 'url': final_url,
+ 'ext': "mp3",
+ 'title': title,
+ 'artist': artist,
+ }]
+
+class Vbox7IE(InfoExtractor):
+ """Information Extractor for Vbox7"""
+ _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+ video_id = mobj.group(1)
+
+ redirect_page, urlh = self._download_webpage_handle(url, video_id)
+ new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
+ redirect_url = urlh.geturl() + new_location
+ webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
+
+ title = self._html_search_regex(r'<title>(.*)</title>',
+ webpage, u'title').split('/')[0].strip()
+
+ ext = "flv"
+ info_url = "http://vbox7.com/play/magare.do"
+ data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
+ info_request = compat_urllib_request.Request(info_url, data)
+ info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
+ if info_response is None:
+ raise ExtractorError(u'Unable to extract the media url')
+ (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
+
+ return [{
+ 'id': video_id,
+ 'url': final_url,
+ 'ext': ext,
+ 'title': title,
+ 'thumbnail': thumbnail_url,
}]
+class GametrailersIE(InfoExtractor):
+ _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+ video_id = mobj.group('id')
+ video_type = mobj.group('type')
+ webpage = self._download_webpage(url, video_id)
+ if video_type == 'full-episodes':
+ mgid_re = r'data-video="(?P<mgid>mgid:.*?)"'
+ else:
+ mgid_re = r'data-contentId=\'(?P<mgid>mgid:.*?)\''
+ mgid = self._search_regex(mgid_re, webpage, u'mgid')
+ data = compat_urllib_parse.urlencode({'uri': mgid, 'acceptMethods': 'fms'})
+
+ info_page = self._download_webpage('http://www.gametrailers.com/feeds/mrss?' + data,
+ video_id, u'Downloading video info')
+ links_webpage = self._download_webpage('http://www.gametrailers.com/feeds/mediagen/?' + data,
+ video_id, u'Downloading video urls info')
+
+ self.report_extraction(video_id)
+ info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.*
+ <description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
+ <image>.*
+ <url>(?P<thumb>.*?)</url>.*
+ </image>'''
+
+ m_info = re.search(info_re, info_page, re.VERBOSE|re.DOTALL)
+ if m_info is None:
+ raise ExtractorError(u'Unable to extract video info')
+ video_title = m_info.group('title')
+ video_description = m_info.group('description')
+ video_thumb = m_info.group('thumb')
+
+ m_urls = list(re.finditer(r'<src>(?P<url>.*)</src>', links_webpage))
+ if m_urls is None or len(m_urls) == 0:
+ raise ExtractError(u'Unable to extrat video url')
+ # They are sorted from worst to best quality
+ video_url = m_urls[-1].group('url')
+
+ return {'url': video_url,
+ 'id': video_id,
+ 'title': video_title,
+ # Videos are actually flv not mp4
+ 'ext': 'flv',
+ 'thumbnail': video_thumb,
+ 'description': video_description,
+ }
+
def gen_extractors():
""" Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL.
YahooSearchIE(),
DepositFilesIE(),
FacebookIE(),
- BlipTVUserIE(),
BlipTVIE(),
+ BlipTVUserIE(),
VimeoIE(),
MyVideoIE(),
ComedyCentralIE(),
SpiegelIE(),
LiveLeakIE(),
ARDIE(),
+ ZDFIE(),
TumblrIE(),
BandcampIE(),
RedTubeIE(),
VineIE(),
FlickrIE(),
TeamcocoIE(),
+ XHamsterIE(),
+ HypemIE(),
+ Vbox7IE(),
+ GametrailersIE(),
GenericIE()
]