]> Raphaël G. Git Repositories - youtubedl/commitdiff
Imported Upstream version 2013.06.21
authorRogério Brito <rbrito@ime.usp.br>
Fri, 21 Jun 2013 00:12:51 +0000 (21:12 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Fri, 21 Jun 2013 00:12:51 +0000 (21:12 -0300)
19 files changed:
.gitignore [deleted file]
.travis.yml [deleted file]
LATEST_VERSION [deleted file]
README.txt [new file with mode: 0644]
devscripts/release.sh
test/test_all_urls.py
test/test_download.py
test/test_youtube_lists.py
test/test_youtube_subtitles.py
test/tests.json
youtube-dl
youtube-dl.1 [new file with mode: 0644]
youtube-dl.bash-completion [new file with mode: 0644]
youtube-dl.exe [deleted file]
youtube_dl/FileDownloader.py
youtube_dl/InfoExtractors.py
youtube_dl/__init__.py
youtube_dl/utils.py
youtube_dl/version.py

diff --git a/.gitignore b/.gitignore
deleted file mode 100644 (file)
index ca4e8f3..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-*.pyc
-*.pyo
-*~
-*.DS_Store
-wine-py2exe/
-py2exe.log
-*.kate-swp
-build/
-dist/
-MANIFEST
-README.txt
-youtube-dl.1
-youtube-dl.bash-completion
-youtube-dl
-youtube-dl.exe
-youtube-dl.tar.gz
-.coverage
-cover/
-updates_key.pem
-*.egg-info
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644 (file)
index 7f1fa8a..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-language: python
-python:
-  - "2.6"
-  - "2.7"
-  - "3.3"
-script: nosetests test --verbose
-notifications:
-  email:
-    - filippo.valsorda@gmail.com
-    - phihag@phihag.de
-    - jaime.marquinez.ferrandiz+travis@gmail.com
-#  irc:
-#    channels:
-#      - "irc.freenode.org#youtube-dl"
-#    skip_join: true
diff --git a/LATEST_VERSION b/LATEST_VERSION
deleted file mode 100644 (file)
index a334573..0000000
+++ /dev/null
@@ -1 +0,0 @@
-2012.12.99
diff --git a/README.txt b/README.txt
new file mode 100644 (file)
index 0000000..c5e2ce3
--- /dev/null
@@ -0,0 +1,332 @@
+NAME
+====
+
+youtube-dl
+
+SYNOPSIS
+========
+
+youtube-dl OPTIONS URL [URL...]
+
+DESCRIPTION
+===========
+
+youtube-dl is a small command-line program to download videos from
+YouTube.com and a few more sites. It requires the Python interpreter,
+version 2.6, 2.7, or 3.3+, and it is not platform specific. It should
+work on your Unix box, on Windows or on Mac OS X. It is released to the
+public domain, which means you can modify it, redistribute it or use it
+however you like.
+
+OPTIONS
+=======
+
+    -h, --help                 print this help text and exit
+    --version                  print program version and exit
+    -U, --update               update this program to latest version
+    -i, --ignore-errors        continue on download errors
+    -r, --rate-limit LIMIT     maximum download rate (e.g. 50k or 44.6m)
+    -R, --retries RETRIES      number of retries (default is 10)
+    --buffer-size SIZE         size of download buffer (e.g. 1024 or 16k)
+                               (default is 1024)
+    --no-resize-buffer         do not automatically adjust the buffer size. By
+                               default, the buffer size is automatically resized
+                               from an initial value of SIZE.
+    --dump-user-agent          display the current browser identification
+    --user-agent UA            specify a custom user agent
+    --referer REF              specify a custom referer, use if the video access
+                               is restricted to one domain
+    --list-extractors          List all supported extractors and the URLs they
+                               would handle
+    --proxy URL                Use the specified HTTP/HTTPS proxy
+    --no-check-certificate     Suppress HTTPS certificate validation.
+
+Video Selection:
+----------------
+
+    --playlist-start NUMBER    playlist video to start at (default is 1)
+    --playlist-end NUMBER      playlist video to end at (default is last)
+    --match-title REGEX        download only matching titles (regex or caseless
+                               sub-string)
+    --reject-title REGEX       skip download for matching titles (regex or
+                               caseless sub-string)
+    --max-downloads NUMBER     Abort after downloading NUMBER files
+    --min-filesize SIZE        Do not download any videos smaller than SIZE
+                               (e.g. 50k or 44.6m)
+    --max-filesize SIZE        Do not download any videos larger than SIZE (e.g.
+                               50k or 44.6m)
+    --date DATE                download only videos uploaded in this date
+    --datebefore DATE          download only videos uploaded before this date
+    --dateafter DATE           download only videos uploaded after this date
+
+Filesystem Options:
+-------------------
+
+    -t, --title                use title in file name (default)
+    --id                       use only video ID in file name
+    -l, --literal              [deprecated] alias of --title
+    -A, --auto-number          number downloaded files starting from 00000
+    -o, --output TEMPLATE      output filename template. Use %(title)s to get
+                               the title, %(uploader)s for the uploader name,
+                               %(uploader_id)s for the uploader nickname if
+                               different, %(autonumber)s to get an automatically
+                               incremented number, %(ext)s for the filename
+                               extension, %(upload_date)s for the upload date
+                               (YYYYMMDD), %(extractor)s for the provider
+                               (youtube, metacafe, etc), %(id)s for the video id
+                               , %(playlist)s for the playlist the video is in,
+                               %(playlist_index)s for the position in the
+                               playlist and %% for a literal percent. Use - to
+                               output to stdout. Can also be used to download to
+                               a different directory, for example with -o '/my/d
+                               ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
+    --autonumber-size NUMBER   Specifies the number of digits in %(autonumber)s
+                               when it is present in output filename template or
+                               --autonumber option is given
+    --restrict-filenames       Restrict filenames to only ASCII characters, and
+                               avoid "&" and spaces in filenames
+    -a, --batch-file FILE      file containing URLs to download ('-' for stdin)
+    -w, --no-overwrites        do not overwrite files
+    -c, --continue             resume partially downloaded files
+    --no-continue              do not resume partially downloaded files (restart
+                               from beginning)
+    --cookies FILE             file to read cookies from and dump cookie jar in
+    --no-part                  do not use .part files
+    --no-mtime                 do not use the Last-modified header to set the
+                               file modification time
+    --write-description        write video description to a .description file
+    --write-info-json          write video metadata to a .info.json file
+    --write-thumbnail          write thumbnail image to disk
+
+Verbosity / Simulation Options:
+-------------------------------
+
+    -q, --quiet                activates quiet mode
+    -s, --simulate             do not download the video and do not write
+                               anything to disk
+    --skip-download            do not download the video
+    -g, --get-url              simulate, quiet but print URL
+    -e, --get-title            simulate, quiet but print title
+    --get-id                   simulate, quiet but print id
+    --get-thumbnail            simulate, quiet but print thumbnail URL
+    --get-description          simulate, quiet but print video description
+    --get-filename             simulate, quiet but print output filename
+    --get-format               simulate, quiet but print output format
+    --newline                  output progress bar as new lines
+    --no-progress              do not print progress bar
+    --console-title            display progress in console titlebar
+    -v, --verbose              print various debugging information
+    --dump-intermediate-pages  print downloaded pages to debug problems(very
+                               verbose)
+
+Video Format Options:
+---------------------
+
+    -f, --format FORMAT        video format code, specifiy the order of
+                               preference using slashes: "-f 22/17/18"
+    --all-formats              download all available video formats
+    --prefer-free-formats      prefer free video formats unless a specific one
+                               is requested
+    --max-quality FORMAT       highest quality format to download
+    -F, --list-formats         list all available formats (currently youtube
+                               only)
+    --write-sub                write subtitle file (currently youtube only)
+    --only-sub                 [deprecated] alias of --skip-download
+    --all-subs                 downloads all the available subtitles of the
+                               video (currently youtube only)
+    --list-subs                lists all available subtitles for the video
+                               (currently youtube only)
+    --sub-format LANG          subtitle format [srt/sbv] (default=srt)
+                               (currently youtube only)
+    --sub-lang LANG            language of the subtitles to download (optional)
+                               use IETF language tags like 'en'
+
+Authentication Options:
+-----------------------
+
+    -u, --username USERNAME    account username
+    -p, --password PASSWORD    account password
+    -n, --netrc                use .netrc authentication data
+
+Post-processing Options:
+------------------------
+
+    -x, --extract-audio        convert video files to audio-only files (requires
+                               ffmpeg or avconv and ffprobe or avprobe)
+    --audio-format FORMAT      "best", "aac", "vorbis", "mp3", "m4a", "opus", or
+                               "wav"; best by default
+    --audio-quality QUALITY    ffmpeg/avconv audio quality specification, insert
+                               a value between 0 (better) and 9 (worse) for VBR
+                               or a specific bitrate like 128K (default 5)
+    --recode-video FORMAT      Encode the video to another format if necessary
+                               (currently supported: mp4|flv|ogg|webm)
+    -k, --keep-video           keeps the video file on disk after the post-
+                               processing; the video is erased by default
+    --no-post-overwrites       do not overwrite post-processed files; the post-
+                               processed files are overwritten by default
+
+CONFIGURATION
+=============
+
+You can configure youtube-dl by placing default arguments (such as
+--extract-audio --no-mtime to always extract the audio and not copy the
+mtime) into /etc/youtube-dl.conf and/or ~/.config/youtube-dl.conf.
+
+OUTPUT TEMPLATE
+===============
+
+The -o option allows users to indicate a template for the output file
+names. The basic usage is not to set any template arguments when
+downloading a single file, like in
+youtube-dl -o funny_video.flv "http://some/video". However, it may
+contain special sequences that will be replaced when downloading each
+video. The special sequences have the format %(NAME)s. To clarify, that
+is a percent symbol followed by a name in parenthesis, followed by a
+lowercase S. Allowed names are:
+
+-   id: The sequence will be replaced by the video identifier.
+-   url: The sequence will be replaced by the video URL.
+-   uploader: The sequence will be replaced by the nickname of the
+    person who uploaded the video.
+-   upload_date: The sequence will be replaced by the upload date in
+    YYYYMMDD format.
+-   title: The sequence will be replaced by the video title.
+-   ext: The sequence will be replaced by the appropriate extension
+    (like flv or mp4).
+-   epoch: The sequence will be replaced by the Unix epoch when creating
+    the file.
+-   autonumber: The sequence will be replaced by a five-digit number
+    that will be increased with each download, starting at zero.
+-   playlist: The name or the id of the playlist that contains the
+    video.
+-   playlist_index: The index of the video in the playlist, a five-digit
+    number.
+
+The current default template is %(id)s.%(ext)s, but that will be
+switchted to %(title)s-%(id)s.%(ext)s (which can be requested with -t at
+the moment).
+
+In some cases, you don't want special characters such as 中, spaces, or
+&, such as when transferring the downloaded filename to a Windows system
+or the filename through an 8bit-unsafe channel. In these cases, add the
+--restrict-filenames flag to get a shorter title:
+
+    $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
+    youtube-dl test video ''_ä↭𝕐.mp4    # All kinds of weird characters
+    $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
+    youtube-dl_test_video_.mp4          # A simple file name
+
+VIDEO SELECTION
+===============
+
+Videos can be filtered by their upload date using the options --date,
+--datebefore or --dateafter, they accept dates in two formats:
+
+-   Absolute dates: Dates in the format YYYYMMDD.
+-   Relative dates: Dates in the format
+    (now|today)[+-][0-9](day|week|month|year)(s)?
+
+Examples:
+
+    $ youtube-dl --dateafter now-6months #will only download the videos uploaded in the last 6 months
+    $ youtube-dl --date 19700101 #will only download the videos uploaded in January 1, 1970
+    $ youtube-dl --dateafter 20000101 --datebefore 20100101 #will only download the videos uploaded between 2000 and 2010
+
+FAQ
+===
+
+Can you please put the -b option back?
+
+Most people asking this question are not aware that youtube-dl now
+defaults to downloading the highest available quality as reported by
+YouTube, which will be 1080p or 720p in some cases, so you no longer
+need the -b option. For some specific videos, maybe YouTube does not
+report them to be available in a specific high quality format you''re
+interested in. In that case, simply request it with the -f option and
+youtube-dl will try to download it.
+
+I get HTTP error 402 when trying to download a video. What's this?
+
+Apparently YouTube requires you to pass a CAPTCHA test if you download
+too much. We''re considering to provide a way to let you solve the
+CAPTCHA, but at the moment, your best course of action is pointing a
+webbrowser to the youtube URL, solving the CAPTCHA, and restart
+youtube-dl.
+
+I have downloaded a video but how can I play it?
+
+Once the video is fully downloaded, use any video player, such as vlc or
+mplayer.
+
+The links provided by youtube-dl -g are not working anymore
+
+The URLs youtube-dl outputs require the downloader to have the correct
+cookies. Use the --cookies option to write the required cookies into a
+file, and advise your downloader to read cookies from that file. Some
+sites also require a common user agent to be used, use --dump-user-agent
+to see the one in use by youtube-dl.
+
+ERROR: no fmt_url_map or conn information found in video info
+
+youtube has switched to a new video info format in July 2011 which is
+not supported by old versions of youtube-dl. You can update youtube-dl
+with sudo youtube-dl --update.
+
+ERROR: unable to download video
+
+youtube requires an additional signature since September 2012 which is
+not supported by old versions of youtube-dl. You can update youtube-dl
+with sudo youtube-dl --update.
+
+SyntaxError: Non-ASCII character
+
+The error
+
+    File "youtube-dl", line 2
+    SyntaxError: Non-ASCII character '\x93' ...
+
+means you're using an outdated version of Python. Please update to
+Python 2.6 or 2.7.
+
+What is this binary file? Where has the code gone?
+
+Since June 2012 (#342) youtube-dl is packed as an executable zipfile,
+simply unzip it (might need renaming to youtube-dl.zip first on some
+systems) or clone the git repository, as laid out above. If you modify
+the code, you can run it by executing the __main__.py file. To recompile
+the executable, run make youtube-dl.
+
+The exe throws a Runtime error from Visual C++
+
+To run the exe you need to install first the Microsoft Visual C++ 2008
+Redistributable Package.
+
+COPYRIGHT
+=========
+
+youtube-dl is released into the public domain by the copyright holders.
+
+This README file was originally written by Daniel Bolton
+(https://github.com/dbbolton) and is likewise released into the public
+domain.
+
+BUGS
+====
+
+Bugs and suggestions should be reported at:
+https://github.com/rg3/youtube-dl/issues
+
+Please include:
+
+-   Your exact command line, like
+    youtube-dl -t "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title".
+    A common mistake is not to escape the &. Putting URLs in quotes
+    should solve this problem.
+-   If possible re-run the command with --verbose, and include the full
+    output, it is really helpful to us.
+-   The output of youtube-dl --version
+-   The output of python --version
+-   The name and version of your Operating System ("Ubuntu 11.04 x64" or
+    "Windows 7 x64" is usually enough).
+
+For discussions, join us in the irc channel #youtube-dl on freenode.
index b2a91f817922853ee502624fbd621f535cfa935e..b8efdab4781901fb096e35dcf5cd351457e75299 100755 (executable)
@@ -22,7 +22,7 @@ if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit
 
 /bin/echo -e "\n### First of all, testing..."
 make cleanall
 
 /bin/echo -e "\n### First of all, testing..."
 make cleanall
-nosetests --with-coverage --cover-package=youtube_dl --cover-html test --stop || exit 1
+nosetests --verbose --with-coverage --cover-package=youtube_dl --cover-html test --stop || exit 1
 
 /bin/echo -e "\n### Changing version in version.py..."
 sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
 
 /bin/echo -e "\n### Changing version in version.py..."
 sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
index a403601220a6de38d5d3ab5ff8063d743b433d59..dd67286a79a6071317a4a6cc8bb6b4497a1c171a 100644 (file)
@@ -7,7 +7,7 @@ import unittest
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE
+from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE
 
 class TestAllURLsMatching(unittest.TestCase):
     def test_youtube_playlist_matching(self):
 
 class TestAllURLsMatching(unittest.TestCase):
     def test_youtube_playlist_matching(self):
@@ -29,6 +29,22 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
 
         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
 
+    def test_justin_tv_channelid_matching(self):
+        self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/"))
+        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/"))
+
+    def test_justintv_videoid_matching(self):
+        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483"))
+
+    def test_justin_tv_chapterid_matching(self):
+        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
+
     def test_youtube_extract(self):
         self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
     def test_youtube_extract(self):
         self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
index 3eca333f269748cabb1f09b62a11f70ace25b3ac..577bcdbf2d7eeac987aa2566cd4cda128d07a9ba 100644 (file)
@@ -7,8 +7,8 @@ import os
 import json
 import unittest
 import sys
 import json
 import unittest
 import sys
-import hashlib
 import socket
 import socket
+import binascii
 
 # Allow direct execution
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 # Allow direct execution
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -38,11 +38,16 @@ def _try_rm(filename):
         if ose.errno != errno.ENOENT:
             raise
 
         if ose.errno != errno.ENOENT:
             raise
 
+md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
+
 class FileDownloader(youtube_dl.FileDownloader):
     def __init__(self, *args, **kwargs):
         self.to_stderr = self.to_screen
         self.processed_info_dicts = []
         return youtube_dl.FileDownloader.__init__(self, *args, **kwargs)
 class FileDownloader(youtube_dl.FileDownloader):
     def __init__(self, *args, **kwargs):
         self.to_stderr = self.to_screen
         self.processed_info_dicts = []
         return youtube_dl.FileDownloader.__init__(self, *args, **kwargs)
+    def report_warning(self, message):
+        # Don't accept warnings during tests
+        raise ExtractorError(message)
     def process_info(self, info_dict):
         self.processed_info_dicts.append(info_dict)
         return youtube_dl.FileDownloader.process_info(self, info_dict)
     def process_info(self, info_dict):
         self.processed_info_dicts.append(info_dict)
         return youtube_dl.FileDownloader.process_info(self, info_dict)
@@ -121,7 +126,21 @@ def generator(test_case):
                 with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
                     info_dict = json.load(infof)
                 for (info_field, value) in tc.get('info_dict', {}).items():
                 with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
                     info_dict = json.load(infof)
                 for (info_field, value) in tc.get('info_dict', {}).items():
-                    self.assertEqual(value, info_dict.get(info_field))
+                    if isinstance(value, compat_str) and value.startswith('md5:'):
+                        self.assertEqual(value, 'md5:' + md5(info_dict.get(info_field)))
+                    else:
+                        self.assertEqual(value, info_dict.get(info_field))
+
+                # If checkable fields are missing from the test case, print the info_dict
+                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
+                    for key, value in info_dict.items()
+                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
+                if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
+                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n')
+
+                # Check for the presence of mandatory fields
+                for key in ('id', 'url', 'title', 'ext'):
+                    self.assertTrue(key in info_dict.keys() and info_dict[key])
         finally:
             for tc in test_cases:
                 _try_rm(tc['file'])
         finally:
             for tc in test_cases:
                 _try_rm(tc['file'])
index 78657b51ca43b1776596e612266cb0e8d9a07ebd..e8b49ff8ebe3bd3f74f796888cdc7f27b1ecc386 100644 (file)
@@ -53,8 +53,7 @@ class TestYoutubeLists(unittest.TestCase):
         dl = FakeDownloader()
         ie = YoutubePlaylistIE(dl)
         result = ie.extract('PLBB231211A4F62143')[0]
         dl = FakeDownloader()
         ie = YoutubePlaylistIE(dl)
         result = ie.extract('PLBB231211A4F62143')[0]
-        self.assertEqual(result['title'], 'Team Fortress 2')
-        self.assertTrue(len(result['entries']) > 40)
+        self.assertTrue(len(result['entries']) > 25)
 
     def test_youtube_playlist_long(self):
         dl = FakeDownloader()
 
     def test_youtube_playlist_long(self):
         dl = FakeDownloader()
@@ -105,5 +104,11 @@ class TestYoutubeLists(unittest.TestCase):
         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
         self.assertTrue(len(result['entries']) >= 320)
 
         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
         self.assertTrue(len(result['entries']) >= 320)
 
+    def test_youtube_safe_search(self):
+        dl = FakeDownloader()
+        ie = YoutubePlaylistIE(dl)
+        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
+        self.assertEqual(len(result['entries']), 2)
+
 if __name__ == '__main__':
     unittest.main()
 if __name__ == '__main__':
     unittest.main()
index a123e6d72df077efc9228e75a5455d3f80624e00..c80c90cbed53f5b5a5fbbbc5f8c7f3e9cb07586b 100644 (file)
@@ -28,7 +28,9 @@ compat_urllib_request.install_opener(opener)
 class FakeDownloader(FileDownloader):
     def __init__(self):
         self.result = []
 class FakeDownloader(FileDownloader):
     def __init__(self):
         self.result = []
-        self.params = parameters
+        # Different instances of the downloader can't share the same dictionary
+        # some test set the "sublang" parameter, which would break the md5 checks.
+        self.params = dict(parameters)
     def to_screen(self, s):
         print(s)
     def trouble(self, s, tb=None):
     def to_screen(self, s):
         print(s)
     def trouble(self, s, tb=None):
@@ -96,6 +98,14 @@ class TestYoutubeSubtitles(unittest.TestCase):
         IE = YoutubeIE(DL)
         info_dict = IE.extract('QRS8MkLhQmM')
         self.assertEqual(info_dict, None)
         IE = YoutubeIE(DL)
         info_dict = IE.extract('QRS8MkLhQmM')
         self.assertEqual(info_dict, None)
+    def test_youtube_automatic_captions(self):
+        DL = FakeDownloader()
+        DL.params['writesubtitles'] = True
+        DL.params['subtitleslang'] = 'it'
+        IE = YoutubeIE(DL)
+        info_dict = IE.extract('8YoUxe5ncPo')
+        sub = info_dict[0]['subtitles'][0]
+        self.assertTrue(sub[2] is not None)
 
 if __name__ == '__main__':
     unittest.main()
 
 if __name__ == '__main__':
     unittest.main()
index f57ebf1c9a8d3f9149786b9caeabf93585d9a4ee..3e0db297db59e9676879e40ec2033c77342a92b8 100644 (file)
     "name": "Dailymotion",
     "md5":  "392c4b85a60a90dc4792da41ce3144eb",
     "url":  "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech",
     "name": "Dailymotion",
     "md5":  "392c4b85a60a90dc4792da41ce3144eb",
     "url":  "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech",
-    "file":  "x33vw9.mp4"
+    "file":  "x33vw9.mp4",
+    "info_dict": {
+      "uploader": "Alex and Van .",
+      "title": "Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
+    }
   },
   {
     "name": "Metacafe",
     "add_ie": ["Youtube"],
     "url":  "http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
   },
   {
     "name": "Metacafe",
     "add_ie": ["Youtube"],
     "url":  "http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
-    "file":  "_aUehQsCQtM.flv"
+    "file":  "_aUehQsCQtM.flv",
+    "info_dict": {
+      "upload_date": "20090102",
+      "title": "The Electric Company | \"Short I\" | PBS KIDS GO!",
+      "description": "md5:2439a8ef6d5a70e380c22f5ad323e5a8",
+      "uploader": "PBS",
+      "uploader_id": "PBS"
+    }
   },
   {
     "name": "BlipTV",
     "md5":  "b2d849efcf7ee18917e4b4d9ff37cafe",
     "url":  "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352",
   },
   {
     "name": "BlipTV",
     "md5":  "b2d849efcf7ee18917e4b4d9ff37cafe",
     "url":  "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352",
-    "file":  "5779306.m4v"
+    "file":  "5779306.m4v",
+    "info_dict": {
+      "upload_date": "20111205",
+      "description": "md5:9bc31f227219cde65e47eeec8d2dc596",
+      "uploader": "Comic Book Resources - CBR TV",
+      "title": "CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3"
+    }
   },
   {
     "name": "XVideos",
     "md5":  "1d0c835822f0a71a7bf011855db929d0",
     "url":  "http://www.xvideos.com/video939581/funny_porns_by_s_-1",
   },
   {
     "name": "XVideos",
     "md5":  "1d0c835822f0a71a7bf011855db929d0",
     "url":  "http://www.xvideos.com/video939581/funny_porns_by_s_-1",
-    "file":  "939581.flv"
+    "file":  "939581.flv",
+    "info_dict": {
+      "title": "Funny Porns By >>>>S<<<<<< -1"
+    }
   },
   {
     "name": "YouPorn",
     "md5": "c37ddbaaa39058c76a7e86c6813423c1",
     "url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/",
   },
   {
     "name": "YouPorn",
     "md5": "c37ddbaaa39058c76a7e86c6813423c1",
     "url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/",
-    "file": "505835.mp4"
+    "file": "505835.mp4",
+    "info_dict": {
+      "upload_date": "20101221",
+      "description": "Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
+      "uploader": "Ask Dan And Jennifer",
+      "title": "Sex Ed: Is It Safe To Masturbate Daily?"
+    }
   },
   {
     "name": "Pornotube",
     "md5": "374dd6dcedd24234453b295209aa69b6",
     "url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing",
   },
   {
     "name": "Pornotube",
     "md5": "374dd6dcedd24234453b295209aa69b6",
     "url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing",
-    "file": "1689755.flv"
+    "file": "1689755.flv",
+    "info_dict": {
+      "upload_date": "20090708",
+      "title": "Marilyn-Monroe-Bathing"
+    }
   },
   {
     "name": "YouJizz",
     "md5": "07e15fa469ba384c7693fd246905547c",
     "url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html",
   },
   {
     "name": "YouJizz",
     "md5": "07e15fa469ba384c7693fd246905547c",
     "url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html",
-    "file": "2189178.flv"
+    "file": "2189178.flv",
+    "info_dict": {
+      "title": "Zeichentrick 1"
+    }
   },
   {
     "name": "Vimeo",
   },
   {
     "name": "Vimeo",
     "name": "Soundcloud",
     "md5":  "ebef0a451b909710ed1d7787dddbf0d7",
     "url":  "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy",
     "name": "Soundcloud",
     "md5":  "ebef0a451b909710ed1d7787dddbf0d7",
     "url":  "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy",
-    "file":  "62986583.mp3"
+    "file":  "62986583.mp3",
+    "info_dict": {
+      "upload_date": "20121011",
+      "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
+      "uploader": "E.T. ExTerrestrial Music",
+      "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
+    }
   },
   {
     "name": "StanfordOpenClassroom",
     "md5":  "544a9468546059d4e80d76265b0443b8",
     "url":  "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100",
   },
   {
     "name": "StanfordOpenClassroom",
     "md5":  "544a9468546059d4e80d76265b0443b8",
     "url":  "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100",
-    "file":  "PracticalUnix_intro-environment.mp4"
+    "file":  "PracticalUnix_intro-environment.mp4",
+    "info_dict": {
+      "title": "Intro Environment"
+    }
   },
   {
     "name": "XNXX",
     "md5":  "0831677e2b4761795f68d417e0b7b445",
     "url":  "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_",
   },
   {
     "name": "XNXX",
     "md5":  "0831677e2b4761795f68d417e0b7b445",
     "url":  "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_",
-    "file":  "1135332.flv"
+    "file":  "1135332.flv",
+    "info_dict": {
+      "title": "lida » Naked Funny Actress  (5)"
+    }
   },
   {
     "name": "Youku",
     "url": "http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
     "file": "XNDgyMDQ2NTQw_part00.flv",
     "md5": "ffe3f2e435663dc2d1eea34faeff5b5b",
   },
   {
     "name": "Youku",
     "url": "http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
     "file": "XNDgyMDQ2NTQw_part00.flv",
     "md5": "ffe3f2e435663dc2d1eea34faeff5b5b",
-    "params": { "test": false }
+    "params": { "test": false },
+    "info_dict": {
+      "title": "youtube-dl test video \"'/\\ä↭𝕐"
+    }
   },
   {
     "name": "NBA",
     "url": "http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html",
     "file": "0021200253-okc-bkn-recap.nba.mp4",
   },
   {
     "name": "NBA",
     "url": "http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html",
     "file": "0021200253-okc-bkn-recap.nba.mp4",
-    "md5": "c0edcfc37607344e2ff8f13c378c88a4"
+    "md5": "c0edcfc37607344e2ff8f13c378c88a4",
+    "info_dict": {
+      "description": "Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.",
+      "title": "Thunder vs. Nets"
+    }
   },
   {
     "name": "JustinTV",
     "url": "http://www.twitch.tv/thegamedevhub/b/296128360",
     "file": "296128360.flv",
   },
   {
     "name": "JustinTV",
     "url": "http://www.twitch.tv/thegamedevhub/b/296128360",
     "file": "296128360.flv",
-    "md5": "ecaa8a790c22a40770901460af191c9a"
+    "md5": "ecaa8a790c22a40770901460af191c9a",
+    "info_dict": {
+      "upload_date": "20110927",
+      "uploader_id": 25114803,
+      "uploader": "thegamedevhub",
+      "title": "Beginner Series - Scripting With Python Pt.1"
+    }
   },
   {
     "name": "MyVideo",
     "url": "http://www.myvideo.de/watch/8229274/bowling_fail_or_win",
     "file": "8229274.flv",
   },
   {
     "name": "MyVideo",
     "url": "http://www.myvideo.de/watch/8229274/bowling_fail_or_win",
     "file": "8229274.flv",
-    "md5": "2d2753e8130479ba2cb7e0a37002053e"
+    "md5": "2d2753e8130479ba2cb7e0a37002053e",
+    "info_dict": {
+      "title": "bowling-fail-or-win"
+    }
   },
   {
     "name": "Escapist",
     "url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate",
     "file": "6618-Breaking-Down-Baldurs-Gate.mp4",
   },
   {
     "name": "Escapist",
     "url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate",
     "file": "6618-Breaking-Down-Baldurs-Gate.mp4",
-    "md5": "c6793dbda81388f4264c1ba18684a74d"
+    "md5": "c6793dbda81388f4264c1ba18684a74d",
+    "info_dict": {
+      "description": "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
+      "uploader": "the-escapist-presents",
+      "title": "Breaking Down Baldur's Gate"
+    }
   },
   {
     "name": "GooglePlus",
     "url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
   },
   {
     "name": "GooglePlus",
     "url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
-    "file": "ZButuJc6CtH.flv"
+    "file": "ZButuJc6CtH.flv",
+    "info_dict": {
+      "upload_date": "20120613",
+      "uploader": "井上ヨシマサ",
+      "title": "嘆きの天使 降臨"
+    }
   },
   {
     "name": "FunnyOrDie",
     "url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version",
     "file": "0732f586d7.mp4",
   },
   {
     "name": "FunnyOrDie",
     "url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version",
     "file": "0732f586d7.mp4",
-    "md5": "f647e9e90064b53b6e046e75d0241fbd"
+    "md5": "f647e9e90064b53b6e046e75d0241fbd",
+    "info_dict": {
+      "description": "Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.",
+      "title": "Heart-Shaped Box: Literal Video Version"
+    }
   },
   {
     "name": "Steam",
   },
   {
     "name": "Steam",
     "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
     "file": "12-jan-pythonthings.mp4",
     "info_dict": {
     "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
     "file": "12-jan-pythonthings.mp4",
     "info_dict": {
+      "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
       "title": "A Few of My Favorite [Python] Things"
     },
     "params": {
       "title": "A Few of My Favorite [Python] Things"
     },
     "params": {
     "file": "422212.mp4",
     "md5": "4e2f5cb088a83cd8cdb7756132f9739d",
     "info_dict": {
     "file": "422212.mp4",
     "md5": "4e2f5cb088a83cd8cdb7756132f9739d",
     "info_dict": {
-        "title": "thedailyshow-kristen-stewart part 1"
+      "upload_date": "20121214",
+      "description": "Kristen Stewart",
+      "uploader": "thedailyshow",
+      "title": "thedailyshow-kristen-stewart part 1"
     }
   },
   {
     }
   },
   {
         "file": "11885679.m4a",
         "md5": "d30b5b5f74217410f4689605c35d1fd7",
         "info_dict": {
         "file": "11885679.m4a",
         "md5": "d30b5b5f74217410f4689605c35d1fd7",
         "info_dict": {
-          "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad"
+          "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
+          "uploader_id": "ytdl"
         }
       },
       {
         "file": "11885680.m4a",
         "md5": "4eb0a669317cd725f6bbd336a29f923a",
         "info_dict": {
         }
       },
       {
         "file": "11885680.m4a",
         "md5": "4eb0a669317cd725f6bbd336a29f923a",
         "info_dict": {
-          "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad"
+          "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
+          "uploader_id": "ytdl"
         }
       },
       {
         "file": "11885682.m4a",
         "md5": "1893e872e263a2705558d1d319ad19e8",
         "info_dict": {
         }
       },
       {
         "file": "11885682.m4a",
         "md5": "1893e872e263a2705558d1d319ad19e8",
         "info_dict": {
-          "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad"
+          "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
+          "uploader_id": "ytdl"
         }
       },
       {
         "file": "11885683.m4a",
         "md5": "b673c46f47a216ab1741ae8836af5899",
         "info_dict": {
         }
       },
       {
         "file": "11885683.m4a",
         "md5": "b673c46f47a216ab1741ae8836af5899",
         "info_dict": {
-          "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad"
+          "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
+          "uploader_id": "ytdl"
         }
       },
       {
         "file": "11885684.m4a",
         "md5": "1d74534e95df54986da7f5abf7d842b7",
         "info_dict": {
         }
       },
       {
         "file": "11885684.m4a",
         "md5": "1d74534e95df54986da7f5abf7d842b7",
         "info_dict": {
-          "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad"
+          "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
+          "uploader_id": "ytdl"
         }
       },
       {
         "file": "11885685.m4a",
         "md5": "f081f47af8f6ae782ed131d38b9cd1c0",
         "info_dict": {
         }
       },
       {
         "file": "11885685.m4a",
         "md5": "f081f47af8f6ae782ed131d38b9cd1c0",
         "info_dict": {
-          "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad"
+          "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
+          "uploader_id": "ytdl"
         }
       }
     ]
         }
       }
     ]
     "file": "NODfbab.mp4",
     "md5": "9b0636f8c0f7614afa4ea5e4c6e57e83",
     "info_dict": {
     "file": "NODfbab.mp4",
     "md5": "9b0636f8c0f7614afa4ea5e4c6e57e83",
     "info_dict": {
+      "uploader": "ytdl",
       "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
     }
       "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
     }
-
   },
   {
     "name": "TED",
     "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html",
     "file": "102.mp4",
   },
   {
     "name": "TED",
     "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html",
     "file": "102.mp4",
-    "md5": "7bc087e71d16f18f9b8ab9fa62a8a031",
+    "md5": "8cd9dfa41ee000ce658fd48fb5d89a61",
     "info_dict": {
         "title": "Dan Dennett: The illusion of consciousness",
     "info_dict": {
         "title": "Dan Dennett: The illusion of consciousness",
-        "thumbnail": "http://images.ted.com/images/ted/488_389x292.jpg"
+        "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922"
     }
   },
   {
     }
   },
   {
     "file": "11741.mp4",
     "md5": "0b49f4844a068f8b33f4b7c88405862b",
     "info_dict": {
     "file": "11741.mp4",
     "md5": "0b49f4844a068f8b33f4b7c88405862b",
     "info_dict": {
-        "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
+      "description": "Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
+      "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
     }
   },
   {
     "name": "Generic",
     "url": "http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html",
     "file": "13601338388002.mp4",
     }
   },
   {
     "name": "Generic",
     "url": "http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html",
     "file": "13601338388002.mp4",
-    "md5": "85b90ccc9d73b4acd9138d3af4c27f89"
+    "md5": "85b90ccc9d73b4acd9138d3af4c27f89",
+    "info_dict": {
+      "uploader": "www.hodiho.fr",
+      "title": "Régis plante sa Jeep"
+    }
   },
   {
     "name": "Spiegel",
   },
   {
     "name": "Spiegel",
     "file": "wshh6a7q1ny0G34ZwuIO.mp4",
     "md5": "9d04de741161603bf7071bbf4e883186",
     "info_dict": {
     "file": "wshh6a7q1ny0G34ZwuIO.mp4",
     "md5": "9d04de741161603bf7071bbf4e883186",
     "info_dict": {
-        "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick! "
+        "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
     }
   },
   {
     }
   },
   {
   },
   {
     "name": "Tumblr",
   },
   {
     "name": "Tumblr",
-    "url": "http://birthdayproject2012.tumblr.com/post/17258355236/a-sample-video-from-leeann-if-you-need-an-idea",
-    "file": "17258355236.mp4",
-    "md5": "7c6a514d691b034ccf8567999e9e88a3",
+    "url": "http://resigno.tumblr.com/post/53364321212/e-de-extrema-importancia-que-esse-video-seja",
+    "file": "53364321212.mp4",
+    "md5": "0716d3dd51baf68a28b40fdf1251494e",
     "info_dict": {
     "info_dict": {
-        "title": "Calling all Pris! - A sample video from LeeAnn. (If you need an idea..."
+        "title": "Rafael Lemos | Tumblr"
     }
   },
   {
     }
   },
   {
         "file":"30510138.mp3",
         "md5":"f9136bf103901728f29e419d2c70f55d",
         "info_dict": {
         "file":"30510138.mp3",
         "md5":"f9136bf103901728f29e419d2c70f55d",
         "info_dict": {
-          "title":"D-D-Dance"
+          "upload_date": "20111213",
+          "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
+          "uploader": "The Royal Concept",
+          "title": "D-D-Dance"
         }
       },
       {
         "file":"47127625.mp3",
         "md5":"09b6758a018470570f8fd423c9453dd8",
         "info_dict": {
         }
       },
       {
         "file":"47127625.mp3",
         "md5":"09b6758a018470570f8fd423c9453dd8",
         "info_dict": {
-          "title":"The Royal Concept - Gimme Twice"
+          "upload_date": "20120521",
+          "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
+          "uploader": "The Royal Concept",
+          "title": "The Royal Concept - Gimme Twice"
         }
       },
       {
         "file":"47127627.mp3",
         "md5":"154abd4e418cea19c3b901f1e1306d9c",
         "info_dict": {
         }
       },
       {
         "file":"47127627.mp3",
         "md5":"154abd4e418cea19c3b901f1e1306d9c",
         "info_dict": {
-          "title":"Goldrushed"
+          "upload_date": "20120521",
+          "uploader": "The Royal Concept",
+          "title": "Goldrushed"
         }
       },
       {
         "file":"47127629.mp3",
         "md5":"2f5471edc79ad3f33a683153e96a79c1",
         "info_dict": {
         }
       },
       {
         "file":"47127629.mp3",
         "md5":"2f5471edc79ad3f33a683153e96a79c1",
         "info_dict": {
-          "title":"In the End"
+          "upload_date": "20120521",
+          "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
+          "uploader": "The Royal Concept",
+          "title": "In the End"
         }
       },
       {
         "file":"47127631.mp3",
         "md5":"f9ba87aa940af7213f98949254f1c6e2",
         "info_dict": {
         }
       },
       {
         "file":"47127631.mp3",
         "md5":"f9ba87aa940af7213f98949254f1c6e2",
         "info_dict": {
-          "title":"Knocked Up"
+          "upload_date": "20120521",
+          "description": "The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
+          "uploader": "The Royal Concept",
+          "title": "Knocked Up"
         }
       },
       {
         "file":"75206121.mp3",
         "md5":"f9d1fe9406717e302980c30de4af9353",
         "info_dict": {
         }
       },
       {
         "file":"75206121.mp3",
         "md5":"f9d1fe9406717e302980c30de4af9353",
         "info_dict": {
-          "title":"World On Fire"
+          "upload_date": "20130116",
+          "description": "The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central).  \r\nAs a gift to our fans we would like to offer you a free download of the track!  ",
+          "uploader": "The Royal Concept",
+          "title": "World On Fire"
         }
       }
     ]
         }
       }
     ]
     "url": "http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0",
     "file": "zpsc0c3b9fa.mp4",
     "md5": "7dabfb92b0a31f6c16cebc0f8e60ff99",
     "url": "http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0",
     "file": "zpsc0c3b9fa.mp4",
     "md5": "7dabfb92b0a31f6c16cebc0f8e60ff99",
-    "info_dict":{
-      "title":"Tired of Link Building? Try BacklinkMyDomain.com!"
+    "info_dict": {
+      "upload_date": "20130504",
+      "uploader": "rachaneronas",
+      "title": "Tired of Link Building? Try BacklinkMyDomain.com!"
     }
   },
   {
     }
   },
   {
   },
   {
     "name": "Yahoo",
   },
   {
     "name": "Yahoo",
-    "url": "http://screen.yahoo.com/obama-celebrates-iraq-victory-27592561.html",
-    "file": "27592561.flv",
-    "md5": "c6179bed843512823fd284fa2e7f012d",
+    "url": "http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html",
+    "file": "214727115.flv",
+    "md5": "2e717f169c1be93d84d3794a00d4a325",
     "info_dict": {
     "info_dict": {
-        "title": "Obama Celebrates Iraq Victory"
+        "title": "Julian Smith & Travis Legg Watch Julian Smith"
     },
     "skip": "Requires rtmpdump"
   },
     },
     "skip": "Requires rtmpdump"
   },
       "title": "Louis C.K. Interview Pt. 1 11/3/11",
       "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one."
     }
       "title": "Louis C.K. Interview Pt. 1 11/3/11",
       "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one."
     }
+  },
+  {
+    "name": "XHamster",
+    "url": "http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html",
+    "file": "1509445.flv",
+    "md5": "9f48e0e8d58e3076bb236ff412ab62fa",
+    "info_dict": {
+      "upload_date": "20121014",
+      "uploader_id": "Ruseful2011",
+      "title": "FemaleAgent Shy beauty takes the bait"
+    }
+  },
+  {
+    "name": "Hypem",
+    "url": "http://hypem.com/track/1v6ga/BODYWORK+-+TAME",
+    "file": "1v6ga.mp3",
+    "md5": "b9cc91b5af8995e9f0c1cee04c575828",
+    "info_dict":{
+      "title":"Tame"
+    }
+  },
+  {
+    "name": "Vbox7",
+    "url": "http://vbox7.com/play:249bb972c2",
+    "file": "249bb972c2.flv",
+    "md5": "9c70d6d956f888bdc08c124acc120cfe",
+    "info_dict":{
+      "title":"Смях! Чудо - чист за секунди - Скрита камера"
+    }
+  },
+  {
+    "name": "Gametrailers",
+    "url": "http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer",
+    "file": "zbvr8i.flv",
+    "md5": "c3edbc995ab4081976e16779bd96a878",
+    "info_dict": {
+        "title": "E3 2013: Debut Trailer"
+    },
+    "skip": "Requires rtmpdump"
   }
 ]
   }
 ]
index e3eb8774caa258ddbaa51323938fbd36c4fc170d..ea6d889ba2d69a7a9c989380e97ccd92186b8fb2 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
diff --git a/youtube-dl.1 b/youtube-dl.1
new file mode 100644 (file)
index 0000000..28e8311
--- /dev/null
@@ -0,0 +1,361 @@
+.TH YOUTUBE\-DL 1 "" 
+.SH NAME
+.PP
+youtube\-dl
+.SH SYNOPSIS
+.PP
+\f[B]youtube\-dl\f[] OPTIONS (#options) URL [URL...]
+.SH DESCRIPTION
+.PP
+\f[B]youtube\-dl\f[] is a small command\-line program to download videos
+from YouTube.com and a few more sites.
+It requires the Python interpreter, version 2.6, 2.7, or 3.3+, and it is
+not platform specific.
+It should work on your Unix box, on Windows or on Mac OS X.
+It is released to the public domain, which means you can modify it,
+redistribute it or use it however you like.
+.SH OPTIONS
+.IP
+.nf
+\f[C]
+\-h,\ \-\-help\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ this\ help\ text\ and\ exit
+\-\-version\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ program\ version\ and\ exit
+\-U,\ \-\-update\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ update\ this\ program\ to\ latest\ version
+\-i,\ \-\-ignore\-errors\ \ \ \ \ \ \ \ continue\ on\ download\ errors
+\-r,\ \-\-rate\-limit\ LIMIT\ \ \ \ \ maximum\ download\ rate\ (e.g.\ 50k\ or\ 44.6m)
+\-R,\ \-\-retries\ RETRIES\ \ \ \ \ \ number\ of\ retries\ (default\ is\ 10)
+\-\-buffer\-size\ SIZE\ \ \ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16k)
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (default\ is\ 1024)
+\-\-no\-resize\-buffer\ \ \ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer\ size.\ By
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ the\ buffer\ size\ is\ automatically\ resized
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ an\ initial\ value\ of\ SIZE.
+\-\-dump\-user\-agent\ \ \ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification
+\-\-user\-agent\ UA\ \ \ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent
+\-\-referer\ REF\ \ \ \ \ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ referer,\ use\ if\ the\ video\ access
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ restricted\ to\ one\ domain
+\-\-list\-extractors\ \ \ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ would\ handle
+\-\-proxy\ URL\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Use\ the\ specified\ HTTP/HTTPS\ proxy
+\-\-no\-check\-certificate\ \ \ \ \ Suppress\ HTTPS\ certificate\ validation.
+\f[]
+.fi
+.SS Video Selection:
+.IP
+.nf
+\f[C]
+\-\-playlist\-start\ NUMBER\ \ \ \ playlist\ video\ to\ start\ at\ (default\ is\ 1)
+\-\-playlist\-end\ NUMBER\ \ \ \ \ \ playlist\ video\ to\ end\ at\ (default\ is\ last)
+\-\-match\-title\ REGEX\ \ \ \ \ \ \ \ download\ only\ matching\ titles\ (regex\ or\ caseless
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ sub\-string)
+\-\-reject\-title\ REGEX\ \ \ \ \ \ \ skip\ download\ for\ matching\ titles\ (regex\ or
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ caseless\ sub\-string)
+\-\-max\-downloads\ NUMBER\ \ \ \ \ Abort\ after\ downloading\ NUMBER\ files
+\-\-min\-filesize\ SIZE\ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ smaller\ than\ SIZE
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (e.g.\ 50k\ or\ 44.6m)
+\-\-max\-filesize\ SIZE\ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ larger\ than\ SIZE\ (e.g.
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 50k\ or\ 44.6m)
+\-\-date\ DATE\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ in\ this\ date
+\-\-datebefore\ DATE\ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ before\ this\ date
+\-\-dateafter\ DATE\ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ uploaded\ after\ this\ date
+\f[]
+.fi
+.SS Filesystem Options:
+.IP
+.nf
+\f[C]
+\-t,\ \-\-title\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ title\ in\ file\ name\ (default)
+\-\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ only\ video\ ID\ in\ file\ name
+\-l,\ \-\-literal\ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-title
+\-A,\ \-\-auto\-number\ \ \ \ \ \ \ \ \ \ number\ downloaded\ files\ starting\ from\ 00000
+\-o,\ \-\-output\ TEMPLATE\ \ \ \ \ \ output\ filename\ template.\ Use\ %(title)s\ to\ get
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ title,\ %(uploader)s\ for\ the\ uploader\ name,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(uploader_id)s\ for\ the\ uploader\ nickname\ if
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ different,\ %(autonumber)s\ to\ get\ an\ automatically
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ incremented\ number,\ %(ext)s\ for\ the\ filename
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ extension,\ %(upload_date)s\ for\ the\ upload\ date
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (YYYYMMDD),\ %(extractor)s\ for\ the\ provider
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (youtube,\ metacafe,\ etc),\ %(id)s\ for\ the\ video\ id
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ,\ %(playlist)s\ for\ the\ playlist\ the\ video\ is\ in,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(playlist_index)s\ for\ the\ position\ in\ the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist\ and\ %%\ for\ a\ literal\ percent.\ Use\ \-\ to
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ output\ to\ stdout.\ Can\ also\ be\ used\ to\ download\ to
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ a\ different\ directory,\ for\ example\ with\ \-o\ \[aq]/my/d
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ownloads/%(uploader)s/%(title)s\-%(id)s.%(ext)s\[aq]\ .
+\-\-autonumber\-size\ NUMBER\ \ \ Specifies\ the\ number\ of\ digits\ in\ %(autonumber)s
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ when\ it\ is\ present\ in\ output\ filename\ template\ or
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-\-autonumber\ option\ is\ given
+\-\-restrict\-filenames\ \ \ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames
+\-a,\ \-\-batch\-file\ FILE\ \ \ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]\-\[aq]\ for\ stdin)
+\-w,\ \-\-no\-overwrites\ \ \ \ \ \ \ \ do\ not\ overwrite\ files
+\-c,\ \-\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
+\-\-no\-continue\ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ resume\ partially\ downloaded\ files\ (restart
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ beginning)
+\-\-cookies\ FILE\ \ \ \ \ \ \ \ \ \ \ \ \ file\ to\ read\ cookies\ from\ and\ dump\ cookie\ jar\ in
+\-\-no\-part\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ use\ .part\ files
+\-\-no\-mtime\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ use\ the\ Last\-modified\ header\ to\ set\ the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ file\ modification\ time
+\-\-write\-description\ \ \ \ \ \ \ \ write\ video\ description\ to\ a\ .description\ file
+\-\-write\-info\-json\ \ \ \ \ \ \ \ \ \ write\ video\ metadata\ to\ a\ .info.json\ file
+\-\-write\-thumbnail\ \ \ \ \ \ \ \ \ \ write\ thumbnail\ image\ to\ disk
+\f[]
+.fi
+.SS Verbosity / Simulation Options:
+.IP
+.nf
+\f[C]
+\-q,\ \-\-quiet\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ activates\ quiet\ mode
+\-s,\ \-\-simulate\ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ download\ the\ video\ and\ do\ not\ write
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ anything\ to\ disk
+\-\-skip\-download\ \ \ \ \ \ \ \ \ \ \ \ do\ not\ download\ the\ video
+\-g,\ \-\-get\-url\ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ URL
+\-e,\ \-\-get\-title\ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ title
+\-\-get\-id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ id
+\-\-get\-thumbnail\ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ thumbnail\ URL
+\-\-get\-description\ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ video\ description
+\-\-get\-filename\ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ output\ filename
+\-\-get\-format\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ simulate,\ quiet\ but\ print\ output\ format
+\-\-newline\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ output\ progress\ bar\ as\ new\ lines
+\-\-no\-progress\ \ \ \ \ \ \ \ \ \ \ \ \ \ do\ not\ print\ progress\ bar
+\-\-console\-title\ \ \ \ \ \ \ \ \ \ \ \ display\ progress\ in\ console\ titlebar
+\-v,\ \-\-verbose\ \ \ \ \ \ \ \ \ \ \ \ \ \ print\ various\ debugging\ information
+\-\-dump\-intermediate\-pages\ \ print\ downloaded\ pages\ to\ debug\ problems(very
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ verbose)
+\f[]
+.fi
+.SS Video Format Options:
+.IP
+.nf
+\f[C]
+\-f,\ \-\-format\ FORMAT\ \ \ \ \ \ \ \ video\ format\ code,\ specifiy\ the\ order\ of
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ preference\ using\ slashes:\ "\-f\ 22/17/18"
+\-\-all\-formats\ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ all\ available\ video\ formats
+\-\-prefer\-free\-formats\ \ \ \ \ \ prefer\ free\ video\ formats\ unless\ a\ specific\ one
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ requested
+\-\-max\-quality\ FORMAT\ \ \ \ \ \ \ highest\ quality\ format\ to\ download
+\-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ list\ all\ available\ formats\ (currently\ youtube
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
+\-\-write\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ subtitle\ file\ (currently\ youtube\ only)
+\-\-only\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-skip\-download
+\-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ all\ the\ available\ subtitles\ of\ the
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ (currently\ youtube\ only)
+\-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ lists\ all\ available\ subtitles\ for\ the\ video
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only)
+\-\-sub\-format\ LANG\ \ \ \ \ \ \ \ \ \ subtitle\ format\ [srt/sbv]\ (default=srt)
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only)
+\-\-sub\-lang\ LANG\ \ \ \ \ \ \ \ \ \ \ \ language\ of\ the\ subtitles\ to\ download\ (optional)
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ IETF\ language\ tags\ like\ \[aq]en\[aq]
+\f[]
+.fi
+.SS Authentication Options:
+.IP
+.nf
+\f[C]
+\-u,\ \-\-username\ USERNAME\ \ \ \ account\ username
+\-p,\ \-\-password\ PASSWORD\ \ \ \ account\ password
+\-n,\ \-\-netrc\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ .netrc\ authentication\ data
+\f[]
+.fi
+.SS Post\-processing Options:
+.IP
+.nf
+\f[C]
+\-x,\ \-\-extract\-audio\ \ \ \ \ \ \ \ convert\ video\ files\ to\ audio\-only\ files\ (requires
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ffmpeg\ or\ avconv\ and\ ffprobe\ or\ avprobe)
+\-\-audio\-format\ FORMAT\ \ \ \ \ \ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",\ "opus",\ or
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "wav";\ best\ by\ default
+\-\-audio\-quality\ QUALITY\ \ \ \ ffmpeg/avconv\ audio\ quality\ specification,\ insert
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ a\ value\ between\ 0\ (better)\ and\ 9\ (worse)\ for\ VBR
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ or\ a\ specific\ bitrate\ like\ 128K\ (default\ 5)
+\-\-recode\-video\ FORMAT\ \ \ \ \ \ Encode\ the\ video\ to\ another\ format\ if\ necessary
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ supported:\ mp4|flv|ogg|webm)
+\-k,\ \-\-keep\-video\ \ \ \ \ \ \ \ \ \ \ keeps\ the\ video\ file\ on\ disk\ after\ the\ post\-
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processing;\ the\ video\ is\ erased\ by\ default
+\-\-no\-post\-overwrites\ \ \ \ \ \ \ do\ not\ overwrite\ post\-processed\ files;\ the\ post\-
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processed\ files\ are\ overwritten\ by\ default
+\f[]
+.fi
+.SH CONFIGURATION
+.PP
+You can configure youtube\-dl by placing default arguments (such as
+\f[C]\-\-extract\-audio\ \-\-no\-mtime\f[] to always extract the audio
+and not copy the mtime) into \f[C]/etc/youtube\-dl.conf\f[] and/or
+\f[C]~/.config/youtube\-dl.conf\f[].
+.SH OUTPUT TEMPLATE
+.PP
+The \f[C]\-o\f[] option allows users to indicate a template for the
+output file names.
+The basic usage is not to set any template arguments when downloading a
+single file, like in
+\f[C]youtube\-dl\ \-o\ funny_video.flv\ "http://some/video"\f[].
+However, it may contain special sequences that will be replaced when
+downloading each video.
+The special sequences have the format \f[C]%(NAME)s\f[].
+To clarify, that is a percent symbol followed by a name in parenthesis,
+followed by a lowercase S.
+Allowed names are:
+.IP \[bu] 2
+\f[C]id\f[]: The sequence will be replaced by the video identifier.
+.IP \[bu] 2
+\f[C]url\f[]: The sequence will be replaced by the video URL.
+.IP \[bu] 2
+\f[C]uploader\f[]: The sequence will be replaced by the nickname of the
+person who uploaded the video.
+.IP \[bu] 2
+\f[C]upload_date\f[]: The sequence will be replaced by the upload date
+in YYYYMMDD format.
+.IP \[bu] 2
+\f[C]title\f[]: The sequence will be replaced by the video title.
+.IP \[bu] 2
+\f[C]ext\f[]: The sequence will be replaced by the appropriate extension
+(like flv or mp4).
+.IP \[bu] 2
+\f[C]epoch\f[]: The sequence will be replaced by the Unix epoch when
+creating the file.
+.IP \[bu] 2
+\f[C]autonumber\f[]: The sequence will be replaced by a five\-digit
+number that will be increased with each download, starting at zero.
+.IP \[bu] 2
+\f[C]playlist\f[]: The name or the id of the playlist that contains the
+video.
+.IP \[bu] 2
+\f[C]playlist_index\f[]: The index of the video in the playlist, a
+five\-digit number.
+.PP
+The current default template is \f[C]%(id)s.%(ext)s\f[], but that will
+be switchted to \f[C]%(title)s\-%(id)s.%(ext)s\f[] (which can be
+requested with \f[C]\-t\f[] at the moment).
+.PP
+In some cases, you don\[aq]t want special characters such as 中, spaces,
+or &, such as when transferring the downloaded filename to a Windows
+system or the filename through an 8bit\-unsafe channel.
+In these cases, add the \f[C]\-\-restrict\-filenames\f[] flag to get a
+shorter title:
+.IP
+.nf
+\f[C]
+$\ youtube\-dl\ \-\-get\-filename\ \-o\ "%(title)s.%(ext)s"\ BaW_jenozKc
+youtube\-dl\ test\ video\ \[aq]\[aq]_ä↭𝕐.mp4\ \ \ \ #\ All\ kinds\ of\ weird\ characters
+$\ youtube\-dl\ \-\-get\-filename\ \-o\ "%(title)s.%(ext)s"\ BaW_jenozKc\ \-\-restrict\-filenames
+youtube\-dl_test_video_.mp4\ \ \ \ \ \ \ \ \ \ #\ A\ simple\ file\ name
+\f[]
+.fi
+.SH VIDEO SELECTION
+.PP
+Videos can be filtered by their upload date using the options
+\f[C]\-\-date\f[], \f[C]\-\-datebefore\f[] or \f[C]\-\-dateafter\f[],
+they accept dates in two formats:
+.IP \[bu] 2
+Absolute dates: Dates in the format \f[C]YYYYMMDD\f[].
+.IP \[bu] 2
+Relative dates: Dates in the format
+\f[C](now|today)[+\-][0\-9](day|week|month|year)(s)?\f[]
+.PP
+Examples:
+.IP
+.nf
+\f[C]
+$\ youtube\-dl\ \-\-dateafter\ now\-6months\ #will\ only\ download\ the\ videos\ uploaded\ in\ the\ last\ 6\ months
+$\ youtube\-dl\ \-\-date\ 19700101\ #will\ only\ download\ the\ videos\ uploaded\ in\ January\ 1,\ 1970
+$\ youtube\-dl\ \-\-dateafter\ 20000101\ \-\-datebefore\ 20100101\ #will\ only\ download\ the\ videos\ uploaded\ between\ 2000\ and\ 2010
+\f[]
+.fi
+.SH FAQ
+.SS Can you please put the \-b option back?
+.PP
+Most people asking this question are not aware that youtube\-dl now
+defaults to downloading the highest available quality as reported by
+YouTube, which will be 1080p or 720p in some cases, so you no longer
+need the \-b option.
+For some specific videos, maybe YouTube does not report them to be
+available in a specific high quality format you\[aq]\[aq]re interested
+in.
+In that case, simply request it with the \-f option and youtube\-dl will
+try to download it.
+.SS I get HTTP error 402 when trying to download a video. What\[aq]s
+this?
+.PP
+Apparently YouTube requires you to pass a CAPTCHA test if you download
+too much.
+We\[aq]\[aq]re considering to provide a way to let you solve the
+CAPTCHA (https://github.com/rg3/youtube-dl/issues/154), but at the
+moment, your best course of action is pointing a webbrowser to the
+youtube URL, solving the CAPTCHA, and restart youtube\-dl.
+.SS I have downloaded a video but how can I play it?
+.PP
+Once the video is fully downloaded, use any video player, such as
+vlc (http://www.videolan.org) or mplayer (http://www.mplayerhq.hu/).
+.SS The links provided by youtube\-dl \-g are not working anymore
+.PP
+The URLs youtube\-dl outputs require the downloader to have the correct
+cookies.
+Use the \f[C]\-\-cookies\f[] option to write the required cookies into a
+file, and advise your downloader to read cookies from that file.
+Some sites also require a common user agent to be used, use
+\f[C]\-\-dump\-user\-agent\f[] to see the one in use by youtube\-dl.
+.SS ERROR: no fmt_url_map or conn information found in video info
+.PP
+youtube has switched to a new video info format in July 2011 which is
+not supported by old versions of youtube\-dl.
+You can update youtube\-dl with \f[C]sudo\ youtube\-dl\ \-\-update\f[].
+.SS ERROR: unable to download video
+.PP
+youtube requires an additional signature since September 2012 which is
+not supported by old versions of youtube\-dl.
+You can update youtube\-dl with \f[C]sudo\ youtube\-dl\ \-\-update\f[].
+.SS SyntaxError: Non\-ASCII character
+.PP
+The error
+.IP
+.nf
+\f[C]
+File\ "youtube\-dl",\ line\ 2
+SyntaxError:\ Non\-ASCII\ character\ \[aq]\\x93\[aq]\ ...
+\f[]
+.fi
+.PP
+means you\[aq]re using an outdated version of Python.
+Please update to Python 2.6 or 2.7.
+.SS What is this binary file? Where has the code gone?
+.PP
+Since June 2012 (#342) youtube\-dl is packed as an executable zipfile,
+simply unzip it (might need renaming to \f[C]youtube\-dl.zip\f[] first
+on some systems) or clone the git repository, as laid out above.
+If you modify the code, you can run it by executing the
+\f[C]__main__.py\f[] file.
+To recompile the executable, run \f[C]make\ youtube\-dl\f[].
+.SS The exe throws a \f[I]Runtime error from Visual C++\f[]
+.PP
+To run the exe you need to install first the Microsoft Visual C++ 2008
+Redistributable
+Package (http://www.microsoft.com/en-us/download/details.aspx?id=29).
+.SH COPYRIGHT
+.PP
+youtube\-dl is released into the public domain by the copyright holders.
+.PP
+This README file was originally written by Daniel Bolton
+(<https://github.com/dbbolton>) and is likewise released into the public
+domain.
+.SH BUGS
+.PP
+Bugs and suggestions should be reported at:
+<https://github.com/rg3/youtube-dl/issues>
+.PP
+Please include:
+.IP \[bu] 2
+Your exact command line, like
+\f[C]youtube\-dl\ \-t\ "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"\f[].
+A common mistake is not to escape the \f[C]&\f[].
+Putting URLs in quotes should solve this problem.
+.IP \[bu] 2
+If possible re\-run the command with \f[C]\-\-verbose\f[], and include
+the full output, it is really helpful to us.
+.IP \[bu] 2
+The output of \f[C]youtube\-dl\ \-\-version\f[]
+.IP \[bu] 2
+The output of \f[C]python\ \-\-version\f[]
+.IP \[bu] 2
+The name and version of your Operating System ("Ubuntu 11.04 x64" or
+"Windows 7 x64" is usually enough).
+.PP
+For discussions, join us in the irc channel #youtube\-dl on freenode.
diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion
new file mode 100644 (file)
index 0000000..e4f73e3
--- /dev/null
@@ -0,0 +1,14 @@
+__youtube-dl()
+{
+    local cur prev opts
+    COMPREPLY=()
+    cur="${COMP_WORDS[COMP_CWORD]}"
+    opts="--help --version --update --ignore-errors --rate-limit --retries --buffer-size --no-resize-buffer --dump-user-agent --user-agent --referer --list-extractors --proxy --no-check-certificate --test --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --only-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites"
+
+    if [[ ${cur} == * ]] ; then
+        COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
+        return 0
+    fi
+}
+
+complete -F __youtube-dl youtube-dl
diff --git a/youtube-dl.exe b/youtube-dl.exe
deleted file mode 100644 (file)
index 45eee04..0000000
Binary files a/youtube-dl.exe and /dev/null differ
index 49f3a871261d3816ec0b537ae60644691dc52f19..f4ce48046f08b21d66fcdecd4e1979cedebfd5dc 100644 (file)
@@ -322,6 +322,9 @@ class FileDownloader(object):
         filetime = timeconvert(timestr)
         if filetime is None:
             return filetime
         filetime = timeconvert(timestr)
         if filetime is None:
             return filetime
+        # Ignore obviously invalid dates
+        if filetime == 0:
+            return
         try:
             os.utime(filename, (time.time(), filetime))
         except:
         try:
             os.utime(filename, (time.time(), filetime))
         except:
@@ -539,6 +542,11 @@ class FileDownloader(object):
                          'playlist': playlist, 
                          'playlist_index': i + playliststart,
                          }
                          'playlist': playlist, 
                          'playlist_index': i + playliststart,
                          }
+                if not 'extractor' in entry:
+                    # We set the extractor, if it's an url it will be set then to
+                    # the new extractor, but if it's already a video we must make
+                    # sure it's present: see issue #877
+                    entry['extractor'] = ie_result['extractor']
                 entry_result = self.process_ie_result(entry,
                                                       download=download,
                                                       extra_info=extra)
                 entry_result = self.process_ie_result(entry,
                                                       download=download,
                                                       extra_info=extra)
@@ -758,21 +766,21 @@ class FileDownloader(object):
         except (OSError, IOError):
             self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
             return False
         except (OSError, IOError):
             self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
             return False
+        verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
 
         # Download using rtmpdump. rtmpdump returns exit code 2 when
         # the connection was interrumpted and resuming appears to be
         # possible. This is part of rtmpdump's normal usage, AFAIK.
 
         # Download using rtmpdump. rtmpdump returns exit code 2 when
         # the connection was interrumpted and resuming appears to be
         # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
-        if self.params.get('verbose', False): basic_args[1] = '-v'
+        basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
         if player_url is not None:
         if player_url is not None:
-            basic_args += ['-W', player_url]
+            basic_args += ['--swfVfy', player_url]
         if page_url is not None:
             basic_args += ['--pageUrl', page_url]
         if play_path is not None:
         if page_url is not None:
             basic_args += ['--pageUrl', page_url]
         if play_path is not None:
-            basic_args += ['-y', play_path]
+            basic_args += ['--playpath', play_path]
         if tc_url is not None:
             basic_args += ['--tcUrl', url]
         if tc_url is not None:
             basic_args += ['--tcUrl', url]
-        args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
+        args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
         if self.params.get('verbose', False):
             try:
                 import pipes
         if self.params.get('verbose', False):
             try:
                 import pipes
@@ -810,6 +818,37 @@ class FileDownloader(object):
             self.report_error(u'rtmpdump exited with code %d' % retval)
             return False
 
             self.report_error(u'rtmpdump exited with code %d' % retval)
             return False
 
+    def _download_with_mplayer(self, filename, url):
+        self.report_destination(filename)
+        tmpfilename = self.temp_name(filename)
+
+        args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
+        # Check for mplayer first
+        try:
+            subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+        except (OSError, IOError):
+            self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
+            return False
+
+        # Download using mplayer. 
+        retval = subprocess.call(args)
+        if retval == 0:
+            fsize = os.path.getsize(encodeFilename(tmpfilename))
+            self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
+            self.try_rename(tmpfilename, filename)
+            self._hook_progress({
+                'downloaded_bytes': fsize,
+                'total_bytes': fsize,
+                'filename': filename,
+                'status': 'finished',
+            })
+            return True
+        else:
+            self.to_stderr(u"\n")
+            self.report_error(u'mplayer exited with code %d' % retval)
+            return False
+
+
     def _do_download(self, filename, info_dict):
         url = info_dict['url']
 
     def _do_download(self, filename, info_dict):
         url = info_dict['url']
 
@@ -830,6 +869,10 @@ class FileDownloader(object):
                                                 info_dict.get('play_path', None),
                                                 info_dict.get('tc_url', None))
 
                                                 info_dict.get('play_path', None),
                                                 info_dict.get('tc_url', None))
 
+        # Attempt to download using mplayer
+        if url.startswith('mms') or url.startswith('rtsp'):
+            return self._download_with_mplayer(filename, url)
+
         tmpfilename = self.temp_name(filename)
         stream = None
 
         tmpfilename = self.temp_name(filename)
         stream = None
 
index d318b4b0340c5912c5232ad4df663a0d86b3b247..17e0f83232856270d4e90be6bf18148a4db13d18 100755 (executable)
@@ -191,6 +191,47 @@ class InfoExtractor(object):
             video_info['title'] = playlist_title
         return video_info
 
             video_info['title'] = playlist_title
         return video_info
 
+    def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+        """
+        Perform a regex search on the given string, using a single or a list of
+        patterns returning the first matching group.
+        In case of failure return a default value or raise a WARNING or a
+        ExtractorError, depending on fatal, specifying the field name.
+        """
+        if isinstance(pattern, (str, compat_str, compiled_regex_type)):
+            mobj = re.search(pattern, string, flags)
+        else:
+            for p in pattern:
+                mobj = re.search(p, string, flags)
+                if mobj: break
+
+        if sys.stderr.isatty() and os.name != 'nt':
+            _name = u'\033[0;34m%s\033[0m' % name
+        else:
+            _name = name
+
+        if mobj:
+            # return the first matching group
+            return next(g for g in mobj.groups() if g is not None)
+        elif default is not None:
+            return default
+        elif fatal:
+            raise ExtractorError(u'Unable to extract %s' % _name)
+        else:
+            self._downloader.report_warning(u'unable to extract %s; '
+                u'please report this issue on GitHub.' % _name)
+            return None
+
+    def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+        """
+        Like _search_regex, but strips HTML tags and unescapes entities.
+        """
+        res = self._search_regex(pattern, string, name, default, fatal, flags)
+        if res:
+            return clean_html(res).strip()
+        else:
+            return res
+
 class SearchInfoExtractor(InfoExtractor):
     """
     Base class for paged search queries extractors.
 class SearchInfoExtractor(InfoExtractor):
     """
     Base class for paged search queries extractors.
@@ -376,6 +417,34 @@ class YoutubeIE(InfoExtractor):
             return (u'Did not fetch video subtitles', None, None)
         return (None, sub_lang, sub)
 
             return (u'Did not fetch video subtitles', None, None)
         return (None, sub_lang, sub)
 
+    def _request_automatic_caption(self, video_id, webpage):
+        """We need the webpage for getting the captions url, pass it as an
+           argument to speed up the process."""
+        sub_lang = self._downloader.params.get('subtitleslang')
+        sub_format = self._downloader.params.get('subtitlesformat')
+        self.to_screen(u'%s: Looking for automatic captions' % video_id)
+        mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
+        err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
+        if mobj is None:
+            return [(err_msg, None, None)]
+        player_config = json.loads(mobj.group(1))
+        try:
+            args = player_config[u'args']
+            caption_url = args[u'ttsurl']
+            timestamp = args[u'timestamp']
+            params = compat_urllib_parse.urlencode({
+                'lang': 'en',
+                'tlang': sub_lang,
+                'fmt': sub_format,
+                'ts': timestamp,
+                'kind': 'asr',
+            })
+            subtitles_url = caption_url + '&' + params
+            sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
+            return [(None, sub_lang, sub)]
+        except KeyError:
+            return [(err_msg, None, None)]
+
     def _extract_subtitle(self, video_id):
         """
         Return a list with a tuple:
     def _extract_subtitle(self, video_id):
         """
         Return a list with a tuple:
@@ -623,7 +692,14 @@ class YoutubeIE(InfoExtractor):
             if video_subtitles:
                 (sub_error, sub_lang, sub) = video_subtitles[0]
                 if sub_error:
             if video_subtitles:
                 (sub_error, sub_lang, sub) = video_subtitles[0]
                 if sub_error:
-                    self._downloader.report_error(sub_error)
+                    # We try with the automatic captions
+                    video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+                    (sub_error_auto, sub_lang, sub) = video_subtitles[0]
+                    if sub is not None:
+                        pass
+                    else:
+                        # We report the original error
+                        self._downloader.report_error(sub_error)
 
         if self._downloader.params.get('allsubtitles', False):
             video_subtitles = self._extract_all_subtitles(video_id)
 
         if self._downloader.params.get('allsubtitles', False):
             video_subtitles = self._extract_all_subtitles(video_id)
@@ -864,16 +940,10 @@ class DailymotionIE(InfoExtractor):
         video_title = unescapeHTML(mobj.group('title'))
 
         video_uploader = None
         video_title = unescapeHTML(mobj.group('title'))
 
         video_uploader = None
-        mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
-        if mobj is None:
-            # lookin for official user
-            mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage)
-            if mobj_official is None:
-                self._downloader.report_warning(u'unable to extract uploader nickname')
-            else:
-                video_uploader = mobj_official.group(1)
-        else:
-            video_uploader = mobj.group(1)
+        video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
+                                             # Looking for official user
+                                             r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
+                                            webpage, 'video uploader')
 
         video_upload_date = None
         mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
 
         video_upload_date = None
         mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
@@ -929,18 +999,13 @@ class PhotobucketIE(InfoExtractor):
             }]
 
         # We try looking in other parts of the webpage
             }]
 
         # We try looking in other parts of the webpage
-        mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract media URL')
-        mediaURL = compat_urllib_parse.unquote(mobj.group(1))
-
-        video_url = mediaURL
+        video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
+            webpage, u'video URL')
 
         mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
         if mobj is None:
             raise ExtractorError(u'Unable to extract title')
         video_title = mobj.group(1).decode('utf-8')
 
         mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
         if mobj is None:
             raise ExtractorError(u'Unable to extract title')
         video_title = mobj.group(1).decode('utf-8')
-
         video_uploader = mobj.group(2).decode('utf-8')
 
         return [{
         video_uploader = mobj.group(2).decode('utf-8')
 
         return [{
@@ -1025,7 +1090,7 @@ class VimeoIE(InfoExtractor):
     """Information extractor for vimeo.com."""
 
     # _VALID_URL matches Vimeo URLs
     """Information extractor for vimeo.com."""
 
     # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
     IE_NAME = u'vimeo'
 
     def _real_extract(self, url, new_video=True):
     IE_NAME = u'vimeo'
 
     def _real_extract(self, url, new_video=True):
@@ -1037,7 +1102,7 @@ class VimeoIE(InfoExtractor):
         video_id = mobj.group('id')
         if not mobj.group('proto'):
             url = 'https://' + url
         video_id = mobj.group('id')
         if not mobj.group('proto'):
             url = 'https://' + url
-        if mobj.group('direct_link'):
+        if mobj.group('direct_link') or mobj.group('pro'):
             url = 'https://vimeo.com/' + video_id
 
         # Retrieve video webpage to extract further information
             url = 'https://vimeo.com/' + video_id
 
         # Retrieve video webpage to extract further information
@@ -1064,7 +1129,7 @@ class VimeoIE(InfoExtractor):
 
         # Extract uploader and uploader_id
         video_uploader = config["video"]["owner"]["name"]
 
         # Extract uploader and uploader_id
         video_uploader = config["video"]["owner"]["name"]
-        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1]
+        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
 
         # Extract video thumbnail
         video_thumbnail = config["video"]["thumbnail"]
 
         # Extract video thumbnail
         video_thumbnail = config["video"]["thumbnail"]
@@ -1338,6 +1403,9 @@ class GenericIE(InfoExtractor):
         if mobj is None:
             # Broaden the search a little bit: JWPlayer JS loader
             mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage)
         if mobj is None:
             # Broaden the search a little bit: JWPlayer JS loader
             mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage)
+        if mobj is None:
+            # Try to find twitter cards info
+            mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
 
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
 
@@ -1359,16 +1427,12 @@ class GenericIE(InfoExtractor):
         #   Site Name | Video Title
         #   Video Title - Tagline | Site Name
         # and so on and so forth; it's just not practical
         #   Site Name | Video Title
         #   Video Title - Tagline | Site Name
         # and so on and so forth; it's just not practical
-        mobj = re.search(r'<title>(.*)</title>', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        video_title = mobj.group(1)
+        video_title = self._html_search_regex(r'<title>(.*)</title>',
+            webpage, u'video title')
 
         # video uploader is domain name
 
         # video uploader is domain name
-        mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        video_uploader = mobj.group(1)
+        video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
+            url, u'video uploader')
 
         return [{
             'id':       video_id,
 
         return [{
             'id':       video_id,
@@ -1389,7 +1453,6 @@ class YoutubeSearchIE(SearchInfoExtractor):
 
     def report_download_page(self, query, pagenum):
         """Report attempt to download search page with given number."""
 
     def report_download_page(self, query, pagenum):
         """Report attempt to download search page with given number."""
-        query = query.decode(preferredencoding())
         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
 
     def _get_n_results(self, query, n):
         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
 
     def _get_n_results(self, query, n):
@@ -1507,7 +1570,7 @@ class YoutubePlaylistIE(InfoExtractor):
                      |
                         ((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
                      )"""
                      |
                         ((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
                      )"""
-    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json'
+    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
     _MAX_RESULTS = 50
     IE_NAME = u'youtube:playlist'
 
     _MAX_RESULTS = 50
     IE_NAME = u'youtube:playlist'
 
@@ -1768,10 +1831,7 @@ class DepositFilesIE(InfoExtractor):
         file_extension = os.path.splitext(file_url)[1][1:]
 
         # Search for file title
         file_extension = os.path.splitext(file_url)[1][1:]
 
         # Search for file title
-        mobj = re.search(r'<b title="(.*?)">', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        file_title = mobj.group(1).decode('utf-8')
+        file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
 
         return [{
             'id':       file_id.decode('utf-8'),
 
         return [{
             'id':       file_id.decode('utf-8'),
@@ -1865,10 +1925,8 @@ class FacebookIE(InfoExtractor):
         video_duration = int(video_data['video_duration'])
         thumbnail = video_data['thumbnail_src']
 
         video_duration = int(video_data['video_duration'])
         thumbnail = video_data['thumbnail_src']
 
-        m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
-        if not m:
-            raise ExtractorError(u'Cannot find title in webpage')
-        video_title = unescapeHTML(m.group(1))
+        video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>',
+            webpage, u'title')
 
         info = {
             'id': video_id,
 
         info = {
             'id': video_id,
@@ -1884,7 +1942,7 @@ class FacebookIE(InfoExtractor):
 class BlipTVIE(InfoExtractor):
     """Information extractor for blip.tv"""
 
 class BlipTVIE(InfoExtractor):
     """Information extractor for blip.tv"""
 
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
+    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
     _URL_EXT = r'^.*\.([a-z0-9]+)$'
     IE_NAME = u'blip.tv'
 
     _URL_EXT = r'^.*\.([a-z0-9]+)$'
     IE_NAME = u'blip.tv'
 
@@ -1897,6 +1955,10 @@ class BlipTVIE(InfoExtractor):
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
 
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
 
+        # See https://github.com/rg3/youtube-dl/issues/857
+        api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url)
+        if api_mobj is not None:
+            url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
         urlp = compat_urllib_parse_urlparse(url)
         if urlp.path.startswith('/play/'):
             request = compat_urllib_request.Request(url)
         urlp = compat_urllib_parse_urlparse(url)
         if urlp.path.startswith('/play/'):
             request = compat_urllib_request.Request(url)
@@ -2026,15 +2088,10 @@ class MyVideoIE(InfoExtractor):
             self.report_extraction(video_id)
             video_url = mobj.group(1) + '.flv'
 
             self.report_extraction(video_id)
             video_url = mobj.group(1) + '.flv'
 
-            mobj = re.search('<title>([^<]+)</title>', webpage)
-            if mobj is None:
-                raise ExtractorError(u'Unable to extract title')
-            video_title = mobj.group(1)
+            video_title = self._html_search_regex('<title>([^<]+)</title>',
+                webpage, u'title')
 
 
-            mobj = re.search('[.](.+?)$', video_url)
-            if mobj is None:
-                raise ExtractorError(u'Unable to extract extention')
-            video_ext = mobj.group(1)
+            video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
 
             return [{
                 'id':       video_id,
 
             return [{
                 'id':       video_id,
@@ -2082,25 +2139,23 @@ class MyVideoIE(InfoExtractor):
         # extracting infos
         self.report_extraction(video_id)
 
         # extracting infos
         self.report_extraction(video_id)
 
+        video_url = None
         mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
         mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
-        if mobj is None:
-            raise ExtractorError(u'unable to extract rtmpurl')
-        video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1))
-        if 'myvideo2flash' in video_rtmpurl:
-            self._downloader.report_warning(u'forcing RTMPT ...')
-            video_rtmpurl = video_rtmpurl.replace('rtmpe://', 'rtmpt://')
-
-        # extract non rtmp videos
-        if (video_rtmpurl is None) or (video_rtmpurl == ''):
+        if mobj:
+            video_url = compat_urllib_parse.unquote(mobj.group(1))
+            if 'myvideo2flash' in video_url:
+                self._downloader.report_warning(u'forcing RTMPT ...')
+                video_url = video_url.replace('rtmpe://', 'rtmpt://')
+
+        if not video_url:
+            # extract non rtmp videos
             mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
             if mobj is None:
                 raise ExtractorError(u'unable to extract url')
             mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
             if mobj is None:
                 raise ExtractorError(u'unable to extract url')
-            video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
+            video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
 
 
-        mobj = re.search('source=\'(.*?)\'', dec_data)
-        if mobj is None:
-            raise ExtractorError(u'unable to extract swfobj')
-        video_file     = compat_urllib_parse.unquote(mobj.group(1))
+        video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
+        video_file = compat_urllib_parse.unquote(video_file)
 
         if not video_file.endswith('f4m'):
             ppath, prefix = video_file.split('.')
 
         if not video_file.endswith('f4m'):
             ppath, prefix = video_file.split('.')
@@ -2112,20 +2167,16 @@ class MyVideoIE(InfoExtractor):
                 video_filepath + video_file
             ).replace('.f4m', '.m3u8')
 
                 video_filepath + video_file
             ).replace('.f4m', '.m3u8')
 
-        mobj = re.search('swfobject.embedSWF\(\'(.+?)\'', webpage)
-        if mobj is None:
-            raise ExtractorError(u'unable to extract swfobj')
-        video_swfobj = compat_urllib_parse.unquote(mobj.group(1))
+        video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
+        video_swfobj = compat_urllib_parse.unquote(video_swfobj)
 
 
-        mobj = re.search("<h1(?: class='globalHd')?>(.*?)</h1>", webpage)
-        if mobj is None:
-            raise ExtractorError(u'unable to extract title')
-        video_title = mobj.group(1)
+        video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
+            webpage, u'title')
 
         return [{
             'id':                 video_id,
 
         return [{
             'id':                 video_id,
-            'url':                video_rtmpurl,
-            'tc_url':             video_rtmpurl,
+            'url':                video_url,
+            'tc_url':             video_url,
             'uploader':           None,
             'upload_date':        None,
             'title':              video_title,
             'uploader':           None,
             'upload_date':        None,
             'title':              video_title,
@@ -2136,6 +2187,7 @@ class MyVideoIE(InfoExtractor):
             'player_url':         video_swfobj,
         }]
 
             'player_url':         video_swfobj,
         }]
 
+
 class ComedyCentralIE(InfoExtractor):
     """Information extractor for The Daily Show and Colbert Report """
 
 class ComedyCentralIE(InfoExtractor):
     """Information extractor for The Daily Show and Colbert Report """
 
@@ -2317,19 +2369,25 @@ class EscapistIE(InfoExtractor):
         showName = mobj.group('showname')
         videoId = mobj.group('episode')
 
         showName = mobj.group('showname')
         videoId = mobj.group('episode')
 
-        self.report_extraction(showName)
-        webPage = self._download_webpage(url, showName)
+        self.report_extraction(videoId)
+        webpage = self._download_webpage(url, videoId)
+
+        videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
+            webpage, u'description', fatal=False)
+
+        imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"',
+            webpage, u'thumbnail', fatal=False)
+
+        playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"',
+            webpage, u'player url')
+
+        title = self._html_search_regex('<meta name="title" content="([^"]*)"',
+            webpage, u'player url').split(' : ')[-1]
 
 
-        descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
-        description = unescapeHTML(descMatch.group(1))
-        imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
-        imgUrl = unescapeHTML(imgMatch.group(1))
-        playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
-        playerUrl = unescapeHTML(playerUrlMatch.group(1))
-        configUrlMatch = re.search('config=(.*)$', playerUrl)
-        configUrl = compat_urllib_parse.unquote(configUrlMatch.group(1))
+        configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
+        configUrl = compat_urllib_parse.unquote(configUrl)
 
 
-        configJSON = self._download_webpage(configUrl, showName,
+        configJSON = self._download_webpage(configUrl, videoId,
                                             u'Downloading configuration',
                                             u'unable to download configuration')
 
                                             u'Downloading configuration',
                                             u'unable to download configuration')
 
@@ -2349,10 +2407,10 @@ class EscapistIE(InfoExtractor):
             'url': videoUrl,
             'uploader': showName,
             'upload_date': None,
             'url': videoUrl,
             'uploader': showName,
             'upload_date': None,
-            'title': showName,
+            'title': title,
             'ext': 'mp4',
             'thumbnail': imgUrl,
             'ext': 'mp4',
             'thumbnail': imgUrl,
-            'description': description,
+            'description': videoDesc,
             'player_url': playerUrl,
         }
 
             'player_url': playerUrl,
         }
 
@@ -2437,26 +2495,17 @@ class XVideosIE(InfoExtractor):
 
         self.report_extraction(video_id)
 
 
         self.report_extraction(video_id)
 
-
         # Extract video URL
         # Extract video URL
-        mobj = re.search(r'flv_url=(.+?)&', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video url')
-        video_url = compat_urllib_parse.unquote(mobj.group(1))
-
+        video_url = compat_urllib_parse.unquote(self._search_regex(r'flv_url=(.+?)&',
+            webpage, u'video URL'))
 
         # Extract title
 
         # Extract title
-        mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video title')
-        video_title = mobj.group(1)
-
+        video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XVID',
+            webpage, u'title')
 
         # Extract video thumbnail
 
         # Extract video thumbnail
-        mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video thumbnail')
-        video_thumbnail = mobj.group(0)
+        video_thumbnail = self._search_regex(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)',
+            webpage, u'thumbnail', fatal=False)
 
         info = {
             'id': video_id,
 
         info = {
             'id': video_id,
@@ -2613,16 +2662,12 @@ class InfoQIE(InfoExtractor):
         video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
 
         # Extract title
         video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
 
         # Extract title
-        mobj = re.search(r'contentTitle = "(.*?)";', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video title')
-        video_title = mobj.group(1)
+        video_title = self._search_regex(r'contentTitle = "(.*?)";',
+            webpage, u'title')
 
         # Extract description
 
         # Extract description
-        video_description = u'No description available.'
-        mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage)
-        if mobj is not None:
-            video_description = mobj.group(1)
+        video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
+            webpage, u'description', fatal=False)
 
         video_filename = video_url.split('/')[-1]
         video_id, extension = video_filename.split('.')
 
         video_filename = video_url.split('/')[-1]
         video_id, extension = video_filename.split('.')
@@ -2793,15 +2838,10 @@ class StanfordOpenClassroomIE(InfoExtractor):
                                         note='Downloading course info page',
                                         errnote='Unable to download course info page')
 
                                         note='Downloading course info page',
                                         errnote='Unable to download course info page')
 
-            m = re.search('<h1>([^<]+)</h1>', coursepage)
-            if m:
-                info['title'] = unescapeHTML(m.group(1))
-            else:
-                info['title'] = info['id']
+            info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
 
 
-            m = re.search('<description>([^<]+)</description>', coursepage)
-            if m:
-                info['description'] = unescapeHTML(m.group(1))
+            info['description'] = self._html_search_regex('<description>([^<]+)</description>',
+                coursepage, u'description', fatal=False)
 
             links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
             info['list'] = [
 
             links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
             info['list'] = [
@@ -2862,25 +2902,17 @@ class MTVIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
 
         webpage = self._download_webpage(url, video_id)
 
-        mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract song name')
-        song_name = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
-        mobj = re.search(r'<meta name="mtv_an" content="([^"]+)"/>', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract performer')
-        performer = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
-        video_title = performer + ' - ' + song_name
+        song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>',
+            webpage, u'song name', fatal=False)
 
 
-        mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to mtvn_uri')
-        mtvn_uri = mobj.group(1)
+        video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>',
+            webpage, u'title')
 
 
-        mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract content id')
-        content_id = mobj.group(1)
+        mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>',
+            webpage, u'mtvn_uri', fatal=False)
+
+        content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);',
+            webpage, u'content id', fatal=False)
 
         videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
         self.report_extraction(video_id)
 
         videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
         self.report_extraction(video_id)
@@ -3028,20 +3060,15 @@ class XNXXIE(InfoExtractor):
         # Get webpage content
         webpage = self._download_webpage(url, video_id)
 
         # Get webpage content
         webpage = self._download_webpage(url, video_id)
 
-        result = re.search(self.VIDEO_URL_RE, webpage)
-        if result is None:
-            raise ExtractorError(u'Unable to extract video url')
-        video_url = compat_urllib_parse.unquote(result.group(1))
+        video_url = self._search_regex(self.VIDEO_URL_RE,
+            webpage, u'video URL')
+        video_url = compat_urllib_parse.unquote(video_url)
 
 
-        result = re.search(self.VIDEO_TITLE_RE, webpage)
-        if result is None:
-            raise ExtractorError(u'Unable to extract video title')
-        video_title = result.group(1)
+        video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
+            webpage, u'title')
 
 
-        result = re.search(self.VIDEO_THUMB_RE, webpage)
-        if result is None:
-            raise ExtractorError(u'Unable to extract video thumbnail')
-        video_thumbnail = result.group(1)
+        video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
+            webpage, u'thumbnail', fatal=False)
 
         return [{
             'id': video_id,
 
         return [{
             'id': video_id,
@@ -3061,26 +3088,6 @@ class GooglePlusIE(InfoExtractor):
     _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
     IE_NAME = u'plus.google'
 
     _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
     IE_NAME = u'plus.google'
 
-    def report_extract_entry(self, url):
-        """Report downloading extry"""
-        self.to_screen(u'Downloading entry: %s' % url)
-
-    def report_date(self, upload_date):
-        """Report downloading extry"""
-        self.to_screen(u'Entry date: %s' % upload_date)
-
-    def report_uploader(self, uploader):
-        """Report downloading extry"""
-        self.to_screen(u'Uploader: %s' % uploader)
-
-    def report_title(self, video_title):
-        """Report downloading extry"""
-        self.to_screen(u'Title: %s' % video_title)
-
-    def report_extract_vid_page(self, video_page):
-        """Report information extraction."""
-        self.to_screen(u'Extracting video page: %s' % video_page)
-
     def _real_extract(self, url):
         # Extract id from URL
         mobj = re.match(self._VALID_URL, url)
     def _real_extract(self, url):
         # Extract id from URL
         mobj = re.match(self._VALID_URL, url)
@@ -3093,47 +3100,31 @@ class GooglePlusIE(InfoExtractor):
         video_extension = 'flv'
 
         # Step 1, Retrieve post webpage to extract further information
         video_extension = 'flv'
 
         # Step 1, Retrieve post webpage to extract further information
-        self.report_extract_entry(post_url)
         webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
 
         webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
 
+        self.report_extraction(video_id)
+
         # Extract update date
         # Extract update date
-        upload_date = None
-        pattern = 'title="Timestamp">(.*?)</a>'
-        mobj = re.search(pattern, webpage)
-        if mobj:
-            upload_date = mobj.group(1)
+        upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
+            webpage, u'upload date', fatal=False)
+        if upload_date:
             # Convert timestring to a format suitable for filename
             upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
             upload_date = upload_date.strftime('%Y%m%d')
             # Convert timestring to a format suitable for filename
             upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
             upload_date = upload_date.strftime('%Y%m%d')
-        self.report_date(upload_date)
 
         # Extract uploader
 
         # Extract uploader
-        uploader = None
-        pattern = r'rel\="author".*?>(.*?)</a>'
-        mobj = re.search(pattern, webpage)
-        if mobj:
-            uploader = mobj.group(1)
-        self.report_uploader(uploader)
+        uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
+            webpage, u'uploader', fatal=False)
 
         # Extract title
         # Get the first line for title
 
         # Extract title
         # Get the first line for title
-        video_title = u'NA'
-        pattern = r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]'
-        mobj = re.search(pattern, webpage)
-        if mobj:
-            video_title = mobj.group(1)
-        self.report_title(video_title)
+        video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
+            webpage, 'title', default=u'NA')
 
         # Step 2, Stimulate clicking the image box to launch video
 
         # Step 2, Stimulate clicking the image box to launch video
-        pattern = '"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]'
-        mobj = re.search(pattern, webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video page URL')
-
-        video_page = mobj.group(1)
+        video_page = self._search_regex('"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]',
+            webpage, u'video page URL')
         webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
         webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
-        self.report_extract_vid_page(video_page)
-
 
         # Extract video links on video page
         """Extract video links of all sizes"""
 
         # Extract video links on video page
         """Extract video links of all sizes"""
@@ -3166,7 +3157,7 @@ class GooglePlusIE(InfoExtractor):
         }]
 
 class NBAIE(InfoExtractor):
         }]
 
 class NBAIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*)(\?.*)?$'
+    _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
     IE_NAME = u'nba'
 
     def _real_extract(self, url):
     IE_NAME = u'nba'
 
     def _real_extract(self, url):
@@ -3175,28 +3166,27 @@ class NBAIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
 
         video_id = mobj.group(1)
             raise ExtractorError(u'Invalid URL: %s' % url)
 
         video_id = mobj.group(1)
-        if video_id.endswith('/index.html'):
-            video_id = video_id[:-len('/index.html')]
 
         webpage = self._download_webpage(url, video_id)
 
         video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
 
         webpage = self._download_webpage(url, video_id)
 
         video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
-        def _findProp(rexp, default=None):
-            m = re.search(rexp, webpage)
-            if m:
-                return unescapeHTML(m.group(1))
-            else:
-                return default
 
         shortened_video_id = video_id.rpartition('/')[2]
 
         shortened_video_id = video_id.rpartition('/')[2]
-        title = _findProp(r'<meta property="og:title" content="(.*?)"', shortened_video_id).replace('NBA.com: ', '')
+        title = self._html_search_regex(r'<meta property="og:title" content="(.*?)"',
+            webpage, 'title', default=shortened_video_id).replace('NBA.com: ', '')
+
+        # It isn't there in the HTML it returns to us
+        # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
+
+        description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
+
         info = {
             'id': shortened_video_id,
             'url': video_url,
             'ext': 'mp4',
             'title': title,
         info = {
             'id': shortened_video_id,
             'url': video_url,
             'ext': 'mp4',
             'title': title,
-            'uploader_date': _findProp(r'<b>Date:</b> (.*?)</div>'),
-            'description': _findProp(r'<div class="description">(.*?)</h1>'),
+            # 'uploader_date': uploader_date,
+            'description': description,
         }
         return [info]
 
         }
         return [info]
 
@@ -3344,30 +3334,21 @@ class FunnyOrDieIE(InfoExtractor):
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
 
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
 
-        m = re.search(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', webpage, re.DOTALL)
-        if not m:
-            raise ExtractorError(u'Unable to find video information')
-        video_url = unescapeHTML(m.group('url'))
+        video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
+            webpage, u'video URL', flags=re.DOTALL)
 
 
-        m = re.search(r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>", webpage, flags=re.DOTALL)
-        if not m:
-            m = re.search(r'<title>(?P<title>[^<]+?)</title>', webpage)
-            if not m:
-                raise ExtractorError(u'Cannot find video title')
-        title = clean_html(m.group('title'))
+        title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
+            r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
 
 
-        m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
-        if m:
-            desc = unescapeHTML(m.group('desc'))
-        else:
-            desc = None
+        video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
+            webpage, u'description', fatal=False, flags=re.DOTALL)
 
         info = {
             'id': video_id,
             'url': video_url,
             'ext': 'mp4',
             'title': title,
 
         info = {
             'id': video_id,
             'url': video_url,
             'ext': 'mp4',
             'title': title,
-            'description': desc,
+            'description': video_description,
         }
         return [info]
 
         }
         return [info]
 
@@ -3378,6 +3359,8 @@ class SteamIE(InfoExtractor):
                 (?P<gameID>\d+)/?
                 (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
                 """
                 (?P<gameID>\d+)/?
                 (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
                 """
+    _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
+    _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
 
     @classmethod
     def suitable(cls, url):
 
     @classmethod
     def suitable(cls, url):
@@ -3387,11 +3370,19 @@ class SteamIE(InfoExtractor):
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url, re.VERBOSE)
         gameID = m.group('gameID')
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url, re.VERBOSE)
         gameID = m.group('gameID')
-        videourl = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' % gameID
-        self.report_age_confirmation()
+
+        videourl = self._VIDEO_PAGE_TEMPLATE % gameID
         webpage = self._download_webpage(videourl, gameID)
         webpage = self._download_webpage(videourl, gameID)
-        game_title = re.search(r'<h2 class="pageheader">(?P<game_title>.*?)</h2>', webpage).group('game_title')
-        
+
+        if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
+            videourl = self._AGECHECK_TEMPLATE % gameID
+            self.report_age_confirmation()
+            webpage = self._download_webpage(videourl, gameID)
+
+        self.report_extraction(gameID)
+        game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
+                                             webpage, 'game title')
+
         urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
         mweb = re.finditer(urlRE, webpage)
         namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
         urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
         mweb = re.finditer(urlRE, webpage)
         namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
@@ -3423,27 +3414,29 @@ class UstreamIE(InfoExtractor):
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
         video_id = m.group('videoID')
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
         video_id = m.group('videoID')
+
         video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
         webpage = self._download_webpage(url, video_id)
         video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
         webpage = self._download_webpage(url, video_id)
+
         self.report_extraction(video_id)
         self.report_extraction(video_id)
-        try:
-            m = re.search(r'data-title="(?P<title>.+)"',webpage)
-            title = m.group('title')
-            m = re.search(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
-                          webpage, re.DOTALL)
-            uploader = unescapeHTML(m.group('uploader').strip())
-            m = re.search(r'<link rel="image_src" href="(?P<thumb>.*?)"', webpage)
-            thumb = m.group('thumb')
-        except AttributeError:
-            raise ExtractorError(u'Unable to extract info')
+
+        video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
+            webpage, u'title')
+
+        uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
+            webpage, u'uploader', fatal=False, flags=re.DOTALL)
+
+        thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
+            webpage, u'thumbnail', fatal=False)
+
         info = {
         info = {
-                'id':video_id,
-                'url':video_url,
+                'id': video_id,
+                'url': video_url,
                 'ext': 'flv',
                 'ext': 'flv',
-                'title': title,
+                'title': video_title,
                 'uploader': uploader,
                 'uploader': uploader,
-                'thumbnail': thumb,
-                  }
+                'thumbnail': thumbnail,
+               }
         return info
 
 class WorldStarHipHopIE(InfoExtractor):
         return info
 
 class WorldStarHipHopIE(InfoExtractor):
@@ -3451,45 +3444,36 @@ class WorldStarHipHopIE(InfoExtractor):
     IE_NAME = u'WorldStarHipHop'
 
     def _real_extract(self, url):
     IE_NAME = u'WorldStarHipHop'
 
     def _real_extract(self, url):
-        _src_url = r'so\.addVariable\("file","(.*?)"\)'
-
         m = re.match(self._VALID_URL, url)
         video_id = m.group('id')
 
         m = re.match(self._VALID_URL, url)
         video_id = m.group('id')
 
-        webpage_src = self._download_webpage(url, video_id) 
+        webpage_src = self._download_webpage(url, video_id)
 
 
-        mobj = re.search(_src_url, webpage_src)
+        video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
+            webpage_src, u'video URL')
 
 
-        if mobj is not None:
-            video_url = mobj.group(1)
-            if 'mp4' in video_url:
-                ext = 'mp4'
-            else:
-                ext = 'flv'
+        if 'mp4' in video_url:
+            ext = 'mp4'
         else:
         else:
-            raise ExtractorError(u'Cannot find video url for %s' % video_id)
+            ext = 'flv'
 
 
-        mobj = re.search(r"<title>(.*)</title>", webpage_src)
+        video_title = self._html_search_regex(r"<title>(.*)</title>",
+            webpage_src, u'title')
 
 
-        if mobj is None:
-            raise ExtractorError(u'Cannot determine title')
-        title = mobj.group(1)
-
-        mobj = re.search(r'rel="image_src" href="(.*)" />', webpage_src)
         # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
         # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
-        if mobj is not None:
-            thumbnail = mobj.group(1)
-        else:
+        thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
+            webpage_src, u'thumbnail', fatal=False)
+
+        if not thumbnail:
             _title = r"""candytitles.*>(.*)</span>"""
             mobj = re.search(_title, webpage_src)
             if mobj is not None:
             _title = r"""candytitles.*>(.*)</span>"""
             mobj = re.search(_title, webpage_src)
             if mobj is not None:
-                title = mobj.group(1)
-            thumbnail = None
+                video_title = mobj.group(1)
 
         results = [{
                     'id': video_id,
                     'url' : video_url,
 
         results = [{
                     'id': video_id,
                     'url' : video_url,
-                    'title' : title,
+                    'title' : video_title,
                     'thumbnail' : thumbnail,
                     'ext' : ext,
                     }]
                     'thumbnail' : thumbnail,
                     'ext' : ext,
                     }]
@@ -3503,10 +3487,9 @@ class RBMARadioIE(InfoExtractor):
         video_id = m.group('videoID')
 
         webpage = self._download_webpage(url, video_id)
         video_id = m.group('videoID')
 
         webpage = self._download_webpage(url, video_id)
-        m = re.search(r'<script>window.gon = {.*?};gon\.show=(.+?);</script>', webpage)
-        if not m:
-            raise ExtractorError(u'Cannot find metadata')
-        json_data = m.group(1)
+
+        json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
+            webpage, u'json data', flags=re.MULTILINE)
 
         try:
             data = json.loads(json_data)
 
         try:
             data = json.loads(json_data)
@@ -3553,42 +3536,33 @@ class YouPornIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
         mobj = re.match(self._VALID_URL, url)
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
-
         video_id = mobj.group('videoid')
 
         req = compat_urllib_request.Request(url)
         req.add_header('Cookie', 'age_verified=1')
         webpage = self._download_webpage(req, video_id)
 
         video_id = mobj.group('videoid')
 
         req = compat_urllib_request.Request(url)
         req.add_header('Cookie', 'age_verified=1')
         webpage = self._download_webpage(req, video_id)
 
-        # Get the video title
-        result = re.search(r'<h1.*?>(?P<title>.*)</h1>', webpage)
-        if result is None:
-            raise ExtractorError(u'Unable to extract video title')
-        video_title = result.group('title').strip()
-
-        # Get the video date
-        result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage)
-        if result is None:
-            self._downloader.report_warning(u'unable to extract video date')
-            upload_date = None
-        else:
-            upload_date = unified_strdate(result.group('date').strip())
+        # Get JSON parameters
+        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
+        try:
+            params = json.loads(json_params)
+        except:
+            raise ExtractorError(u'Invalid JSON')
 
 
-        # Get the video uploader
-        result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
-        if result is None:
-            self._downloader.report_warning(u'unable to extract uploader')
-            video_uploader = None
-        else:
-            video_uploader = result.group('uploader').strip()
-            video_uploader = clean_html( video_uploader )
+        self.report_extraction(video_id)
+        try:
+            video_title = params['title']
+            upload_date = unified_strdate(params['release_date_f'])
+            video_description = params['description']
+            video_uploader = params['submitted_by']
+            thumbnail = params['thumbnails'][0]['image']
+        except KeyError:
+            raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
 
         # Get all of the formats available
         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
 
         # Get all of the formats available
         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
-        result = re.search(DOWNLOAD_LIST_RE, webpage)
-        if result is None:
-            raise ExtractorError(u'Unable to extract download list')
-        download_list_html = result.group('download_list').strip()
+        download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
+            webpage, u'download list').strip()
 
         # Get all of the links from the page
         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
 
         # Get all of the links from the page
         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
@@ -3612,19 +3586,18 @@ class YouPornIE(InfoExtractor):
             size = format[0]
             bitrate = format[1]
             format = "-".join( format )
             size = format[0]
             bitrate = format[1]
             format = "-".join( format )
-            title = u'%s-%s-%s' % (video_title, size, bitrate)
+            title = u'%s-%s-%s' % (video_title, size, bitrate)
 
             formats.append({
                 'id': video_id,
                 'url': video_url,
                 'uploader': video_uploader,
                 'upload_date': upload_date,
 
             formats.append({
                 'id': video_id,
                 'url': video_url,
                 'uploader': video_uploader,
                 'upload_date': upload_date,
-                'title': title,
+                'title': video_title,
                 'ext': extension,
                 'format': format,
                 'ext': extension,
                 'format': format,
-                'thumbnail': None,
-                'description': None,
-                'player_url': None
+                'thumbnail': thumbnail,
+                'description': video_description
             })
 
         if self._downloader.params.get('listformats', None):
             })
 
         if self._downloader.params.get('listformats', None):
@@ -3665,17 +3638,13 @@ class PornotubeIE(InfoExtractor):
 
         # Get the video URL
         VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
 
         # Get the video URL
         VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
-        result = re.search(VIDEO_URL_RE, webpage)
-        if result is None:
-            raise ExtractorError(u'Unable to extract video url')
-        video_url = compat_urllib_parse.unquote(result.group('url'))
+        video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
+        video_url = compat_urllib_parse.unquote(video_url)
 
         #Get the uploaded date
         VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
 
         #Get the uploaded date
         VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
-        result = re.search(VIDEO_UPLOADED_RE, webpage)
-        if result is None:
-            raise ExtractorError(u'Unable to extract video title')
-        upload_date = unified_strdate(result.group('date'))
+        upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
+        if upload_date: upload_date = unified_strdate(upload_date)
 
         info = {'id': video_id,
                 'url': video_url,
 
         info = {'id': video_id,
                 'url': video_url,
@@ -3702,10 +3671,8 @@ class YouJizzIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         # Get the video title
         webpage = self._download_webpage(url, video_id)
 
         # Get the video title
-        result = re.search(r'<title>(?P<title>.*)</title>', webpage)
-        if result is None:
-            raise ExtractorError(u'ERROR: unable to extract video title')
-        video_title = result.group('title').strip()
+        video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
+            webpage, u'title').strip()
 
         # Get the embed page
         result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
 
         # Get the embed page
         result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
@@ -3718,10 +3685,8 @@ class YouJizzIE(InfoExtractor):
         webpage = self._download_webpage(embed_page_url, video_id)
 
         # Get the video URL
         webpage = self._download_webpage(embed_page_url, video_id)
 
         # Get the video URL
-        result = re.search(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', webpage)
-        if result is None:
-            raise ExtractorError(u'ERROR: unable to extract video url')
-        video_url = result.group('source')
+        video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
+            webpage, u'video URL')
 
         info = {'id': video_id,
                 'url': video_url,
 
         info = {'id': video_id,
                 'url': video_url,
@@ -3744,10 +3709,7 @@ class EightTracksIE(InfoExtractor):
 
         webpage = self._download_webpage(url, playlist_id)
 
 
         webpage = self._download_webpage(url, playlist_id)
 
-        m = re.search(r"PAGE.mix = (.*?);\n", webpage, flags=re.DOTALL)
-        if not m:
-            raise ExtractorError(u'Cannot find trax information')
-        json_like = m.group(1)
+        json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
         data = json.loads(json_like)
 
         session = str(random.randint(0, 1000000000))
         data = json.loads(json_like)
 
         session = str(random.randint(0, 1000000000))
@@ -3783,18 +3745,22 @@ class KeekIE(InfoExtractor):
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
         video_id = m.group('videoID')
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
         video_id = m.group('videoID')
+
         video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
         thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
         webpage = self._download_webpage(url, video_id)
         video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
         thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
         webpage = self._download_webpage(url, video_id)
-        m = re.search(r'<meta property="og:title" content="(?P<title>.*?)"', webpage)
-        title = unescapeHTML(m.group('title'))
-        m = re.search(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', webpage)
-        uploader = clean_html(m.group('uploader'))
+
+        video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
+            webpage, u'title')
+
+        uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
+            webpage, u'uploader', fatal=False)
+
         info = {
                 'id': video_id,
                 'url': video_url,
                 'ext': 'mp4',
         info = {
                 'id': video_id,
                 'url': video_url,
                 'ext': 'mp4',
-                'title': title,
+                'title': video_title,
                 'thumbnail': thumbnail,
                 'uploader': uploader
         }
                 'thumbnail': thumbnail,
                 'uploader': uploader
         }
@@ -3826,10 +3792,6 @@ class TEDIE(InfoExtractor):
             self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
             return [self._playlist_videos_info(url,name,playlist_id)]
 
             self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
             return [self._playlist_videos_info(url,name,playlist_id)]
 
-    def _talk_video_link(self,mediaSlug):
-        '''Returns the video link for that mediaSlug'''
-        return 'http://download.ted.com/talks/%s.mp4' % mediaSlug
-
     def _playlist_videos_info(self,url,name,playlist_id=0):
         '''Returns the videos of the playlist'''
         video_RE=r'''
     def _playlist_videos_info(self,url,name,playlist_id=0):
         '''Returns the videos of the playlist'''
         video_RE=r'''
@@ -3842,9 +3804,8 @@ class TEDIE(InfoExtractor):
         m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
         m_names=re.finditer(video_name_RE,webpage)
 
         m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
         m_names=re.finditer(video_name_RE,webpage)
 
-        playlist_RE = r'div class="headline">(\s*?)<h1>(\s*?)<span>(?P<playlist_title>.*?)</span>'
-        m_playlist = re.search(playlist_RE, webpage)
-        playlist_title = m_playlist.group('playlist_title')
+        playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
+                                                 webpage, 'playlist title')
 
         playlist_entries = []
         for m_video, m_name in zip(m_videos,m_names):
 
         playlist_entries = []
         for m_video, m_name in zip(m_videos,m_names):
@@ -3855,27 +3816,28 @@ class TEDIE(InfoExtractor):
 
     def _talk_info(self, url, video_id=0):
         """Return the video for the talk in the url"""
 
     def _talk_info(self, url, video_id=0):
         """Return the video for the talk in the url"""
-        m=re.match(self._VALID_URL, url,re.VERBOSE)
-        videoName=m.group('name')
-        webpage=self._download_webpage(url, video_id, 'Downloading \"%s\" page' % videoName)
+        m = re.match(self._VALID_URL, url,re.VERBOSE)
+        video_name = m.group('name')
+        webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
+        self.report_extraction(video_name)
         # If the url includes the language we get the title translated
         # If the url includes the language we get the title translated
-        title_RE=r'<span id="altHeadline" >(?P<title>.*)</span>'
-        title=re.search(title_RE, webpage).group('title')
-        info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?)
-                        "id":(?P<videoID>[\d]+).*?
-                        "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"'''
-        thumb_RE=r'</span>[\s.]*</div>[\s.]*<img src="(?P<thumbnail>.*?)"'
-        thumb_match=re.search(thumb_RE,webpage)
-        info_match=re.search(info_RE,webpage,re.VERBOSE)
-        video_id=info_match.group('videoID')
-        mediaSlug=info_match.group('mediaSlug')
-        video_url=self._talk_video_link(mediaSlug)
+        title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
+                                        webpage, 'title')
+        json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
+                                    webpage, 'json data')
+        info = json.loads(json_data)
+        desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
+                                       webpage, 'description', flags = re.DOTALL)
+        
+        thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
+                                       webpage, 'thumbnail')
         info = {
         info = {
-                'id': video_id,
-                'url': video_url,
+                'id': info['id'],
+                'url': info['htmlStreams'][-1]['file'],
                 'ext': 'mp4',
                 'title': title,
                 'ext': 'mp4',
                 'title': title,
-                'thumbnail': thumb_match.group('thumbnail')
+                'thumbnail': thumbnail,
+                'description': desc,
                 }
         return info
 
                 }
         return info
 
@@ -3941,10 +3903,9 @@ class SpiegelIE(InfoExtractor):
         video_id = m.group('videoID')
 
         webpage = self._download_webpage(url, video_id)
         video_id = m.group('videoID')
 
         webpage = self._download_webpage(url, video_id)
-        m = re.search(r'<div class="spVideoTitle">(.*?)</div>', webpage)
-        if not m:
-            raise ExtractorError(u'Cannot find title')
-        video_title = unescapeHTML(m.group(1))
+
+        video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
+            webpage, u'title')
 
         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
         xml_code = self._download_webpage(xml_url, video_id,
 
         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
         xml_code = self._download_webpage(xml_url, video_id,
@@ -3980,35 +3941,25 @@ class LiveLeakIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
 
         webpage = self._download_webpage(url, video_id)
 
-        m = re.search(r'file: "(.*?)",', webpage)
-        if not m:
-            raise ExtractorError(u'Unable to find video url')
-        video_url = m.group(1)
+        video_url = self._search_regex(r'file: "(.*?)",',
+            webpage, u'video URL')
 
 
-        m = re.search(r'<meta property="og:title" content="(?P<title>.*?)"', webpage)
-        if not m:
-            raise ExtractorError(u'Cannot find video title')
-        title = unescapeHTML(m.group('title')).replace('LiveLeak.com -', '').strip()
+        video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
+            webpage, u'title').replace('LiveLeak.com -', '').strip()
 
 
-        m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
-        if m:
-            desc = unescapeHTML(m.group('desc'))
-        else:
-            desc = None
+        video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
+            webpage, u'description', fatal=False)
 
 
-        m = re.search(r'By:.*?(\w+)</a>', webpage)
-        if m:
-            uploader = clean_html(m.group(1))
-        else:
-            uploader = None
+        video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
+            webpage, u'uploader', fatal=False)
 
         info = {
             'id':  video_id,
             'url': video_url,
             'ext': 'mp4',
 
         info = {
             'id':  video_id,
             'url': video_url,
             'ext': 'mp4',
-            'title': title,
-            'description': desc,
-            'uploader': uploader
+            'title': video_title,
+            'description': video_description,
+            'uploader': video_uploader
         }
 
         return [info]
         }
 
         return [info]
@@ -4052,6 +4003,64 @@ class ARDIE(InfoExtractor):
             info["url"] = stream["video_url"]
         return [info]
 
             info["url"] = stream["video_url"]
         return [info]
 
+class ZDFIE(InfoExtractor):
+    _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+    _TITLE = r'<h1(?: class="beitragHeadline")?>(?P<title>.*)</h1>'
+    _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
+    _MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
+    _RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        video_id = mobj.group('video_id')
+
+        html = self._download_webpage(url, video_id)
+        streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
+        if streams is None:
+            raise ExtractorError(u'No media url found.')
+
+        # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
+        # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
+        # choose first/default media type and highest quality for now
+        for s in streams:        #find 300 - dsl1000mbit
+            if s['quality'] == '300' and s['media_type'] == 'wstreaming':
+                stream_=s
+                break
+        for s in streams:        #find veryhigh - dsl2000mbit
+            if s['quality'] == 'veryhigh' and s['media_type'] == 'wstreaming': # 'hstreaming' - rtsp is not working
+                stream_=s
+                break
+        if stream_ is None:
+            raise ExtractorError(u'No stream found.')
+
+        media_link = self._download_webpage(stream_['video_url'], video_id,'Get stream URL')
+
+        self.report_extraction(video_id)
+        mobj = re.search(self._TITLE, html)
+        if mobj is None:
+            raise ExtractorError(u'Cannot extract title')
+        title = unescapeHTML(mobj.group('title'))
+
+        mobj = re.search(self._MMS_STREAM, media_link)
+        if mobj is None:
+            mobj = re.search(self._RTSP_STREAM, media_link)
+            if mobj is None:
+                raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
+        mms_url = mobj.group('video_url')
+
+        mobj = re.search('(.*)[.](?P<ext>[^.]+)', mms_url)
+        if mobj is None:
+            raise ExtractorError(u'Cannot extract extention')
+        ext = mobj.group('ext')
+
+        return [{'id': video_id,
+                 'url': mms_url,
+                 'title': title,
+                 'ext': ext
+                 }]
+
 class TumblrIE(InfoExtractor):
     _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
 
 class TumblrIE(InfoExtractor):
     _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
 
@@ -4066,23 +4075,23 @@ class TumblrIE(InfoExtractor):
         re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
         video = re.search(re_video, webpage)
         if video is None:
         re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
         video = re.search(re_video, webpage)
         if video is None:
-            self.to_screen("No video found")
-            return []
+           raise ExtractorError(u'Unable to extract video')
         video_url = video.group('video_url')
         ext = video.group('ext')
 
         video_url = video.group('video_url')
         ext = video.group('ext')
 
-        re_thumb = r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22'  # We pick the first poster
-        thumb = re.search(re_thumb, webpage).group('thumb').replace('\\', '')
+        video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
+            webpage, u'thumbnail', fatal=False)  # We pick the first poster
+        if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
 
         # The only place where you can get a title, it's not complete,
         # but searching in other places doesn't work for all videos
 
         # The only place where you can get a title, it's not complete,
         # but searching in other places doesn't work for all videos
-        re_title = r'<title>(?P<title>.*?)</title>'
-        title = unescapeHTML(re.search(re_title, webpage, re.DOTALL).group('title'))
+        video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
+            webpage, u'title', flags=re.DOTALL)
 
         return [{'id': video_id,
                  'url': video_url,
 
         return [{'id': video_id,
                  'url': video_url,
-                 'title': title,
-                 'thumbnail': thumb,
+                 'title': video_title,
+                 'thumbnail': video_thumbnail,
                  'ext': ext
                  }]
 
                  'ext': ext
                  }]
 
@@ -4096,7 +4105,7 @@ class BandcampIE(InfoExtractor):
         # We get the link to the free download page
         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
         if m_download is None:
         # We get the link to the free download page
         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
         if m_download is None:
-            raise ExtractorError(u'No free songs founded')
+            raise ExtractorError(u'No free songs found')
 
         download_link = m_download.group(1)
         id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', 
 
         download_link = m_download.group(1)
         id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', 
@@ -4124,10 +4133,10 @@ class BandcampIE(InfoExtractor):
 
         track_info = {'id':id,
                       'title' : info[u'title'],
 
         track_info = {'id':id,
                       'title' : info[u'title'],
-                      'ext' : 'mp3',
-                      'url' : final_url,
+                      'ext' :   'mp3',
+                      'url' :   final_url,
                       'thumbnail' : info[u'thumb_url'],
                       'thumbnail' : info[u'thumb_url'],
-                      'uploader' : info[u'artist']
+                      'uploader' :  info[u'artist']
                       }
 
         return [track_info]
                       }
 
         return [track_info]
@@ -4144,17 +4153,14 @@ class RedTubeIE(InfoExtractor):
         video_id = mobj.group('id')
         video_extension = 'mp4'        
         webpage = self._download_webpage(url, video_id)
         video_id = mobj.group('id')
         video_extension = 'mp4'        
         webpage = self._download_webpage(url, video_id)
+
         self.report_extraction(video_id)
         self.report_extraction(video_id)
-        mobj = re.search(r'<source src="'+'(.+)'+'" type="video/mp4">',webpage)
 
 
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract media URL')
+        video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
+            webpage, u'video URL')
 
 
-        video_url = mobj.group(1)
-        mobj = re.search('<h1 class="videoTitle slidePanelMovable">(.+)</h1>',webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        video_title = mobj.group(1)
+        video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
+            webpage, u'title')
 
         return [{
             'id':       video_id,
 
         return [{
             'id':       video_id,
@@ -4175,15 +4181,13 @@ class InaIE(InfoExtractor):
         video_extension = 'mp4'
         webpage = self._download_webpage(mrss_url, video_id)
 
         video_extension = 'mp4'
         webpage = self._download_webpage(mrss_url, video_id)
 
-        mobj = re.search(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract media URL')
-        video_url = mobj.group(1)
+        self.report_extraction(video_id)
 
 
-        mobj = re.search(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        video_title = mobj.group(1)
+        video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
+            webpage, u'video URL')
+
+        video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
+            webpage, u'title')
 
         return [{
             'id':       video_id,
 
         return [{
             'id':       video_id,
@@ -4205,27 +4209,17 @@ class HowcastIE(InfoExtractor):
 
         self.report_extraction(video_id)
 
 
         self.report_extraction(video_id)
 
-        mobj = re.search(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)"', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video URL')
-        video_url = mobj.group(1)
+        video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
+            webpage, u'video URL')
 
 
-        mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        video_title = mobj.group(1) or mobj.group(2)
+        video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
+            webpage, u'title')
 
 
-        mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', webpage)
-        if mobj is None:
-            self._downloader.report_warning(u'unable to extract description')
-            video_description = None
-        else:
-            video_description = mobj.group(1) or mobj.group(2)
+        video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
+            webpage, u'description', fatal=False)
 
 
-        mobj = re.search(r'<meta content=\'(.+?)\' property=\'og:image\'', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract thumbnail')
-        thumbnail = mobj.group(1)
+        thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
+            webpage, u'thumbnail', fatal=False)
 
         return [{
             'id':       video_id,
 
         return [{
             'id':       video_id,
@@ -4241,7 +4235,6 @@ class VineIE(InfoExtractor):
     _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
 
     def _real_extract(self, url):
     _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
 
     def _real_extract(self, url):
-
         mobj = re.match(self._VALID_URL, url)
 
         video_id = mobj.group('id')
         mobj = re.match(self._VALID_URL, url)
 
         video_id = mobj.group('id')
@@ -4250,25 +4243,17 @@ class VineIE(InfoExtractor):
 
         self.report_extraction(video_id)
 
 
         self.report_extraction(video_id)
 
-        mobj = re.search(r'<meta property="twitter:player:stream" content="(.+?)"', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video URL')
-        video_url = mobj.group(1)
+        video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
+            webpage, u'video URL')
 
 
-        mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        video_title = mobj.group(1)
+        video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
+            webpage, u'title')
 
 
-        mobj = re.search(r'<meta property="og:image" content="(.+?)(\?.*?)?"', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract thumbnail')
-        thumbnail = mobj.group(1)
+        thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
+            webpage, u'thumbnail', fatal=False)
 
 
-        mobj = re.search(r'<div class="user">.*?<h2>(.+?)</h2>', webpage, re.DOTALL)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract uploader')
-        uploader = mobj.group(1)
+        uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
+            webpage, u'uploader', fatal=False, flags=re.DOTALL)
 
         return [{
             'id':        video_id,
 
         return [{
             'id':        video_id,
@@ -4291,18 +4276,13 @@ class FlickrIE(InfoExtractor):
         webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
         webpage = self._download_webpage(webpage_url, video_id)
 
         webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
         webpage = self._download_webpage(webpage_url, video_id)
 
-        mobj = re.search(r"photo_secret: '(\w+)'", webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video secret')
-        secret = mobj.group(1)
+        secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
 
         first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
         first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
 
 
         first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
         first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
 
-        mobj = re.search(r'<Item id="id">(\d+-\d+)</Item>', first_xml)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract node_id')
-        node_id = mobj.group(1)
+        node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
+            first_xml, u'node_id')
 
         second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
         second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
 
         second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
         second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
@@ -4314,22 +4294,14 @@ class FlickrIE(InfoExtractor):
             raise ExtractorError(u'Unable to extract video url')
         video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
 
             raise ExtractorError(u'Unable to extract video url')
         video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
 
-        mobj = re.search(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        video_title = mobj.group(1) or mobj.group(2)
+        video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
+            webpage, u'video title')
 
 
-        mobj = re.search(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
-        if mobj is None:
-            self._downloader.report_warning(u'unable to extract description')
-            video_description = None
-        else:
-            video_description = mobj.group(1) or mobj.group(2)
+        video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
+            webpage, u'description', fatal=False)
 
 
-        mobj = re.search(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract thumbnail')
-        thumbnail = mobj.group(1) or mobj.group(2)
+        thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
+            webpage, u'thumbnail', fatal=False)
 
         return [{
             'id':          video_id,
 
         return [{
             'id':          video_id,
@@ -4351,32 +4323,25 @@ class TeamcocoIE(InfoExtractor):
         url_title = mobj.group('url_title')
         webpage = self._download_webpage(url, url_title)
 
         url_title = mobj.group('url_title')
         webpage = self._download_webpage(url, url_title)
 
-        mobj = re.search(r'<article class="video" data-id="(\d+?)"', webpage)
-        video_id = mobj.group(1)
+        video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
+            webpage, u'video id')
 
         self.report_extraction(video_id)
 
 
         self.report_extraction(video_id)
 
-        mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        video_title = mobj.group(1)
+        video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
+            webpage, u'title')
 
 
-        mobj = re.search(r'<meta property="og:image" content="(.+?)"', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract thumbnail')
-        thumbnail = mobj.group(1)
+        thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
+            webpage, u'thumbnail', fatal=False)
 
 
-        mobj = re.search(r'<meta property="og:description" content="(.*?)"', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract description')
-        description = mobj.group(1)
+        video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
+            webpage, u'description', fatal=False)
 
         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
         data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
 
         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
         data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
-        mobj = re.search(r'<file type="high".*?>(.*?)</file>', data)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract video url')
-        video_url = mobj.group(1)
+
+        video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
+            data, u'video URL')
 
         return [{
             'id':          video_id,
 
         return [{
             'id':          video_id,
@@ -4384,9 +4349,198 @@ class TeamcocoIE(InfoExtractor):
             'ext':         'mp4',
             'title':       video_title,
             'thumbnail':   thumbnail,
             'ext':         'mp4',
             'title':       video_title,
             'thumbnail':   thumbnail,
-            'description': description,
+            'description': video_description,
+        }]
+
+class XHamsterIE(InfoExtractor):
+    """Information Extractor for xHamster"""
+    _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
+
+    def _real_extract(self,url):
+        mobj = re.match(self._VALID_URL, url)
+
+        video_id = mobj.group('id')
+        mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
+        webpage = self._download_webpage(mrss_url, video_id)
+
+        mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract media URL')
+        if len(mobj.group('server')) == 0:
+            video_url = compat_urllib_parse.unquote(mobj.group('file'))
+        else:
+            video_url = mobj.group('server')+'/key='+mobj.group('file')
+        video_extension = video_url.split('.')[-1]
+
+        video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
+            webpage, u'title')
+
+        # Can't see the description anywhere in the UI
+        # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
+        #     webpage, u'description', fatal=False)
+        # if video_description: video_description = unescapeHTML(video_description)
+
+        mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
+        if mobj:
+            video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
+        else:
+            video_upload_date = None
+            self._downloader.report_warning(u'Unable to extract upload date')
+
+        video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
+            webpage, u'uploader id', default=u'anonymous')
+
+        video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
+            webpage, u'thumbnail', fatal=False)
+
+        return [{
+            'id':       video_id,
+            'url':      video_url,
+            'ext':      video_extension,
+            'title':    video_title,
+            # 'description': video_description,
+            'upload_date': video_upload_date,
+            'uploader_id': video_uploader_id,
+            'thumbnail': video_thumbnail
+        }]
+
+class HypemIE(InfoExtractor):
+    """Information Extractor for hypem"""
+    _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        track_id = mobj.group(1)
+
+        data = { 'ax': 1, 'ts': time.time() }
+        data_encoded = compat_urllib_parse.urlencode(data)
+        complete_url = url + "?" + data_encoded
+        request = compat_urllib_request.Request(complete_url)
+        response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
+        cookie = urlh.headers.get('Set-Cookie', '')
+
+        self.report_extraction(track_id)
+
+        html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
+            response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
+        try:
+            track_list = json.loads(html_tracks)
+            track = track_list[u'tracks'][0]
+        except ValueError:
+            raise ExtractorError(u'Hypemachine contained invalid JSON.')
+
+        key = track[u"key"]
+        track_id = track[u"id"]
+        artist = track[u"artist"]
+        title = track[u"song"]
+
+        serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
+        request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
+        request.add_header('cookie', cookie)
+        song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
+        try:
+            song_data = json.loads(song_data_json)
+        except ValueError:
+            raise ExtractorError(u'Hypemachine contained invalid JSON.')
+        final_url = song_data[u"url"]
+
+        return [{
+            'id':       track_id,
+            'url':      final_url,
+            'ext':      "mp3",
+            'title':    title,
+            'artist':   artist,
+        }]
+
+class Vbox7IE(InfoExtractor):
+    """Information Extractor for Vbox7"""
+    _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
+
+    def _real_extract(self,url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        video_id = mobj.group(1)
+
+        redirect_page, urlh = self._download_webpage_handle(url, video_id)
+        new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
+        redirect_url = urlh.geturl() + new_location
+        webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
+
+        title = self._html_search_regex(r'<title>(.*)</title>',
+            webpage, u'title').split('/')[0].strip()
+
+        ext = "flv"
+        info_url = "http://vbox7.com/play/magare.do"
+        data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
+        info_request = compat_urllib_request.Request(info_url, data)
+        info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
+        if info_response is None:
+            raise ExtractorError(u'Unable to extract the media url')
+        (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
+
+        return [{
+            'id':        video_id,
+            'url':       final_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
         }]
 
         }]
 
+class GametrailersIE(InfoExtractor):
+    _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        video_id = mobj.group('id')
+        video_type = mobj.group('type')
+        webpage = self._download_webpage(url, video_id)
+        if video_type == 'full-episodes':
+            mgid_re = r'data-video="(?P<mgid>mgid:.*?)"'
+        else:
+            mgid_re = r'data-contentId=\'(?P<mgid>mgid:.*?)\''
+        mgid = self._search_regex(mgid_re, webpage, u'mgid')
+        data = compat_urllib_parse.urlencode({'uri': mgid, 'acceptMethods': 'fms'})
+
+        info_page = self._download_webpage('http://www.gametrailers.com/feeds/mrss?' + data,
+                                           video_id, u'Downloading video info')
+        links_webpage = self._download_webpage('http://www.gametrailers.com/feeds/mediagen/?' + data,
+                                               video_id, u'Downloading video urls info')
+
+        self.report_extraction(video_id)
+        info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.*
+                      <description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
+                      <image>.*
+                        <url>(?P<thumb>.*?)</url>.*
+                      </image>'''
+
+        m_info = re.search(info_re, info_page, re.VERBOSE|re.DOTALL)
+        if m_info is None:
+            raise ExtractorError(u'Unable to extract video info')
+        video_title = m_info.group('title')
+        video_description = m_info.group('description')
+        video_thumb = m_info.group('thumb')
+
+        m_urls = list(re.finditer(r'<src>(?P<url>.*)</src>', links_webpage))
+        if m_urls is None or len(m_urls) == 0:
+            raise ExtractError(u'Unable to extrat video url')
+        # They are sorted from worst to best quality
+        video_url = m_urls[-1].group('url')
+
+        return {'url':         video_url,
+                'id':          video_id,
+                'title':       video_title,
+                # Videos are actually flv not mp4
+                'ext':         'flv',
+                'thumbnail':   video_thumb,
+                'description': video_description,
+                }
+
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
     The order does matter; the first extractor matched is the one handling the URL.
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
     The order does matter; the first extractor matched is the one handling the URL.
@@ -4405,8 +4559,8 @@ def gen_extractors():
         YahooSearchIE(),
         DepositFilesIE(),
         FacebookIE(),
         YahooSearchIE(),
         DepositFilesIE(),
         FacebookIE(),
-        BlipTVUserIE(),
         BlipTVIE(),
         BlipTVIE(),
+        BlipTVUserIE(),
         VimeoIE(),
         MyVideoIE(),
         ComedyCentralIE(),
         VimeoIE(),
         MyVideoIE(),
         ComedyCentralIE(),
@@ -4440,6 +4594,7 @@ def gen_extractors():
         SpiegelIE(),
         LiveLeakIE(),
         ARDIE(),
         SpiegelIE(),
         LiveLeakIE(),
         ARDIE(),
+        ZDFIE(),
         TumblrIE(),
         BandcampIE(),
         RedTubeIE(),
         TumblrIE(),
         BandcampIE(),
         RedTubeIE(),
@@ -4448,6 +4603,10 @@ def gen_extractors():
         VineIE(),
         FlickrIE(),
         TeamcocoIE(),
         VineIE(),
         FlickrIE(),
         TeamcocoIE(),
+        XHamsterIE(),
+        HypemIE(),
+        Vbox7IE(),
+        GametrailersIE(),
         GenericIE()
     ]
 
         GenericIE()
     ]
 
index 308c48fe6e6c408efa10b1ea156759f7b829a0cd..9279ce7769967b9b2df69a1d1c88d6b0701b6beb 100644 (file)
@@ -392,8 +392,11 @@ def _real_main(argv=None):
 
     # General configuration
     cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
 
     # General configuration
     cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-    if opts.proxy:
-        proxies = {'http': opts.proxy, 'https': opts.proxy}
+    if opts.proxy is not None:
+        if opts.proxy == '':
+            proxies = {}
+        else:
+            proxies = {'http': opts.proxy, 'https': opts.proxy}
     else:
         proxies = compat_urllib_request.getproxies()
         # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
     else:
         proxies = compat_urllib_request.getproxies()
         # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
index 63d9d0ae58dd36fafa835fe29b1d07332b5fabbc..66ae41e319e39d7c75597a31034315f996f00797 100644 (file)
@@ -12,7 +12,7 @@ import sys
 import traceback
 import zlib
 import email.utils
 import traceback
 import zlib
 import email.utils
-import json
+import socket
 import datetime
 
 try:
 import datetime
 
 try:
@@ -154,6 +154,9 @@ def compat_ord(c):
     if type(c) is int: return c
     else: return ord(c)
 
     if type(c) is int: return c
     else: return ord(c)
 
+# This is not clearly defined otherwise
+compiled_regex_type = type(re.compile(''))
+
 std_headers = {
     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 std_headers = {
     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
@@ -469,7 +472,11 @@ class ExtractorError(Exception):
     """Error during info extraction."""
     def __init__(self, msg, tb=None):
         """ tb, if given, is the original traceback (so that it can be printed out). """
     """Error during info extraction."""
     def __init__(self, msg, tb=None):
         """ tb, if given, is the original traceback (so that it can be printed out). """
+
+        if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+            msg = msg + u'; please report this issue on GitHub.'
         super(ExtractorError, self).__init__(msg)
         super(ExtractorError, self).__init__(msg)
+
         self.traceback = tb
         self.exc_info = sys.exc_info()  # preserve original exception
 
         self.traceback = tb
         self.exc_info = sys.exc_info()  # preserve original exception
 
index 1cda7fa743dc17c67dc4d6a63efc28bfbd05ebfd..7c6757efe1c6c468b88c991f261c570518a599b9 100644 (file)
@@ -1,2 +1,2 @@
 
 
-__version__ = '2013.05.23'
+__version__ = '2013.06.21'