]> Raphaël G. Git Repositories - youtubedl/commitdiff
Imported Upstream version 2014.12.01
authorRogério Brito <rbrito@ime.usp.br>
Tue, 2 Dec 2014 23:06:28 +0000 (21:06 -0200)
committerRogério Brito <rbrito@ime.usp.br>
Tue, 2 Dec 2014 23:06:28 +0000 (21:06 -0200)
241 files changed:
README.md
README.txt
devscripts/bash-completion.py
devscripts/buildserver.py
devscripts/check-porn.py
devscripts/fish-completion.py
devscripts/gh-pages/add-version.py
devscripts/gh-pages/generate-download.py
devscripts/gh-pages/sign-versions.py
devscripts/gh-pages/update-copyright.py
devscripts/gh-pages/update-feed.py
devscripts/gh-pages/update-sites.py
devscripts/make_readme.py
devscripts/prepare_manpage.py
devscripts/transition_helper.py [deleted file]
devscripts/transition_helper_exe/setup.py [deleted file]
devscripts/transition_helper_exe/youtube-dl.py [deleted file]
devscripts/zsh-completion.py
setup.py
test/helper.py
test/test_YoutubeDL.py
test/test_all_urls.py
test/test_download.py
test/test_subtitles.py
test/test_unicode_literals.py
test/test_utils.py
test/test_write_annotations.py
test/test_write_info_json.py
test/test_youtube_lists.py
youtube-dl
youtube-dl.1
youtube-dl.fish
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/__main__.py
youtube_dl/aes.py
youtube_dl/compat.py
youtube_dl/downloader/__init__.py
youtube_dl/downloader/common.py
youtube_dl/downloader/f4m.py
youtube_dl/downloader/hls.py
youtube_dl/downloader/http.py
youtube_dl/downloader/mplayer.py
youtube_dl/downloader/rtmp.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/academicearth.py
youtube_dl/extractor/addanime.py
youtube_dl/extractor/adultswim.py
youtube_dl/extractor/aparat.py
youtube_dl/extractor/appletrailers.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/audiomack.py
youtube_dl/extractor/bambuser.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/bbccouk.py
youtube_dl/extractor/beeg.py
youtube_dl/extractor/bild.py
youtube_dl/extractor/bliptv.py
youtube_dl/extractor/bpb.py [new file with mode: 0644]
youtube_dl/extractor/buzzfeed.py [new file with mode: 0644]
youtube_dl/extractor/canalplus.py
youtube_dl/extractor/cbs.py
youtube_dl/extractor/cbsnews.py
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/channel9.py
youtube_dl/extractor/cinemassacre.py
youtube_dl/extractor/clipfish.py
youtube_dl/extractor/clipsyndicate.py
youtube_dl/extractor/cnn.py
youtube_dl/extractor/collegehumor.py
youtube_dl/extractor/common.py
youtube_dl/extractor/cracked.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/defense.py
youtube_dl/extractor/discovery.py
youtube_dl/extractor/dotsub.py
youtube_dl/extractor/dropbox.py
youtube_dl/extractor/ehow.py
youtube_dl/extractor/eighttracks.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/firsttv.py
youtube_dl/extractor/fivemin.py
youtube_dl/extractor/fktv.py
youtube_dl/extractor/flickr.py
youtube_dl/extractor/fourtube.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/gamekings.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/globo.py
youtube_dl/extractor/gorillavid.py
youtube_dl/extractor/hornbunny.py
youtube_dl/extractor/hotnewhiphop.py
youtube_dl/extractor/howcast.py
youtube_dl/extractor/ign.py
youtube_dl/extractor/imdb.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/internetvideoarchive.py
youtube_dl/extractor/iprima.py
youtube_dl/extractor/ivi.py
youtube_dl/extractor/jadorecettepub.py
youtube_dl/extractor/jeuxvideo.py
youtube_dl/extractor/jukebox.py
youtube_dl/extractor/kankan.py
youtube_dl/extractor/kickstarter.py
youtube_dl/extractor/kontrtube.py
youtube_dl/extractor/ku6.py
youtube_dl/extractor/laola1tv.py
youtube_dl/extractor/lifenews.py
youtube_dl/extractor/liveleak.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/m6.py
youtube_dl/extractor/malemotion.py
youtube_dl/extractor/mdr.py
youtube_dl/extractor/metacafe.py
youtube_dl/extractor/metacritic.py
youtube_dl/extractor/mojvideo.py
youtube_dl/extractor/moniker.py
youtube_dl/extractor/mooshare.py
youtube_dl/extractor/motherless.py
youtube_dl/extractor/moviezine.py
youtube_dl/extractor/movshare.py
youtube_dl/extractor/mpora.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/muenchentv.py
youtube_dl/extractor/musicplayon.py
youtube_dl/extractor/muzu.py
youtube_dl/extractor/myspace.py
youtube_dl/extractor/myvideo.py
youtube_dl/extractor/naver.py
youtube_dl/extractor/nba.py
youtube_dl/extractor/ndr.py
youtube_dl/extractor/newgrounds.py
youtube_dl/extractor/newstube.py
youtube_dl/extractor/nfb.py
youtube_dl/extractor/nhl.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/ninegag.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/normalboots.py
youtube_dl/extractor/novamov.py
youtube_dl/extractor/nowvideo.py
youtube_dl/extractor/ntv.py
youtube_dl/extractor/nuvid.py
youtube_dl/extractor/nytimes.py
youtube_dl/extractor/ooyala.py
youtube_dl/extractor/orf.py
youtube_dl/extractor/photobucket.py
youtube_dl/extractor/playvid.py
youtube_dl/extractor/podomatic.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/pornotube.py
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/rai.py
youtube_dl/extractor/rbmaradio.py
youtube_dl/extractor/redtube.py
youtube_dl/extractor/ringtv.py
youtube_dl/extractor/rtlnl.py
youtube_dl/extractor/rtlnow.py
youtube_dl/extractor/rtve.py
youtube_dl/extractor/rutv.py
youtube_dl/extractor/sbs.py
youtube_dl/extractor/scivee.py
youtube_dl/extractor/screencast.py
youtube_dl/extractor/servingsys.py
youtube_dl/extractor/shared.py
youtube_dl/extractor/sina.py
youtube_dl/extractor/slideshare.py
youtube_dl/extractor/slutload.py
youtube_dl/extractor/smotri.py
youtube_dl/extractor/sohu.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/space.py
youtube_dl/extractor/spiegeltv.py
youtube_dl/extractor/sport5.py
youtube_dl/extractor/sportdeutschland.py
youtube_dl/extractor/stanfordoc.py
youtube_dl/extractor/subtitles.py
youtube_dl/extractor/swrmediathek.py
youtube_dl/extractor/tass.py [new file with mode: 0644]
youtube_dl/extractor/teachertube.py
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/telecinco.py
youtube_dl/extractor/tf1.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/thisav.py
youtube_dl/extractor/tinypic.py
youtube_dl/extractor/tlc.py
youtube_dl/extractor/tmz.py [new file with mode: 0644]
youtube_dl/extractor/tnaflix.py
youtube_dl/extractor/traileraddict.py
youtube_dl/extractor/trilulilu.py
youtube_dl/extractor/tudou.py
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/tunein.py [new file with mode: 0644]
youtube_dl/extractor/tvigle.py
youtube_dl/extractor/tvp.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/ustream.py
youtube_dl/extractor/vbox7.py
youtube_dl/extractor/veehd.py
youtube_dl/extractor/vesti.py
youtube_dl/extractor/vevo.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/vice.py
youtube_dl/extractor/videobam.py
youtube_dl/extractor/videofyme.py
youtube_dl/extractor/videopremium.py
youtube_dl/extractor/videott.py
youtube_dl/extractor/videoweed.py
youtube_dl/extractor/vidzi.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/vrt.py
youtube_dl/extractor/wdr.py
youtube_dl/extractor/weibo.py
youtube_dl/extractor/worldstarhiphop.py
youtube_dl/extractor/xbef.py
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/xminus.py [new file with mode: 0644]
youtube_dl/extractor/xnxx.py
youtube_dl/extractor/xtube.py
youtube_dl/extractor/yahoo.py
youtube_dl/extractor/ynet.py
youtube_dl/extractor/youku.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zingmp3.py [new file with mode: 0644]
youtube_dl/options.py
youtube_dl/postprocessor/__init__.py
youtube_dl/postprocessor/common.py
youtube_dl/postprocessor/execafterdownload.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/postprocessor/xattrpp.py
youtube_dl/swfinterp.py
youtube_dl/update.py
youtube_dl/utils.py
youtube_dl/version.py

index c284db4955130065504aabe046ac4c0526917ba4..d6e7ff902c86021db324cb3b9d06d441b8673cad 100644 (file)
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ Alternatively, refer to the developer instructions below for how to check out an
 # DESCRIPTION
 **youtube-dl** is a small command-line program to download videos from
 YouTube.com and a few more sites. It requires the Python interpreter, version
-2.6, 2.7, or 3.3+, and it is not platform specific. It should work on
+2.6, 2.7, or 3.2+, and it is not platform specific. It should work on
 your Unix box, on Windows or on Mac OS X. It is released to the public domain,
 which means you can modify it, redistribute it or use it however you like.
 
@@ -93,7 +93,8 @@ which means you can modify it, redistribute it or use it however you like.
                                      COUNT views
     --max-views COUNT                Do not download any videos with more than
                                      COUNT views
-    --no-playlist                    download only the currently playing video
+    --no-playlist                    If the URL refers to a video and a
+                                     playlist, download only the video.
     --age-limit YEARS                download only videos suitable for the given
                                      age
     --download-archive FILE          Download only videos not listed in the
@@ -492,14 +493,15 @@ If you want to add support for a new site, you can follow this quick list (assum
 
         def _real_extract(self, url):
             video_id = self._match_id(url)
+            webpage = self._download_webpage(url, video_id)
 
             # TODO more code goes here, for example ...
-            webpage = self._download_webpage(url, video_id)
             title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
 
             return {
                 'id': video_id,
                 'title': title,
+                'description': self._og_search_description(webpage),
                 # TODO more properties (see youtube_dl/extractor/common.py)
             }
     ```
index 3ba6f53d508a59e82b0046557c3b08f7374068eb..601fe9ae25a8edd9d06492e3610b101651225ef6 100644 (file)
@@ -39,7 +39,7 @@ DESCRIPTION
 
 youtube-dl is a small command-line program to download videos from
 YouTube.com and a few more sites. It requires the Python interpreter,
-version 2.6, 2.7, or 3.3+, and it is not platform specific. It should
+version 2.6, 2.7, or 3.2+, and it is not platform specific. It should
 work on your Unix box, on Windows or on Mac OS X. It is released to the
 public domain, which means you can modify it, redistribute it or use it
 however you like.
@@ -107,7 +107,8 @@ Video Selection:
                                      COUNT views
     --max-views COUNT                Do not download any videos with more than
                                      COUNT views
-    --no-playlist                    download only the currently playing video
+    --no-playlist                    If the URL refers to a video and a
+                                     playlist, download only the video.
     --age-limit YEARS                download only videos suitable for the given
                                      age
     --download-archive FILE          Download only videos not listed in the
@@ -605,14 +606,15 @@ list (assuming your service is called yourextractor):
 
         def _real_extract(self, url):
             video_id = self._match_id(url)
+            webpage = self._download_webpage(url, video_id)
 
             # TODO more code goes here, for example ...
-            webpage = self._download_webpage(url, video_id)
             title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
 
             return {
                 'id': video_id,
                 'title': title,
+                'description': self._og_search_description(webpage),
                 # TODO more properties (see youtube_dl/extractor/common.py)
             }
     ```
index 49287724d6b8d1cb6215aaadf150b8a441582993..cd26cc0895d033af03541f48815e8dad23f5161d 100755 (executable)
@@ -1,4 +1,6 @@
 #!/usr/bin/env python
+from __future__ import unicode_literals
+
 import os
 from os.path import dirname as dirn
 import sys
@@ -9,16 +11,17 @@ import youtube_dl
 BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
 BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
 
+
 def build_completion(opt_parser):
     opts_flag = []
     for group in opt_parser.option_groups:
         for option in group.option_list:
-            #for every long flag
+            # for every long flag
             opts_flag.append(option.get_opt_string())
     with open(BASH_COMPLETION_TEMPLATE) as f:
         template = f.read()
     with open(BASH_COMPLETION_FILE, "w") as f:
-        #just using the special char
+        # just using the special char
         filled_template = template.replace("{{flags}}", " ".join(opts_flag))
         f.write(filled_template)
 
index e0c3cc83ed5dd73544b383d983aca0281e86408f..7c2f49f8bb63bbe2b47efca151129a7e6b49674d 100644 (file)
@@ -142,7 +142,7 @@ def win_service_set_status(handle, status_code):
 
 def win_service_main(service_name, real_main, argc, argv_raw):
     try:
-        #args = [argv_raw[i].value for i in range(argc)]
+        # args = [argv_raw[i].value for i in range(argc)]
         stop_event = threading.Event()
         handler = HandlerEx(functools.partial(stop_event, win_service_handler))
         h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
@@ -233,6 +233,7 @@ def rmtree(path):
 
 #==============================================================================
 
+
 class BuildError(Exception):
     def __init__(self, output, code=500):
         self.output = output
@@ -369,7 +370,7 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea
 
 
 class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
-    actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching.
+    actionDict = {'build': Builder, 'download': Builder}  # They're the same, no more caching.
 
     def do_GET(self):
         path = urlparse.urlparse(self.path)
index 86aa37b5fb687acc91f29753f1bf8ba0db7b8e94..216282712c1b38b96c049f74a6cfe8a0fcd30806 100644 (file)
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from __future__ import unicode_literals
 
 """
 This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
index f4aaf0201f5a43341a0dfa1de3eb1087a293485b..c2f2387980d0867adc3cea4c3e14047fdaf1aa28 100755 (executable)
@@ -23,13 +23,13 @@ EXTRA_ARGS = {
     'batch-file': ['--require-parameter'],
 }
 
+
 def build_completion(opt_parser):
     commands = []
 
     for group in opt_parser.option_groups:
         for option in group.option_list:
             long_option = option.get_opt_string().strip('-')
-            help_msg = shell_quote([option.help])
             complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
             if option._short_opts:
                 complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
index 35865b2f30f9526f4a05b4bad594f078f5a56443..867ea0048fb88f1ca1382f11b1c60b17110d4fc8 100755 (executable)
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+from __future__ import unicode_literals
 
 import json
 import sys
index 55912e12c4e25e400aa68cf8685d2e523d0bd253..392e3ba21ab86070f2df164362fbd177b750c726 100755 (executable)
@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
+from __future__ import unicode_literals
+
 import hashlib
-import shutil
-import subprocess
-import tempfile
 import urllib.request
 import json
 
index 8a824df56fe7677868ca9b03aa24de8d4b6eba76..fa389c35872c3f2b937ef6a875b46bc4dc0d04f8 100755 (executable)
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+from __future__ import unicode_literals, with_statement
 
 import rsa
 import json
@@ -11,22 +12,23 @@ except NameError:
 
 versions_info = json.load(open('update/versions.json'))
 if 'signature' in versions_info:
-       del versions_info['signature']
+    del versions_info['signature']
 
 print('Enter the PKCS1 private key, followed by a blank line:')
 privkey = b''
 while True:
-       try:
-               line = input()
-       except EOFError:
-               break
-       if line == '':
-               break
-       privkey += line.encode('ascii') + b'\n'
+    try:
+        line = input()
+    except EOFError:
+        break
+    if line == '':
+        break
+    privkey += line.encode('ascii') + b'\n'
 privkey = rsa.PrivateKey.load_pkcs1(privkey)
 
 signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode()
 print('signature: ' + signature)
 
 versions_info['signature'] = signature
-json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True)
\ No newline at end of file
+with open('update/versions.json', 'w') as versionsf:
+    json.dump(versions_info, versionsf, indent=4, sort_keys=True)
index 12c2a91949135d72edf53e41d15ae532690bc6bf..3663c8afef278f132518ed9c0286bdfe34d028a7 100755 (executable)
@@ -1,11 +1,11 @@
 #!/usr/bin/env python
 # coding: utf-8
 
-from __future__ import with_statement
+from __future__ import with_statement, unicode_literals
 
 import datetime
 import glob
-import io # For Python 2 compatibilty
+import io  # For Python 2 compatibilty
 import os
 import re
 
@@ -13,7 +13,7 @@ year = str(datetime.datetime.now().year)
 for fn in glob.glob('*.html*'):
     with io.open(fn, encoding='utf-8') as f:
         content = f.read()
-    newc = re.sub(u'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', u'Copyright © 2006-' + year, content)
+    newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content)
     if content != newc:
         tmpFn = fn + '.part'
         with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
index 0ba15ae0f7c83a4eb2ac6b2b56aeb72d55b3a951..e93eb60fb8af28a72b7aceb5d62d1c2e16b69bc5 100755 (executable)
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+from __future__ import unicode_literals
 
 import datetime
 import io
@@ -73,4 +74,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str)
 
 with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file:
     atom_file.write(atom_template)
-
index 153e15c8ab674f44e3681e5440c9372634b272d8..f0f0481c781ab40f8386de5c87a99c6468e00708 100755 (executable)
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+from __future__ import unicode_literals
 
 import sys
 import os
@@ -9,6 +10,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(
 
 import youtube_dl
 
+
 def main():
     with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
         template = tmplf.read()
@@ -21,7 +23,7 @@ def main():
             continue
         elif ie_desc is not None:
             ie_html += ': {}'.format(ie.IE_DESC)
-        if ie.working() == False:
+        if not ie.working():
             ie_html += ' (Currently broken)'
         ie_htmls.append('<li>{}</li>'.format(ie_html))
 
index 70fa942dd12f7a75ee71b1fc9668e615cda0d747..8fbce07967c177217f5d39162d9f0958f1d41bf5 100755 (executable)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import io
 import sys
 import re
index d9c857015c520fcd5b6dd46ecf3663e167a458df..f66bebfea6de2cec60a10f943380b9247e5c3d60 100644 (file)
@@ -1,3 +1,4 @@
+from __future__ import unicode_literals
 
 import io
 import os.path
diff --git a/devscripts/transition_helper.py b/devscripts/transition_helper.py
deleted file mode 100644 (file)
index d5ca2d4..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env python
-
-import sys, os
-
-try:
-    import urllib.request as compat_urllib_request
-except ImportError: # Python 2
-    import urllib2 as compat_urllib_request
-
-sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
-sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
-sys.stderr.write(u'The new location of the binaries is https://github.com/rg3/youtube-dl/downloads, not the git repository.\n\n')
-
-try:
-       raw_input()
-except NameError: # Python 3
-       input()
-
-filename = sys.argv[0]
-
-API_URL = "https://api.github.com/repos/rg3/youtube-dl/downloads"
-BIN_URL = "https://github.com/downloads/rg3/youtube-dl/youtube-dl"
-
-if not os.access(filename, os.W_OK):
-    sys.exit('ERROR: no write permissions on %s' % filename)
-
-try:
-    urlh = compat_urllib_request.urlopen(BIN_URL)
-    newcontent = urlh.read()
-    urlh.close()
-except (IOError, OSError) as err:
-    sys.exit('ERROR: unable to download latest version')
-
-try:
-    with open(filename, 'wb') as outf:
-        outf.write(newcontent)
-except (IOError, OSError) as err:
-    sys.exit('ERROR: unable to overwrite current version')
-
-sys.stderr.write(u'Done! Now you can run youtube-dl.\n')
diff --git a/devscripts/transition_helper_exe/setup.py b/devscripts/transition_helper_exe/setup.py
deleted file mode 100644 (file)
index aaf5c29..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-from distutils.core import setup
-import py2exe
-
-py2exe_options = {
-    "bundle_files": 1,
-    "compressed": 1,
-    "optimize": 2,
-    "dist_dir": '.',
-    "dll_excludes": ['w9xpopen.exe']
-}
-
-setup(console=['youtube-dl.py'], options={ "py2exe": py2exe_options }, zipfile=None)
\ No newline at end of file
diff --git a/devscripts/transition_helper_exe/youtube-dl.py b/devscripts/transition_helper_exe/youtube-dl.py
deleted file mode 100644 (file)
index 6297dfd..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/usr/bin/env python
-
-import sys, os
-import urllib2
-import json, hashlib
-
-def rsa_verify(message, signature, key):
-    from struct import pack
-    from hashlib import sha256
-    from sys import version_info
-    def b(x):
-        if version_info[0] == 2: return x
-        else: return x.encode('latin1')
-    assert(type(message) == type(b('')))
-    block_size = 0
-    n = key[0]
-    while n:
-        block_size += 1
-        n >>= 8
-    signature = pow(int(signature, 16), key[1], key[0])
-    raw_bytes = []
-    while signature:
-        raw_bytes.insert(0, pack("B", signature & 0xFF))
-        signature >>= 8
-    signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
-    if signature[0:2] != b('\x00\x01'): return False
-    signature = signature[2:]
-    if not b('\x00') in signature: return False
-    signature = signature[signature.index(b('\x00'))+1:]
-    if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
-    signature = signature[19:]
-    if signature != sha256(message).digest(): return False
-    return True
-
-sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
-sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
-sys.stderr.write(u'From now on, get the binaries from http://rg3.github.com/youtube-dl/download.html, not from the git repository.\n\n')
-
-raw_input()
-
-filename = sys.argv[0]
-
-UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
-VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
-JSON_URL = UPDATE_URL + 'versions.json'
-UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
-
-if not os.access(filename, os.W_OK):
-    sys.exit('ERROR: no write permissions on %s' % filename)
-
-exe = os.path.abspath(filename)
-directory = os.path.dirname(exe)
-if not os.access(directory, os.W_OK):
-    sys.exit('ERROR: no write permissions on %s' % directory)
-
-try:
-    versions_info = urllib2.urlopen(JSON_URL).read().decode('utf-8')
-    versions_info = json.loads(versions_info)
-except:
-    sys.exit(u'ERROR: can\'t obtain versions info. Please try again later.')
-if not 'signature' in versions_info:
-    sys.exit(u'ERROR: the versions file is not signed or corrupted. Aborting.')
-signature = versions_info['signature']
-del versions_info['signature']
-if not rsa_verify(json.dumps(versions_info, sort_keys=True), signature, UPDATES_RSA_KEY):
-    sys.exit(u'ERROR: the versions file signature is invalid. Aborting.')
-
-version = versions_info['versions'][versions_info['latest']]
-
-try:
-    urlh = urllib2.urlopen(version['exe'][0])
-    newcontent = urlh.read()
-    urlh.close()
-except (IOError, OSError) as err:
-    sys.exit('ERROR: unable to download latest version')
-
-newcontent_hash = hashlib.sha256(newcontent).hexdigest()
-if newcontent_hash != version['exe'][1]:
-    sys.exit(u'ERROR: the downloaded file hash does not match. Aborting.')
-
-try:
-    with open(exe + '.new', 'wb') as outf:
-        outf.write(newcontent)
-except (IOError, OSError) as err:
-    sys.exit(u'ERROR: unable to write the new version')
-
-try:
-    bat = os.path.join(directory, 'youtube-dl-updater.bat')
-    b = open(bat, 'w')
-    b.write("""
-echo Updating youtube-dl...
-ping 127.0.0.1 -n 5 -w 1000 > NUL
-move /Y "%s.new" "%s"
-del "%s"
-    \n""" %(exe, exe, bat))
-    b.close()
-
-    os.startfile(bat)
-except (IOError, OSError) as err:
-    sys.exit('ERROR: unable to overwrite current version')
-
-sys.stderr.write(u'Done! Now you can run youtube-dl.\n')
index e8d71928a53b4ab26a0bc710be66a76e7b4a7f40..f200f2c80aef4da29999ddba75bc1b17e3048a54 100755 (executable)
@@ -1,4 +1,6 @@
 #!/usr/bin/env python
+from __future__ import unicode_literals
+
 import os
 from os.path import dirname as dirn
 import sys
index cf6b92b0f7e61b504dfdc16b6b04568fd073982b..4686260e0bbd25adf39e683bdae1b475e267b0b8 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,6 @@
 from __future__ import print_function
 
 import os.path
-import pkg_resources
 import warnings
 import sys
 
@@ -103,7 +102,9 @@ setup(
         "Programming Language :: Python :: 2.6",
         "Programming Language :: Python :: 2.7",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.3"
+        "Programming Language :: Python :: 3.2",
+        "Programming Language :: Python :: 3.3",
+        "Programming Language :: Python :: 3.4",
     ],
 
     **params
index 8be37a183abcedc80945d5bb663ed000dc33f738..9a7f0746ec4ca02ed9f197395b8e48a1fffff38a 100644 (file)
@@ -59,7 +59,7 @@ class FakeYDL(YoutubeDL):
         params = get_params(override=override)
         super(FakeYDL, self).__init__(params, auto_init=False)
         self.result = []
-        
+
     def to_screen(self, s, skip_eol=None):
         print(s)
 
@@ -72,8 +72,10 @@ class FakeYDL(YoutubeDL):
     def expect_warning(self, regex):
         # Silence an expected warning matching a regex
         old_report_warning = self.report_warning
+
         def report_warning(self, message):
-            if re.match(regex, message): return
+            if re.match(regex, message):
+                return
             old_report_warning(message)
         self.report_warning = types.MethodType(report_warning, self)
 
@@ -114,14 +116,14 @@ def expect_info_dict(self, expected_dict, got_dict):
         elif isinstance(expected, type):
             got = got_dict.get(info_field)
             self.assertTrue(isinstance(got, expected),
-                'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
+                            'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
         else:
             if isinstance(expected, compat_str) and expected.startswith('md5:'):
                 got = 'md5:' + md5(got_dict.get(info_field))
             else:
                 got = got_dict.get(info_field)
             self.assertEqual(expected, got,
-                'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+                             'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
 
     # Check for the presence of mandatory fields
     if got_dict.get('_type') != 'playlist':
@@ -133,13 +135,13 @@ def expect_info_dict(self, expected_dict, got_dict):
 
     # Are checkable fields missing from the test case definition?
     test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
-        for key, value in got_dict.items()
-        if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
+                          for key, value in got_dict.items()
+                          if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
     missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
     if missing_keys:
         def _repr(v):
             if isinstance(v, compat_str):
-                return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'")
+                return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
             else:
                 return repr(v)
         info_dict_str = ''.join(
index ab61e19768e4454f061ab9f832cc70c968440d85..f8e4f930ebe6d5aefdbf128909b1be720ec046f3 100644 (file)
@@ -266,6 +266,7 @@ class TestFormatSelection(unittest.TestCase):
             'ext': 'mp4',
             'width': None,
         }
+
         def fname(templ):
             ydl = YoutubeDL({'outtmpl': templ})
             return ydl.prepare_filename(info)
index 965e5d8a5859886937e80d85a97ae39396378d26..bd4fe17bf2c0f37b4f9ac2b291a8f7d664f74534 100644 (file)
@@ -32,19 +32,19 @@ class TestAllURLsMatching(unittest.TestCase):
     def test_youtube_playlist_matching(self):
         assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
         assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
-        assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
+        assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q')  # 585
         assertPlaylist('PL63F0C78739B09958')
         assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
         assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
         assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
-        assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
+        assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')  # 668
         self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
         # Top tracks
         assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
 
     def test_youtube_matching(self):
         self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
-        self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
+        self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012'))  # 668
         self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
         self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
         self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
index 12cfb5cbe49dde5468b8b773f85cfbbb331d8eb8..a009aa475442ae588405a99f432af6feade92836 100644 (file)
@@ -40,18 +40,22 @@ from youtube_dl.extractor import get_info_extractor
 
 RETRIES = 3
 
+
 class YoutubeDL(youtube_dl.YoutubeDL):
     def __init__(self, *args, **kwargs):
         self.to_stderr = self.to_screen
         self.processed_info_dicts = []
         super(YoutubeDL, self).__init__(*args, **kwargs)
+
     def report_warning(self, message):
         # Don't accept warnings during tests
         raise ExtractorError(message)
+
     def process_info(self, info_dict):
         self.processed_info_dicts.append(info_dict)
         return super(YoutubeDL, self).process_info(info_dict)
 
+
 def _file_md5(fn):
     with open(fn, 'rb') as f:
         return hashlib.md5(f.read()).hexdigest()
@@ -61,10 +65,13 @@ defs = gettestcases()
 
 class TestDownload(unittest.TestCase):
     maxDiff = None
+
     def setUp(self):
         self.defs = defs
 
-### Dynamically generate tests
+# Dynamically generate tests
+
+
 def generator(test_case):
 
     def test_template(self):
@@ -90,7 +97,7 @@ def generator(test_case):
             return
         for other_ie in other_ies:
             if not other_ie.working():
-                print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
+                print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
                 return
 
         params = get_params(test_case.get('params', {}))
@@ -101,6 +108,7 @@ def generator(test_case):
         ydl = YoutubeDL(params, auto_init=False)
         ydl.add_default_info_extractors()
         finished_hook_called = set()
+
         def _hook(status):
             if status['status'] == 'finished':
                 finished_hook_called.add(status['filename'])
@@ -111,6 +119,7 @@ def generator(test_case):
             return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
 
         res_dict = None
+
         def try_rm_tcs_files(tcs=None):
             if tcs is None:
                 tcs = test_cases
@@ -134,7 +143,7 @@ def generator(test_case):
                         raise
 
                     if try_num == RETRIES:
-                        report_warning(u'Failed due to network errors, skipping...')
+                        report_warning('Failed due to network errors, skipping...')
                         return
 
                     print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
@@ -206,7 +215,7 @@ def generator(test_case):
 
     return test_template
 
-### And add them to TestDownload
+# And add them to TestDownload
 for n, test_case in enumerate(defs):
     test_method = generator(test_case)
     tname = 'test_' + str(test_case['name'])
index 94e3290db6966b6f44159c0893d16713c0c5c207..7c4cd8218e218a6d3319747705c2893c4fbf4cd2 100644 (file)
@@ -23,6 +23,7 @@ from youtube_dl.extractor import (
 class BaseTestSubtitles(unittest.TestCase):
     url = None
     IE = None
+
     def setUp(self):
         self.DL = FakeYDL()
         self.ie = self.IE(self.DL)
@@ -237,7 +238,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
     def test_subtitles(self):
         self.DL.params['writesubtitles'] = True
         subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
+        self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
 
     def test_subtitles_lang(self):
         self.DL.params['writesubtitles'] = True
index a4ba7bad03b85f7c7e69ded7b435898c7b17e11e..2cc431b0be5c58db1aaf9ac2de8e495090089f65 100644 (file)
@@ -9,14 +9,13 @@ rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
 IGNORED_FILES = [
     'setup.py',  # http://bugs.python.org/issue13943
+    'conf.py',
+    'buildserver.py',
 ]
 
 
 class TestUnicodeLiterals(unittest.TestCase):
     def test_all_files(self):
-        print('Skipping this test (not yet fully implemented)')
-        return
-
         for dirpath, _, filenames in os.walk(rootDir):
             for basename in filenames:
                 if not basename.endswith('.py'):
@@ -30,10 +29,10 @@ class TestUnicodeLiterals(unittest.TestCase):
 
                 if "'" not in code and '"' not in code:
                     continue
-                imps = 'from __future__ import unicode_literals'
-                self.assertTrue(
-                    imps in code,
-                    ' %s  missing in %s' % (imps, fn))
+                self.assertRegexpMatches(
+                    code,
+                    r'(?:#.*\n*)?from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
+                    'unicode_literals import  missing in %s' % fn)
 
                 m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
                 if m is not None:
index 0fa8731473317577f6ef7534d9ad5ead2064e32b..baa3a215657026245bf93960c53374bc3abdd61b 100644 (file)
@@ -45,8 +45,9 @@ from youtube_dl.utils import (
     escape_rfc3986,
     escape_url,
     js_to_json,
-    get_filesystem_encoding,
     intlist_to_bytes,
+    args_to_str,
+    parse_filesize,
 )
 
 
@@ -119,16 +120,16 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
         self.assertEqual(orderedSet([]), [])
         self.assertEqual(orderedSet([1]), [1])
-        #keep the list ordered
+        # keep the list ordered
         self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1])
 
     def test_unescape_html(self):
         self.assertEqual(unescapeHTML('%20;'), '%20;')
         self.assertEqual(
             unescapeHTML('&eacute;'), 'é')
-        
+
     def test_daterange(self):
-        _20century = DateRange("19000101","20000101")
+        _20century = DateRange("19000101", "20000101")
         self.assertFalse("17890714" in _20century)
         _ac = DateRange("00010101")
         self.assertTrue("19690721" in _ac)
@@ -170,7 +171,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
 
     def test_smuggle_url(self):
-        data = {u"ö": u"ö", u"abc": [3]}
+        data = {"ö": "ö", "abc": [3]}
         url = 'https://foo.bar/baz?x=y#a'
         smug_url = smuggle_url(url, data)
         unsmug_url, unsmug_data = unsmuggle_url(smug_url)
@@ -361,5 +362,20 @@ class TestUtil(unittest.TestCase):
             intlist_to_bytes([0, 1, 127, 128, 255]),
             b'\x00\x01\x7f\x80\xff')
 
+    def test_args_to_str(self):
+        self.assertEqual(
+            args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
+            'foo ba/r -baz \'2 be\' \'\''
+        )
+
+    def test_parse_filesize(self):
+        self.assertEqual(parse_filesize(None), None)
+        self.assertEqual(parse_filesize(''), None)
+        self.assertEqual(parse_filesize('91 B'), 91)
+        self.assertEqual(parse_filesize('foobar'), None)
+        self.assertEqual(parse_filesize('2 MiB'), 2097152)
+        self.assertEqual(parse_filesize('5 GB'), 5000000000)
+        self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
+
 if __name__ == '__main__':
     unittest.main()
index eac53b285ab6740b368f278784aced9625abb9a6..780636c7730d396c381fd45fd6d4e8126d1c9fe2 100644 (file)
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
+from __future__ import unicode_literals
 
 # Allow direct execution
 import os
@@ -31,19 +32,18 @@ params = get_params({
 })
 
 
-
 TEST_ID = 'gr51aVj-mLg'
 ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
 EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
 
+
 class TestAnnotations(unittest.TestCase):
     def setUp(self):
         # Clear old files
         self.tearDown()
 
-
     def test_info_json(self):
-        expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text.
+        expected = list(EXPECTED_ANNOTATIONS)  # Two annotations could have the same text.
         ie = youtube_dl.extractor.YoutubeIE()
         ydl = YoutubeDL(params)
         ydl.add_info_extractor(ie)
@@ -51,7 +51,7 @@ class TestAnnotations(unittest.TestCase):
         self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
         annoxml = None
         with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
-                annoxml = xml.etree.ElementTree.parse(annof)
+            annoxml = xml.etree.ElementTree.parse(annof)
         self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
         root = annoxml.getroot()
         self.assertEqual(root.tag, 'document')
@@ -59,18 +59,17 @@ class TestAnnotations(unittest.TestCase):
         self.assertEqual(annotationsTag.tag, 'annotations')
         annotations = annotationsTag.findall('annotation')
 
-        #Not all the annotations have TEXT children and the annotations are returned unsorted.
+        # Not all the annotations have TEXT children and the annotations are returned unsorted.
         for a in annotations:
-                self.assertEqual(a.tag, 'annotation')
-                if a.get('type') == 'text':
-                        textTag = a.find('TEXT')
-                        text = textTag.text
-                        self.assertTrue(text in expected) #assertIn only added in python 2.7
-                        #remove the first occurance, there could be more than one annotation with the same text
-                        expected.remove(text)
-        #We should have seen (and removed) all the expected annotation texts.
+            self.assertEqual(a.tag, 'annotation')
+            if a.get('type') == 'text':
+                textTag = a.find('TEXT')
+                text = textTag.text
+                self.assertTrue(text in expected)  # assertIn only added in python 2.7
+                remove the first occurance, there could be more than one annotation with the same text
+                expected.remove(text)
+        # We should have seen (and removed) all the expected annotation texts.
         self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
-        
 
     def tearDown(self):
         try_rm(ANNOTATIONS_FILE)
index 90426a559551571a7769c6137a76270a4ad47b1c..0396ef26283f58c05ee261b0cf23773d462ad103 100644 (file)
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
+from __future__ import unicode_literals
 
 # Allow direct execution
 import os
@@ -32,7 +33,7 @@ params = get_params({
 TEST_ID = 'BaW_jenozKc'
 INFO_JSON_FILE = TEST_ID + '.info.json'
 DESCRIPTION_FILE = TEST_ID + '.mp4.description'
-EXPECTED_DESCRIPTION = u'''test chars:  "'/\ä↭𝕐
+EXPECTED_DESCRIPTION = '''test chars:  "'/\ä↭𝕐
 test URL: https://github.com/rg3/youtube-dl/issues/1892
 
 This is a test video for youtube-dl.
@@ -53,11 +54,11 @@ class TestInfoJSON(unittest.TestCase):
         self.assertTrue(os.path.exists(INFO_JSON_FILE))
         with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
             jd = json.load(jsonf)
-        self.assertEqual(jd['upload_date'], u'20121002')
+        self.assertEqual(jd['upload_date'], '20121002')
         self.assertEqual(jd['description'], EXPECTED_DESCRIPTION)
         self.assertEqual(jd['id'], TEST_ID)
         self.assertEqual(jd['extractor'], 'youtube')
-        self.assertEqual(jd['title'], u'''youtube-dl test video "'/\ä↭𝕐''')
+        self.assertEqual(jd['title'], '''youtube-dl test video "'/\ä↭𝕐''')
         self.assertEqual(jd['uploader'], 'Philipp Hagemeister')
 
         self.assertTrue(os.path.exists(DESCRIPTION_FILE))
index 410f9edc297036d7aeb59250c3241dc10bb7cf2d..c889b6f15c40f5ea91a4dd4ea5a86bb8c62c830d 100644 (file)
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from __future__ import unicode_literals
 
 # Allow direct execution
 import os
@@ -12,10 +13,6 @@ from test.helper import FakeYDL
 from youtube_dl.extractor import (
     YoutubePlaylistIE,
     YoutubeIE,
-    YoutubeChannelIE,
-    YoutubeShowIE,
-    YoutubeTopListIE,
-    YoutubeSearchURLIE,
 )
 
 
@@ -31,7 +28,7 @@ class TestYoutubeLists(unittest.TestCase):
         result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
         self.assertEqual(result['_type'], 'url')
         self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
-    
+
     def test_youtube_course(self):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
index 9cb5e7a3f5b888fd8caab7455ce4f99958a94b24..49ec0137f18bc03474bde30d7c2948f257dbfa95 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 2bcdc0107a17fdd0f2218986593950e98da0058f..08e2ae3dede2310e10a880875dbbfa77d77a642a 100644 (file)
@@ -9,7 +9,7 @@ youtube\-dl \- download videos from youtube.com or other video platforms
 .PP
 \f[B]youtube\-dl\f[] is a small command\-line program to download videos
 from YouTube.com and a few more sites.
-It requires the Python interpreter, version 2.6, 2.7, or 3.3+, and it is
+It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is
 not platform specific.
 It should work on your Unix box, on Windows or on Mac OS X.
 It is released to the public domain, which means you can modify it,
@@ -80,7 +80,8 @@ redistribute it or use it however you like.
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
 \-\-max\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ more\ than
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
-\-\-no\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ the\ currently\ playing\ video
+\-\-no\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ If\ the\ URL\ refers\ to\ a\ video\ and\ a
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist,\ download\ only\ the\ video.
 \-\-age\-limit\ YEARS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ suitable\ for\ the\ given
 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ age
 \-\-download\-archive\ FILE\ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ not\ listed\ in\ the
@@ -647,14 +648,15 @@ class\ YourExtractorIE(InfoExtractor):
 
 \ \ \ \ def\ _real_extract(self,\ url):
 \ \ \ \ \ \ \ \ video_id\ =\ self._match_id(url)
+\ \ \ \ \ \ \ \ webpage\ =\ self._download_webpage(url,\ video_id)
 
 \ \ \ \ \ \ \ \ #\ TODO\ more\ code\ goes\ here,\ for\ example\ ...
-\ \ \ \ \ \ \ \ webpage\ =\ self._download_webpage(url,\ video_id)
 \ \ \ \ \ \ \ \ title\ =\ self._html_search_regex(r\[aq]<h1>(.*?)</h1>\[aq],\ webpage,\ \[aq]title\[aq])
 
 \ \ \ \ \ \ \ \ return\ {
 \ \ \ \ \ \ \ \ \ \ \ \ \[aq]id\[aq]:\ video_id,
 \ \ \ \ \ \ \ \ \ \ \ \ \[aq]title\[aq]:\ title,
+\ \ \ \ \ \ \ \ \ \ \ \ \[aq]description\[aq]:\ self._og_search_description(webpage),
 \ \ \ \ \ \ \ \ \ \ \ \ #\ TODO\ more\ properties\ (see\ youtube_dl/extractor/common.py)
 \ \ \ \ \ \ \ \ }
 \f[]
index 461fd1b5391c25a153e21afad473c9a036058292..923fd60f4bc63eeacaf0e693635d412a76ca6352 100644 (file)
@@ -24,7 +24,7 @@ complete --command youtube-dl --long-option datebefore --description 'download o
 complete --command youtube-dl --long-option dateafter --description 'download only videos uploaded on or after this date (i.e. inclusive)'
 complete --command youtube-dl --long-option min-views --description 'Do not download any videos with less than COUNT views'
 complete --command youtube-dl --long-option max-views --description 'Do not download any videos with more than COUNT views'
-complete --command youtube-dl --long-option no-playlist --description 'download only the currently playing video'
+complete --command youtube-dl --long-option no-playlist --description 'If the URL refers to a video and a playlist, download only the video.'
 complete --command youtube-dl --long-option age-limit --description 'download only videos suitable for the given age'
 complete --command youtube-dl --long-option download-archive --description 'Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.' --require-parameter
 complete --command youtube-dl --long-option include-ads --description 'Download advertisements as well (experimental)'
index fde026fbf695781ac22d936c87c0216e4c12cfd9..21c7c298a830dd8b4a1b4651419506aaa8c8a8bc 100755 (executable)
@@ -60,6 +60,7 @@ from .utils import (
     write_string,
     YoutubeDLHandler,
     prepend_extension,
+    args_to_str,
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractors
@@ -253,6 +254,22 @@ class YoutubeDL(object):
             self.print_debug_header()
             self.add_default_info_extractors()
 
+    def warn_if_short_id(self, argv):
+        # short YouTube ID starting with dash?
+        idxs = [
+            i for i, a in enumerate(argv)
+            if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
+        if idxs:
+            correct_argv = (
+                ['youtube-dl'] +
+                [a for i, a in enumerate(argv) if i not in idxs] +
+                ['--'] + [argv[i] for i in idxs]
+            )
+            self.report_warning(
+                'Long argument string detected. '
+                'Use -- to separate parameters and URLs, like this:\n%s\n' %
+                args_to_str(correct_argv))
+
     def add_info_extractor(self, ie):
         """Add an InfoExtractor object to the end of the list."""
         self._ies.append(ie)
@@ -297,7 +314,7 @@ class YoutubeDL(object):
         self._output_process.stdin.write((message + '\n').encode('utf-8'))
         self._output_process.stdin.flush()
         res = ''.join(self._output_channel.readline().decode('utf-8')
-                       for _ in range(line_count))
+                      for _ in range(line_count))
         return res[:-len('\n')]
 
     def to_screen(self, message, skip_eol=False):
@@ -534,7 +551,7 @@ class YoutubeDL(object):
 
             try:
                 ie_result = ie.extract(url)
-                if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+                if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
                     break
                 if isinstance(ie_result, list):
                     # Backwards compatibility: old IE result format
@@ -547,7 +564,7 @@ class YoutubeDL(object):
                     return self.process_ie_result(ie_result, download, extra_info)
                 else:
                     return ie_result
-            except ExtractorError as de: # An error we somewhat expected
+            except ExtractorError as de:  # An error we somewhat expected
                 self.report_error(compat_str(de), de.format_traceback())
                 break
             except MaxDownloadsReached:
@@ -682,14 +699,17 @@ class YoutubeDL(object):
             self.report_warning(
                 'Extractor %s returned a compat_list result. '
                 'It needs to be updated.' % ie_result.get('extractor'))
+
             def _fixup(r):
-                self.add_extra_info(r,
+                self.add_extra_info(
+                    r,
                     {
                         'extractor': ie_result['extractor'],
                         'webpage_url': ie_result['webpage_url'],
                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
                         'extractor_key': ie_result['extractor_key'],
-                    })
+                    }
+                )
                 return r
             ie_result['entries'] = [
                 self.process_ie_result(_fixup(r), download, extra_info)
@@ -839,14 +859,14 @@ class YoutubeDL(object):
                         # Two formats have been requested like '137+139'
                         format_1, format_2 = rf.split('+')
                         formats_info = (self.select_format(format_1, formats),
-                            self.select_format(format_2, formats))
+                                        self.select_format(format_2, formats))
                         if all(formats_info):
                             # The first format must contain the video and the
                             # second the audio
                             if formats_info[0].get('vcodec') == 'none':
                                 self.report_error('The first format must '
-                                    'contain the video, try using '
-                                    '"-f %s+%s"' % (format_2, format_1))
+                                                  'contain the video, try using '
+                                                  '"-f %s+%s"' % (format_2, format_1))
                                 return
                             selected_format = {
                                 'requested_formats': formats_info,
@@ -992,7 +1012,7 @@ class YoutubeDL(object):
                     else:
                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
-                                subfile.write(sub)
+                            subfile.write(sub)
                 except (OSError, IOError):
                     self.report_error('Cannot write subtitles file ' + sub_filename)
                     return
@@ -1024,10 +1044,10 @@ class YoutubeDL(object):
                         with open(thumb_filename, 'wb') as thumbf:
                             shutil.copyfileobj(uf, thumbf)
                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
-                            (info_dict['extractor'], info_dict['id'], thumb_filename))
+                                       (info_dict['extractor'], info_dict['id'], thumb_filename))
                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                         self.report_warning('Unable to download thumbnail "%s": %s' %
-                            (info_dict['thumbnail'], compat_str(err)))
+                                            (info_dict['thumbnail'], compat_str(err)))
 
         if not self.params.get('skip_download', False):
             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
@@ -1048,8 +1068,8 @@ class YoutubeDL(object):
                         if not merger._executable:
                             postprocessors = []
                             self.report_warning('You have requested multiple '
-                                'formats but ffmpeg or avconv are not installed.'
-                                ' The formats won\'t be merged')
+                                                'formats but ffmpeg or avconv are not installed.'
+                                                ' The formats won\'t be merged')
                         else:
                             postprocessors = [merger]
                         for f in info_dict['requested_formats']:
@@ -1093,7 +1113,7 @@ class YoutubeDL(object):
 
         for url in url_list:
             try:
-                #It also downloads the videos
+                # It also downloads the videos
                 res = self.extract_info(url)
             except UnavailableVideoError:
                 self.report_error('unable to download video')
index c1323b4f35e532851cecad6931fb9f58aba0d85e..77b3384a05fa45d6d4cada65decdb5588ade9100 100644 (file)
@@ -76,10 +76,10 @@ def _real_main(argv=None):
     if opts.headers is not None:
         for h in opts.headers:
             if h.find(':', 1) < 0:
-                parser.error('wrong header formatting, it should be key:value, not "%s"'%h)
+                parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
             key, value = h.split(':', 2)
             if opts.verbose:
-                write_string('[debug] Adding header from command line option %s:%s\n'%(key, value))
+                write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
             std_headers[key] = value
 
     # Dump user agent
@@ -128,7 +128,6 @@ def _real_main(argv=None):
             compat_print(desc)
         sys.exit(0)
 
-
     # Conflicting, missing and erroneous options
     if opts.usenetrc and (opts.username is not None or opts.password is not None):
         parser.error('using .netrc conflicts with giving username/password')
@@ -190,21 +189,21 @@ def _real_main(argv=None):
 
     # --all-sub automatically sets --write-sub if --write-auto-sub is not given
     # this was the old behaviour if only --all-sub was given.
-    if opts.allsubtitles and (opts.writeautomaticsub == False):
+    if opts.allsubtitles and not opts.writeautomaticsub:
         opts.writesubtitles = True
 
     if sys.version_info < (3,):
         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
         if opts.outtmpl is not None:
             opts.outtmpl = opts.outtmpl.decode(preferredencoding())
-    outtmpl =((opts.outtmpl is not None and opts.outtmpl)
-            or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
-            or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
-            or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
-            or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
-            or (opts.useid and '%(id)s.%(ext)s')
-            or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
-            or DEFAULT_OUTTMPL)
+    outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
+               or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
+               or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
+               or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
+               or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
+               or (opts.useid and '%(id)s.%(ext)s')
+               or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
+               or DEFAULT_OUTTMPL)
     if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
         parser.error('Cannot download a video and extract audio into the same'
                      ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
@@ -317,7 +316,6 @@ def _real_main(argv=None):
                 ydl.add_post_processor(FFmpegAudioFixPP())
             ydl.add_post_processor(AtomicParsleyPP())
 
-
         # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
         # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
         if opts.exec_cmd:
@@ -334,11 +332,12 @@ def _real_main(argv=None):
 
         # Maybe do nothing
         if (len(all_urls) < 1) and (opts.load_info_filename is None):
-            if not (opts.update_self or opts.rm_cachedir):
-                parser.error('you must provide at least one URL')
-            else:
+            if opts.update_self or opts.rm_cachedir:
                 sys.exit()
 
+            ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
+            parser.error('you must provide at least one URL')
+
         try:
             if opts.load_info_filename is not None:
                 retcode = ydl.download_with_info_file(opts.load_info_filename)
index 3fe29c91f416e0d6c957ed750d3f0f69950dc9c0..65a0f891c5998cd49c7e1a98dbca75b6c926ccb8 100755 (executable)
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+from __future__ import unicode_literals
 
 # Execute with
 # $ python youtube_dl/__main__.py (2.6+)
index e9c5e21521d66baa177986e8ca878e3fc1a75461..5efd0f836bcf2375b065be008a36e5b8a54d2e69 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
 
 import base64
@@ -7,10 +9,11 @@ from .utils import bytes_to_intlist, intlist_to_bytes
 
 BLOCK_SIZE_BYTES = 16
 
+
 def aes_ctr_decrypt(data, key, counter):
     """
     Decrypt with aes in counter mode
-    
+
     @param {int[]} data        cipher
     @param {int[]} key         16/24/32-Byte cipher key
     @param {instance} counter  Instance whose next_value function (@returns {int[]}  16-Byte block)
@@ -19,23 +22,24 @@ def aes_ctr_decrypt(data, key, counter):
     """
     expanded_key = key_expansion(key)
     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
-    
-    decrypted_data=[]
+
+    decrypted_data = []
     for i in range(block_count):
         counter_block = counter.next_value()
-        block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
-        block += [0]*(BLOCK_SIZE_BYTES - len(block))
-        
+        block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+        block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
         cipher_counter_block = aes_encrypt(counter_block, expanded_key)
         decrypted_data += xor(block, cipher_counter_block)
     decrypted_data = decrypted_data[:len(data)]
-    
+
     return decrypted_data
 
+
 def aes_cbc_decrypt(data, key, iv):
     """
     Decrypt with aes in CBC mode
-    
+
     @param {int[]} data        cipher
     @param {int[]} key         16/24/32-Byte cipher key
     @param {int[]} iv          16-Byte IV
@@ -43,94 +47,98 @@ def aes_cbc_decrypt(data, key, iv):
     """
     expanded_key = key_expansion(key)
     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
-    
-    decrypted_data=[]
+
+    decrypted_data = []
     previous_cipher_block = iv
     for i in range(block_count):
-        block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
-        block += [0]*(BLOCK_SIZE_BYTES - len(block))
-        
+        block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+        block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
         decrypted_block = aes_decrypt(block, expanded_key)
         decrypted_data += xor(decrypted_block, previous_cipher_block)
         previous_cipher_block = block
     decrypted_data = decrypted_data[:len(data)]
-    
+
     return decrypted_data
 
+
 def key_expansion(data):
     """
     Generate key schedule
-    
+
     @param {int[]} data  16/24/32-Byte cipher key
-    @returns {int[]}     176/208/240-Byte expanded key 
+    @returns {int[]}     176/208/240-Byte expanded key
     """
-    data = data[:] # copy
+    data = data[:]  # copy
     rcon_iteration = 1
     key_size_bytes = len(data)
     expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
-    
+
     while len(data) < expanded_key_size_bytes:
         temp = data[-4:]
         temp = key_schedule_core(temp, rcon_iteration)
         rcon_iteration += 1
-        data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
-        
+        data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
         for _ in range(3):
             temp = data[-4:]
-            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
-        
+            data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
         if key_size_bytes == 32:
             temp = data[-4:]
             temp = sub_bytes(temp)
-            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
-        
-        for _ in range(3 if key_size_bytes == 32  else 2 if key_size_bytes == 24 else 0):
+            data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+        for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
             temp = data[-4:]
-            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+            data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
     data = data[:expanded_key_size_bytes]
-    
+
     return data
 
+
 def aes_encrypt(data, expanded_key):
     """
     Encrypt one block with aes
-    
+
     @param {int[]} data          16-Byte state
-    @param {int[]} expanded_key  176/208/240-Byte expanded key 
+    @param {int[]} expanded_key  176/208/240-Byte expanded key
     @returns {int[]}             16-Byte cipher
     """
     rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
 
     data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
-    for i in range(1, rounds+1):
+    for i in range(1, rounds + 1):
         data = sub_bytes(data)
         data = shift_rows(data)
         if i != rounds:
             data = mix_columns(data)
-        data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+        data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
 
     return data
 
+
 def aes_decrypt(data, expanded_key):
     """
     Decrypt one block with aes
-    
+
     @param {int[]} data          16-Byte cipher
     @param {int[]} expanded_key  176/208/240-Byte expanded key
     @returns {int[]}             16-Byte state
     """
     rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
-    
+
     for i in range(rounds, 0, -1):
-        data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+        data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
         if i != rounds:
             data = mix_columns_inv(data)
         data = shift_rows_inv(data)
         data = sub_bytes_inv(data)
     data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
-    
+
     return data
 
+
 def aes_decrypt_text(data, password, key_size_bytes):
     """
     Decrypt text
@@ -138,33 +146,34 @@ def aes_decrypt_text(data, password, key_size_bytes):
     - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
       with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
     - Mode of operation is 'counter'
-    
+
     @param {str} data                    Base64 encoded string
     @param {str,unicode} password        Password (will be encoded with utf-8)
     @param {int} key_size_bytes          Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
     @returns {str}                       Decrypted data
     """
     NONCE_LENGTH_BYTES = 8
-    
+
     data = bytes_to_intlist(base64.b64decode(data))
     password = bytes_to_intlist(password.encode('utf-8'))
-    
-    key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
+
+    key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
     key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
-    
+
     nonce = data[:NONCE_LENGTH_BYTES]
     cipher = data[NONCE_LENGTH_BYTES:]
-    
+
     class Counter:
-        __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+        __value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+
         def next_value(self):
             temp = self.__value
             self.__value = inc(self.__value)
             return temp
-    
+
     decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
     plaintext = intlist_to_bytes(decrypted_data)
-    
+
     return plaintext
 
 RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
@@ -200,14 +209,14 @@ SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x
             0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
             0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
             0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
-MIX_COLUMN_MATRIX = ((0x2,0x3,0x1,0x1),
-                     (0x1,0x2,0x3,0x1),
-                     (0x1,0x1,0x2,0x3),
-                     (0x3,0x1,0x1,0x2))
-MIX_COLUMN_MATRIX_INV = ((0xE,0xB,0xD,0x9),
-                         (0x9,0xE,0xB,0xD),
-                         (0xD,0x9,0xE,0xB),
-                         (0xB,0xD,0x9,0xE))
+MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1),
+                     (0x1, 0x2, 0x3, 0x1),
+                     (0x1, 0x1, 0x2, 0x3),
+                     (0x3, 0x1, 0x1, 0x2))
+MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9),
+                         (0x9, 0xE, 0xB, 0xD),
+                         (0xD, 0x9, 0xE, 0xB),
+                         (0xB, 0xD, 0x9, 0xE))
 RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
                       0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
                       0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
@@ -241,30 +250,37 @@ RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7
                       0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
                       0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
 
+
 def sub_bytes(data):
     return [SBOX[x] for x in data]
 
+
 def sub_bytes_inv(data):
     return [SBOX_INV[x] for x in data]
 
+
 def rotate(data):
     return data[1:] + [data[0]]
 
+
 def key_schedule_core(data, rcon_iteration):
     data = rotate(data)
     data = sub_bytes(data)
     data[0] = data[0] ^ RCON[rcon_iteration]
-    
+
     return data
 
+
 def xor(data1, data2):
-    return [x^y for x, y in zip(data1, data2)]
+    return [x ^ y for x, y in zip(data1, data2)]
+
 
 def rijndael_mul(a, b):
-    if(a==0 or b==0):
+    if(a == 0 or b == 0):
         return 0
     return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
 
+
 def mix_column(data, matrix):
     data_mixed = []
     for row in range(4):
@@ -275,33 +291,38 @@ def mix_column(data, matrix):
         data_mixed.append(mixed)
     return data_mixed
 
+
 def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
     data_mixed = []
     for i in range(4):
-        column = data[i*4 : (i+1)*4]
+        column = data[i * 4: (i + 1) * 4]
         data_mixed += mix_column(column, matrix)
     return data_mixed
 
+
 def mix_columns_inv(data):
     return mix_columns(data, MIX_COLUMN_MATRIX_INV)
 
+
 def shift_rows(data):
     data_shifted = []
     for column in range(4):
         for row in range(4):
-            data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
+            data_shifted.append(data[((column + row) & 0b11) * 4 + row])
     return data_shifted
 
+
 def shift_rows_inv(data):
     data_shifted = []
     for column in range(4):
         for row in range(4):
-            data_shifted.append( data[((column - row) & 0b11) * 4 + row] )
+            data_shifted.append(data[((column - row) & 0b11) * 4 + row])
     return data_shifted
 
+
 def inc(data):
-    data = data[:] # copy
-    for i in range(len(data)-1,-1,-1):
+    data = data[:]  # copy
+    for i in range(len(data) - 1, -1, -1):
         if data[i] == 255:
             data[i] = 0
         else:
index 9d33a8ec5fbd80a75a5d382e8621f6dc616c7aa0..27596687d0d2c354990e6112027e054865c1c79c 100644 (file)
@@ -3,53 +3,54 @@ from __future__ import unicode_literals
 import getpass
 import optparse
 import os
+import re
 import subprocess
 import sys
 
 
 try:
     import urllib.request as compat_urllib_request
-except ImportError: # Python 2
+except ImportError:  # Python 2
     import urllib2 as compat_urllib_request
 
 try:
     import urllib.error as compat_urllib_error
-except ImportError: # Python 2
+except ImportError:  # Python 2
     import urllib2 as compat_urllib_error
 
 try:
     import urllib.parse as compat_urllib_parse
-except ImportError: # Python 2
+except ImportError:  # Python 2
     import urllib as compat_urllib_parse
 
 try:
     from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError: # Python 2
+except ImportError:  # Python 2
     from urlparse import urlparse as compat_urllib_parse_urlparse
 
 try:
     import urllib.parse as compat_urlparse
-except ImportError: # Python 2
+except ImportError:  # Python 2
     import urlparse as compat_urlparse
 
 try:
     import http.cookiejar as compat_cookiejar
-except ImportError: # Python 2
+except ImportError:  # Python 2
     import cookielib as compat_cookiejar
 
 try:
     import html.entities as compat_html_entities
-except ImportError: # Python 2
+except ImportError:  # Python 2
     import htmlentitydefs as compat_html_entities
 
 try:
     import html.parser as compat_html_parser
-except ImportError: # Python 2
+except ImportError:  # Python 2
     import HTMLParser as compat_html_parser
 
 try:
     import http.client as compat_http_client
-except ImportError: # Python 2
+except ImportError:  # Python 2
     import httplib as compat_http_client
 
 try:
@@ -110,12 +111,12 @@ except ImportError:
 
 try:
     from urllib.parse import parse_qs as compat_parse_qs
-except ImportError: # Python 2
+except ImportError:  # Python 2
     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
     # Python 2's version is apparently totally broken
 
     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
-                encoding='utf-8', errors='replace'):
+                   encoding='utf-8', errors='replace'):
         qs, _coerce_result = qs, unicode
         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
         r = []
@@ -144,10 +145,10 @@ except ImportError: # Python 2
         return r
 
     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
-                encoding='utf-8', errors='replace'):
+                        encoding='utf-8', errors='replace'):
         parsed_result = {}
         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
-                        encoding=encoding, errors=errors)
+                           encoding=encoding, errors=errors)
         for name, value in pairs:
             if name in parsed_result:
                 parsed_result[name].append(value)
@@ -156,12 +157,12 @@ except ImportError: # Python 2
         return parsed_result
 
 try:
-    compat_str = unicode # Python 2
+    compat_str = unicode  # Python 2
 except NameError:
     compat_str = str
 
 try:
-    compat_chr = unichr # Python 2
+    compat_chr = unichr  # Python 2
 except NameError:
     compat_chr = chr
 
@@ -174,12 +175,17 @@ try:
     from shlex import quote as shlex_quote
 except ImportError:  # Python < 3.3
     def shlex_quote(s):
-        return "'" + s.replace("'", "'\"'\"'") + "'"
+        if re.match(r'^[-_\w./]+$', s):
+            return s
+        else:
+            return "'" + s.replace("'", "'\"'\"'") + "'"
 
 
 def compat_ord(c):
-    if type(c) is int: return c
-    else: return ord(c)
+    if type(c) is int:
+        return c
+    else:
+        return ord(c)
 
 
 if sys.version_info >= (3, 0):
@@ -250,7 +256,7 @@ else:
                     drive = ''
                 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
 
-            if i != 1: #~user
+            if i != 1:  # ~user
                 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
 
             return userhome + path[i:]
@@ -264,7 +270,7 @@ if sys.version_info < (3, 0):
         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 else:
     def compat_print(s):
-        assert type(s) == type(u'')
+        assert isinstance(s, compat_str)
         print(s)
 
 
index 3f941596edd83edda99917b57187485941133e8f..31e28df58e828f8c5434390ff18a63358215924a 100644 (file)
@@ -30,3 +30,8 @@ def get_suitable_downloader(info_dict):
         return F4mFD
     else:
         return HttpFD
+
+__all__ = [
+    'get_suitable_downloader',
+    'FileDownloader',
+]
index 7c33004b17b1612942187cc80bedbe0fef985d68..c0af50c59182ad560a23264738d027d4ad3c8697 100644 (file)
@@ -81,7 +81,7 @@ class FileDownloader(object):
         if total is None:
             return None
         dif = now - start
-        if current == 0 or dif < 0.001: # One millisecond
+        if current == 0 or dif < 0.001:  # One millisecond
             return None
         rate = float(current) / dif
         return int((float(total) - float(current)) / rate)
@@ -95,7 +95,7 @@ class FileDownloader(object):
     @staticmethod
     def calc_speed(start, now, bytes):
         dif = now - start
-        if bytes == 0 or dif < 0.001: # One millisecond
+        if bytes == 0 or dif < 0.001:  # One millisecond
             return None
         return float(bytes) / dif
 
@@ -108,7 +108,7 @@ class FileDownloader(object):
     @staticmethod
     def best_block_size(elapsed_time, bytes):
         new_min = max(bytes / 2.0, 1.0)
-        new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
+        new_max = min(max(bytes * 2.0, 1.0), 4194304)  # Do not surpass 4 MB
         if elapsed_time < 0.001:
             return int(new_max)
         rate = bytes / elapsed_time
index b607f6485ae7b0a064e55634ef8ca33937cd857d..7cd22c504e463ad2551692728bd3933e8bcf20ab 100644 (file)
@@ -55,7 +55,7 @@ class FlvReader(io.BytesIO):
         if size == 1:
             real_size = self.read_unsigned_long_long()
             header_end = 16
-        return real_size, box_type, self.read(real_size-header_end)
+        return real_size, box_type, self.read(real_size - header_end)
 
     def read_asrt(self):
         # version
@@ -180,7 +180,7 @@ def build_fragments_list(boot_info):
     n_frags = segment_run_entry[1]
     fragment_run_entry_table = boot_info['fragments'][0]['fragments']
     first_frag_number = fragment_run_entry_table[0]['first']
-    for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)):
+    for (i, frag_number) in zip(range(1, n_frags + 1), itertools.count(first_frag_number)):
         res.append((1, frag_number))
     return res
 
@@ -225,13 +225,15 @@ class F4mFD(FileDownloader):
         self.to_screen('[download] Downloading f4m manifest')
         manifest = self.ydl.urlopen(man_url).read()
         self.report_destination(filename)
-        http_dl = HttpQuietDownloader(self.ydl,
+        http_dl = HttpQuietDownloader(
+            self.ydl,
             {
                 'continuedl': True,
                 'quiet': True,
                 'noprogress': True,
                 'test': self.params.get('test', False),
-            })
+            }
+        )
 
         doc = etree.fromstring(manifest)
         formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
@@ -277,7 +279,7 @@ class F4mFD(FileDownloader):
         def frag_progress_hook(status):
             frag_total_bytes = status.get('total_bytes', 0)
             estimated_size = (state['downloaded_bytes'] +
-                (total_frags - state['frag_counter']) * frag_total_bytes)
+                              (total_frags - state['frag_counter']) * frag_total_bytes)
             if status['status'] == 'finished':
                 state['downloaded_bytes'] += frag_total_bytes
                 state['frag_counter'] += 1
@@ -287,13 +289,13 @@ class F4mFD(FileDownloader):
                 frag_downloaded_bytes = status['downloaded_bytes']
                 byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
                 frag_progress = self.calc_percent(frag_downloaded_bytes,
-                    frag_total_bytes)
+                                                  frag_total_bytes)
                 progress = self.calc_percent(state['frag_counter'], total_frags)
                 progress += frag_progress / float(total_frags)
 
             eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
             self.report_progress(progress, format_bytes(estimated_size),
-                status.get('speed'), eta)
+                                 status.get('speed'), eta)
         http_dl.add_progress_hook(frag_progress_hook)
 
         frags_filenames = []
index 68eafa403df4ad157feb13174584786205b41bea..954beffd50e51db43ce203931d31ad2fbeee95dc 100644 (file)
@@ -28,14 +28,14 @@ class HlsFD(FileDownloader):
             if check_executable(program, ['-version']):
                 break
         else:
-            self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+            self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
             return False
         cmd = [program] + args
 
         retval = subprocess.call(cmd)
         if retval == 0:
             fsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen(u'\r[%s] %s bytes' % (cmd[0], fsize))
+            self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize))
             self.try_rename(tmpfilename, filename)
             self._hook_progress({
                 'downloaded_bytes': fsize,
@@ -45,8 +45,8 @@ class HlsFD(FileDownloader):
             })
             return True
         else:
-            self.to_stderr(u"\n")
-            self.report_error(u'%s exited with code %d' % (program, retval))
+            self.to_stderr('\n')
+            self.report_error('%s exited with code %d' % (program, retval))
             return False
 
 
@@ -101,4 +101,3 @@ class NativeHlsFD(FileDownloader):
         })
         self.try_rename(tmpfilename, filename)
         return True
-
index f62555ce0e33353f5eac848e3956b263f9d43bcb..8491cee8aa2769e8465176411a92ded85b07ad13 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import os
 import time
 
@@ -106,7 +108,7 @@ class HttpFD(FileDownloader):
                 self.report_retry(count, retries)
 
         if count > retries:
-            self.report_error(u'giving up after %s retries' % retries)
+            self.report_error('giving up after %s retries' % retries)
             return False
 
         data_len = data.info().get('Content-length', None)
@@ -124,10 +126,10 @@ class HttpFD(FileDownloader):
             min_data_len = self.params.get("min_filesize", None)
             max_data_len = self.params.get("max_filesize", None)
             if min_data_len is not None and data_len < min_data_len:
-                self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+                self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
                 return False
             if max_data_len is not None and data_len > max_data_len:
-                self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+                self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
                 return False
 
         data_len_str = format_bytes(data_len)
@@ -151,13 +153,13 @@ class HttpFD(FileDownloader):
                     filename = self.undo_temp_name(tmpfilename)
                     self.report_destination(filename)
                 except (OSError, IOError) as err:
-                    self.report_error(u'unable to open for writing: %s' % str(err))
+                    self.report_error('unable to open for writing: %s' % str(err))
                     return False
             try:
                 stream.write(data_block)
             except (IOError, OSError) as err:
-                self.to_stderr(u"\n")
-                self.report_error(u'unable to write data: %s' % str(err))
+                self.to_stderr('\n')
+                self.report_error('unable to write data: %s' % str(err))
                 return False
             if not self.params.get('noresizebuffer', False):
                 block_size = self.best_block_size(after - before, len(data_block))
@@ -188,10 +190,10 @@ class HttpFD(FileDownloader):
             self.slow_down(start, byte_counter - resume_len)
 
         if stream is None:
-            self.to_stderr(u"\n")
-            self.report_error(u'Did not get any data blocks')
+            self.to_stderr('\n')
+            self.report_error('Did not get any data blocks')
             return False
-        if tmpfilename != u'-':
+        if tmpfilename != '-':
             stream.close()
         self.report_finish(data_len_str, (time.time() - start))
         if data_len is not None and byte_counter != data_len:
index 4de7f15f4ee3dd11b51afbb9c0e1e221b12d0784..c53195da0c9471d55a61b53b1041e05ee209697e 100644 (file)
@@ -1,7 +1,10 @@
+from __future__ import unicode_literals
+
 import os
 import subprocess
 
 from .common import FileDownloader
+from ..compat import compat_subprocess_get_DEVNULL
 from ..utils import (
     encodeFilename,
 )
@@ -13,19 +16,23 @@ class MplayerFD(FileDownloader):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
 
-        args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
+        args = [
+            'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
+            '-dumpstream', '-dumpfile', tmpfilename, url]
         # Check for mplayer first
         try:
-            subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+            subprocess.call(
+                ['mplayer', '-h'],
+                stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
         except (OSError, IOError):
-            self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0])
+            self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
             return False
 
         # Download using mplayer.
         retval = subprocess.call(args)
         if retval == 0:
             fsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
+            self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
             self.try_rename(tmpfilename, filename)
             self._hook_progress({
                 'downloaded_bytes': fsize,
@@ -35,6 +42,6 @@ class MplayerFD(FileDownloader):
             })
             return True
         else:
-            self.to_stderr(u"\n")
-            self.report_error(u'mplayer exited with code %d' % retval)
+            self.to_stderr('\n')
+            self.report_error('mplayer exited with code %d' % retval)
             return False
index 17d9631faa54613dcd8c910231a428cad2389c60..58ae2005c014eb4aafbd68e087d5018d26d02a21 100644 (file)
@@ -46,13 +46,13 @@ class RtmpFD(FileDownloader):
                     continue
                 mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
                 if mobj:
-                    downloaded_data_len = int(float(mobj.group(1))*1024)
+                    downloaded_data_len = int(float(mobj.group(1)) * 1024)
                     percent = float(mobj.group(2))
                     if not resume_percent:
                         resume_percent = percent
                         resume_downloaded_data_len = downloaded_data_len
-                    eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
-                    speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
+                    eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent)
+                    speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len)
                     data_len = None
                     if percent > 0:
                         data_len = int(downloaded_data_len * 100 / percent)
@@ -72,7 +72,7 @@ class RtmpFD(FileDownloader):
                     # no percent for live streams
                     mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
                     if mobj:
-                        downloaded_data_len = int(float(mobj.group(1))*1024)
+                        downloaded_data_len = int(float(mobj.group(1)) * 1024)
                         time_now = time.time()
                         speed = self.calc_speed(start, time_now, downloaded_data_len)
                         self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
@@ -88,7 +88,7 @@ class RtmpFD(FileDownloader):
                         if not cursor_in_new_line:
                             self.to_screen('')
                         cursor_in_new_line = True
-                        self.to_screen('[rtmpdump] '+line)
+                        self.to_screen('[rtmpdump] ' + line)
             proc.wait()
             if not cursor_in_new_line:
                 self.to_screen('')
@@ -180,7 +180,7 @@ class RtmpFD(FileDownloader):
         while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
             prevsize = os.path.getsize(encodeFilename(tmpfilename))
             self.to_screen('[rtmpdump] %s bytes' % prevsize)
-            time.sleep(5.0) # This seems to be needed
+            time.sleep(5.0)  # This seems to be needed
             retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
             cursize = os.path.getsize(encodeFilename(tmpfilename))
             if prevsize == cursize and retval == RD_FAILED:
index 7497a97f5c7ece071b0ddc5427dc3ed746d1421d..0339d13860ab81d1a38a3dc624f2336b42e4a6a5 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 from .abc import ABCIE
 from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
@@ -32,9 +34,11 @@ from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
+from .bpb import BpbIE
 from .br import BRIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
+from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
 from .canal13cl import Canal13clIE
@@ -238,7 +242,7 @@ from .muenchentv import MuenchenTVIE
 from .musicplayon import MusicPlayOnIE
 from .musicvault import MusicVaultIE
 from .muzu import MuzuTVIE
-from .myspace import MySpaceIE
+from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .naver import NaverIE
@@ -372,6 +376,7 @@ from .syfy import SyfyIE
 from .sztvhu import SztvHuIE
 from .tagesschau import TagesschauIE
 from .tapely import TapelyIE
+from .tass import TassIE
 from .teachertube import (
     TeacherTubeIE,
     TeacherTubeUserIE,
@@ -392,6 +397,7 @@ from .thesixtyone import TheSixtyOneIE
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
 from .tlc import TlcIE, TlcDeIE
+from .tmz import TMZIE
 from .tnaflix import TNAFlixIE
 from .thvideo import (
     THVideoIE,
@@ -405,6 +411,7 @@ from .trutube import TruTubeIE
 from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
+from .tunein import TuneInIE
 from .turbo import TurboIE
 from .tutv import TutvIE
 from .tvigle import TvigleIE
@@ -454,7 +461,10 @@ from .vine import (
     VineUserIE,
 )
 from .viki import VikiIE
-from .vk import VKIE
+from .vk import (
+    VKIE,
+    VKUserVideosIE,
+)
 from .vodlocker import VodlockerIE
 from .vporn import VpornIE
 from .vrt import VRTIE
@@ -478,6 +488,7 @@ from .wrzuta import WrzutaIE
 from .xbef import XBefIE
 from .xboxclips import XboxClipsIE
 from .xhamster import XHamsterIE
+from .xminus import XMinusIE
 from .xnxx import XNXXIE
 from .xvideos import XVideosIE
 from .xtube import XTubeUserIE, XTubeIE
@@ -508,6 +519,10 @@ from .youtube import (
     YoutubeWatchLaterIE,
 )
 from .zdf import ZDFIE
+from .zingmp3 import (
+    ZingMp3SongIE,
+    ZingMp3AlbumIE,
+)
 
 _ALL_CLASSES = [
     klass
@@ -526,4 +541,4 @@ def gen_extractors():
 
 def get_info_extractor(ie_name):
     """Returns the info extractor class with the given ie_name"""
-    return globals()[ie_name+'IE']
+    return globals()[ie_name + 'IE']
index c983ef0f519c303f880f471d357db4b60657ef17..47313fba8735902f964c0cd39992f9962e0f47fb 100644 (file)
@@ -1,4 +1,5 @@
 from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -18,15 +19,14 @@ class AcademicEarthCourseIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        playlist_id = m.group('id')
+        playlist_id = self._match_id(url)
 
         webpage = self._download_webpage(url, playlist_id)
         title = self._html_search_regex(
-            r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title')
+            r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, 'title')
         description = self._html_search_regex(
             r'<p class="excerpt"[^>]*?>(.*?)</p>',
-            webpage, u'description', fatal=False)
+            webpage, 'description', fatal=False)
         urls = re.findall(
             r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
             webpage)
index 11f149f9e418588e144fe836ef65ad6b8e5a288d..203936e54a3797ae37535022ad02757a925f24d7 100644 (file)
@@ -15,8 +15,7 @@ from ..utils import (
 
 
 class AddAnimeIE(InfoExtractor):
-
-    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
+    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
     _TEST = {
         'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
         'md5': '72954ea10bc979ab5e2eb288b21425a0',
@@ -29,9 +28,9 @@ class AddAnimeIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
+        video_id = self._match_id(url)
+
         try:
-            mobj = re.match(self._VALID_URL, url)
-            video_id = mobj.group('video_id')
             webpage = self._download_webpage(url, video_id)
         except ExtractorError as ee:
             if not isinstance(ee.cause, compat_HTTPError) or \
@@ -49,7 +48,7 @@ class AddAnimeIE(InfoExtractor):
                 r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
                 redir_webpage)
             if av is None:
-                raise ExtractorError(u'Cannot find redirect math task')
+                raise ExtractorError('Cannot find redirect math task')
             av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
 
             parsed_url = compat_urllib_parse_urlparse(url)
index b4b40f2d4f21432f6b12a883513ae00827af00e5..0d05cbb4b16b470aaa1a82d318c07f323ccd1bf7 100644 (file)
@@ -5,6 +5,7 @@ import re
 
 from .common import InfoExtractor
 
+
 class AdultSwimIE(InfoExtractor):
     _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
     _TEST = {
index 74860882628017c5ab7a44f22bd9b05286ad556e..15006336faacb0c7f6ab9c24263726776866dbb6 100644 (file)
@@ -1,5 +1,4 @@
-#coding: utf-8
-
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -26,8 +25,7 @@ class AparatIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
+        video_id = self._match_id(url)
 
         # Note: There is an easier-to-parse configuration at
         # http://www.aparat.com/video/video/config/videohash/%video_id
@@ -40,15 +38,15 @@ class AparatIE(InfoExtractor):
         for i, video_url in enumerate(video_urls):
             req = HEADRequest(video_url)
             res = self._request_webpage(
-                req, video_id, note=u'Testing video URL %d' % i, errnote=False)
+                req, video_id, note='Testing video URL %d' % i, errnote=False)
             if res:
                 break
         else:
-            raise ExtractorError(u'No working video URLs found')
+            raise ExtractorError('No working video URLs found')
 
-        title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
+        title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
         thumbnail = self._search_regex(
-            r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
+            r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
 
         return {
             'id': video_id,
index 4359b88d1b7057944beb126eb8a1c82dbb818758..0c01fa1a13ffa6fbfbfe7b7fb2283d5ed4f8b70f 100644 (file)
@@ -70,15 +70,17 @@ class AppleTrailersIE(InfoExtractor):
         uploader_id = mobj.group('company')
 
         playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
+
         def fix_html(s):
             s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
             s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
             # The ' in the onClick attributes are not escaped, it couldn't be parsed
             # like: http://trailers.apple.com/trailers/wb/gravity/
+
             def _clean_json(m):
                 return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
             s = re.sub(self._JSON_RE, _clean_json, s)
-            s = '<html>' + s + u'</html>'
+            s = '<html>%s</html>' % s
             return s
         doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
 
@@ -86,7 +88,7 @@ class AppleTrailersIE(InfoExtractor):
         for li in doc.findall('./div/ul/li'):
             on_click = li.find('.//a').attrib['onClick']
             trailer_info_json = self._search_regex(self._JSON_RE,
-                on_click, 'trailer info')
+                                                   on_click, 'trailer info')
             trailer_info = json.loads(trailer_info_json)
             title = trailer_info['title']
             video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
index 630b1faa999617c79d160a636052e2acd6d5debd..967bd865c53229e7ff38997ea9a7f4a6ab19f92d 100644 (file)
@@ -192,4 +192,3 @@ class ARDIE(InfoExtractor):
             'upload_date': upload_date,
             'thumbnail': thumbnail,
         }
-
index 3a57ce52785a7d4950ec694b71fb9bdff69d1e55..219631b9b0dfa690a37e09a7b3473566543370e2 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
     qualities,
 )
 
-# There are different sources of video in arte.tv, the extraction process 
+# There are different sources of video in arte.tv, the extraction process
 # is different for each one. The videos usually expire in 7 days, so we can't
 # add tests.
 
index 6232d2cd041b7db63b4880f5d983980af65f8a65..eeeec768fe302a508008e095f138690477594553 100644 (file)
@@ -12,17 +12,17 @@ class AudiomackIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
     IE_NAME = 'audiomack'
     _TESTS = [
-        #hosted on audiomack
+        # hosted on audiomack
         {
             'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
             'info_dict':
             {
-                'id' : 'roosh-williams/extraordinary',
+                'id': 'roosh-williams/extraordinary',
                 'ext': 'mp3',
                 'title': 'Roosh Williams - Extraordinary'
             }
         },
-        #hosted on soundcloud via audiomack
+        # hosted on soundcloud via audiomack
         {
             'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
             'file': '172419696.mp3',
@@ -49,7 +49,7 @@ class AudiomackIE(InfoExtractor):
             raise ExtractorError("Unable to deduce api url of song")
         realurl = api_response["url"]
 
-        #Audiomack wraps a lot of soundcloud tracks in their branded wrapper
+        # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
         # - if so, pass the work off to the soundcloud extractor
         if SoundcloudIE.suitable(realurl):
             return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
index de5d4faf3b920ddb0f2231f05311b9858dc5ef86..1ca0b7cf2bf78717fd45a11d17e3cce7e5191b9b 100644 (file)
@@ -18,7 +18,7 @@ class BambuserIE(InfoExtractor):
     _TEST = {
         'url': 'http://bambuser.com/v/4050584',
         # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
-        #u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
+        # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
         'info_dict': {
             'id': '4050584',
             'ext': 'flv',
@@ -38,7 +38,7 @@ class BambuserIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
-            '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
+                    '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
         info_json = self._download_webpage(info_url, video_id)
         info = json.loads(info_json)['result']
 
@@ -73,10 +73,11 @@ class BambuserChannelIE(InfoExtractor):
         urls = []
         last_id = ''
         for i in itertools.count(1):
-            req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
+            req_url = (
+                'http://bambuser.com/xhr-api/index.php?username={user}'
                 '&sort=created&access_mode=0%2C1%2C2&limit={count}'
                 '&method=broadcast&format=json&vid_older_than={last}'
-                ).format(user=user, count=self._STEP, last=last_id)
+            ).format(user=user, count=self._STEP, last=last_id)
             req = compat_urllib_request.Request(req_url)
             # Without setting this header, we wouldn't get any result
             req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
index 1b8da43cae83253531e6f542af4ad0b22e588502..acddbc8f1d19ebc48b721b3867b98fc30af3c133 100644 (file)
@@ -83,12 +83,12 @@ class BandcampIE(InfoExtractor):
         initial_url = mp3_info['url']
         re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
         m_url = re.match(re_url, initial_url)
-        #We build the url we will use to get the final track url
+        # We build the url we will use to get the final track url
         # This url is build in Bandcamp in the script download_bunde_*.js
         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
         final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
         # If we could correctly generate the .rand field the url would be
-        #in the "download_url" key
+        # in the "download_url" key
         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
 
         return {
index 75e608f99de4ff3cc14234ab370f370b1ae83940..beb6cfc8ae88a3c40ac2b4ee7f6b0ae2c6ddfc87 100644 (file)
@@ -1,9 +1,11 @@
 from __future__ import unicode_literals
 
 import re
+import xml.etree.ElementTree
 
 from .subtitles import SubtitlesInfoExtractor
 from ..utils import ExtractorError
+from ..compat import compat_HTTPError
 
 
 class BBCCoUkIE(SubtitlesInfoExtractor):
@@ -55,7 +57,22 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
                 'skip_download': True,
             },
             'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
-        }
+        },
+        {
+            'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
+            'info_dict': {
+                'id': 'b03k3pb7',
+                'ext': 'flv',
+                'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
+                'description': '2. Invasion',
+                'duration': 3600,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+        },
     ]
 
     def _extract_asx_playlist(self, connection, programme_id):
@@ -102,6 +119,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
         return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
 
     def _extract_medias(self, media_selection):
+        error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
+        if error is not None:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
         return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
 
     def _extract_connections(self, media):
@@ -158,54 +179,73 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
             subtitles[lang] = srt
         return subtitles
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        group_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, group_id, 'Downloading video page')
-        if re.search(r'id="emp-error" class="notinuk">', webpage):
-            raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
-                expected=True)
-
-        playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
-            'Downloading playlist XML')
-
-        no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
-        if no_items is not None:
-            reason = no_items.get('reason')
-            if reason == 'preAvailability':
-                msg = 'Episode %s is not yet available' % group_id
-            elif reason == 'postAvailability':
-                msg = 'Episode %s is no longer available' % group_id
+    def _download_media_selector(self, programme_id):
+        try:
+            media_selection = self._download_xml(
+                'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
+                programme_id, 'Downloading media selection XML')
+        except ExtractorError as ee:
+            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+                media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
             else:
-                msg = 'Episode %s is not available: %s' % (group_id, reason)
-            raise ExtractorError(msg, expected=True)
+                raise
 
         formats = []
         subtitles = None
 
-        for item in self._extract_items(playlist):
-            kind = item.get('kind')
-            if kind != 'programme' and kind != 'radioProgramme':
-                continue
-            title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
-            description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
+        for media in self._extract_medias(media_selection):
+            kind = media.get('kind')
+            if kind == 'audio':
+                formats.extend(self._extract_audio(media, programme_id))
+            elif kind == 'video':
+                formats.extend(self._extract_video(media, programme_id))
+            elif kind == 'captions':
+                subtitles = self._extract_captions(media, programme_id)
 
-            programme_id = item.get('identifier')
-            duration = int(item.get('duration'))
+        return formats, subtitles
 
-            media_selection = self._download_xml(
-                'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s'  % programme_id,
-                programme_id, 'Downloading media selection XML')
+    def _real_extract(self, url):
+        group_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, group_id, 'Downloading video page')
 
-            for media in self._extract_medias(media_selection):
-                kind = media.get('kind')
-                if kind == 'audio':
-                    formats.extend(self._extract_audio(media, programme_id))
-                elif kind == 'video':
-                    formats.extend(self._extract_video(media, programme_id))
-                elif kind == 'captions':
-                    subtitles = self._extract_captions(media, programme_id)
+        programme_id = self._search_regex(
+            r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
+        if programme_id:
+            player = self._download_json(
+                'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
+                group_id)['jsConf']['player']
+            title = player['title']
+            description = player['subtitle']
+            duration = player['duration']
+            formats, subtitles = self._download_media_selector(programme_id)
+        else:
+            playlist = self._download_xml(
+                'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
+                group_id, 'Downloading playlist XML')
+
+            no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
+            if no_items is not None:
+                reason = no_items.get('reason')
+                if reason == 'preAvailability':
+                    msg = 'Episode %s is not yet available' % group_id
+                elif reason == 'postAvailability':
+                    msg = 'Episode %s is no longer available' % group_id
+                elif reason == 'noMedia':
+                    msg = 'Episode %s is not currently available' % group_id
+                else:
+                    msg = 'Episode %s is not available: %s' % (group_id, reason)
+                raise ExtractorError(msg, expected=True)
+
+            for item in self._extract_items(playlist):
+                kind = item.get('kind')
+                if kind != 'programme' and kind != 'radioProgramme':
+                    continue
+                title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
+                description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
+                programme_id = item.get('identifier')
+                duration = int(item.get('duration'))
+                formats, subtitles = self._download_media_selector(programme_id)
 
         if self._downloader.params.get('listsubtitles', False):
             self._list_available_subtitles(programme_id, subtitles)
@@ -220,4 +260,4 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
             'duration': duration,
             'formats': formats,
             'subtitles': subtitles,
-        }
\ No newline at end of file
+        }
index 314e37f8bea9d44bc5ffe1545b17d02cc7d13fe2..4e79fea8f0346d8aca19bc0182fd087a78779809 100644 (file)
@@ -40,7 +40,7 @@ class BeegIE(InfoExtractor):
 
         title = self._html_search_regex(
             r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title')
-        
+
         description = self._html_search_regex(
             r'<meta name="description" content="([^"]*)"',
             webpage, 'description', fatal=False)
index 0269d1174fd5c289ec49b593637b123d8bd536bf..77b562d99625a30035a38d14e15c6927e8b007e9 100644 (file)
@@ -1,4 +1,4 @@
-#coding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
index f2b02643de162ff1547bef47a62dca55f2a405a3..da47f27bdd6702d3927f3fde72fc0ebe064df53a 100644 (file)
@@ -64,6 +64,20 @@ class BlipTVIE(SubtitlesInfoExtractor):
                 'uploader': 'redvsblue',
                 'uploader_id': '792887',
             }
+        },
+        {
+            'url': 'http://blip.tv/play/gbk766dkj4Yn',
+            'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
+            'info_dict': {
+                'id': '1749452',
+                'ext': 'mp4',
+                'upload_date': '20090208',
+                'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
+                'title': 'Nostalgia Critic: Transformers',
+                'timestamp': 1234068723,
+                'uploader': 'NostalgiaCritic',
+                'uploader_id': '246467',
+            }
         }
     ]
 
@@ -74,11 +88,13 @@ class BlipTVIE(SubtitlesInfoExtractor):
         # See https://github.com/rg3/youtube-dl/issues/857 and
         # https://github.com/rg3/youtube-dl/issues/4197
         if lookup_id:
-            info_page = self._download_webpage(
-                'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id')
-            video_id = self._search_regex(r'config\.id\s*=\s*"([0-9]+)', info_page, 'video_id')
-        else:
-            video_id = mobj.group('id')
+            urlh = self._request_webpage(
+                'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
+            url = compat_urlparse.urlparse(urlh.geturl())
+            qs = compat_urlparse.parse_qs(url.query)
+            mobj = re.match(self._VALID_URL, qs['file'][0])
+
+        video_id = mobj.group('id')
 
         rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
 
@@ -114,7 +130,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
             msg = self._download_webpage(
                 url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
                 video_id, 'Resolving URL for %s' % role)
-            real_url = compat_urlparse.parse_qs(msg)['message'][0]
+            real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
 
             media_type = media_content.get('type')
             if media_type == 'text/srt' or url.endswith('.srt'):
diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py
new file mode 100644 (file)
index 0000000..510813f
--- /dev/null
@@ -0,0 +1,37 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class BpbIE(InfoExtractor):
+    IE_DESC = 'Bundeszentrale für politische Bildung'
+    _VALID_URL = r'http://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
+
+    _TEST = {
+        'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
+        'md5': '0792086e8e2bfbac9cdf27835d5f2093',
+        'info_dict': {
+            'id': '297',
+            'ext': 'mp4',
+            'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
+            'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            r'<h2 class="white">(.*?)</h2>', webpage, 'title')
+        video_url = self._html_search_regex(
+            r'(http://film\.bpb\.de/player/dokument_[0-9]+\.mp4)',
+            webpage, 'video URL')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': self._og_search_description(webpage),
+        }
diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py
new file mode 100644 (file)
index 0000000..a40a1bb
--- /dev/null
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class BuzzFeedIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)'
+    _TESTS = [{
+        'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia',
+        'info_dict': {
+            'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss',
+            'title': 'This Angry Ram Destroys A Punching Bag Like A Boss',
+            'description': 'Rambro!',
+        },
+        'playlist': [{
+            'info_dict': {
+                'id': 'aVCR29aE_OQ',
+                'ext': 'mp4',
+                'upload_date': '20141024',
+                'uploader_id': 'Buddhanz1',
+                'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl',
+                'uploader': 'Buddhanz',
+                'title': 'Angry Ram destroys a punching bag',
+            }
+        }]
+    }, {
+        'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia',
+        'params': {
+            'skip_download': True,  # Got enough YouTube download tests
+        },
+        'info_dict': {
+            'description': 'Munchkin the Teddy Bear is back !',
+            'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
+        },
+        'playlist': [{
+            'info_dict': {
+                'id': 'mVmBL8B-In0',
+                'ext': 'mp4',
+                'upload_date': '20141124',
+                'uploader_id': 'CindysMunchkin',
+                'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
+                'uploader': 'Munchkin the Shih Tzu',
+                'title': 'Munchkin the Teddy Bear gets her exercise',
+            },
+        }]
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        all_buckets = re.findall(
+            r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'',
+            webpage)
+
+        entries = []
+        for bd_json in all_buckets:
+            bd = json.loads(bd_json)
+            video = bd.get('video') or bd.get('progload_video')
+            if not video:
+                continue
+            entries.append(self.url_result(video['url']))
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'entries': entries,
+        }
index dddeaa59c4427470fb1511990f443fd38fe804cf..9873728df6f3bb1adbfddc1959aa5e7e70241f5b 100644 (file)
@@ -112,4 +112,4 @@ class CanalplusIE(InfoExtractor):
             'like_count': int(infos.find('NB_LIKES').text),
             'comment_count': int(infos.find('NB_COMMENTS').text),
             'formats': formats,
-        }
\ No newline at end of file
+        }
index db48dc24fa2a4698ac0dc8a28033b8cca51d3d44..e43756ec69b1d7f1872e45cc6901b41752ec6ef6 100644 (file)
@@ -45,4 +45,4 @@ class CBSIE(InfoExtractor):
         real_id = self._search_regex(
             r"video\.settings\.pid\s*=\s*'([^']+)';",
             webpage, 'real video ID')
-        return self.url_result(u'theplatform:%s' % real_id)
+        return self.url_result('theplatform:%s' % real_id)
index 0bce7937f830c1bc8178b88f95d619693edf7ae3..7e47960ab08f3ffba7a1e596b34c2fbfc7fd6c59 100644 (file)
@@ -84,4 +84,4 @@ class CBSNewsIE(InfoExtractor):
             'thumbnail': thumbnail,
             'duration': duration,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 90a3dddb9b79a875c57942a8dcb58951aa973090..97feb6704075831fb8b5ef95a547428ecc57ec3f 100644 (file)
@@ -92,7 +92,7 @@ class CeskaTelevizeIE(InfoExtractor):
         req.add_header('Referer', url)
 
         playlist = self._download_xml(req, video_id)
-        
+
         formats = []
         for i in playlist.find('smilRoot/body'):
             if 'AD' not in i.attrib['id']:
index 16d800512a092e3d9d064cf2ac7800a830af64ec..2a05813f8d600540f66015a75ce4df647096d3fc 100644 (file)
@@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 
+
 class Channel9IE(InfoExtractor):
     '''
     Common extractor for channel9.msdn.com.
@@ -31,7 +32,7 @@ class Channel9IE(InfoExtractor):
                 'session_code': 'KOS002',
                 'session_day': 'Day 1',
                 'session_room': 'Arena 1A',
-                'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ],
+                'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'],
             },
         },
         {
@@ -44,7 +45,7 @@ class Channel9IE(InfoExtractor):
                 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
                 'duration': 1540,
                 'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
-                'authors': [ 'Mike Wilmot' ],
+                'authors': ['Mike Wilmot'],
             },
         }
     ]
@@ -83,7 +84,7 @@ class Channel9IE(InfoExtractor):
             'format_id': x.group('quality'),
             'format_note': x.group('note'),
             'format': '%s (%s)' % (x.group('quality'), x.group('note')),
-            'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
+            'filesize': self._restore_bytes(x.group('filesize')),  # File size is approximate
             'preference': self._known_formats.index(x.group('quality')),
             'vcodec': 'none' if x.group('note') == 'Audio only' else None,
         } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
@@ -187,32 +188,33 @@ class Channel9IE(InfoExtractor):
         view_count = self._extract_view_count(html)
         comment_count = self._extract_comment_count(html)
 
-        common = {'_type': 'video',
-                  'id': content_path,
-                  'description': description,
-                  'thumbnail': thumbnail,
-                  'duration': duration,
-                  'avg_rating': avg_rating,
-                  'rating_count': rating_count,
-                  'view_count': view_count,
-                  'comment_count': comment_count,
-                }
+        common = {
+            '_type': 'video',
+            'id': content_path,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'avg_rating': avg_rating,
+            'rating_count': rating_count,
+            'view_count': view_count,
+            'comment_count': comment_count,
+        }
 
         result = []
 
         if slides is not None:
             d = common.copy()
-            d.update({ 'title': title + '-Slides', 'url': slides })
+            d.update({'title': title + '-Slides', 'url': slides})
             result.append(d)
 
         if zip_ is not None:
             d = common.copy()
-            d.update({ 'title': title + '-Zip', 'url': zip_ })
+            d.update({'title': title + '-Zip', 'url': zip_})
             result.append(d)
 
         if len(formats) > 0:
             d = common.copy()
-            d.update({ 'title': title, 'formats': formats })
+            d.update({'title': title, 'formats': formats})
             result.append(d)
 
         return result
@@ -270,5 +272,5 @@ class Channel9IE(InfoExtractor):
             else:
                 raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
 
-        else: # Assuming list
+        else:  # Assuming list
             return self._extract_list(content_path)
index 31fe906b448669ae94c185546a5402314b975224..b7fa73c3bfc8f8c290d899662ce0cb102ac86670 100644 (file)
@@ -77,7 +77,7 @@ class CinemassacreIE(InfoExtractor):
         if videolist_url:
             videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
             formats = []
-            baseurl = vidurl[:vidurl.rfind('/')+1]
+            baseurl = vidurl[:vidurl.rfind('/') + 1]
             for video in videolist.findall('.//video'):
                 src = video.get('src')
                 if not src:
index 669919a2cc9039ffb91ae052b96d5531665341e0..a5c3cb7c6253776062fb5834d0fe4c121dfb9c99 100644 (file)
@@ -24,7 +24,7 @@ class ClipfishIE(InfoExtractor):
             'title': 'FIFA 14 - E3 2013 Trailer',
             'duration': 82,
         },
-        u'skip': 'Blocked in the US'
+        'skip': 'Blocked in the US'
     }
 
     def _real_extract(self, url):
@@ -34,7 +34,7 @@ class ClipfishIE(InfoExtractor):
         info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
                     (video_id, int(time.time())))
         doc = self._download_xml(
-            info_url, video_id, note=u'Downloading info page')
+            info_url, video_id, note='Downloading info page')
         title = doc.find('title').text
         video_url = doc.find('filename').text
         if video_url is None:
index 02a1667fa3fbf7cbe1a822db7b82f9c087864249..d07d544eaf7742bb782a8b367a09561f14c44a4e 100644 (file)
@@ -39,6 +39,7 @@ class ClipsyndicateIE(InfoExtractor):
             transform_source=fix_xml_ampersands)
 
         track_doc = pdoc.find('trackList/track')
+
         def find_param(name):
             node = find_xpath_attr(track_doc, './/param', 'name', name)
             if node is not None:
index 3826ce7e1ee29bde4d640cb62bf2c127a83eb7bc..81142ee419d45b9df9f75bdc152ab87e1317650f 100644 (file)
@@ -25,8 +25,7 @@ class CNNIE(InfoExtractor):
             'duration': 135,
             'upload_date': '20130609',
         },
-    },
-    {
+    }, {
         "url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
         "md5": "b5cc60c60a3477d185af8f19a2a26f4e",
         "info_dict": {
index 6f866e7fcee7f401362b24d69db2285e22cfa6a4..002b240378299bf18000b2ee92c76ae062115126 100644 (file)
@@ -10,47 +10,46 @@ from ..utils import int_or_none
 class CollegeHumorIE(InfoExtractor):
     _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
 
-    _TESTS = [{
-        'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
-        'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
-        'info_dict': {
-            'id': '6902724',
-            'ext': 'mp4',
-            'title': 'Comic-Con Cosplay Catastrophe',
-            'description': "Fans get creative this year at San Diego.  Too creative.  And yes, that's really Joss Whedon.",
-            'age_limit': 13,
-            'duration': 187,
+    _TESTS = [
+        {
+            'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
+            'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
+            'info_dict': {
+                'id': '6902724',
+                'ext': 'mp4',
+                'title': 'Comic-Con Cosplay Catastrophe',
+                'description': "Fans get creative this year at San Diego.  Too creative.  And yes, that's really Joss Whedon.",
+                'age_limit': 13,
+                'duration': 187,
+            },
+        }, {
+            'url': 'http://www.collegehumor.com/video/3505939/font-conference',
+            'md5': '72fa701d8ef38664a4dbb9e2ab721816',
+            'info_dict': {
+                'id': '3505939',
+                'ext': 'mp4',
+                'title': 'Font Conference',
+                'description': "This video wasn't long enough, so we made it double-spaced.",
+                'age_limit': 10,
+                'duration': 179,
+            },
+        }, {
+            # embedded youtube video
+            'url': 'http://www.collegehumor.com/embed/6950306',
+            'info_dict': {
+                'id': 'Z-bao9fg6Yc',
+                'ext': 'mp4',
+                'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
+                'uploader': 'Mark Dice',
+                'uploader_id': 'MarkDice',
+                'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
+                'upload_date': '20140127',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'add_ie': ['Youtube'],
         },
-    },
-    {
-        'url': 'http://www.collegehumor.com/video/3505939/font-conference',
-        'md5': '72fa701d8ef38664a4dbb9e2ab721816',
-        'info_dict': {
-            'id': '3505939',
-            'ext': 'mp4',
-            'title': 'Font Conference',
-            'description': "This video wasn't long enough, so we made it double-spaced.",
-            'age_limit': 10,
-            'duration': 179,
-        },
-    },
-    # embedded youtube video
-    {
-        'url': 'http://www.collegehumor.com/embed/6950306',
-        'info_dict': {
-            'id': 'Z-bao9fg6Yc',
-            'ext': 'mp4',
-            'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
-            'uploader': 'Mark Dice',
-            'uploader_id': 'MarkDice',
-            'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
-            'upload_date': '20140127',
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'add_ie': ['Youtube'],
-    },
     ]
 
     def _real_extract(self, url):
index 93a5a3d57bf027cce5f6293c65f219c5d2cfe51d..e80a2dad0b2e12c5e9d14a486feb0b600de66823 100644 (file)
@@ -13,6 +13,7 @@ import time
 import xml.etree.ElementTree
 
 from ..compat import (
+    compat_cookiejar,
     compat_http_client,
     compat_urllib_error,
     compat_urllib_parse_urlparse,
@@ -296,9 +297,11 @@ class InfoExtractor(object):
         content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
         return (content, urlh)
 
-    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
+    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
         content_type = urlh.headers.get('Content-Type', '')
         webpage_bytes = urlh.read()
+        if prefix is not None:
+            webpage_bytes = prefix + webpage_bytes
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
         if m:
             encoding = m.group(1)
@@ -423,17 +426,18 @@ class InfoExtractor(object):
         """Report attempt to log in."""
         self.to_screen('Logging in')
 
-    #Methods for following #608
+    # Methods for following #608
     @staticmethod
     def url_result(url, ie=None, video_id=None):
         """Returns a url that points to a page that should be processed"""
-        #TODO: ie should be the class used for getting the info
+        # TODO: ie should be the class used for getting the info
         video_info = {'_type': 'url',
                       'url': url,
                       'ie_key': ie}
         if video_id is not None:
             video_info['id'] = video_id
         return video_info
+
     @staticmethod
     def playlist_result(entries, playlist_id=None, playlist_title=None):
         """Returns a playlist"""
@@ -477,7 +481,7 @@ class InfoExtractor(object):
             raise RegexNotFoundError('Unable to extract %s' % _name)
         else:
             self._downloader.report_warning('unable to extract %s; '
-                'please report this issue on http://yt-dl.org/bug' % _name)
+                                            'please report this issue on http://yt-dl.org/bug' % _name)
             return None
 
     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
@@ -517,7 +521,7 @@ class InfoExtractor(object):
                     raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
             except (IOError, netrc.NetrcParseError) as err:
                 self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))
-        
+
         return (username, password)
 
     def _get_tfa_info(self):
@@ -611,7 +615,7 @@ class InfoExtractor(object):
 
     def _twitter_search_player(self, html):
         return self._html_search_meta('twitter:player', html,
-            'twitter card player')
+                                      'twitter card player')
 
     def _sort_formats(self, formats):
         if not formats:
@@ -814,6 +818,11 @@ class InfoExtractor(object):
                 self._downloader.report_warning(msg)
         return res
 
+    def _set_cookie(self, domain, name, value, expire_time=None):
+        cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
+            None, '/', True, False, expire_time, '', None, None, None)
+        self._downloader.cookiejar.set_cookie(cookie)
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
index 74b880ffce9b0a8dbf7ab273063253a2743dbbdb..cf763ee7e03019adc5f957060b0f45e52e532084 100644 (file)
@@ -54,7 +54,7 @@ class CrackedIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'url':video_url,
+            'url': video_url,
             'title': title,
             'description': description,
             'timestamp': timestamp,
@@ -62,4 +62,4 @@ class CrackedIE(InfoExtractor):
             'comment_count': comment_count,
             'height': height,
             'width': width,
-        }
\ No newline at end of file
+        }
index fe1324fe3cc047b87e605d2724e3bab015b3ed28..d7e2b841e10856cadf0526fe8ff6d4c280dc0dae 100644 (file)
@@ -69,11 +69,9 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
         login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         self._download_webpage(login_request, None, False, 'Wrong login info')
 
-
     def _real_initialize(self):
         self._login()
 
-
     def _decrypt_subtitles(self, data, iv, id):
         data = bytes_to_intlist(data)
         iv = bytes_to_intlist(iv)
@@ -99,8 +97,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
             return shaHash + [0] * 12
 
         key = obfuscate_key(id)
+
         class Counter:
             __value = iv
+
             def next_value(self):
                 temp = self.__value
                 self.__value = inc(self.__value)
@@ -183,7 +183,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
         return output
 
-    def _real_extract(self,url):
+    def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('video_id')
 
@@ -226,10 +226,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         formats = []
         for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
             stream_quality, stream_format = self._FORMAT_IDS[fmt]
-            video_format = fmt+'p'
+            video_format = fmt + 'p'
             streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
             # urlencode doesn't work!
-            streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
+            streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format
             streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
             streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
             streamdata = self._download_xml(
@@ -248,8 +248,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         subtitles = {}
         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
         for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
-            sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
-                                              video_id, note='Downloading subtitles for '+sub_name)
+            sub_page = self._download_webpage(
+                'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
+                video_id, note='Downloading subtitles for ' + sub_name)
             id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
             iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
             data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
@@ -274,14 +275,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             return
 
         return {
-            'id':          video_id,
-            'title':       video_title,
+            'id': video_id,
+            'title': video_title,
             'description': video_description,
-            'thumbnail':   video_thumbnail,
-            'uploader':    video_uploader,
+            'thumbnail': video_thumbnail,
+            'uploader': video_uploader,
             'upload_date': video_upload_date,
-            'subtitles':   subtitles,
-            'formats':     formats,
+            'subtitles': subtitles,
+            'formats': formats,
         }
 
 
index fd4bc75b2a91a1ad115a9152bf85debce7880431..936c13cd60b0ec44f376818a7e17cb9ccf4d1384 100644 (file)
@@ -1,4 +1,4 @@
-#coding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -18,6 +18,7 @@ from ..utils import (
     unescapeHTML,
 )
 
+
 class DailymotionBaseInfoExtractor(InfoExtractor):
     @staticmethod
     def _build_request(url):
@@ -27,6 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
         request.add_header('Cookie', 'ff=off')
         return request
 
+
 class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
     """Information Extractor for Dailymotion"""
 
@@ -112,7 +114,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
         embed_page = self._download_webpage(embed_url, video_id,
                                             'Downloading embed page')
         info = self._search_regex(r'var info = ({.*?}),$', embed_page,
-            'video info', flags=re.MULTILINE)
+                                  'video info', flags=re.MULTILINE)
         info = json.loads(info)
         if info.get('error') is not None:
             msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
@@ -206,7 +208,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
             if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
                 break
         return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
-                   for video_id in orderedSet(video_ids)]
+                for video_id in orderedSet(video_ids)]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index c5529f8d455963e7f7ff6ad798f27493a0579410..5e50c63d9aca7d2642239ccf32a5cedd91b05174 100644 (file)
@@ -9,7 +9,7 @@ from .common import InfoExtractor
 class DefenseGouvFrIE(InfoExtractor):
     IE_NAME = 'defense.gouv.fr'
     _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
-        r'ligthboxvideo/base-de-medias/webtv/(.*)')
+                  r'ligthboxvideo/base-de-medias/webtv/(.*)')
 
     _TEST = {
         'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
@@ -26,13 +26,13 @@ class DefenseGouvFrIE(InfoExtractor):
         video_id = self._search_regex(
             r"flashvars.pvg_id=\"(\d+)\";",
             webpage, 'ID')
-        
+
         json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
-            + video_id)
+                    + video_id)
         info = self._download_webpage(json_url, title,
-                                                  'Downloading JSON config')
+                                      'Downloading JSON config')
         video_url = json.loads(info)['renditions'][0]['url']
-        
+
         return {'id': video_id,
                 'ext': 'mp4',
                 'url': video_url,
index 554df673506a88cada08b9db8300cd15d301087d..52c2d7ddf99873779b7f3223b0acfe4563e2b5d9 100644 (file)
@@ -16,9 +16,9 @@ class DiscoveryIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'MythBusters: Mission Impossible Outtakes',
             'description': ('Watch Jamie Hyneman and Adam Savage practice being'
-                ' each other -- to the point of confusing Jamie\'s dog -- and '
-                'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
-                ' back.'),
+                            ' each other -- to the point of confusing Jamie\'s dog -- and '
+                            'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
+                            ' back.'),
             'duration': 156,
         },
     }
@@ -29,7 +29,7 @@ class DiscoveryIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
-            webpage, 'video list', flags=re.DOTALL)
+                                             webpage, 'video list', flags=re.DOTALL)
         video_list = json.loads(video_list_json)
         info = video_list['clips'][0]
         formats = []
index 5ae0ad5b65cdf12a896a573db78e23494864fd23..638bb33cd81b53fdc2c4bbc31f300a098fb57652 100644 (file)
@@ -27,7 +27,7 @@ class DotsubIE(InfoExtractor):
         video_id = mobj.group('id')
         info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
         info = self._download_json(info_url, video_id)
-        date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
+        date = time.gmtime(info['dateCreated'] / 1000)  # The timestamp is in miliseconds
 
         return {
             'id': video_id,
index aefca848a25a7c4c10a0a821bf3454efbe6c1117..14b6c00b0bd1c4d3a306b5513477e2bb6c4cd52d 100644 (file)
@@ -11,18 +11,18 @@ from ..utils import url_basename
 
 class DropboxIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
-    _TESTS = [{
-        'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
-        'info_dict': {
-            'id': 'nelirfsxnmcfbfh',
-            'ext': 'mp4',
-            'title': 'youtube-dl test video \'ä"BaW_jenozKc'
-        }
-    },
-    {
-        'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
-        'only_matching': True,
-    },
+    _TESTS = [
+        {
+            'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
+            'info_dict': {
+                'id': 'nelirfsxnmcfbfh',
+                'ext': 'mp4',
+                'title': 'youtube-dl test video \'ä"BaW_jenozKc'
+            }
+        }, {
+            'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
index f8f49a013503cc853c2bf79e345b360af3db7fee..b766e17f26a9e79d654d4b160fa8f98f5f21503f 100644 (file)
@@ -28,7 +28,7 @@ class EHowIE(InfoExtractor):
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
         video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
-            webpage, 'video URL')
+                                       webpage, 'video URL')
         final_url = compat_urllib_parse.unquote(video_url)
         uploader = self._html_search_meta('uploader', webpage)
         title = self._og_search_title(webpage).replace(' | eHow', '')
index c1b4c729ef5888da0bdcebf692cbddad30dee4df..f4c1e2a72bf74821afd476dee86b806c0dbb56c7 100644 (file)
@@ -125,7 +125,7 @@ class EightTracksIE(InfoExtractor):
             info = {
                 'id': compat_str(track_data['id']),
                 'url': track_data['track_file_stream_url'],
-                'title': track_data['performer'] + u' - ' + track_data['name'],
+                'title': track_data['performer'] + ' - ' + track_data['name'],
                 'raw_title': track_data['name'],
                 'uploader_id': data['user']['login'],
                 'ext': 'm4a',
index 10480356324d2b1d3558e570b02646990321bb27..2139f68aa3cb16facdc45b5fd9e014621e1c6674 100644 (file)
@@ -60,8 +60,8 @@ class FacebookIE(InfoExtractor):
         login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
         login_page_req.add_header('Cookie', 'locale=en_US')
         login_page = self._download_webpage(login_page_req, None,
-            note='Downloading login page',
-            errnote='Unable to download login page')
+                                            note='Downloading login page',
+                                            errnote='Unable to download login page')
         lsd = self._search_regex(
             r'<input type="hidden" name="lsd" value="([^"]*)"',
             login_page, 'lsd')
@@ -77,12 +77,12 @@ class FacebookIE(InfoExtractor):
             'legacy_return': '1',
             'timezone': '-60',
             'trynum': '1',
-            }
+        }
         request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         try:
             login_results = self._download_webpage(request, None,
-                note='Logging in', errnote='unable to fetch login page')
+                                                   note='Logging in', errnote='unable to fetch login page')
             if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
                 self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
                 return
@@ -96,7 +96,7 @@ class FacebookIE(InfoExtractor):
             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
             check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
             check_response = self._download_webpage(check_req, None,
-                note='Confirming login')
+                                                    note='Confirming login')
             if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
                 self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
index c663a0f81d08650b24616d2c3c2daef262c95aa2..6f5d23559b78dfb621bbf6d819612d20b96fd597 100644 (file)
@@ -40,7 +40,7 @@ class FC2IE(InfoExtractor):
 
         info_url = (
             "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
-            format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.','%2E')))
+            format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E')))
 
         info_webpage = self._download_webpage(
             info_url, video_id, note='Downloading info page')
index c2e987ff72d7847d6c6ec955943763ea9f231e92..08ceee4ed7d5e8b96b81e7d8b9b823a5ea18e120 100644 (file)
@@ -44,9 +44,9 @@ class FirstTVIE(InfoExtractor):
         duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
 
         like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
-            webpage, 'like count', fatal=False)
+                                             webpage, 'like count', fatal=False)
         dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
-            webpage, 'dislike count', fatal=False)
+                                                webpage, 'dislike count', fatal=False)
 
         return {
             'id': video_id,
@@ -57,4 +57,4 @@ class FirstTVIE(InfoExtractor):
             'duration': int_or_none(duration),
             'like_count': int_or_none(like_count),
             'dislike_count': int_or_none(dislike_count),
-        }
\ No newline at end of file
+        }
index 3a50bab5c9bd04c9d176a88be080033368fe46c7..f9c127ce67bd7edefd22e7e7953ecce57e888d15 100644 (file)
@@ -50,7 +50,7 @@ class FiveMinIE(InfoExtractor):
         video_id = mobj.group('id')
         embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
         embed_page = self._download_webpage(embed_url, video_id,
-            'Downloading embed page')
+                                            'Downloading embed page')
         sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
         query = compat_urllib_parse.urlencode({
             'func': 'GetResults',
index 21b89142c118de83e4d5be6856ac9a3b3f3232c8..d09d1c13a70cffb725329f69368f37359d7f7a08 100644 (file)
@@ -32,9 +32,9 @@ class FKTVIE(InfoExtractor):
         server = random.randint(2, 4)
         video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
         start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
-            episode)
+                                               episode)
         playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
-            'playlist', flags=re.DOTALL)
+                                      'playlist', flags=re.DOTALL)
         files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
         # TODO: return a single multipart video
         videos = []
index e09982e88b913676a2f8c75946e79c82502bf650..0c858b6544b919b1b569b4c4102447631298046e 100644 (file)
@@ -17,8 +17,8 @@ class FlickrIE(InfoExtractor):
         'info_dict': {
             'id': '5645318632',
             'ext': 'mp4',
-            "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", 
-            "uploader_id": "forestwander-nature-pictures", 
+            "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
+            "uploader_id": "forestwander-nature-pictures",
             "title": "Dark Hollow Waterfalls"
         }
     }
@@ -37,7 +37,7 @@ class FlickrIE(InfoExtractor):
         first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
 
         node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
-            first_xml, 'node_id')
+                                          first_xml, 'node_id')
 
         second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
         second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
index 7d56b9be93a0332e70381c3a46b748c6d39e5b6a..b22ce2acb5d1beae29d1b298b4ef63c8e7639e5f 100644 (file)
@@ -55,7 +55,7 @@ class FourTubeIE(InfoExtractor):
         description = self._html_search_meta('description', webpage, 'description')
         if description:
             upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date',
-                fatal=False)
+                                             fatal=False)
             if upload_date:
                 upload_date = unified_strdate(upload_date)
             view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False)
@@ -65,9 +65,9 @@ class FourTubeIE(InfoExtractor):
 
         token_url = "http://tkn.4tube.com/{0}/desktop/{1}".format(media_id, "+".join(sources))
         headers = {
-                b'Content-Type': b'application/x-www-form-urlencoded',
-                b'Origin': b'http://www.4tube.com',
-                }
+            b'Content-Type': b'application/x-www-form-urlencoded',
+            b'Origin': b'http://www.4tube.com',
+        }
         token_req = compat_urllib_request.Request(token_url, b'{}', headers)
         tokens = self._download_json(token_req, video_id)
 
@@ -76,7 +76,7 @@ class FourTubeIE(InfoExtractor):
             'format_id': format + 'p',
             'resolution': format + 'p',
             'quality': int(format),
-            } for format in sources]
+        } for format in sources]
 
         self._sort_formats(formats)
 
@@ -92,4 +92,4 @@ class FourTubeIE(InfoExtractor):
             'duration': duration,
             'age_limit': 18,
             'webpage_url': webpage_url,
-        }
\ No newline at end of file
+        }
index 35d7d15e1b6726dd0301e52e1e671c73a76f8d77..e0420a48f8cacb5661b6882f797c09755de4df46 100644 (file)
@@ -26,6 +26,19 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
         if info.get('status') == 'NOK':
             raise ExtractorError(
                 '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
+        allowed_countries = info['videos'][0].get('geoblocage')
+        if allowed_countries:
+            georestricted = True
+            geo_info = self._download_json(
+                'http://geo.francetv.fr/ws/edgescape.json', video_id,
+                'Downloading geo restriction info')
+            country = geo_info['reponse']['geo_info']['country_code']
+            if country not in allowed_countries:
+                raise ExtractorError(
+                    'The video is not available from your location',
+                    expected=True)
+        else:
+            georestricted = False
 
         formats = []
         for video in info['videos']:
@@ -36,6 +49,10 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                 continue
             format_id = video['format']
             if video_url.endswith('.f4m'):
+                if georestricted:
+                    # See https://github.com/rg3/youtube-dl/issues/3963
+                    # m3u8 urls work fine
+                    continue
                 video_url_parsed = compat_urllib_parse_urlparse(video_url)
                 f4m_url = self._download_webpage(
                     'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
@@ -234,7 +251,7 @@ class GenerationQuoiIE(InfoExtractor):
         info_json = self._download_webpage(info_url, name)
         info = json.loads(info_json)
         return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
-            ie='Dailymotion')
+                               ie='Dailymotion')
 
 
 class CultureboxIE(FranceTVBaseInfoExtractor):
index 11fee3d31e88833b8074a1b59cff885eeffa46d3..cf8e90d7dbe9483efffe7894bedbe437746c5da1 100644 (file)
@@ -11,7 +11,7 @@ class GamekingsIE(InfoExtractor):
         'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
         # MD5 is flaky, seems to change regularly
         # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
-        u'info_dict': {
+        'info_dict': {
             'id': '20130811',
             'ext': 'mp4',
             'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
index c7a824c29b15187becb0bb9758ee94348f2309da..328301de396e5dd289b139808754ef20e1af652b 100644 (file)
@@ -445,6 +445,30 @@ class GenericIE(InfoExtractor):
                 'title': 'Rosetta #CometLanding webcast HL 10',
             }
         },
+        # LazyYT
+        {
+            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
+            'info_dict': {
+                'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
+            },
+            'playlist_mincount': 2,
+        },
+        # Direct link with incorrect MIME type
+        {
+            'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+            'md5': '4ccbebe5f36706d85221f204d7eb5913',
+            'info_dict': {
+                'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+                'id': '5_Lennart_Poettering_-_Systemd',
+                'ext': 'webm',
+                'title': '5_Lennart_Poettering_-_Systemd',
+                'upload_date': '20141120',
+            },
+            'expected_warnings': [
+                'URL could be a direct video link, returning it as such.'
+            ]
+        }
+
     ]
 
     def report_following_redirect(self, new_url):
@@ -537,9 +561,9 @@ class GenericIE(InfoExtractor):
 
             if default_search in ('error', 'fixup_error'):
                 raise ExtractorError(
-                    ('%r is not a valid URL. '
-                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
-                    % (url, url), expected=True)
+                    '%r is not a valid URL. '
+                    'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
+                    % (url, url), expected=True)
             else:
                 if ':' not in default_search:
                     default_search += ':'
@@ -598,10 +622,28 @@ class GenericIE(InfoExtractor):
         if not self._downloader.params.get('test', False) and not is_intentional:
             self._downloader.report_warning('Falling back on generic information extractor.')
 
-        if full_response:
-            webpage = self._webpage_read_content(full_response, url, video_id)
-        else:
-            webpage = self._download_webpage(url, video_id)
+        if not full_response:
+            full_response = self._request_webpage(url, video_id)
+
+        # Maybe it's a direct link to a video?
+        # Be careful not to download the whole thing!
+        first_bytes = full_response.read(512)
+        if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
+            self._downloader.report_warning(
+                'URL could be a direct video link, returning it as such.')
+            upload_date = unified_strdate(
+                head_response.headers.get('Last-Modified'))
+            return {
+                'id': video_id,
+                'title': os.path.splitext(url_basename(url))[0],
+                'direct': True,
+                'url': url,
+                'upload_date': upload_date,
+            }
+
+        webpage = self._webpage_read_content(
+            full_response, url, video_id, prefix=first_bytes)
+
         self.report_extraction(video_id)
 
         # Is it an RSS feed?
@@ -702,6 +744,12 @@ class GenericIE(InfoExtractor):
             return _playlist_from_matches(
                 matches, lambda m: unescapeHTML(m[1]))
 
+        # Look for lazyYT YouTube embed
+        matches = re.findall(
+            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
+        if matches:
+            return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
+
         # Look for embedded Dailymotion player
         matches = re.findall(
             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
@@ -733,7 +781,7 @@ class GenericIE(InfoExtractor):
                 'title': video_title,
                 'id': video_id,
             }
-            
+
         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
         if match:
             return {
@@ -748,7 +796,7 @@ class GenericIE(InfoExtractor):
         # Look for embedded blip.tv player
         mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
         if mobj:
-            return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
+            return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
         mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
         if mobj:
             return self.url_result(mobj.group(1), 'BlipTV')
@@ -784,7 +832,7 @@ class GenericIE(InfoExtractor):
 
         # Look for Ooyala videos
         mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
-             re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+                re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
         if mobj is not None:
             return OoyalaIE._build_url_result(mobj.group('ec'))
 
@@ -1025,4 +1073,3 @@ class GenericIE(InfoExtractor):
                 '_type': 'playlist',
                 'entries': entries,
             }
-
index 66ca37918ad4e8133a013cb806adb8a4f10c4720..6949a57c70dd9b378c4879dad8afd4f3b18e558a 100644 (file)
@@ -397,4 +397,4 @@ class GloboIE(InfoExtractor):
             'uploader_id': uploader_id,
             'like_count': like_count,
             'formats': formats
-        }
\ No newline at end of file
+        }
index e21e57510d346b3425a37aa12b3bd9c65668bb83..1ac1da8569e20b84c2baf38011488d6304a090cb 100644 (file)
@@ -9,14 +9,15 @@ from ..utils import (
     determine_ext,
     compat_urllib_parse,
     compat_urllib_request,
+    int_or_none,
 )
 
 
 class GorillaVidIE(InfoExtractor):
-    IE_DESC = 'GorillaVid.in, daclips.in and movpod.in'
+    IE_DESC = 'GorillaVid.in, daclips.in, movpod.in and fastvideo.in'
     _VALID_URL = r'''(?x)
         https?://(?P<host>(?:www\.)?
-            (?:daclips\.in|gorillavid\.in|movpod\.in))/
+            (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in))/
         (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
     '''
 
@@ -49,6 +50,16 @@ class GorillaVidIE(InfoExtractor):
             'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
             'thumbnail': 're:http://.*\.jpg',
         }
+    }, {
+        # video with countdown timeout
+        'url': 'http://fastvideo.in/1qmdn1lmsmbw',
+        'md5': '8b87ec3f6564a3108a0e8e66594842ba',
+        'info_dict': {
+            'id': '1qmdn1lmsmbw',
+            'ext': 'mp4',
+            'title': 'Man of Steel - Trailer',
+            'thumbnail': 're:http://.*\.jpg',
+        },
     }, {
         'url': 'http://movpod.in/0wguyyxi1yca',
         'only_matching': True,
@@ -69,8 +80,14 @@ class GorillaVidIE(InfoExtractor):
             (?:id="[^"]+"\s+)?
             value="([^"]*)"
             ''', webpage))
-        
+
         if fields['op'] == 'download1':
+            countdown = int_or_none(self._search_regex(
+                r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
+                webpage, 'countdown', default=None))
+            if countdown:
+                self._sleep(countdown, video_id)
+
             post = compat_urllib_parse.urlencode(fields)
 
             req = compat_urllib_request.Request(url, post)
@@ -78,9 +95,13 @@ class GorillaVidIE(InfoExtractor):
 
             webpage = self._download_webpage(req, video_id, 'Downloading video page')
 
-        title = self._search_regex(r'style="z-index: [0-9]+;">([^<]+)</span>', webpage, 'title')
-        video_url = self._search_regex(r'file\s*:\s*\'(http[^\']+)\',', webpage, 'file url')
-        thumbnail = self._search_regex(r'image\s*:\s*\'(http[^\']+)\',', webpage, 'thumbnail', fatal=False)
+        title = self._search_regex(
+            r'style="z-index: [0-9]+;">([^<]+)</span>',
+            webpage, 'title', default=None) or self._og_search_title(webpage)
+        video_url = self._search_regex(
+            r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
+        thumbnail = self._search_regex(
+            r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', fatal=False)
 
         formats = [{
             'format_id': 'sd',
index 7e7714438ce9099e4d7f8d6efe9f1204dc6f2690..5b6efb27eedfe0097cc47d96f3f287ab1858e9e8 100644 (file)
@@ -37,7 +37,7 @@ class HornBunnyIE(InfoExtractor):
         webpage2 = self._download_webpage(redirect_url, video_id)
         video_url = self._html_search_regex(
             r'flvMask:(.*?);', webpage2, 'video_url')
-        
+
         duration = parse_duration(self._search_regex(
             r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
             webpage, 'duration', fatal=False))
index 80b48b1b3605a18fa11547ab890897b6b47ccaa5..651784b73940032fd65c9675043f045b0bcf4ff2 100644 (file)
@@ -1,12 +1,13 @@
 from __future__ import unicode_literals
 
-import re
 import base64
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
     HEADRequest,
 )
@@ -16,25 +17,24 @@ class HotNewHipHopIE(InfoExtractor):
     _VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
     _TEST = {
         'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
-        'file': '1435540.mp3',
         'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
         'info_dict': {
+            'id': '1435540',
+            'ext': 'mp3',
             'title': 'Freddie Gibbs - Lay It Down'
         }
     }
 
     def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
-
-        webpage_src = self._download_webpage(url, video_id)
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
 
         video_url_base64 = self._search_regex(
-            r'data-path="(.*?)"', webpage_src, u'video URL', fatal=False)
+            r'data-path="(.*?)"', webpage, 'video URL', default=None)
 
         if video_url_base64 is None:
             video_url = self._search_regex(
-                r'"contentUrl" content="(.*?)"', webpage_src, u'video URL')
+                r'"contentUrl" content="(.*?)"', webpage, 'content URL')
             return self.url_result(video_url, ie='Youtube')
 
         reqdata = compat_urllib_parse.urlencode([
@@ -59,11 +59,11 @@ class HotNewHipHopIE(InfoExtractor):
         if video_url.endswith('.html'):
             raise ExtractorError('Redirect failed')
 
-        video_title = self._og_search_title(webpage_src).strip()
+        video_title = self._og_search_title(webpage).strip()
 
         return {
             'id': video_id,
             'url': video_url,
             'title': video_title,
-            'thumbnail': self._og_search_thumbnail(webpage_src),
+            'thumbnail': self._og_search_thumbnail(webpage),
         }
index 6ae04782c1aabb27ee6973819810f1e6f763b8b0..3f7d6666c0810e545c0f285dcf70689806c4dac7 100644 (file)
@@ -13,7 +13,7 @@ class HowcastIE(InfoExtractor):
         'info_dict': {
             'id': '390161',
             'ext': 'mp4',
-            'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.', 
+            'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
             'title': 'How to Tie a Square Knot Properly',
         }
     }
@@ -27,10 +27,10 @@ class HowcastIE(InfoExtractor):
         self.report_extraction(video_id)
 
         video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
-            webpage, 'video URL')
+                                       webpage, 'video URL')
 
         video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
-            webpage, 'description', fatal=False)
+                                                    webpage, 'description', fatal=False)
 
         return {
             'id': video_id,
index c80185b535b8d9853adf886fb8b5582284d12a03..3db668cd0297ea0ff3c0168c2b3f5db1491a0db4 100644 (file)
@@ -63,8 +63,10 @@ class IGNIE(InfoExtractor):
                 'id': '078fdd005f6d3c02f63d795faa1b984f',
                 'ext': 'mp4',
                 'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
-                'description': 'Giant skeletons, bloody hunts, and captivating'
-                    ' natural beauty take our breath away.',
+                'description': (
+                    'Giant skeletons, bloody hunts, and captivating'
+                    ' natural beauty take our breath away.'
+                ),
             },
         },
     ]
@@ -99,7 +101,7 @@ class IGNIE(InfoExtractor):
         video_id = self._find_video_id(webpage)
         result = self._get_video_info(video_id)
         description = self._html_search_regex(self._DESCRIPTION_RE,
-            webpage, 'video description', flags=re.DOTALL)
+                                              webpage, 'video description', flags=re.DOTALL)
         result['description'] = description
         return result
 
index 6108ed5526ba37f2e5b8d14b604974ca3c3f8273..f2c1c10f5c1dec44129ea4cd7d4cff69c7c07206 100644 (file)
@@ -71,7 +71,7 @@ class ImdbListIE(InfoExtractor):
         },
         'playlist_count': 7,
     }
-    
+
     def _real_extract(self, url):
         list_id = self._match_id(url)
         webpage = self._download_webpage(url, list_id)
index 5109f26ce860edc0675eaba6350e0ab820e7fe27..b020e2621a5cc3c8d7ef6a1bc2cb6aaea989f779 100644 (file)
@@ -27,9 +27,9 @@ class InstagramIE(InfoExtractor):
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
         uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
-            webpage, 'uploader id', fatal=False)
+                                         webpage, 'uploader id', fatal=False)
         desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
-            fatal=False)
+                                  fatal=False)
 
         return {
             'id': video_id,
index 53f9a5f7587bcf36d9d4a63f6cfa36d90496dd28..1e47991874ecf6afa75181f99b6bf98a8dd60916 100644 (file)
@@ -32,7 +32,7 @@ class InternetVideoArchiveIE(InfoExtractor):
     def _clean_query(query):
         NEEDED_ARGS = ['publishedid', 'customerid']
         query_dic = compat_urlparse.parse_qs(query)
-        cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)
+        cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)
         # Other player ids return m3u8 urls
         cleaned_dic['playerid'] = '247'
         cleaned_dic['videokbrate'] = '100000'
@@ -45,22 +45,26 @@ class InternetVideoArchiveIE(InfoExtractor):
         url = self._build_url(query)
 
         flashconfiguration = self._download_xml(url, video_id,
-            'Downloading flash configuration')
+                                                'Downloading flash configuration')
         file_url = flashconfiguration.find('file').text
         file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
         # Replace some of the parameters in the query to get the best quality
         # and http links (no m3u8 manifests)
         file_url = re.sub(r'(?<=\?)(.+)$',
-            lambda m: self._clean_query(m.group()),
-            file_url)
+                          lambda m: self._clean_query(m.group()),
+                          file_url)
         info = self._download_xml(file_url, video_id,
-            'Downloading video info')
+                                  'Downloading video info')
         item = info.find('channel/item')
 
         def _bp(p):
-            return xpath_with_ns(p,
-                {'media': 'http://search.yahoo.com/mrss/',
-                'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})
+            return xpath_with_ns(
+                p,
+                {
+                    'media': 'http://search.yahoo.com/mrss/',
+                    'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
+                }
+            )
         formats = []
         for content in item.findall(_bp('media:group/media:content')):
             attr = content.attrib
index d1defd363c5fe9c86330236f53b8aa21bfe65a38..4247d6391fa25f674449d9d8ac44b428c7c387e0 100644 (file)
@@ -54,7 +54,7 @@ class IPrimaIE(InfoExtractor):
 
         player_url = (
             'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
-            (floor(random()*1073741824), floor(random()*1073741824))
+            (floor(random() * 1073741824), floor(random() * 1073741824))
         )
 
         req = compat_urllib_request.Request(player_url)
index 75b543b7cf8ed443bb98f3cd5c492e1c629c28a3..f0fba1adb7dba9c4c09717132731d98ff0e5ffd3 100644 (file)
@@ -43,7 +43,7 @@ class IviIE(InfoExtractor):
                 'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
             },
             'skip': 'Only works from Russia',
-         }
+        }
     ]
 
     # Sorted by quality
@@ -102,7 +102,7 @@ class IviIE(InfoExtractor):
         compilation = result['compilation']
         title = result['title']
 
-        title = '%s - %s' % (compilation, title) if compilation is not None else title  
+        title = '%s - %s' % (compilation, title) if compilation is not None else title
 
         previews = result['preview']
         previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
@@ -152,17 +152,17 @@ class IviCompilationIE(InfoExtractor):
         compilation_id = mobj.group('compilationid')
         season_id = mobj.group('seasonid')
 
-        if season_id is not None: # Season link
+        if season_id is not None:  # Season link
             season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id)
             playlist_id = '%s/season%s' % (compilation_id, season_id)
             playlist_title = self._html_search_meta('title', season_page, 'title')
             entries = self._extract_entries(season_page, compilation_id)
-        else: # Compilation link            
+        else:  # Compilation link
             compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
             playlist_id = compilation_id
             playlist_title = self._html_search_meta('title', compilation_page, 'title')
             seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
-            if len(seasons) == 0: # No seasons in this compilation
+            if len(seasons) == 0:  # No seasons in this compilation
                 entries = self._extract_entries(compilation_page, compilation_id)
             else:
                 entries = []
@@ -172,4 +172,4 @@ class IviCompilationIE(InfoExtractor):
                         compilation_id, 'Downloading season %s web page' % season_id)
                     entries.extend(self._extract_entries(season_page, compilation_id))
 
-        return self.playlist_result(entries, playlist_id, playlist_title)
\ No newline at end of file
+        return self.playlist_result(entries, playlist_id, playlist_title)
index ace08769bd7671619db448696a016d08fd453e67..063e86de46c896c94be505ae916fd6f3fbdedc02 100644 (file)
@@ -45,4 +45,3 @@ class JadoreCettePubIE(InfoExtractor):
             'title': title,
             'description': description,
         }
-
index 1881659665b415714d1315f6d52bc93dbe324a2c..8094cc2e487f2880a66178d0a07c97a2ef9432f5 100644 (file)
@@ -29,7 +29,7 @@ class JeuxVideoIE(InfoExtractor):
         xml_link = self._html_search_regex(
             r'<param name="flashvars" value="config=(.*?)" />',
             webpage, 'config URL')
-        
+
         video_id = self._search_regex(
             r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
             xml_link, 'video ID')
@@ -38,7 +38,7 @@ class JeuxVideoIE(InfoExtractor):
             xml_link, title, 'Downloading XML config')
         info_json = config.find('format.json').text
         info = json.loads(info_json)['versions'][0]
-        
+
         video_url = 'http://video720.jeuxvideo.com/' + info['file']
 
         return {
index 5aa32bf092d8bfae17fd302fe399acf5d5264164..da8068efcba914a14788bc7b39872b8b5cd01c7c 100644 (file)
@@ -36,7 +36,7 @@ class JukeboxIE(InfoExtractor):
 
         try:
             video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
-                iframe_html, 'video url')
+                                           iframe_html, 'video url')
             video_url = unescapeHTML(video_url).replace('\/', '/')
         except RegexNotFoundError:
             youtube_url = self._search_regex(
@@ -47,9 +47,9 @@ class JukeboxIE(InfoExtractor):
             return self.url_result(youtube_url, ie='Youtube')
 
         title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
-            html, 'title')
+                                        html, 'title')
         artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
-            html, 'artist')
+                                         html, 'artist')
 
         return {
             'id': video_id,
index 23103b163fea1ed6a27cb44dadcf231b478edcdb..dbfe4cc03fd8c569ed2f05d5ae9c86c36bb9e278 100644 (file)
@@ -10,7 +10,7 @@ _md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
 
 class KankanIE(InfoExtractor):
     _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
-    
+
     _TEST = {
         'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
         'file': '48863.flv',
index 827091e601c9c896c206a25a545ad39d61d0f658..7d4b57056509383fdc082a68c1650f38dc258763 100644 (file)
@@ -13,8 +13,10 @@ class KickStarterIE(InfoExtractor):
             'id': '1404461844',
             'ext': 'mp4',
             'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
-            'description': 'A unique motocross documentary that examines the '
-                'life and mind of one of sports most elite athletes: Josh Grant.',
+            'description': (
+                'A unique motocross documentary that examines the '
+                'life and mind of one of sports most elite athletes: Josh Grant.'
+            ),
         },
     }, {
         'note': 'Embedded video (not using the native kickstarter video service)',
index 8a73ecfa0631d2e63c0d11ca30889c63ae9a9d6d..41fd62009ac16100c1e6bb776028020cd12e9ff2 100644 (file)
@@ -63,4 +63,4 @@ class KontrTubeIE(InfoExtractor):
             'duration': duration,
             'view_count': int_or_none(view_count),
             'comment_count': int_or_none(comment_count),
-        }
\ No newline at end of file
+        }
index 89013e5223ade02697f96691ab5219da3a6b8c0c..a602980a141f3f8ccce026eaddc8b383e7894352 100644 (file)
@@ -30,4 +30,3 @@ class Ku6IE(InfoExtractor):
             'title': title,
             'url': downloadUrl
         }
-
index 102e29f7a8d03e5b03c920a72b48d3d121ea2fb6..2fd3b4699d288809974ead7519e2bc5ddf9bcc68 100644 (file)
@@ -75,4 +75,3 @@ class Laola1TvIE(InfoExtractor):
             'categories': categories,
             'ext': 'mp4',
         }
-
index 8d9491f233bf578bc9274a18fe022a1538effad6..1dfe7f77f4ccdefa0b076f71f6467644e465cb52 100644 (file)
@@ -52,7 +52,7 @@ class LifeNewsIE(InfoExtractor):
             r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False)
 
         upload_date = self._html_search_regex(
-            r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False)
+            r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
         if upload_date is not None:
             upload_date = unified_strdate(upload_date)
 
@@ -71,4 +71,4 @@ class LifeNewsIE(InfoExtractor):
         if len(videos) == 1:
             return make_entry(video_id, videos[0])
         else:
-            return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)]
\ No newline at end of file
+            return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]
index 8e50e8f79adee2e21c7801da13da60897fcb61ec..b04be1e8cfda94addca26a1d1e3731ce61519dc1 100644 (file)
@@ -19,8 +19,7 @@ class LiveLeakIE(InfoExtractor):
             'uploader': 'ljfriel2',
             'title': 'Most unlucky car accident'
         }
-    },
-    {
+    }, {
         'url': 'http://www.liveleak.com/view?i=f93_1390833151',
         'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
         'info_dict': {
@@ -30,8 +29,7 @@ class LiveLeakIE(InfoExtractor):
             'uploader': 'ARD_Stinkt',
             'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
         }
-    },
-    {
+    }, {
         'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
         'md5': '42c6d97d54f1db107958760788c5f48f',
         'info_dict': {
index 33f34f4e9bdda2aa034dd4f46ef3299478f181ec..2160d6cb08ae5b71584ffdc0d76982e2a9bdf0c0 100644 (file)
@@ -45,7 +45,7 @@ class LyndaIE(SubtitlesInfoExtractor):
         video_id = mobj.group(1)
 
         page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
-            'Downloading video JSON')
+                                      'Downloading video JSON')
         video_json = json.loads(page)
 
         if 'Status' in video_json:
@@ -109,7 +109,7 @@ class LyndaIE(SubtitlesInfoExtractor):
             'password': password,
             'remember': 'false',
             'stayPut': 'false'
-        }        
+        }
         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
         login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
 
@@ -117,7 +117,7 @@ class LyndaIE(SubtitlesInfoExtractor):
         m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
         if m is not None:
             response = m.group('json')
-            response_json = json.loads(response)            
+            response_json = json.loads(response)
             state = response_json['state']
 
             if state == 'notlogged':
@@ -187,7 +187,7 @@ class LyndaCourseIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         course_path = mobj.group('coursepath')
         course_id = mobj.group('courseid')
-        
+
         page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
                                       course_id, 'Downloading course JSON')
         course_json = json.loads(page)
@@ -221,4 +221,4 @@ class LyndaCourseIE(InfoExtractor):
 
         course_title = course_json['Title']
 
-        return self.playlist_result(entries, course_id, course_title)
\ No newline at end of file
+        return self.playlist_result(entries, course_id, course_title)
index 1a26b5d572852eab85449af0a6ffc2fd7c98aeda..7e025831b51d611f00e248bda637b4ae8f35efb6 100644 (file)
@@ -27,7 +27,7 @@ class M6IE(InfoExtractor):
         video_id = mobj.group('id')
 
         rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
-            'Downloading video RSS')
+                                 'Downloading video RSS')
 
         title = rss.find('./channel/item/title').text
         description = rss.find('./channel/item/description').text
@@ -53,4 +53,4 @@ class M6IE(InfoExtractor):
             'duration': duration,
             'view_count': view_count,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 8c1966ab25e5215ab96286822d50507aa25ea58d..1abf6e4f85d52cbe4d257280cf26277e715dbdb1 100644 (file)
@@ -7,6 +7,7 @@ from ..utils import (
     compat_urllib_parse,
 )
 
+
 class MalemotionIE(InfoExtractor):
     _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
     _TEST = {
index 1b8c4a32edf5d269b1e9bb9db366487ef2fba981..5fdd19027db3ccad0265601b8d88452a0eaac525 100644 (file)
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 
 class MDRIE(InfoExtractor):
     _VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
-    
+
     # No tests, MDR regularily deletes its videos
     _TEST = {
         'url': 'http://www.mdr.de/fakt/video189002.html',
index 1a896b536dd813a561cea8f870258bf73519e00b..858c1c0c31f4c08c3068a62983781129288dc3b8 100644 (file)
@@ -22,7 +22,7 @@ class MetacafeIE(InfoExtractor):
         # Youtube video
         {
             'add_ie': ['Youtube'],
-            'url':  'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
+            'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
             'info_dict': {
                 'id': '_aUehQsCQtM',
                 'ext': 'mp4',
@@ -219,8 +219,8 @@ class MetacafeIE(InfoExtractor):
         description = self._og_search_description(webpage)
         thumbnail = self._og_search_thumbnail(webpage)
         video_uploader = self._html_search_regex(
-                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
-                webpage, 'uploader nickname', fatal=False)
+            r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
+            webpage, 'uploader nickname', fatal=False)
         duration = int_or_none(
             self._html_search_meta('video:duration', webpage))
 
index 07f072924a6dadb2838230fd29a6a830ff99bb64..e30320569805aedaa6694ae54f9086909593f7a4 100644 (file)
@@ -28,7 +28,7 @@ class MetacriticIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         # The xml is not well formatted, there are raw '&'
         info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
-            video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
+                                  video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
 
         clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
         formats = []
@@ -44,7 +44,7 @@ class MetacriticIE(InfoExtractor):
         self._sort_formats(formats)
 
         description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
-            webpage, 'description', flags=re.DOTALL)
+                                              webpage, 'description', flags=re.DOTALL)
 
         return {
             'id': video_id,
index 90b460d65c2ca90e252dba6ecc3adcb93f86ab1e..0ba435dc5597219e9b569c441e58e3a196e1bfbe 100644 (file)
@@ -55,4 +55,4 @@ class MojvideoIE(InfoExtractor):
             'title': title,
             'thumbnail': thumbnail,
             'duration': duration,
-        }
\ No newline at end of file
+        }
index 79bb2ca5935cb7ad888081540696e256c31faa98..1c4f589cce1605b17e099d47c050097fef1ad0a9 100644 (file)
@@ -54,7 +54,7 @@ class MonikerIE(InfoExtractor):
 
         title = os.path.splitext(data['fname'])[0]
 
-        #Could be several links with different quality
+        # Could be several links with different quality
         links = re.findall(r'"file" : "?(.+?)",', webpage)
         # Assume the links are ordered in quality
         formats = [{
index 7d21ea18f1bec57a83a49478d43f000b8039041f..34a4bec3a0d1fb91208acce4684a4078188b847d 100644 (file)
@@ -49,7 +49,7 @@ class MooshareIE(InfoExtractor):
         page = self._download_webpage(url, video_id, 'Downloading page')
 
         if re.search(r'>Video Not Found or Deleted<', page) is not None:
-            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
 
         hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
         title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
@@ -111,4 +111,4 @@ class MooshareIE(InfoExtractor):
             'thumbnail': thumbnail,
             'duration': duration,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 3621ff99e76da1bffabda1a81f6181fd4a6ed61c..97d5da626a7a5d2555ac3107eb89d1a4fd11b510 100644 (file)
@@ -72,7 +72,7 @@ class MotherlessIE(InfoExtractor):
         like_count = str_to_int(self._html_search_regex(
             r'<strong>Favorited</strong>\s+([^<]+)<',
             webpage, 'like count', fatal=False))
+
         upload_date = self._html_search_regex(
             r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
         if 'Ago' in upload_date:
index 43146180ad2b272104cb2234de3f4482e7c330fb..f130b75c416ad3fe2e8d4ac3221799d1eb4aa1b9 100644 (file)
@@ -27,7 +27,7 @@ class MoviezineIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
 
-        formats =[{
+        formats = [{
             'format_id': 'sd',
             'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
             'quality': 0,
index 4191cf7a0c808034edf37aa07fa8338174a8d7e3..6101063f2ef9695dd86bbabedec134a4387a9332 100644 (file)
@@ -24,4 +24,4 @@ class MovShareIE(NovaMovIE):
             'title': 'dissapeared image',
             'description': 'optical illusion  dissapeared image  magic illusion',
         }
-    }
\ No newline at end of file
+    }
index 387935d4db784641377b72f5be9ec5e7649f5908..88c9501cd4e34492003a1fe67923d1d84a9e2d2b 100644 (file)
@@ -44,7 +44,7 @@ class MporaIE(InfoExtractor):
                     r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
                     False, default=None)
                 vcodec = src['type'].partition('/')[2]
-                
+
                 formats.append({
                     'format_id': encoding_id + '-' + vcodec,
                     'url': src['src'],
index 5f0f476b690579dbbc0aeb03171982944270409f..b482d6d4dfb09416bc46ef43ac21dff8bf2d5744 100644 (file)
@@ -53,23 +53,23 @@ class MTVServicesInfoExtractor(InfoExtractor):
         # Otherwise we get a webpage that would execute some javascript
         req.add_header('Youtubedl-user-agent', 'curl/7')
         webpage = self._download_webpage(req, mtvn_id,
-            'Downloading mobile page')
+                                         'Downloading mobile page')
         metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
         req = HEADRequest(metrics_url)
         response = self._request_webpage(req, mtvn_id, 'Resolving url')
         url = response.geturl()
         # Transform the url to get the best quality:
         url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
-        return [{'url': url,'ext': 'mp4'}]
+        return [{'url': url, 'ext': 'mp4'}]
 
     def _extract_video_formats(self, mdoc, mtvn_id):
         if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
             if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
                 self.to_screen('The normal version is not available from your '
-                    'country, trying with the mobile version')
+                               'country, trying with the mobile version')
                 return self._extract_mobile_video_formats(mtvn_id)
             raise ExtractorError('This video is not available from your country.',
-                expected=True)
+                                 expected=True)
 
         formats = []
         for rendition in mdoc.findall('.//rendition'):
@@ -98,7 +98,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
             mediagen_url += '&acceptMethods=fms'
 
         mediagen_doc = self._download_xml(mediagen_url, video_id,
-            'Downloading video urls')
+                                          'Downloading video urls')
 
         description_node = itemdoc.find('description')
         if description_node is not None:
@@ -126,7 +126,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         # This a short id that's used in the webpage urls
         mtvn_id = None
         mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
-                'scheme', 'urn:mtvn:id')
+                                       'scheme', 'urn:mtvn:id')
         if mtvn_id_node is not None:
             mtvn_id = mtvn_id_node.text
 
@@ -164,7 +164,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         if mgid is None or ':' not in mgid:
             mgid = self._search_regex(
                 [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
-                webpage, u'mgid')
+                webpage, 'mgid')
         return self._get_videos_info(mgid)
 
 
@@ -188,7 +188,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
         video_id = self._id_from_uri(uri)
         site_id = uri.replace(video_id, '')
         config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/'
-            'context4/context5/config.xml'.format(site_id))
+                      'context4/context5/config.xml'.format(site_id))
         config_doc = self._download_xml(config_url, video_id)
         feed_node = config_doc.find('.//feed')
         feed_url = feed_node.text.strip().split('?')[0]
@@ -240,15 +240,15 @@ class MTVIE(MTVServicesInfoExtractor):
         uri = mobj.groupdict().get('mgid')
         if uri is None:
             webpage = self._download_webpage(url, video_id)
-    
+
             # Some videos come from Vevo.com
             m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
                                webpage, re.DOTALL)
             if m_vevo:
-                vevo_id = m_vevo.group(1);
+                vevo_id = m_vevo.group(1)
                 self.to_screen('Vevo video detected: %s' % vevo_id)
                 return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
-    
+
             uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
         return self._get_videos_info(uri)
 
index c7f6beb9c703de0434a8ab4e9a41df3df16c2d81..b4e8ad17e9003940e5753349e30b4857c4c8aa6a 100644 (file)
@@ -73,4 +73,3 @@ class MuenchenTVIE(InfoExtractor):
             'is_live': True,
             'thumbnail': thumbnail,
         }
-
index 42d7a82a5dcc50fe0f84c48cb96fdf4b4a794c37..50d92b50ae5ec2fa49e45cc64aea7f08cc21ccea 100644 (file)
@@ -72,4 +72,4 @@ class MusicPlayOnIE(InfoExtractor):
             'duration': int_or_none(duration),
             'view_count': int_or_none(view_count),
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 1772b7f9ae43c2eaef57a15a5b3df5d9e7244213..1e9cf8de9174e086dd7c19525a7dc94025075683 100644 (file)
@@ -1,64 +1,65 @@
-import re
-import json
+from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
-    determine_ext,
 )
 
 
 class MuzuTVIE(InfoExtractor):
     _VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
-    IE_NAME = u'muzu.tv'
+    IE_NAME = 'muzu.tv'
 
     _TEST = {
-        u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
-        u'file': u'1981454.mp4',
-        u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
-        u'info_dict': {
-            u'title': u'Cat Walk (Original Mix)',
-            u'description': u'md5:90e868994de201b2570e4e5854e19420',
-            u'uploader': u'MarcAshken featuring SOS',
+        'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
+        'md5': '98f8b2c7bc50578d6a0364fff2bfb000',
+        'info_dict': {
+            'id': '1981454',
+            'ext': 'mp4',
+            'title': 'Cat Walk (Original Mix)',
+            'description': 'md5:90e868994de201b2570e4e5854e19420',
+            'uploader': 'MarcAshken featuring SOS',
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
-        info_data = compat_urllib_parse.urlencode({'format': 'json',
-                                                   'url': url,
-                                                   })
-        video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
-                                                 video_id, u'Downloading video info')
-        info = json.loads(video_info_page)
+        info_data = compat_urllib_parse.urlencode({
+            'format': 'json',
+            'url': url,
+        })
+        info = self._download_json(
+            'http://www.muzu.tv/api/oembed/?%s' % info_data,
+            video_id, 'Downloading video info')
 
-        player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
-                                                  video_id, u'Downloading player info')
-        video_info = json.loads(player_info_page)['videos'][0]
-        for quality in ['1080' , '720', '480', '360']:
+        player_info = self._download_json(
+            'http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
+            video_id, 'Downloading player info')
+        video_info = player_info['videos'][0]
+        for quality in ['1080', '720', '480', '360']:
             if video_info.get('v%s' % quality):
                 break
 
-        data = compat_urllib_parse.urlencode({'ai': video_id,
-                                              # Even if each time you watch a video the hash changes,
-                                              # it seems to work for different videos, and it will work
-                                              # even if you use any non empty string as a hash
-                                              'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
-                                              'device': 'web',
-                                              'qv': quality,
-                                              })
-        video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
-                                                video_id, u'Downloading video url')
-        video_url_info = json.loads(video_url_page)
+        data = compat_urllib_parse.urlencode({
+            'ai': video_id,
+            # Even if each time you watch a video the hash changes,
+            # it seems to work for different videos, and it will work
+            # even if you use any non empty string as a hash
+            'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
+            'device': 'web',
+            'qv': quality,
+        })
+        video_url_info = self._download_json(
+            'http://player.muzu.tv/player/requestVideo?%s' % data,
+            video_id, 'Downloading video url')
         video_url = video_url_info['url']
 
-        return {'id': video_id,
-                'title': info['title'],
-                'url': video_url,
-                'ext': determine_ext(video_url),
-                'thumbnail': info['thumbnail_url'],
-                'description': info['description'],
-                'uploader': info['author_name'],
-                }
+        return {
+            'id': video_id,
+            'title': info['title'],
+            'url': video_url,
+            'thumbnail': info['thumbnail_url'],
+            'description': info['description'],
+            'uploader': info['author_name'],
+        }
index c16939f5437cf55b6d3e51e9d51ab45d1d12392f..e626146705cfcc961ebcffad93c889b4a4f90e62 100644 (file)
@@ -1,12 +1,14 @@
+# encoding: utf-8
 from __future__ import unicode_literals
 
 import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
 )
+from ..utils import ExtractorError
 
 
 class MySpaceIE(InfoExtractor):
@@ -14,33 +16,58 @@ class MySpaceIE(InfoExtractor):
 
     _TESTS = [
         {
-            'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
+            'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
             'info_dict': {
-                'id': '100008689',
+                'id': '109594919',
                 'ext': 'flv',
-                'title': 'Viva La Vida',
-                'description': 'The official Viva La Vida video, directed by Hype Williams',
-                'uploader': 'Coldplay',
-                'uploader_id': 'coldplay',
+                'title': 'Little Big Town',
+                'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
+                'uploader': 'Five Minutes to the Stage',
+                'uploader_id': 'fiveminutestothestage',
             },
             'params': {
                 # rtmp download
                 'skip_download': True,
             },
         },
-        # song
+        # songs
         {
-            'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242',
+            'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
             'info_dict': {
-                'id': '39008454',
+                'id': '93388656',
                 'ext': 'flv',
-                'title': 'Darkness In My Heart',
-                'uploader_id': 'spiderbags',
+                'title': 'Of weakened soul...',
+                'uploader': 'Killsorrow',
+                'uploader_id': 'killsorrow',
             },
             'params': {
                 # rtmp download
                 'skip_download': True,
             },
+        }, {
+            'add_ie': ['Vevo'],
+            'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
+            'info_dict': {
+                'id': 'USZM20600099',
+                'ext': 'mp4',
+                'title': 'Animal I Have Become',
+                'uploader': 'Three Days Grace',
+                'timestamp': int,
+                'upload_date': '20060502',
+            },
+            'skip': 'VEVO is only available in some countries',
+        }, {
+            'add_ie': ['Youtube'],
+            'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
+            'info_dict': {
+                'id': 'ypWvQgnJrSU',
+                'ext': 'mp4',
+                'title': 'Starset - First Light',
+                'description': 'md5:2d5db6c9d11d527683bcda818d332414',
+                'uploader': 'Jacob Soren',
+                'uploader_id': 'SorenPromotions',
+                'upload_date': '20140725',
+            }
         },
     ]
 
@@ -48,22 +75,46 @@ class MySpaceIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
+        player_url = self._search_regex(
+            r'playerSwf":"([^"?]*)', webpage, 'player URL')
 
         if mobj.group('mediatype').startswith('music/song'):
             # songs don't store any useful info in the 'context' variable
+            song_data = self._search_regex(
+                r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
+                webpage, 'song_data', default=None, group=0)
+            if song_data is None:
+                # some songs in an album are not playable
+                self.report_warning(
+                    '%s: No downloadable song on this page' % video_id)
+                return
             def search_data(name):
-                return self._search_regex(r'data-%s="(.*?)"' % name, webpage,
-                    name)
+                return self._search_regex(
+                    r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
+                    song_data, name, default='', group='data')
             streamUrl = search_data('stream-url')
+            if not streamUrl:
+                vevo_id = search_data('vevo-id')
+                youtube_id = search_data('youtube-id')
+                if vevo_id:
+                    self.to_screen('Vevo video detected: %s' % vevo_id)
+                    return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
+                elif youtube_id:
+                    self.to_screen('Youtube video detected: %s' % youtube_id)
+                    return self.url_result(youtube_id, ie='Youtube')
+                else:
+                    raise ExtractorError(
+                        'Found song but don\'t know how to download it')
             info = {
                 'id': video_id,
                 'title': self._og_search_title(webpage),
+                'uploader': search_data('artist-name'),
                 'uploader_id': search_data('artist-username'),
                 'thumbnail': self._og_search_thumbnail(webpage),
             }
         else:
-            context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
-                u'context'))
+            context = json.loads(self._search_regex(
+                r'context = ({.*?});', webpage, 'context'))
             video = context['video']
             streamUrl = video['streamUrl']
             info = {
@@ -79,6 +130,50 @@ class MySpaceIE(InfoExtractor):
         info.update({
             'url': rtmp_url,
             'play_path': play_path,
+            'player_url': player_url,
             'ext': 'flv',
         })
         return info
+
+
+class MySpaceAlbumIE(InfoExtractor):
+    IE_NAME = 'MySpace:album'
+    _VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
+        'info_dict': {
+            'title': 'Transmissions',
+            'id': '19455773',
+        },
+        'playlist_count': 14,
+        'skip': 'this album is only available in some countries',
+    }, {
+        'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
+        'info_dict': {
+            'title': 'The Demo',
+            'id': '18596029',
+        },
+        'playlist_count': 5,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        display_id = mobj.group('title') + playlist_id
+        webpage = self._download_webpage(url, display_id)
+        tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
+        if not tracks_paths:
+            raise ExtractorError(
+                '%s: No songs found, try using proxy' % display_id,
+                expected=True)
+        entries = [
+            self.url_result(t_path, ie=MySpaceIE.ie_key())
+            for t_path in tracks_paths]
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'display_id': display_id,
+            'title': self._og_search_title(webpage),
+            'entries': entries,
+        }
index a89153985984a6d36e88c8290124203c20c4abb2..5e754fcffb6403cbd359b9b358ad3879ef279f53 100644 (file)
@@ -33,7 +33,7 @@ class MyVideoIE(InfoExtractor):
     # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
     # Released into the Public Domain by Tristan Fischer on 2013-05-19
     # https://github.com/rg3/youtube-dl/pull/842
-    def __rc4crypt(self,data, key):
+    def __rc4crypt(self, data, key):
         x = 0
         box = list(range(256))
         for i in list(range(256)):
@@ -49,17 +49,17 @@ class MyVideoIE(InfoExtractor):
             out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
         return out
 
-    def __md5(self,s):
+    def __md5(self, s):
         return hashlib.md5(s).hexdigest().encode()
 
-    def _real_extract(self,url):
+    def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
 
         GK = (
-          b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
-          b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
-          b'TnpsbA0KTVRkbU1tSTRNdz09'
+            b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
+            b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
+            b'TnpsbA0KTVRkbU1tSTRNdz09'
         )
 
         # Get video webpage
@@ -72,7 +72,7 @@ class MyVideoIE(InfoExtractor):
             video_url = mobj.group(1) + '.flv'
 
             video_title = self._html_search_regex('<title>([^<]+)</title>',
-                webpage, 'title')
+                                                  webpage, 'title')
 
             return {
                 'id': video_id,
@@ -162,7 +162,7 @@ class MyVideoIE(InfoExtractor):
         video_swfobj = compat_urllib_parse.unquote(video_swfobj)
 
         video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
-            webpage, 'title')
+                                              webpage, 'title')
 
         return {
             'id': video_id,
@@ -173,4 +173,3 @@ class MyVideoIE(InfoExtractor):
             'play_path': video_playpath,
             'player_url': video_swfobj,
         }
-
index cb1af9ecc134a8b8487dd53a32da357e44240ea9..fbe34defd868694d44f4371825322a41b39019a3 100644 (file)
@@ -30,7 +30,7 @@ class NaverIE(InfoExtractor):
         video_id = mobj.group(1)
         webpage = self._download_webpage(url, video_id)
         m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
-            webpage)
+                         webpage)
         if m_id is None:
             m_error = re.search(
                 r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
@@ -40,7 +40,7 @@ class NaverIE(InfoExtractor):
             raise ExtractorError('couldn\'t extract vid and key')
         vid = m_id.group(1)
         key = m_id.group(2)
-        query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
+        query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key, })
         query_urls = compat_urllib_parse.urlencode({
             'masterVid': vid,
             'protocol': 'p2p',
@@ -65,7 +65,7 @@ class NaverIE(InfoExtractor):
             if domain.startswith('rtmp'):
                 f.update({
                     'ext': 'flv',
-                    'rtmp_protocol': '1', # rtmpt
+                    'rtmp_protocol': '1',  # rtmpt
                 })
             formats.append(f)
         self._sort_formats(formats)
index 78e650b2d01a87d3772a1f40459171bd7cce5cf4..f69fe0925ee4d5d68699f09d84a568ce34f787ec 100644 (file)
@@ -39,7 +39,6 @@ class NBAIE(InfoExtractor):
         duration = parse_duration(
             self._html_search_meta('duration', webpage, 'duration', fatal=False))
 
-
         return {
             'id': shortened_video_id,
             'url': video_url,
index c31ff8160851f824042804a06d905f85d3217416..f49c666909a270ad18e36a2d1177ef681adc3121 100644 (file)
@@ -91,4 +91,4 @@ class NDRIE(InfoExtractor):
             'thumbnail': thumbnail,
             'duration': duration,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 2e72e8915aab601b6916fd18e4f64090a613986e..cd117b04edeff88d90842f2ed8e15a8c43bde714 100644 (file)
@@ -23,12 +23,12 @@ class NewgroundsIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         music_id = mobj.group('id')
         webpage = self._download_webpage(url, music_id)
-        
+
         title = self._html_search_regex(
             r',"name":"([^"]+)",', webpage, 'music title')
         uploader = self._html_search_regex(
             r',"artist":"([^"]+)",', webpage, 'music uploader')
-        
+
         music_url_json_string = self._html_search_regex(
             r'({"url":"[^"]+"),', webpage, 'music url') + '}'
         music_url_json = json.loads(music_url_json_string)
index 551bd4d7a511c51f3d5809cf488d7e454b6ed6b5..85fcad06b51dc9ce87bdd563043c92a126cc8eea 100644 (file)
@@ -89,4 +89,4 @@ class NewstubeIE(InfoExtractor):
             'thumbnail': thumbnail,
             'duration': duration,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index ba7b77a467e8e7ba969fb9d4555e47fad6ed7196..7ce1d481d0a73218598bf24fee964ca8ec65956d 100644 (file)
@@ -38,12 +38,12 @@ class NFBIE(InfoExtractor):
         page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
 
         uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
-            page, 'director id', fatal=False)
+                                              page, 'director id', fatal=False)
         uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
-            page, 'director name', fatal=False)
+                                           page, 'director name', fatal=False)
 
         request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
-            compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
+                                                compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
 
@@ -93,4 +93,4 @@ class NFBIE(InfoExtractor):
             'uploader': uploader,
             'uploader_id': uploader_id,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 82af6e33098eaa9018c6f58b93b4e8b4c9cff399..bdcf7e23953870e61a523a11b4daaf10cfc3ab75 100644 (file)
@@ -4,9 +4,11 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
     compat_urllib_parse,
+)
+from ..utils import (
     unified_strdate,
 )
 
@@ -31,7 +33,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
                 path_url, video_id, 'Downloading final video url')
             video_url = path_doc.find('path').text
         else:
-           video_url = initial_video_url
+            video_url = initial_video_url
 
         join = compat_urlparse.urljoin
         return {
@@ -122,10 +124,10 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
         response = self._download_webpage(request_url, playlist_title)
         response = self._fix_json(response)
         if not response.strip():
-            self._downloader.report_warning(u'Got an empty reponse, trying '
+            self._downloader.report_warning('Got an empty reponse, trying '
                                             'adding the "newvideos" parameter')
             response = self._download_webpage(request_url + '&newvideos=true',
-                playlist_title)
+                                              playlist_title)
             response = self._fix_json(response)
         videos = json.loads(response)
 
index 3b5784e8f5cb5ae8014c4f51d2d451d989b87860..1d9c1a096403e7e7b4f3835f31dbee31812594c9 100644 (file)
@@ -111,7 +111,7 @@ class NiconicoIE(InfoExtractor):
 
         if 'deleted=' in flv_info_webpage:
             raise ExtractorError('The video has been deleted.',
-                expected=True)
+                                 expected=True)
         video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
 
         # Start extracting information
@@ -170,13 +170,13 @@ class NiconicoPlaylistIE(InfoExtractor):
         webpage = self._download_webpage(url, list_id)
 
         entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
-            webpage, 'entries')
+                                          webpage, 'entries')
         entries = json.loads(entries_json)
         entries = [{
             '_type': 'url',
             'ie_key': NiconicoIE.ie_key(),
             'url': ('http://www.nicovideo.jp/watch/%s' %
-                entry['item_data']['video_id']),
+                    entry['item_data']['video_id']),
         } for entry in entries]
 
         return {
index 33daa0dec327dea3f691f5dccab5b6b312d79e4f..16a02ad7939082627ffb9edd41d7bf6e62fd1f6d 100644 (file)
@@ -27,8 +27,7 @@ class NineGagIE(InfoExtractor):
             "thumbnail": "re:^https?://",
         },
         'add_ie': ['Youtube']
-    },
-    {
+    }, {
         'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
         'info_dict': {
             'id': 'KklwM',
index 7f1bc6377a42e99d853a38dd406a60dda929e49f..7d2ff7b9a149d0284da1d52a26f4c0a85bf0f6ce 100644 (file)
@@ -20,6 +20,7 @@ class NocoIE(InfoExtractor):
     _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
     _LOGIN_URL = 'http://noco.tv/do.php'
     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
+    _SUB_LANG_TEMPLATE = '&sub_lang=%s'
     _NETRC_MACHINE = 'noco'
 
     _TEST = {
@@ -60,10 +61,12 @@ class NocoIE(InfoExtractor):
         if 'erreur' in login:
             raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
 
-    def _call_api(self, path, video_id, note):
+    def _call_api(self, path, video_id, note, sub_lang=None):
         ts = compat_str(int(time.time() * 1000))
         tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
         url = self._API_URL_TEMPLATE % (path, ts, tk)
+        if sub_lang:
+            url += self._SUB_LANG_TEMPLATE % sub_lang
 
         resp = self._download_json(url, video_id, note)
 
@@ -91,31 +94,34 @@ class NocoIE(InfoExtractor):
 
         formats = []
 
-        for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
-
-            video = self._call_api(
-                'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
-                video_id, 'Downloading %s video JSON' % format_id)
-
-            file_url = video['file']
-            if not file_url:
-                continue
-
-            if file_url in ['forbidden', 'not found']:
-                popmessage = video['popmessage']
-                self._raise_error(popmessage['title'], popmessage['message'])
-
-            formats.append({
-                'url': file_url,
-                'format_id': format_id,
-                'width': fmt['res_width'],
-                'height': fmt['res_lines'],
-                'abr': fmt['audiobitrate'],
-                'vbr': fmt['videobitrate'],
-                'filesize': fmt['filesize'],
-                'format_note': qualities[format_id]['quality_name'],
-                'preference': qualities[format_id]['priority'],
-            })
+        for lang, lang_dict in medias['fr']['video_list'].items():
+            for format_id, fmt in lang_dict['quality_list'].items():
+                format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
+
+                video = self._call_api(
+                    'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
+                    video_id, 'Downloading %s video JSON' % format_id_extended,
+                    lang if lang != 'none' else None)
+
+                file_url = video['file']
+                if not file_url:
+                    continue
+
+                if file_url in ['forbidden', 'not found']:
+                    popmessage = video['popmessage']
+                    self._raise_error(popmessage['title'], popmessage['message'])
+
+                formats.append({
+                    'url': file_url,
+                    'format_id': format_id_extended,
+                    'width': fmt['res_width'],
+                    'height': fmt['res_lines'],
+                    'abr': fmt['audiobitrate'],
+                    'vbr': fmt['videobitrate'],
+                    'filesize': fmt['filesize'],
+                    'format_note': qualities[format_id]['quality_name'],
+                    'preference': qualities[format_id]['priority'],
+                })
 
         self._sort_formats(formats)
 
@@ -163,4 +169,4 @@ class NocoIE(InfoExtractor):
             'uploader_id': uploader_id,
             'duration': duration,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 25e71a56e196d9cf7f9d2423c47293b01e46cd24..3d35b11ac81286a359a06620d0e08e8fae18043b 100644 (file)
@@ -31,9 +31,9 @@ class NormalbootsIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
         video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
-            webpage, 'uploader')
+                                                 webpage, 'uploader')
         raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
-            webpage, 'date')
+                                                  webpage, 'date')
         video_upload_date = unified_strdate(raw_upload_date)
 
         player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
index 2e7ab1e4f9ce23c422fddf478b23b1497aac02ae..38d05e46604a859247c0b155625ee41f5b556b36 100644 (file)
@@ -66,4 +66,4 @@ class NovaMovIE(InfoExtractor):
             'url': video_url,
             'title': title,
             'description': description
-        }
\ No newline at end of file
+        }
index bfba184184c09bfd429698229efc1375c334e617..ecb38de2d3d969b96145b0b191054e2b34f1801e 100644 (file)
@@ -25,4 +25,4 @@ class NowVideoIE(NovaMovIE):
             'title': 'youtubedl test video _BaW_jenozKc.mp4',
             'description': 'Description',
         }
-    }
\ No newline at end of file
+    }
index ed60314eca4f918392da7fe2637e47ed3cdf5ee9..13c8d79cd8ac6346dbe4e9810bc8bf0b20825dcc 100644 (file)
@@ -145,4 +145,4 @@ class NTVIE(InfoExtractor):
             'duration': duration,
             'view_count': view_count,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 58ec81f91115b9df146f7570f5ef508f86a35fde..449c8a6a3e86c410daaafbf80878161693242e27 100644 (file)
@@ -71,4 +71,4 @@ class NuvidIE(InfoExtractor):
             'upload_date': upload_date,
             'age_limit': 18,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 7bf105d38178a5d59339d81262649b40fa57a46e..56e1cad3b0021431721b59df2162feaf7e0c357b 100644 (file)
@@ -74,4 +74,4 @@ class NYTimesIE(InfoExtractor):
             'duration': duration,
             'formats': formats,
             'thumbnails': thumbnails,
-        }
\ No newline at end of file
+        }
index 2044e107eba9808bbde802e8468bf6b009841fb8..f17a528583bf7296906431bdc1a0e3a3cbd6c71d 100644 (file)
@@ -43,7 +43,7 @@ class OoyalaIE(InfoExtractor):
     @classmethod
     def _build_url_result(cls, embed_code):
         return cls.url_result(cls._url_for_embed_code(embed_code),
-            ie=cls.ie_key())
+                              ie=cls.ie_key())
 
     def _extract_result(self, info, more_info):
         return {
@@ -97,4 +97,3 @@ class OoyalaIE(InfoExtractor):
             }
         else:
             return self._extract_result(videos_info[0], videos_more_info)
-        
index 011e6be13e63562dad8def87ea264a7e1b6783af..572a234ad8c2514e5704d936fb98a19035662f40 100644 (file)
@@ -178,4 +178,4 @@ class ORFFM4IE(InfoExtractor):
             'title': data['title'],
             'description': data['subtitle'],
             'entries': entries
-        }
\ No newline at end of file
+        }
index 8aa69c46eb75e9ccfe6fab5b7bff2c9a5778009e..b4389e0b6feaf0726a4805bec674b77cd38e295b 100644 (file)
@@ -31,7 +31,7 @@ class PhotobucketIE(InfoExtractor):
         # Extract URL, uploader, and title from webpage
         self.report_extraction(video_id)
         info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
-            webpage, 'info json')
+                                       webpage, 'info json')
         info = json.loads(info_json)
         url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
         return {
index b1322f13f8b62a4618b1f857dadf0829b9752238..cd3905acb0fcd0ef9fe08beec2bda41fd2f94f70 100644 (file)
@@ -4,6 +4,8 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
+    clean_html,
     compat_urllib_parse,
 )
 
@@ -28,6 +30,11 @@ class PlayvidIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        m_error = re.search(
+            r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
+        if m_error:
+            raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
+
         video_title = None
         duration = None
         video_thumbnail = None
index ffafd23800f49de2bc7987f3d63c9b5ab2104b58..f20946a2bd0616d8d90cebf360570a3f0bc40e94 100644 (file)
@@ -6,6 +6,7 @@ import re
 from .common import InfoExtractor
 from ..utils import int_or_none
 
+
 class PodomaticIE(InfoExtractor):
     IE_NAME = 'podomatic'
     _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
index 618e8f5dd18deff8ffe3f907ce2f97d1256ba204..2ca15b717ec5dd9a36b6aa2bfc7e9019148c0493 100644 (file)
@@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
         comment_count = self._extract_count(
             r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
 
-        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
+        video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
         if webpage.find('"encrypted":true') != -1:
             password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
             video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
index 04bd3d9793c0424c6dded7d727e1f8cac629377c..5253aa3d30062ec7937c5e5f48d85998923c6e8f 100644 (file)
@@ -38,7 +38,7 @@ class PornotubeIE(InfoExtractor):
         video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url')
         video_url = compat_urllib_parse.unquote(video_url)
 
-        #Get the uploaded date
+        # Get the uploaded date
         VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
         upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False)
         if upload_date:
index 619496de7a57f9ab297b708bbffb3005c85e0dd8..32d747ede0188a7347637aa1ee8075161ec5c1f8 100644 (file)
@@ -280,4 +280,4 @@ class ProSiebenSat1IE(InfoExtractor):
             'upload_date': upload_date,
             'duration': duration,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index ba3dd707f8b5d38363dacce62e9fe684265c6a96..2d39ecfe4faa537cb86f156df930dc2b19241a0b 100644 (file)
@@ -119,4 +119,4 @@ class RaiIE(SubtitlesInfoExtractor):
             if captions.endswith(STL_EXT):
                 captions = captions[:-len(STL_EXT)] + SRT_EXT
             subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
-        return subtitles
\ No newline at end of file
+        return subtitles
index 2c53ed2e1147a50248a4294c838ceef67688a356..0f8f3ebde0999e8599eaa86516dd2b52524c9b40 100644 (file)
@@ -33,7 +33,7 @@ class RBMARadioIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
-            webpage, 'json data', flags=re.MULTILINE)
+                                       webpage, 'json data', flags=re.MULTILINE)
 
         try:
             data = json.loads(json_data)
index d1e12dd8d5a6699ba3caa8d041c8bc039e996fc7..846b76c81528431c0faf8ea3fc9bbd6b017db099 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -9,32 +7,23 @@ class RedTubeIE(InfoExtractor):
     _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.redtube.com/66418',
-        'file': '66418.mp4',
-        # md5 varies from time to time, as in
-        # https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
-        #'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
         'info_dict': {
+            'id': '66418',
+            'ext': 'mp4',
             "title": "Sucked on a toilet",
             "age_limit": 18,
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('id')
-        video_extension = 'mp4'
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        self.report_extraction(video_id)
-
         video_url = self._html_search_regex(
-            r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
-
+            r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
         video_title = self._html_search_regex(
             r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
-            webpage, u'title')
-
+            webpage, 'title')
         video_thumbnail = self._og_search_thumbnail(webpage)
 
         # No self-labeling, but they describe themselves as
@@ -44,7 +33,7 @@ class RedTubeIE(InfoExtractor):
         return {
             'id': video_id,
             'url': video_url,
-            'ext': video_extension,
+            'ext': 'mp4',
             'title': video_title,
             'thumbnail': video_thumbnail,
             'age_limit': age_limit,
index 9fbdb9fcbbbc8fc5ef1847a2df8e8f7f929d3b75..59dc137cc225889feb9428dd70f42a91451a951d 100644 (file)
@@ -41,4 +41,3 @@ class RingTVIE(InfoExtractor):
             'thumbnail': thumbnail_url,
             'description': description,
         }
-
index 4a188e5d46304ab8059d646cea667edc86cf7cb4..d029b0ec525fc2e1186aba7e1c3dadf9bed981b0 100644 (file)
@@ -44,7 +44,7 @@ class RtlXlIE(InfoExtractor):
 
         formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
 
-        video_urlpart = videopath.split('/flash/')[1][:-4]
+        video_urlpart = videopath.split('/flash/')[1][:-5]
         PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
 
         formats.extend([
index 1a41cbe409acc666a30a056c4d07c63afbadd863..285c3c4bebf8ec7c2cd793d2b40739222c18a5ca 100644 (file)
@@ -122,7 +122,7 @@ class RTLnowIE(InfoExtractor):
         playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
 
         videoinfo = playerdata.find('./playlist/videoinfo')
-        
+
         formats = []
         for filename in videoinfo.findall('filename'):
             mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
@@ -153,4 +153,4 @@ class RTLnowIE(InfoExtractor):
             'upload_date': upload_date,
             'duration': duration,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 4dd35a47b35b5341139aa2d7f27886b52ddad5ce..0ce22d60c7fa995980e3f70159583f156672d76b 100644 (file)
@@ -54,7 +54,6 @@ def _decrypt_url(png):
     return url
 
 
-
 class RTVEALaCartaIE(InfoExtractor):
     IE_NAME = 'rtve.es:alacarta'
     IE_DESC = 'RTVE a la carta'
index f737b4e5fad8cd86e28b094abcb4226927083d2a..a73e6f331fc02a8977863a412227681b3838b91a 100644 (file)
@@ -191,4 +191,4 @@ class RUTVIE(InfoExtractor):
             'view_count': view_count,
             'duration': duration,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 409f8540a0b2e2ef9db1ab3c5746d7779a2e5db3..b8775c2f99f4a105ae35f1b04a919e64c987df0f 100644 (file)
@@ -27,8 +27,7 @@ class SBSIE(InfoExtractor):
             'thumbnail': 're:http://.*\.jpg',
         },
         'add_ies': ['generic'],
-    },
-    {
+    }, {
         'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
         'only_matching': True,
     }]
index 55a481cc0ed41fa0bef33f3af1bce941fa7659a1..3bf93c870b2bc30c3baf9567a64d06171558f06b 100644 (file)
@@ -53,4 +53,4 @@ class SciVeeIE(InfoExtractor):
             'description': description,
             'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 306869e6af109cf6971a72df14f47fe99a7b6885..c145f6fc72f1b9eed8a5089dce48dfdd5f106a79 100644 (file)
@@ -96,7 +96,7 @@ class ScreencastIE(InfoExtractor):
         if title is None:
             title = self._html_search_regex(
                 [r'<b>Title:</b> ([^<]*)</div>',
-                r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
+                 r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
                 webpage, 'title')
         thumbnail = self._og_search_thumbnail(webpage)
         description = self._og_search_description(webpage, default=None)
index 1dc551d5c7f274b717369952824050812c21fee3..16dc3736b48bfb15a94b98713beef4757446b642 100644 (file)
@@ -67,5 +67,3 @@ class ServingSysIE(InfoExtractor):
             'title': title,
             'entries': entries,
         }
-
\ No newline at end of file
index badba2ac61ca5f46db325e5064f6e75425c60cbb..fdc31603a709676713f7ba87325f4b2ba6b47f3e 100644 (file)
@@ -54,4 +54,4 @@ class SharedIE(InfoExtractor):
             'filesize': filesize,
             'title': title,
             'thumbnail': thumbnail,
-        }
\ No newline at end of file
+        }
index 2909ef18b51a5ac6dadc4eec33bd05522443da8c..5eadbb7eaea263b8a37307fbdcc01c3e54c5eaa2 100644 (file)
@@ -46,7 +46,7 @@ class SinaIE(InfoExtractor):
     def _extract_video(self, video_id):
         data = compat_urllib_parse.urlencode({'vid': video_id})
         url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
-            video_id, 'Downloading video url')
+                                     video_id, 'Downloading video url')
         image_page = self._download_webpage(
             'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
             video_id, 'Downloading thumbnail info')
index 53c3c9220374737b88dc516ec810ecb6865b74f7..5864b9936cca2e4d0ba3a0fa217884c21f897ed7 100644 (file)
@@ -39,7 +39,7 @@ class SlideshareIE(InfoExtractor):
         ext = info['jsplayer']['video_extension']
         video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
         description = self._html_search_regex(
-            r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
+            r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
             'description', fatal=False)
 
         return {
index e6e7d086503a04a3fda862f601e109003169b9d7..3df71304dafc9c9e353923f6769c88e1fcf8c5ff 100644 (file)
@@ -26,7 +26,7 @@ class SlutloadIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
-            webpage, 'title').strip()
+                                              webpage, 'title').strip()
 
         video_url = self._html_search_regex(
             r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
index 9bd5defa7ac5e171904eb681015e9bcf1661acb9..0751efc6111c96ca1c089c66183429f9bde6147c 100644 (file)
@@ -1,7 +1,6 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import os.path
 import re
 import json
 import hashlib
@@ -12,15 +11,15 @@ from ..utils import (
     compat_urllib_parse,
     compat_urllib_request,
     ExtractorError,
-    url_basename,
     int_or_none,
+    unified_strdate,
 )
 
 
 class SmotriIE(InfoExtractor):
     IE_DESC = 'Smotri.com'
     IE_NAME = 'smotri'
-    _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
+    _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
     _NETRC_MACHINE = 'smotri'
 
     _TESTS = [
@@ -35,7 +34,6 @@ class SmotriIE(InfoExtractor):
                 'uploader': 'rbc2008',
                 'uploader_id': 'rbc08',
                 'upload_date': '20131118',
-                'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
                 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
             },
         },
@@ -50,7 +48,6 @@ class SmotriIE(InfoExtractor):
                 'uploader': 'Support Photofile@photofile',
                 'uploader_id': 'support-photofile',
                 'upload_date': '20070704',
-                'description': 'test, видео test',
                 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
             },
         },
@@ -66,7 +63,6 @@ class SmotriIE(InfoExtractor):
                 'uploader_id': 'timoxa40',
                 'upload_date': '20100404',
                 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
-                'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
             },
             'params': {
                 'videopassword': 'qwerty',
@@ -85,7 +81,6 @@ class SmotriIE(InfoExtractor):
                 'upload_date': '20101001',
                 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
                 'age_limit': 18,
-                'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
             },
             'params': {
                 'videopassword': '333'
@@ -102,17 +97,11 @@ class SmotriIE(InfoExtractor):
                 'uploader': 'HannahL',
                 'uploader_id': 'lisaha95',
                 'upload_date': '20090331',
-                'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
                 'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
             },
         },
     ]
 
-    _SUCCESS = 0
-    _PASSWORD_NOT_VERIFIED = 1
-    _PASSWORD_DETECTED = 2
-    _VIDEO_NOT_FOUND = 3
-
     @classmethod
     def _extract_url(cls, webpage):
         mobj = re.search(
@@ -137,44 +126,44 @@ class SmotriIE(InfoExtractor):
         return self._html_search_meta(name, html, display_name)
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
-        real_video_id = mobj.group('realvideoid')
+        video_id = self._match_id(url)
+
+        video_form = {
+            'ticket': video_id,
+            'video_url': '1',
+            'frame_url': '1',
+            'devid': 'LoadupFlashPlayer',
+            'getvideoinfo': '1',
+        }
 
-        # Download video JSON data
-        video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
-        video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON')
-        video_json = json.loads(video_json_page)
+        request = compat_urllib_request.Request(
+            'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 
-        status = video_json['status']
-        if status == self._VIDEO_NOT_FOUND:
+        video = self._download_json(request, video_id, 'Downloading video JSON')
+
+        if video.get('_moderate_no') or not video.get('moderated'):
+            raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True)
+
+        if video.get('error'):
             raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-        elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
-                                                # video-password set
-            video_password = self._downloader.params.get('videopassword', None)
-            if not video_password:
-                raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
-            video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
-            video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)')
-            video_json = json.loads(video_json_page)
-            status = video_json['status']
-            if status == self._PASSWORD_NOT_VERIFIED:
-                raise ExtractorError('Video password is invalid', expected=True)
-
-        if status != self._SUCCESS:
-            raise ExtractorError('Unexpected status value %s' % status)
-
-        # Extract the URL of the video
-        video_url = video_json['file_data']
+
+        video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
+        title = video['title']
+        thumbnail = video['_imgURL']
+        upload_date = unified_strdate(video['added'])
+        uploader = video['userNick']
+        uploader_id = video['userLogin']
+        duration = int_or_none(video['duration'])
 
         # Video JSON does not provide enough meta data
         # We will extract some from the video web page instead
-        video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
-        video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
+        webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
+        webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
 
         # Warning if video is unavailable
         warning = self._html_search_regex(
-            r'<div class="videoUnModer">(.*?)</div>', video_page,
+            r'<div class="videoUnModer">(.*?)</div>', webpage,
             'warning message', default=None)
         if warning is not None:
             self._downloader.report_warning(
@@ -182,84 +171,32 @@ class SmotriIE(InfoExtractor):
                 (video_id, warning))
 
         # Adult content
-        if re.search('EroConfirmText">', video_page) is not None:
+        if re.search('EroConfirmText">', webpage) is not None:
             self.report_age_confirmation()
             confirm_string = self._html_search_regex(
                 r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
-                video_page, 'confirm string')
-            confirm_url = video_page_url + '&confirm=%s' % confirm_string
-            video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
+                webpage, 'confirm string')
+            confirm_url = webpage_url + '&confirm=%s' % confirm_string
+            webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
             adult_content = True
         else:
             adult_content = False
 
-        # Extract the rest of meta data
-        video_title = self._search_meta('name', video_page, 'title')
-        if not video_title:
-            video_title = os.path.splitext(url_basename(video_url))[0]
-
-        video_description = self._search_meta('description', video_page)
-        END_TEXT = ' на сайте Smotri.com'
-        if video_description and video_description.endswith(END_TEXT):
-            video_description = video_description[:-len(END_TEXT)]
-        START_TEXT = 'Смотреть онлайн ролик '
-        if video_description and video_description.startswith(START_TEXT):
-            video_description = video_description[len(START_TEXT):]
-        video_thumbnail = self._search_meta('thumbnail', video_page)
-
-        upload_date_str = self._search_meta('uploadDate', video_page, 'upload date')
-        if upload_date_str:
-            upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
-            video_upload_date = (
-                (
-                    upload_date_m.group('year') +
-                    upload_date_m.group('month') +
-                    upload_date_m.group('day')
-                )
-                if upload_date_m else None
-            )
-        else:
-            video_upload_date = None
-
-        duration_str = self._search_meta('duration', video_page)
-        if duration_str:
-            duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
-            video_duration = (
-                (
-                    (int(duration_m.group('hours')) * 60 * 60) +
-                    (int(duration_m.group('minutes')) * 60) +
-                    int(duration_m.group('seconds'))
-                )
-                if duration_m else None
-            )
-        else:
-            video_duration = None
-
-        video_uploader = self._html_search_regex(
-            '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
-            video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
-
-        video_uploader_id = self._html_search_regex(
-            '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
-            video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
-
-        video_view_count = self._html_search_regex(
+        view_count = self._html_search_regex(
             'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
-            video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
+            webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL)
 
         return {
             'id': video_id,
             'url': video_url,
-            'title': video_title,
-            'thumbnail': video_thumbnail,
-            'description': video_description,
-            'uploader': video_uploader,
-            'upload_date': video_upload_date,
-            'uploader_id': video_uploader_id,
-            'duration': video_duration,
-            'view_count': int_or_none(video_view_count),
+            'title': title,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'upload_date': upload_date,
+            'uploader_id': uploader_id,
+            'duration': duration,
+            'view_count': int_or_none(view_count),
             'age_limit': 18 if adult_content else 0,
-            'video_page_url': video_page_url
         }
 
 
@@ -275,7 +212,7 @@ class SmotriCommunityIE(InfoExtractor):
         },
         'playlist_mincount': 4,
     }
-    
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         community_id = mobj.group('communityid')
@@ -345,7 +282,7 @@ class SmotriBroadcastIE(InfoExtractor):
             (username, password) = self._get_login_info()
             if username is None:
                 raise ExtractorError('Erotic broadcasts allowed only for registered users, '
-                    'use --username and --password options to provide account credentials.', expected=True)
+                                     'use --username and --password options to provide account credentials.', expected=True)
 
             login_form = {
                 'login-hint53': '1',
index bebcafb62bea3fe2e180818256475262177f902f..07f514a46246657206ae3fd31e19b1d2932e6f15 100644 (file)
@@ -1,4 +1,5 @@
 # encoding: utf-8
+from __future__ import unicode_literals
 
 import json
 import re
@@ -11,13 +12,14 @@ class SohuIE(InfoExtractor):
     _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
 
     _TEST = {
-        u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
-        u'file': u'382479172.mp4',
-        u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
-        u'info_dict': {
-            u'title': u'MV:Far East Movement《The Illest》',
+        'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
+        'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7',
+        'info_dict': {
+            'id': '382479172',
+            'ext': 'mp4',
+            'title': 'MV:Far East Movement《The Illest》',
         },
-        u'skip': u'Only available from China',
+        'skip': 'Only available from China',
     }
 
     def _real_extract(self, url):
@@ -26,11 +28,11 @@ class SohuIE(InfoExtractor):
             if mytv:
                 base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
             else:
-                base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+                base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
             data_url = base_data_url + str(vid_id)
             data_json = self._download_webpage(
                 data_url, video_id,
-                note=u'Downloading JSON data for ' + str(vid_id))
+                note='Downloading JSON data for ' + str(vid_id))
             return json.loads(data_json)
 
         mobj = re.match(self._VALID_URL, url)
@@ -39,11 +41,11 @@ class SohuIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
         raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
-                                            webpage, u'video title')
+                                            webpage, 'video title')
         title = raw_title.partition('-')[0].strip()
 
         vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
-                                      u'video path')
+                                      'video path')
         data = _fetch_data(vid, mytv)
 
         QUALITIES = ('ori', 'super', 'high', 'nor')
@@ -51,7 +53,7 @@ class SohuIE(InfoExtractor):
                    for q in QUALITIES
                    if data['data'][q + 'Vid'] != 0]
         if not vid_ids:
-            raise ExtractorError(u'No formats available for this video')
+            raise ExtractorError('No formats available for this video')
 
         # For now, we just pick the highest available quality
         vid_id = vid_ids[-1]
@@ -69,7 +71,7 @@ class SohuIE(InfoExtractor):
                         (allot, prot, clipsURL[i], su[i]))
             part_str = self._download_webpage(
                 part_url, video_id,
-                note=u'Downloading part %d of %d' % (i+1, part_count))
+                note='Downloading part %d of %d' % (i + 1, part_count))
 
             part_info = part_str.split('|')
             video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
index 54256e1a29f02ad75dfdf828e7b4819455294a14..3c1d058db9cf546bb94a299d9f1badd79fab12d3 100644 (file)
@@ -159,7 +159,7 @@ class SoundcloudIE(InfoExtractor):
 
         # We have to retrieve the url
         streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
-            'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
+                       'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
         format_dict = self._download_json(
             streams_url,
             track_id, 'Downloading track url')
@@ -224,14 +224,14 @@ class SoundcloudIE(InfoExtractor):
             # extract uploader (which is in the url)
             uploader = mobj.group('uploader')
             # extract simple title (uploader + slug of song title)
-            slug_title =  mobj.group('title')
+            slug_title = mobj.group('title')
             token = mobj.group('token')
             full_title = resolve_title = '%s/%s' % (uploader, slug_title)
             if token:
                 resolve_title += '/%s' % token
-    
+
             self.report_resolve(full_title)
-    
+
             url = 'http://soundcloud.com/%s' % resolve_title
             info_json_url = self._resolv_url(url)
         info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
@@ -371,7 +371,7 @@ class SoundcloudPlaylistIE(SoundcloudIE):
 
         entries = [
             self._extract_info_dict(t, quiet=True, secret_token=token)
-                for t in data['tracks']]
+            for t in data['tracks']]
 
         return {
             '_type': 'playlist',
index d34aefeaa24a2b8b307005e9164ad9349b638e88..c2d0d36a6935c40553419621678ce8987c4f2dbd 100644 (file)
@@ -33,5 +33,6 @@ class SpaceIE(InfoExtractor):
             # Other videos works fine with the info from the object
             brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
         if brightcove_url is None:
-            raise ExtractorError(u'The webpage does not contain a video', expected=True)
+            raise ExtractorError(
+                'The webpage does not contain a video', expected=True)
         return self.url_result(brightcove_url, BrightcoveIE.ie_key())
index ef5e7c08bb8ac6f9c90b656b863fbf2bd1cca99a..98cf92d89a1151edfd11b8f15a86eeaa6a83178d 100644 (file)
@@ -77,4 +77,4 @@ class SpiegeltvIE(InfoExtractor):
             'description': description,
             'duration': duration,
             'thumbnails': thumbnails
-        }
\ No newline at end of file
+        }
index 3f680bfc6322d4f4190351e5fae9ca7c178accfd..dfe50ed4585b0fe876b8a300edd00a453ae4b690 100644 (file)
@@ -89,4 +89,4 @@ class Sport5IE(InfoExtractor):
             'duration': duration,
             'categories': categories,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index abb82778325fd74f55a2ae1ce00f8f98316ad0a1..057ef5251dc6855c32a8df5abed6917556d190e9 100644 (file)
@@ -93,4 +93,3 @@ class SportDeutschlandIE(InfoExtractor):
             'rtmp_live': asset.get('live'),
             'timestamp': parse_iso8601(asset.get('date')),
         }
-
index 5feb4ff83f9c4d2def9196666bde4fcc2a3a9101..4a3d8bb8f267b588c59e2f16b208955a70d362d9 100644 (file)
@@ -82,7 +82,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
 
             rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
             rootpage = self._download_webpage(rootURL, info['id'],
-                errnote='Unable to download course info page')
+                                              errnote='Unable to download course info page')
 
             links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
             info['entries'] = [self.url_result(
index db33745c14472f7f3e7749978585f0b2b2c53af2..59a51268d25ed4b0718c67ae91a5e8f07fcfe42c 100644 (file)
@@ -1,7 +1,8 @@
+from __future__ import unicode_literals
 from .common import InfoExtractor
 
+from ..compat import compat_str
 from ..utils import (
-    compat_str,
     ExtractorError,
 )
 
@@ -17,10 +18,10 @@ class SubtitlesInfoExtractor(InfoExtractor):
         sub_lang_list = self._get_available_subtitles(video_id, webpage)
         auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
         sub_lang = ",".join(list(sub_lang_list.keys()))
-        self.to_screen(u'%s: Available subtitles for video: %s' %
+        self.to_screen('%s: Available subtitles for video: %s' %
                        (video_id, sub_lang))
         auto_lang = ",".join(auto_captions_list.keys())
-        self.to_screen(u'%s: Available automatic captions for video: %s' %
+        self.to_screen('%s: Available automatic captions for video: %s' %
                        (video_id, auto_lang))
 
     def extract_subtitles(self, video_id, webpage):
@@ -50,8 +51,8 @@ class SubtitlesInfoExtractor(InfoExtractor):
 
             sub_lang_list = {}
             for sub_lang in requested_langs:
-                if not sub_lang in available_subs_list:
-                    self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
+                if sub_lang not in available_subs_list:
+                    self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
                     continue
                 sub_lang_list[sub_lang] = available_subs_list[sub_lang]
 
@@ -70,10 +71,10 @@ class SubtitlesInfoExtractor(InfoExtractor):
         try:
             sub = self._download_subtitle_url(sub_lang, url)
         except ExtractorError as err:
-            self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
+            self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
             return
         if not sub:
-            self._downloader.report_warning(u'Did not fetch video subtitles')
+            self._downloader.report_warning('Did not fetch video subtitles')
             return
         return sub
 
@@ -94,5 +95,5 @@ class SubtitlesInfoExtractor(InfoExtractor):
         Must be redefined by the subclasses that support automatic captions,
         otherwise it will return {}
         """
-        self._downloader.report_warning(u'Automatic Captions not supported by this server')
+        self._downloader.report_warning('Automatic Captions not supported by this server')
         return {}
index bf430d870125a78788d364b0ea615983ae170948..58073eefeffc0f3ebc244a6087cad36662940228 100644 (file)
@@ -80,7 +80,7 @@ class SWRMediathekIE(InfoExtractor):
 
             if media_type == 'Video':
                 fmt.update({
-                    'format_note': ['144p', '288p', '544p', '720p'][quality-1],
+                    'format_note': ['144p', '288p', '544p', '720p'][quality - 1],
                     'vcodec': codec,
                 })
             elif media_type == 'Audio':
diff --git a/youtube_dl/extractor/tass.py b/youtube_dl/extractor/tass.py
new file mode 100644 (file)
index 0000000..c4ef707
--- /dev/null
@@ -0,0 +1,62 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    qualities,
+)
+
+
+class TassIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://tass.ru/obschestvo/1586870',
+            'md5': '3b4cdd011bc59174596b6145cda474a4',
+            'info_dict': {
+                'id': '1586870',
+                'ext': 'mp4',
+                'title': 'Посетителям московского зоопарка показали красную панду',
+                'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+        {
+            'url': 'http://itar-tass.com/obschestvo/1600009',
+            'only_matching': True,
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        sources = json.loads(js_to_json(self._search_regex(
+            r'(?s)sources\s*:\s*(\[.+?\])', webpage, 'sources')))
+
+        quality = qualities(['sd', 'hd'])
+
+        formats = []
+        for source in sources:
+            video_url = source.get('file')
+            if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
+                continue
+            label = source.get('label')
+            formats.append({
+                'url': video_url,
+                'format_id': label,
+                'quality': quality(label),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'formats': formats,
+        }
index 8a95fd6563999f1f59808e2b2090ede9ee312f7a..6c3445d792206395b7a36d016b8a42ad255ea9cc 100644 (file)
@@ -121,7 +121,7 @@ class TeacherTubeUserIE(InfoExtractor):
         urls = []
         webpage = self._download_webpage(url, user_id)
         urls.extend(re.findall(self._MEDIA_RE, webpage))
-        
+
         pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
         for p in pages:
             more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
index fa796ce72126610cda53db5378d926b44d72e526..5fa67eb8d4441d62c1591289551171cdbcbcf45b 100644 (file)
@@ -8,24 +8,23 @@ from .common import InfoExtractor
 class TeamcocoIE(InfoExtractor):
     _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
     _TESTS = [
-    {
-        'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
-        'file': '80187.mp4',
-        'md5': '3f7746aa0dc86de18df7539903d399ea',
-        'info_dict': {
-            'title': 'Conan Becomes A Mary Kay Beauty Consultant',
-            'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
+        {
+            'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
+            'file': '80187.mp4',
+            'md5': '3f7746aa0dc86de18df7539903d399ea',
+            'info_dict': {
+                'title': 'Conan Becomes A Mary Kay Beauty Consultant',
+                'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
+            }
+        }, {
+            'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
+            'file': '19705.mp4',
+            'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
+            'info_dict': {
+                "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
+                "title": "Louis C.K. Interview Pt. 1 11/3/11"
+            }
         }
-    },
-    {
-        'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
-        'file': '19705.mp4',
-        'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
-        'info_dict': {
-            "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
-            "title": "Louis C.K. Interview Pt. 1 11/3/11"
-        }
-    }
     ]
 
     def _real_extract(self, url):
@@ -33,7 +32,7 @@ class TeamcocoIE(InfoExtractor):
 
         display_id = mobj.group('display_id')
         webpage = self._download_webpage(url, display_id)
-        
+
         video_id = mobj.group("video_id")
         if not video_id:
             video_id = self._html_search_regex(
index 8550380779168a80b95e526f8921059e2eddf8f4..f8a87afdaf4d27c59b4b29491569b243331b2322 100644 (file)
@@ -33,9 +33,9 @@ class TEDIE(SubtitlesInfoExtractor):
             'ext': 'mp4',
             'title': 'The illusion of consciousness',
             'description': ('Philosopher Dan Dennett makes a compelling '
-                'argument that not only don\'t we understand our own '
-                'consciousness, but that half the time our brains are '
-                'actively fooling us.'),
+                            'argument that not only don\'t we understand our own '
+                            'consciousness, but that half the time our brains are '
+                            'actively fooling us.'),
             'uploader': 'Dan Dennett',
             'width': 854,
             'duration': 1308,
@@ -93,7 +93,7 @@ class TEDIE(SubtitlesInfoExtractor):
 
     def _extract_info(self, webpage):
         info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
-            webpage, 'info json')
+                                       webpage, 'info json')
         return json.loads(info_json)
 
     def _real_extract(self, url):
@@ -113,7 +113,7 @@ class TEDIE(SubtitlesInfoExtractor):
         '''Returns the videos of the playlist'''
 
         webpage = self._download_webpage(url, name,
-            'Downloading playlist webpage')
+                                         'Downloading playlist webpage')
         info = self._extract_info(webpage)
         playlist_info = info['playlist']
 
index db9788c1814b9dea530af23cfc53e3206a117d11..2a2fff5e18e4219f8db404bb0803778055570dfc 100644 (file)
@@ -1,4 +1,4 @@
-#coding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .mitele import MiTeleIE
index fdae17b1b817efd2a7666d44cc2cc38de1ccfa22..6e61cc9e2ecf621b19fe13924456970b091bce1c 100644 (file)
@@ -30,7 +30,7 @@ class TF1IE(InfoExtractor):
         embed_url = self._html_search_regex(
             r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
         embed_page = self._download_webpage(embed_url, video_id,
-            'Downloading embed player page')
+                                            'Downloading embed player page')
         wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
         wat_info = self._download_json(
             'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
index a04925633b4bbdea7864bb5f1619e5c846f80b38..e2653d62dc8c288ce8e58e5bfda52793aef7cfaf 100644 (file)
@@ -35,19 +35,19 @@ class ThePlatformIE(InfoExtractor):
             'skip_download': True,
         },
     }
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         if mobj.group('config'):
-            config_url = url+ '&form=json'
+            config_url = url + '&form=json'
             config_url = config_url.replace('swf/', 'config/')
             config_url = config_url.replace('onsite/', 'onsite/config/')
             config = self._download_json(config_url, video_id, 'Downloading config')
             smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
         else:
             smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
-                'format=smil&mbr=true'.format(video_id))
-
+                        'format=smil&mbr=true'.format(video_id))
 
         meta = self._download_xml(smil_url, video_id)
         try:
@@ -118,5 +118,5 @@ class ThePlatformIE(InfoExtractor):
             'formats': formats,
             'description': info['description'],
             'thumbnail': info['defaultThumbnailUrl'],
-            'duration': info['duration']//1000,
+            'duration': info['duration'] // 1000,
         }
index bfb9d2fc9f3cef23466806519c6ac0d226efa945..7f323c938762f6ec1337b6dbf6b9c64ec2993dd8 100644 (file)
@@ -1,4 +1,4 @@
-#coding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -36,12 +36,12 @@ class ThisAVIE(InfoExtractor):
             r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
             webpage, 'uploader id', fatal=False)
         ext = determine_ext(video_url)
-        
+
         return {
-            'id':          video_id,
-            'url':         video_url,
-            'uploader':    uploader,
+            'id': video_id,
+            'url': video_url,
+            'uploader': uploader,
             'uploader_id': uploader_id,
-            'title':       title,
-            'ext':         ext,
+            'title': title,
+            'ext': ext,
         }
index a4aa25f661223301b9d16c7ac87b6c502aa0e0ff..4fe89dbe516f8e25eb1f84239bc9cbc9f26bd648 100644 (file)
@@ -26,9 +26,9 @@ class TinyPicIE(InfoExtractor):
         video_id = mobj.group('id')
 
         webpage = self._download_webpage(url, video_id, 'Downloading page')
-        
+
         mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
-            '\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
+                         '\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
         if mobj is None:
             raise ExtractorError('Video %s does not exist' % video_id, expected=True)
 
@@ -47,4 +47,4 @@ class TinyPicIE(InfoExtractor):
             'url': video_url,
             'thumbnail': thumbnail,
             'title': title
-        }
\ No newline at end of file
+        }
index d848ee1863252dbc155652c997210476c42479e4..66d159e99f6b15c01d53017b24b0a70b57470bd3 100644 (file)
@@ -36,9 +36,10 @@ class TlcDeIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Breaking Amish: Die Welt da draußen',
             'uploader': 'Discovery Networks - Germany',
-            'description': 'Vier Amische und eine Mennonitin wagen in New York'
+            'description': (
+                'Vier Amische und eine Mennonitin wagen in New York'
                 '  den Sprung in ein komplett anderes Leben. Begleitet sie auf'
-                ' ihrem spannenden Weg.',
+                ' ihrem spannenden Weg.'),
         },
     }
 
diff --git a/youtube_dl/extractor/tmz.py b/youtube_dl/extractor/tmz.py
new file mode 100644 (file)
index 0000000..827aa08
--- /dev/null
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TMZIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/]+)/?'
+    _TEST = {
+        'url': 'http://www.tmz.com/videos/0_okj015ty/',
+        'md5': '791204e3bf790b1426cb2db0706184c0',
+        'info_dict': {
+            'id': '0_okj015ty',
+            'url': 'http://tmz.vo.llnwd.net/o28/2014-03/13/0_okj015ty_0_rt8ro3si_2.mp4',
+            'ext': 'mp4',
+            'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
+            'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie???  Or is she just showing off her amazing boobs?',
+            'thumbnail': 'http://cdnbakmi.kaltura.com/p/591531/sp/59153100/thumbnail/entry_id/0_okj015ty/version/100002/acv/182/width/640',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        return {
+            'id': video_id,
+            'url': self._html_search_meta('VideoURL', webpage, fatal=True),
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._html_search_meta('ThumbURL', webpage),
+        }
index 4956f857766eb6ea24638355c327742556f0e55f..0ecd695f85e6a40929501fb6adc40bace8784d42 100644 (file)
@@ -71,7 +71,7 @@ class TNAFlixIE(InfoExtractor):
                 fmt['height'] = int(m.group(1))
             formats.append(fmt)
         self._sort_formats(formats)
-        
+
         return {
             'id': video_id,
             'display_id': display_id,
index 11407428b56cb779ee9411ee1ccc7736fa2fb718..1c53a3fd09459f31fdf188dc852141ef000af4a6 100644 (file)
@@ -25,7 +25,7 @@ class TrailerAddictIE(InfoExtractor):
         webpage = self._download_webpage(url, name)
 
         title = self._search_regex(r'<title>(.+?)</title>',
-                webpage, 'video title').replace(' - Trailer Addict','')
+                                   webpage, 'video title').replace(' - Trailer Addict', '')
         view_count_str = self._search_regex(
             r'<span class="views_n">([0-9,.]+)</span>',
             webpage, 'view count', fatal=False)
@@ -43,12 +43,12 @@ class TrailerAddictIE(InfoExtractor):
             fvar = "fvar"
 
         info_url = "http://www.traileraddict.com/%s.php?tid=%s" % (fvar, str(video_id))
-        info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
+        info_webpage = self._download_webpage(info_url, video_id, "Downloading the info webpage")
 
         final_url = self._search_regex(r'&fileurl=(.+)',
-                info_webpage, 'Download url').replace('%3F','?')
+                                       info_webpage, 'Download url').replace('%3F', '?')
         thumbnail_url = self._search_regex(r'&image=(.+?)&',
-                info_webpage, 'thumbnail url')
+                                           info_webpage, 'thumbnail url')
 
         description = self._html_search_regex(
             r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
index d64aaa41f690956b08211ed4fe07e1bc27267641..220a05b7b493fb728f3cd3c6dab74208a8f587eb 100644 (file)
@@ -1,28 +1,28 @@
+from __future__ import unicode_literals
+
 import json
-import re
 
 from .common import InfoExtractor
 
 
 class TriluliluIE(InfoExtractor):
-    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/video-[^/]+/(?P<id>[^/]+)'
     _TEST = {
-        u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
-        u'file': u"big-buck-bunny-1.mp4",
-        u'info_dict': {
-            u"title": u"Big Buck Bunny",
-            u"description": u":) pentru copilul din noi",
+        'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
+        'info_dict': {
+            'id': 'big-buck-bunny-1',
+            'ext': 'mp4',
+            'title': 'Big Buck Bunny',
+            'description': ':) pentru copilul din noi',
         },
         # Server ignores Range headers (--test)
-        u"params": {
-            u"skip_download": True
+        'params': {
+            'skip_download': True
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('video_id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         title = self._og_search_title(webpage)
@@ -30,20 +30,20 @@ class TriluliluIE(InfoExtractor):
         description = self._og_search_description(webpage)
 
         log_str = self._search_regex(
-            r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
+            r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, 'log info')
         log = json.loads(log_str)
 
-        format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
-                      u'video-formats2' % log)
+        format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
+                      'video-formats2' % log)
         format_doc = self._download_xml(
             format_url, video_id,
-            note=u'Downloading formats',
-            errnote=u'Error while downloading formats')
+            note='Downloading formats',
+            errnote='Error while downloading formats')
+
         video_url_template = (
-            u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
-            u'&source=site&hash=%(hash)s&username=%(userid)s&'
-            u'key=ministhebest&format=%%s&sig=&exp=' %
+            'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
+            '&source=site&hash=%(hash)s&username=%(userid)s&'
+            'key=ministhebest&format=%%s&sig=&exp=' %
             log)
         formats = [
             {
@@ -63,4 +63,3 @@ class TriluliluIE(InfoExtractor):
             'description': description,
             'thumbnail': thumbnail,
         }
-
index dcd823d0838dca23b27298cbf05ad47cc4261637..161e47624b383dfe76ea1e2ea6ec73395a97db53 100644 (file)
@@ -36,12 +36,12 @@ class TudouIE(InfoExtractor):
         'skip': 'Only works from China'
     }]
 
-    def _url_for_id(self, id, quality = None):
-        info_url = "http://v2.tudou.com/f?id="+str(id)
+    def _url_for_id(self, id, quality=None):
+        info_url = "http://v2.tudou.com/f?id=" + str(id)
         if quality:
             info_url += '&hd' + quality
         webpage = self._download_webpage(info_url, id, "Opening the info webpage")
-        final_url = self._html_search_regex('>(.+?)</f>',webpage, 'video url')
+        final_url = self._html_search_regex('>(.+?)</f>', webpage, 'video url')
         return final_url
 
     def _real_extract(self, url):
@@ -73,7 +73,7 @@ class TudouIE(InfoExtractor):
         result = []
         len_parts = len(parts)
         if len_parts > 1:
-            self.to_screen(u'%s: found %s parts' % (video_id, len_parts))
+            self.to_screen('%s: found %s parts' % (video_id, len_parts))
         for part in parts:
             part_id = part['k']
             final_url = self._url_for_id(part_id, quality)
index 40c53ff17ec1e6eb8a05bcf683d55d00f2455101..2a1ae5a717cf7b2af16bf5a1ce3ef7494e28a7a6 100644 (file)
@@ -43,7 +43,7 @@ class TumblrIE(InfoExtractor):
             webpage, 'iframe url')
         iframe = self._download_webpage(iframe_url, video_id)
         video_url = self._search_regex(r'<source src="([^"]+)"',
-            iframe, 'video url')
+                                       iframe, 'video url')
 
         # The only place where you can get a title, it's not complete,
         # but searching in other places doesn't work for all videos
diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py
new file mode 100644 (file)
index 0000000..4ce5aee
--- /dev/null
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class TuneInIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://(?:www\.)?
+    (?:
+        tunein\.com/
+        (?:
+            radio/.*?-s|
+            station/.*?StationId\=
+        )(?P<id>[0-9]+)
+        |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
+    )
+    '''
+    _API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station'
+
+    _INFO_DICT = {
+        'id': '34682',
+        'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
+        'ext': 'AAC',
+        'thumbnail': 're:^https?://.*\.png$',
+        'location': 'Tacoma, WA',
+    }
+    _TESTS = [
+        {
+            'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
+            'info_dict': _INFO_DICT,
+            'params': {
+                'skip_download': True,  # live stream
+            },
+        },
+        {  # test redirection
+            'url': 'http://tun.in/ser7s',
+            'info_dict': _INFO_DICT,
+            'params': {
+                'skip_download': True,  # live stream
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        redirect_id = mobj.group('redirect_id')
+        if redirect_id:
+            # The server doesn't support HEAD requests
+            urlh = self._request_webpage(
+                url, redirect_id, note='Downloading redirect page')
+            url = urlh.geturl()
+            self.to_screen('Following redirect: %s' % url)
+            mobj = re.match(self._VALID_URL, url)
+        station_id = mobj.group('id')
+
+        station_info = self._download_json(
+            self._API_URL_TEMPLATE.format(station_id),
+            station_id, note='Downloading station JSON')
+
+        title = station_info['Title']
+        thumbnail = station_info.get('Logo')
+        location = station_info.get('Location')
+        streams_url = station_info.get('StreamUrl')
+        if not streams_url:
+            raise ExtractorError('No downloadable streams found',
+                                 expected=True)
+        stream_data = self._download_webpage(
+            streams_url, station_id, note='Downloading stream data')
+        streams = json.loads(self._search_regex(
+            r'\((.*)\);', stream_data, 'stream info'))['Streams']
+
+        is_live = None
+        formats = []
+        for stream in streams:
+            if stream.get('Type') == 'Live':
+                is_live = True
+            formats.append({
+                'abr': stream.get('Bandwidth'),
+                'ext': stream.get('MediaType'),
+                'acodec': stream.get('MediaType'),
+                'vcodec': 'none',
+                'url': stream.get('Url'),
+                # Sometimes streams with the highest quality do not exist
+                'preference': stream.get('Reliability'),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': station_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'location': location,
+            'is_live': is_live,
+        }
index 27962b5fe146dd16e85f46e341725b4e30bf24e1..d81d1d1a67cef49d4f612f08bfb5b7b7002b51fd 100644 (file)
@@ -84,4 +84,4 @@ class TvigleIE(InfoExtractor):
             'duration': duration,
             'age_limit': age_limit,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index bfed9dd042bf3e170f1af394954d64dbdadea0ac..a645800057fc6dc88850885cba7737243c95574e 100644 (file)
@@ -1,40 +1,35 @@
-import json
-import re
+from __future__ import unicode_literals
 
 from .common import InfoExtractor
 
 
 class TvpIE(InfoExtractor):
-    IE_NAME = u'tvp.pl'
+    IE_NAME = 'tvp.pl'
     _VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
 
     _TEST = {
-        u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
-        u'md5': u'148408967a6a468953c0a75cbdaf0d7a',
-        u'file': u'12878238.wmv',
-        u'info_dict': {
-            u'title': u'31.10.2013 - Odcinek 2',
-            u'description': u'31.10.2013 - Odcinek 2',
+        'url': 'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
+        'md5': '148408967a6a468953c0a75cbdaf0d7a',
+        'info_dict': {
+            'id': '12878238',
+            'ext': 'wmv',
+            'title': '31.10.2013 - Odcinek 2',
+            'description': '31.10.2013 - Odcinek 2',
         },
-        u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
+        'skip': 'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id
-        json_params = self._download_webpage(
-            json_url, video_id, u"Downloading video metadata")
-
-        params = json.loads(json_params)
-        self.report_extraction(video_id)
+        params = self._download_json(
+            json_url, video_id, "Downloading video metadata")
         video_url = params['video_url']
 
-        title = self._og_search_title(webpage, fatal=True)
         return {
             'id': video_id,
-            'title': title,
+            'title': self._og_search_title(webpage),
             'ext': 'wmv',
             'url': video_url,
             'description': self._og_search_description(webpage),
index 054f427252341306edd7698e6d150bcb619b64fc..0e4d386a8ba32387f6f9025e633efb4c1ee59700 100644 (file)
@@ -40,8 +40,24 @@ class UdemyIE(InfoExtractor):
                 error_str += ' - %s' % error_data.get('formErrors')
             raise ExtractorError(error_str, expected=True)
 
-    def _download_json(self, url, video_id, note='Downloading JSON metadata'):
-        response = super(UdemyIE, self)._download_json(url, video_id, note)
+    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
+        headers = {
+            'X-Udemy-Snail-Case': 'true',
+            'X-Requested-With': 'XMLHttpRequest',
+        }
+        for cookie in self._downloader.cookiejar:
+            if cookie.name == 'client_id':
+                headers['X-Udemy-Client-Id'] = cookie.value
+            elif cookie.name == 'access_token':
+                headers['X-Udemy-Bearer-Token'] = cookie.value
+
+        if isinstance(url_or_request, compat_urllib_request.Request):
+            for header, value in headers.items():
+                url_or_request.add_header(header, value)
+        else:
+            url_or_request = compat_urllib_request.Request(url_or_request, headers=headers)
+
+        response = super(UdemyIE, self)._download_json(url_or_request, video_id, note)
         self._handle_error(response)
         return response
 
@@ -62,7 +78,9 @@ class UdemyIE(InfoExtractor):
         if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
             return
 
-        csrf = self._html_search_regex(r'<input type="hidden" name="csrf" value="(.+?)"', login_popup, 'csrf token')
+        csrf = self._html_search_regex(
+            r'<input type="hidden" name="csrf" value="(.+?)"',
+            login_popup, 'csrf token')
 
         login_form = {
             'email': username,
@@ -71,42 +89,52 @@ class UdemyIE(InfoExtractor):
             'displayType': 'json',
             'isSubmitted': '1',
         }
-        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
-        response = self._download_json(request, None, 'Logging in as %s' % username)
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        response = self._download_json(
+            request, None, 'Logging in as %s' % username)
 
         if 'returnUrl' not in response:
             raise ExtractorError('Unable to log in')
 
+
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         lecture_id = mobj.group('id')
 
         lecture = self._download_json(
-            'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, lecture_id, 'Downloading lecture JSON')
+            'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id,
+            lecture_id, 'Downloading lecture JSON')
 
-        if lecture['assetType'] != 'Video':
-            raise ExtractorError('Lecture %s is not a video' % lecture_id, expected=True)
+        asset_type = lecture.get('assetType') or lecture.get('asset_type')
+        if asset_type != 'Video':
+            raise ExtractorError(
+                'Lecture %s is not a video' % lecture_id, expected=True)
 
         asset = lecture['asset']
 
-        stream_url = asset['streamUrl']
+        stream_url = asset.get('streamUrl') or asset.get('stream_url')
         mobj = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url)
         if mobj:
             return self.url_result(mobj.group(1), 'Youtube')
 
         video_id = asset['id']
-        thumbnail = asset['thumbnailUrl']
+        thumbnail = asset.get('thumbnailUrl') or asset.get('thumbnail_url')
         duration = asset['data']['duration']
 
-        download_url = asset['downloadUrl']
+        download_url = asset.get('downloadUrl') or asset.get('download_url')
+
+        video = download_url.get('Video') or download_url.get('video')
+        video_480p = download_url.get('Video480p') or download_url.get('video_480p')
 
         formats = [
             {
-                'url': download_url['Video480p'][0],
+                'url': video_480p[0],
                 'format_id': '360p',
             },
             {
-                'url': download_url['Video'][0],
+                'url': video[0],
                 'format_id': '720p',
             },
         ]
@@ -140,25 +168,29 @@ class UdemyCourseIE(UdemyIE):
         course_path = mobj.group('coursepath')
 
         response = self._download_json(
-            'https://www.udemy.com/api-1.1/courses/%s' % course_path, course_path, 'Downloading course JSON')
+            'https://www.udemy.com/api-1.1/courses/%s' % course_path,
+            course_path, 'Downloading course JSON')
 
         course_id = int(response['id'])
         course_title = response['title']
 
         webpage = self._download_webpage(
-            'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id, course_id, 'Enrolling in the course')
+            'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id,
+            course_id, 'Enrolling in the course')
 
         if self._SUCCESSFULLY_ENROLLED in webpage:
             self.to_screen('%s: Successfully enrolled in' % course_id)
         elif self._ALREADY_ENROLLED in webpage:
             self.to_screen('%s: Already enrolled in' % course_id)
 
-        response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
+        response = self._download_json(
+            'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
             course_id, 'Downloading course curriculum')
 
         entries = [
-            self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
-            for asset in response if asset.get('assetType') == 'Video'
+            self.url_result(
+                'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
+            for asset in response if asset.get('assetType') or asset.get('asset_type') == 'Video'
         ]
 
-        return self.playlist_result(entries, course_id, course_title)
\ No newline at end of file
+        return self.playlist_result(entries, course_id, course_title)
index 875450908eb15856643dfbd7b085b107f5e1ca1e..53dc3a496ff65edf044137540080d9190ad8d72b 100644 (file)
@@ -45,13 +45,13 @@ class UstreamIE(InfoExtractor):
         self.report_extraction(video_id)
 
         video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
-            webpage, 'title')
+                                              webpage, 'title')
 
         uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
-            webpage, 'uploader', fatal=False, flags=re.DOTALL)
+                                           webpage, 'uploader', fatal=False, flags=re.DOTALL)
 
         thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
-            webpage, 'thumbnail', fatal=False)
+                                            webpage, 'thumbnail', fatal=False)
 
         return {
             'id': video_id,
index ebd64f0f54df23fca1d243e32dc8071fcfcbfb1d..455b6d9da62f221cf0854655f707d5963546840f 100644 (file)
@@ -30,13 +30,13 @@ class Vbox7IE(InfoExtractor):
 
         redirect_page, urlh = self._download_webpage_handle(url, video_id)
         new_location = self._search_regex(r'window\.location = \'(.*)\';',
-            redirect_page, 'redirect location')
+                                          redirect_page, 'redirect location')
         redirect_url = urlh.geturl() + new_location
         webpage = self._download_webpage(redirect_url, video_id,
-            'Downloading redirect page')
+                                         'Downloading redirect page')
 
         title = self._html_search_regex(r'<title>(.*)</title>',
-            webpage, 'title').split('/')[0].strip()
+                                        webpage, 'title').split('/')[0].strip()
 
         info_url = "http://vbox7.com/play/magare.do"
         data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
index 77b1f91ce3636cbb8f805f60ff06d95852cf4940..94647d1c8c88a18cfb6abcba2ec5ed8e71e9c4b6 100644 (file)
@@ -48,11 +48,11 @@ class VeeHDIE(InfoExtractor):
         video_url = compat_urlparse.unquote(config['clip']['url'])
         title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
         uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
-            webpage, 'uploader')
+                                              webpage, 'uploader')
         thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
-            webpage, 'thumbnail')
+                                       webpage, 'thumbnail')
         description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
-            webpage, 'description', flags=re.DOTALL)
+                                              webpage, 'description', flags=re.DOTALL)
 
         return {
             '_type': 'video',
index 27f9acb670b1b10ffa2ee0220f62dc411e49d8d7..a0c59a2e0e1cb8fca2e0e3eb3ec2e4edce2918bb 100644 (file)
@@ -112,10 +112,10 @@ class VestiIE(InfoExtractor):
         if mobj:
             video_id = mobj.group('id')
             page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
-                'Downloading video page')
+                                          'Downloading video page')
 
         rutv_url = RUTVIE._extract_url(page)
         if rutv_url:
             return self.url_result(rutv_url, 'RUTV')
 
-        raise ExtractorError('No video found', expected=True)
\ No newline at end of file
+        raise ExtractorError('No video found', expected=True)
index 5b1a3ec787ac6c99c0db4d37d5ff2c33c6950ef6..c912c3cbe7ae42b221816b50ec6a97139cb13d55 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 class VevoIE(InfoExtractor):
     """
     Accepts urls from vevo.com or in the format 'vevo:{id}'
-    (currently used by MTVIE)
+    (currently used by MTVIE and MySpaceIE)
     """
     _VALID_URL = r'''(?x)
         (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
index d3fa70e0e932418a4a39874ee9b89298185ae61d..70578a4cc6866524e2c52d574976579a8ad238e5 100644 (file)
@@ -114,4 +114,4 @@ class VGTVIE(InfoExtractor):
             'duration': float_or_none(data['duration'], 1000),
             'view_count': data['displays'],
             'formats': formats,
-        }
\ No newline at end of file
+        }
index f11ca8217ff42e404e8a50faf40c18dfe8c5be5d..71f520fb525a5bef424061be1ff881408b762624 100644 (file)
@@ -35,4 +35,3 @@ class ViceIE(InfoExtractor):
         except ExtractorError:
             raise ExtractorError('The page doesn\'t contain a video', expected=True)
         return self.url_result(ooyala_url, ie='Ooyala')
-
index fed95ef71120a7137b47d953dcb830e6ede59f23..0eb3d9414ea339e0854949732f40ab9975ce8500 100644 (file)
@@ -78,4 +78,4 @@ class VideoBamIE(InfoExtractor):
             'view_count': view_count,
             'formats': formats,
             'age_limit': 18,
-        }
\ No newline at end of file
+        }
index f75169041b4f958b9f345daba99a4a1ba575cf4e..94f9e9be94f9a420fd0207339085cbc35aba6805 100644 (file)
@@ -1,46 +1,50 @@
-import re
+from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
     find_xpath_attr,
-    determine_ext,
+    int_or_none,
 )
 
+
 class VideofyMeIE(InfoExtractor):
-    _VALID_URL = r'https?://(www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
-    IE_NAME = u'videofy.me'
+    _VALID_URL = r'https?://(?:www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
+    IE_NAME = 'videofy.me'
 
     _TEST = {
-        u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
-        u'file':  u'1100701.mp4',
-        u'md5': u'c77d700bdc16ae2e9f3c26019bd96143',
-        u'info_dict': {
-            u'title': u'This is VideofyMe',
-            u'description': None,
-            u'uploader': u'VideofyMe',
-            u'uploader_id': u'thisisvideofyme',
+        'url': 'http://www.videofy.me/thisisvideofyme/1100701',
+        'md5': 'c77d700bdc16ae2e9f3c26019bd96143',
+        'info_dict': {
+            'id': '1100701',
+            'ext': 'mp4',
+            'title': 'This is VideofyMe',
+            'description': None,
+            'uploader': 'VideofyMe',
+            'uploader_id': 'thisisvideofyme',
+            'view_count': int,
         },
-        
+
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
-                                            video_id)
+                                    video_id)
         video = config.find('video')
         sources = video.find('sources')
-        url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) 
-            for key in ['on', 'av', 'off']] if node is not None)
+        url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
+                                          for key in ['on', 'av', 'off']] if node is not None)
         video_url = url_node.find('url').text
+        view_count = int_or_none(self._search_regex(
+            r'([0-9]+)', video.find('views').text, 'view count', fatal=False))
 
-        return {'id': video_id,
-                'title': video.find('title').text,
-                'url': video_url,
-                'ext': determine_ext(video_url),
-                'thumbnail': video.find('thumb').text,
-                'description': video.find('description').text,
-                'uploader': config.find('blog/name').text,
-                'uploader_id': video.find('identifier').text,
-                'view_count': re.search(r'\d+', video.find('views').text).group(),
-                }
+        return {
+            'id': video_id,
+            'title': video.find('title').text,
+            'url': video_url,
+            'thumbnail': video.find('thumb').text,
+            'description': video.find('description').text,
+            'uploader': config.find('blog/name').text,
+            'uploader_id': video.find('identifier').text,
+            'view_count': view_count,
+        }
index 65463c73324ca83ab87b45bc33d569c3fe881163..3176e3b9dda8580ca4c693343508367d12b25751 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import random
 
@@ -5,23 +7,22 @@ from .common import InfoExtractor
 
 
 class VideoPremiumIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
+    _VALID_URL = r'https?://(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
     _TEST = {
-        u'url': u'http://videopremium.tv/4w7oadjsf156',
-        u'file': u'4w7oadjsf156.f4v',
-        u'info_dict': {
-            u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4"
+        'url': 'http://videopremium.tv/4w7oadjsf156',
+        'info_dict': {
+            'id': '4w7oadjsf156',
+            'ext': 'f4v',
+            'title': 'youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4'
         },
-        u'params': {
-            u'skip_download': True,
+        'params': {
+            'skip_download': True,
         },
-        u'skip': u'Test file has been deleted.',
+        'skip': 'Test file has been deleted.',
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage_url = 'http://videopremium.tv/' + video_id
         webpage = self._download_webpage(webpage_url, video_id)
 
@@ -29,17 +30,17 @@ class VideoPremiumIE(InfoExtractor):
             # Download again, we need a cookie
             webpage = self._download_webpage(
                 webpage_url, video_id,
-                note=u'Downloading webpage again (with cookie)')
+                note='Downloading webpage again (with cookie)')
 
         video_title = self._html_search_regex(
-            r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title')
+            r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, 'video title')
 
         return {
-            'id':          video_id,
-            'url':         "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
-            'play_path':   "mp4:%s.f4v" % video_id,
-            'page_url':    "http://videopremium.tv/" + video_id,
-            'player_url':  "http://videopremium.tv/uplayer/uppod.swf",
-            'ext':         'f4v',
-            'title':       video_title,
+            'id': video_id,
+            'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
+            'play_path': "mp4:%s.f4v" % video_id,
+            'page_url': "http://videopremium.tv/" + video_id,
+            'player_url': "http://videopremium.tv/uplayer/uppod.swf",
+            'ext': 'f4v',
+            'title': video_title,
         }
index a647807d01f8b3e5a2ed3eab99acc545533327d4..1f938838cc9247e9e80c8ce32911cb40954c1830 100644 (file)
@@ -58,4 +58,4 @@ class VideoTtIE(InfoExtractor):
             'like_count': int_or_none(video['liked']),
             'dislike_count': int_or_none(video['disliked']),
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 4a08ddd4390fedb6fc0944b6f3f0034e72c8bc03..ca2e50935def482fb3d3f7f5e7594d24b3dba2f1 100644 (file)
@@ -23,4 +23,4 @@ class VideoWeedIE(NovaMovIE):
             'title': 'optical illusion  dissapeared image magic illusion',
             'description': ''
         },
-    }
\ No newline at end of file
+    }
index 669979e13660b55d8c6f426aaab2b888007b8c5b..08a5a7b8ddbace34da0a4c6bb751562eb51a95ba 100644 (file)
@@ -1,4 +1,4 @@
-#coding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
@@ -18,16 +18,15 @@ class VidziIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        
+
         webpage = self._download_webpage(url, video_id)
         video_url = self._html_search_regex(
             r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url')
         title = self._html_search_regex(
             r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
-        
+
         return {
             'id': video_id,
             'title': title,
             'url': video_url,
         }
-        
\ No newline at end of file
index c744d4f041b4eddb21a9f9b9aa8f3924ca578f03..06b0bed41e68401a8667cbabdca0d9796ea8ca3d 100644 (file)
@@ -260,7 +260,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                 else:
                     config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
                 config = self._search_regex(config_re, webpage, 'info section',
-                    flags=re.DOTALL)
+                                            flags=re.DOTALL)
                 config = json.loads(config)
         except Exception as e:
             if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
index 36cd7e52e7a492207a3b47934d327f4845ab1d62..ca6b0d5b3369c53b7e06715ea5a56fd338d71e41 100644 (file)
@@ -11,12 +11,13 @@ from ..utils import (
     compat_urllib_parse,
     compat_str,
     unescapeHTML,
-)
+    unified_strdate,
+    orderedSet)
 
 
 class VKIE(InfoExtractor):
     IE_NAME = 'vk.com'
-    _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
+    _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))'
     _NETRC_MACHINE = 'vk'
 
     _TESTS = [
@@ -29,17 +30,19 @@ class VKIE(InfoExtractor):
                 'title': 'ProtivoGunz - Хуёвая песня',
                 'uploader': 're:Noize MC.*',
                 'duration': 195,
+                'upload_date': '20120212',
             },
         },
         {
-            'url': 'http://vk.com/video4643923_163339118',
-            'md5': 'f79bccb5cd182b1f43502ca5685b2b36',
+            'url': 'http://vk.com/video205387401_165548505',
+            'md5': '6c0aeb2e90396ba97035b9cbde548700',
             'info_dict': {
-                'id': '163339118',
+                'id': '165548505',
                 'ext': 'mp4',
-                'uploader': 'Elya Iskhakova',
-                'title': 'Dream Theater - Hollow Years Live at Budokan 720*',
-                'duration': 558,
+                'uploader': 'Tom Cruise',
+                'title': 'No name',
+                'duration': 9,
+                'upload_date': '20130721'
             }
         },
         {
@@ -52,9 +55,12 @@ class VKIE(InfoExtractor):
                 'uploader': 'Vladimir Gavrin',
                 'title': 'Lin Dan',
                 'duration': 101,
+                'upload_date': '20120730',
             }
         },
         {
+            # VIDEO NOW REMOVED
+            # please update if you find a video whose URL follows the same pattern
             'url': 'http://vk.com/video-8871596_164049491',
             'md5': 'a590bcaf3d543576c9bd162812387666',
             'note': 'Only available for registered users',
@@ -64,18 +70,7 @@ class VKIE(InfoExtractor):
                 'uploader': 'Триллеры',
                 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
                 'duration': 8352,
-            },
-            'skip': 'Requires vk account credentials',
-        },
-        {
-            'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
-            'md5': 'd82c22e449f036282d1d3f7f4d276869',
-            'info_dict': {
-                'id': '166094326',
-                'ext': 'mp4',
-                'uploader': 'Киномания - лучшее из мира кино',
-                'title': 'Запах женщины (1992)',
-                'duration': 9392,
+                'upload_date': '20121218'
             },
             'skip': 'Requires vk account credentials',
         },
@@ -88,6 +83,7 @@ class VKIE(InfoExtractor):
                 'uploader': 'Киномания - лучшее из мира кино',
                 'title': ' ',
                 'duration': 7291,
+                'upload_date': '20140328',
             },
             'skip': 'Requires vk account credentials',
         },
@@ -100,9 +96,15 @@ class VKIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Книга Илая',
                 'duration': 6771,
+                'upload_date': '20140626',
             },
             'skip': 'Only works from Russia',
         },
+        {
+            # removed video, just testing that we match the pattern
+            'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
+            'only_matching': True,
+        },
     ]
 
     def _login(self):
@@ -119,7 +121,7 @@ class VKIE(InfoExtractor):
         }
 
         request = compat_urllib_request.Request('https://login.vk.com/?act=login',
-            compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+                                                compat_urllib_parse.urlencode(login_form).encode('utf-8'))
         login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
 
         if re.search(r'onLoginFailed', login_page):
@@ -140,12 +142,14 @@ class VKIE(InfoExtractor):
 
         ERRORS = {
             r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
-                'Video %s has been removed from public access due to rightholder complaint.',
+            'Video %s has been removed from public access due to rightholder complaint.',
+
             r'<!>Please log in or <':
-                'Video %s is only available for registered users, '
-                'use --username and --password options to provide account credentials.',
-            '<!>Unknown error':
-                'Video %s does not exist.'
+            'Video %s is only available for registered users, '
+            'use --username and --password options to provide account credentials.',
+
+            r'<!>Unknown error':
+            'Video %s does not exist.'
         }
 
         for error_re, error_msg in ERRORS.items():
@@ -169,6 +173,13 @@ class VKIE(InfoExtractor):
         data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
         data = json.loads(data_json)
 
+        # Extract upload date
+        upload_date = None
+        mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
+        if mobj is not None:
+            mobj.group(1) + ' ' + mobj.group(2)
+            upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
+
         formats = [{
             'format_id': k,
             'url': v,
@@ -183,5 +194,28 @@ class VKIE(InfoExtractor):
             'title': unescapeHTML(data['md_title']),
             'thumbnail': data.get('jpg'),
             'uploader': data.get('md_author'),
-            'duration': data.get('duration')
+            'duration': data.get('duration'),
+            'upload_date': upload_date,
         }
+
+
+class VKUserVideosIE(InfoExtractor):
+    IE_NAME = 'vk.com:user-videos'
+    IE_DESC = 'vk.com:All of a user\'s videos'
+    _VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?'
+    _TEMPLATE_URL = 'https://vk.com/videos'
+    _TEST = {
+        'url': 'http://vk.com/videos205387401',
+        'playlist_mincount': 4,
+    }
+
+    def _real_extract(self, url):
+        page_id = self._match_id(url)
+        page = self._download_webpage(url, page_id)
+        video_ids = orderedSet(
+            m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
+        url_entries = [
+            self.url_result(
+                'http://vk.com/video' + video_id, 'VK', video_id=video_id)
+            for video_id in video_ids]
+        return self.playlist_result(url_entries, page_id)
index 57ef8dc300355f1c49b2be0a2ee4e4fa27b4e4ca..bbd3bbf7bad98c787c0840ed0f302198ebb7932a 100644 (file)
@@ -36,7 +36,7 @@ class VRTIE(InfoExtractor):
                 'timestamp': 1413835980.560,
                 'upload_date': '20141020',
                 'duration': 3238,
-            }  
+            }
         },
         # cobra.be
         {
@@ -92,4 +92,4 @@ class VRTIE(InfoExtractor):
             'timestamp': timestamp,
             'duration': duration,
             'formats': formats,
-        }
\ No newline at end of file
+        }
index 54d37da618960d46d977ff4b97682ca0c7b2a99c..93a6e64542c71be1f05dac1e351d5c57ecd28ff7 100644 (file)
@@ -224,4 +224,4 @@ class WDRMausIE(InfoExtractor):
             'upload_date': upload_date,
         }
 
-# TODO test _1
\ No newline at end of file
+# TODO test _1
index b24297a409911c79433cca404dc94206009aefe5..20bb039d38dd9c62ff6c38135f67f9aa41f484aa 100644 (file)
@@ -41,7 +41,7 @@ class WeiboIE(InfoExtractor):
         videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
         player_url = videos_urls[-1]
         m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
-            player_url)
+                          player_url)
         if m_sina is not None:
             self.to_screen('Sina video detected')
             sina_id = m_sina.group(1)
index bda3870db9f16e12c721c361696656df8be8a1b3..d5c26a032bcf28a9c8ae79e1d083d67ed29b2726 100644 (file)
@@ -51,4 +51,3 @@ class WorldStarHipHopIE(InfoExtractor):
             'title': video_title,
             'thumbnail': thumbnail,
         }
-
index 71bd7c463595a549786d3156f33f114b97b54226..1b4e883652667f2c2109d34014154c71fd443196 100644 (file)
@@ -47,4 +47,3 @@ class XBefIE(InfoExtractor):
             'thumbnail': thumbnail,
             'age_limit': 18,
         }
-
index 4e8fbde8d6bbb072e7fc3475288c6c2e93360993..6b37bcbc959a8e8b83fee052da18728ca9a9c298 100644 (file)
@@ -42,7 +42,7 @@ class XHamsterIE(InfoExtractor):
         }
     ]
 
-    def _real_extract(self,url):
+    def _real_extract(self, url):
         def extract_video_url(webpage):
             mp4 = re.search(r'<video\s+.*?file="([^"]+)".*?>', webpage)
             if mp4 is None:
@@ -67,17 +67,17 @@ class XHamsterIE(InfoExtractor):
         description = mobj.group(1) if mobj else None
 
         upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'',
-            webpage, 'upload date', fatal=False)
+                                              webpage, 'upload date', fatal=False)
         if upload_date:
             upload_date = unified_strdate(upload_date)
 
         uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
-            webpage, 'uploader id', default='anonymous')
+                                              webpage, 'uploader id', default='anonymous')
 
         thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False)
 
         duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>',
-            webpage, 'duration', fatal=False))
+                                                          webpage, 'duration', fatal=False))
 
         view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False)
         if view_count:
diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py
new file mode 100644 (file)
index 0000000..f7e2e8a
--- /dev/null
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_chr,
+    compat_ord,
+)
+from ..utils import (
+    int_or_none,
+    parse_filesize,
+)
+
+
+class XMinusIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?x-minus\.org/track/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://x-minus.org/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D0%B5%D1%80%D0%B0.html',
+        'md5': '401a15f2d2dcf6d592cb95528d72a2a8',
+        'info_dict': {
+            'id': '4542',
+            'ext': 'mp3',
+            'title': 'Леонид Агутин-Песенка шофера',
+            'duration': 156,
+            'tbr': 320,
+            'filesize_approx': 5900000,
+            'view_count': int,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        artist = self._html_search_regex(
+            r'minus_track\.artist="(.+?)"', webpage, 'artist')
+        title = artist + '-' + self._html_search_regex(
+            r'minus_track\.title="(.+?)"', webpage, 'title')
+        duration = int_or_none(self._html_search_regex(
+            r'minus_track\.dur_sec=\'([0-9]*?)\'',
+            webpage, 'duration', fatal=False))
+        filesize_approx = parse_filesize(self._html_search_regex(
+            r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])',
+            webpage, 'approximate filesize', fatal=False))
+        tbr = int_or_none(self._html_search_regex(
+            r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
+            webpage, 'bitrate', fatal=False))
+        view_count = int_or_none(self._html_search_regex(
+            r'<div class="quality.*?► ([0-9]+)',
+            webpage, 'view count', fatal=False))
+
+        enc_token = self._html_search_regex(
+            r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
+        token = ''.join(
+            c if pos == 3 else compat_chr(compat_ord(c) - 1)
+            for pos, c in enumerate(reversed(enc_token)))
+        video_url = 'http://x-minus.org/dwlf/%s/%s.mp3' % (video_id, token)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'duration': duration,
+            'filesize_approx': filesize_approx,
+            'tbr': tbr,
+            'view_count': view_count,
+        }
index 7a73b243080406b29b6c4a17d50a3e4d1ac023cb..53ed7ef5a6ea95826d0324bac53a71bea4913fa4 100644 (file)
@@ -30,14 +30,14 @@ class XNXXIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         video_url = self._search_regex(r'flv_url=(.*?)&amp;',
-            webpage, 'video URL')
+                                       webpage, 'video URL')
         video_url = compat_urllib_parse.unquote(video_url)
 
         video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
-            webpage, 'title')
+                                              webpage, 'title')
 
         video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&amp;',
-            webpage, 'thumbnail', fatal=False)
+                                             webpage, 'thumbnail', fatal=False)
 
         return {
             'id': video_id,
index c3bb9b2cfb916ad37febd435bc2c67916627edf7..38448e7c0fbfe3641cc364a2707a97910ab16cf8 100644 (file)
@@ -97,7 +97,7 @@ class XTubeUserIE(InfoExtractor):
             url, username, note='Retrieving profile page')
 
         video_count = int(self._search_regex(
-            r'<strong>%s\'s Videos \(([0-9]+)\)</strong>'%username, profile_page,
+            r'<strong>%s\'s Videos \(([0-9]+)\)</strong>' % username, profile_page,
             'video count'))
 
         PAGE_SIZE = 25
index 117f0856a261e8f520bd6ac593ca75f843f6c7a7..0fdb122436d9b7e158a706cd1048630e252d5666 100644 (file)
@@ -229,7 +229,7 @@ class YahooSearchIE(SearchInfoExtractor):
         for pagenum in itertools.count(0):
             result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
             info = self._download_json(result_url, query,
-                note='Downloading results page '+str(pagenum+1))
+                                       note='Downloading results page ' + str(pagenum + 1))
             m = info['m']
             results = info['results']
 
index 9cd7989cc553b58043425cc5b852b258ee4834aa..7b621a9e32b3cc4521a2988f00e32252cb17bd65 100644 (file)
@@ -47,4 +47,4 @@ class YnetIE(InfoExtractor):
             'title': title,
             'formats': self._extract_f4m_formats(f4m_url, video_id),
             'thumbnail': self._og_search_thumbnail(webpage),
-        }
\ No newline at end of file
+        }
index 48d47a24556666df12f3a1ea4c0d1f1d6a51a06b..97b98bbe88715f644da6bec1709d697af2c8e0e0 100644 (file)
@@ -35,21 +35,21 @@ class YoukuIE(InfoExtractor):
 
     def _gen_sid(self):
         nowTime = int(time.time() * 1000)
-        random1 = random.randint(1000,1998)
-        random2 = random.randint(1000,9999)
+        random1 = random.randint(1000, 1998)
+        random2 = random.randint(1000, 9999)
 
-        return "%d%d%d" %(nowTime,random1,random2)
+        return "%d%d%d" % (nowTime, random1, random2)
 
     def _get_file_ID_mix_string(self, seed):
         mixed = []
         source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
         seed = float(seed)
         for i in range(len(source)):
-            seed  =  (seed * 211 + 30031) % 65536
-            index  =  math.floor(seed / 65536 * len(source))
+            seed = (seed * 211 + 30031) % 65536
+            index = math.floor(seed / 65536 * len(source))
             mixed.append(source[int(index)])
             source.remove(source[int(index)])
-        #return ''.join(mixed)
+        # return ''.join(mixed)
         return mixed
 
     def _get_file_id(self, fileId, seed):
@@ -74,7 +74,7 @@ class YoukuIE(InfoExtractor):
             # -8 means blocked outside China.
             error = config['data'][0].get('error')  # Chinese and English, separated by newline.
             raise ExtractorError(error or 'Server reported error %i' % error_code,
-                expected=True)
+                                 expected=True)
 
         video_title = config['data'][0]['title']
         seed = config['data'][0]['seed']
@@ -100,12 +100,12 @@ class YoukuIE(InfoExtractor):
         keys = [s['k'] for s in config['data'][0]['segs'][format]]
         # segs is usually a dictionary, but an empty *list* if an error occured.
 
-        files_info=[]
+        files_info = []
         sid = self._gen_sid()
         fileid = self._get_file_id(fileid, seed)
 
-        #column 8,9 of fileid represent the segment number
-        #fileid[7:9] should be changed
+        # column 8,9 of fileid represent the segment number
+        # fileid[7:9] should be changed
         for index, key in enumerate(keys):
             temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
             download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
index 7bfda45e76e0d4ca3b6bfd6c3a8ec9f38de453e1..8123928be982dbaccb98638ea0b915007ebc7066 100644 (file)
@@ -49,7 +49,7 @@ class YouPornIE(InfoExtractor):
         try:
             params = json.loads(json_params)
         except:
-            raise ExtractorError(u'Invalid JSON')
+            raise ExtractorError('Invalid JSON')
 
         self.report_extraction(video_id)
         try:
@@ -64,7 +64,7 @@ class YouPornIE(InfoExtractor):
         # Get all of the links from the page
         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
         download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
-            webpage, 'download list').strip()
+                                                webpage, 'download list').strip()
         LINK_RE = r'<a href="([^"]+)">'
         links = re.findall(LINK_RE, download_list_html)
 
@@ -73,7 +73,7 @@ class YouPornIE(InfoExtractor):
         for encrypted_link in encrypted_links:
             link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')
             links.append(link)
-        
+
         formats = []
         for link in links:
             # A link looks like this:
@@ -103,8 +103,8 @@ class YouPornIE(InfoExtractor):
         self._sort_formats(formats)
 
         if not formats:
-            raise ExtractorError(u'ERROR: no known formats available for video')
-        
+            raise ExtractorError('ERROR: no known formats available for video')
+
         return {
             'id': video_id,
             'uploader': video_uploader,
index 0cb837afcc7d448ec6ae5a8b3496642cc7f956c0..2642ecfffbadeff3c7c184d52bf119328d5bbf60 100644 (file)
@@ -7,6 +7,7 @@ import itertools
 import json
 import os.path
 import re
+import time
 import traceback
 
 from .common import InfoExtractor, SearchInfoExtractor
@@ -33,21 +34,19 @@ from ..utils import (
     uppercase_escape,
 )
 
+
 class YoutubeBaseInfoExtractor(InfoExtractor):
     """Provide base functions for Youtube extractors"""
     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
     _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
-    _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
-    _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
     _NETRC_MACHINE = 'youtube'
     # If True it will raise an error if no login info is provided
     _LOGIN_REQUIRED = False
 
     def _set_language(self):
-        return bool(self._download_webpage(
-            self._LANG_URL, None,
-            note='Setting language', errnote='unable to set language',
-            fatal=False))
+        self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en',
+            # YouTube sets the expire time to about two months
+            expire_time=time.time() + 60*24*3600)
 
     def _login(self):
         """
@@ -76,30 +75,30 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
         # Log in
         login_form_strs = {
-                'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
-                'Email': username,
-                'GALX': galx,
-                'Passwd': password,
-
-                'PersistentCookie': 'yes',
-                '_utf8': '霱',
-                'bgresponse': 'js_disabled',
-                'checkConnection': '',
-                'checkedDomains': 'youtube',
-                'dnConn': '',
-                'pstMsg': '0',
-                'rmShown': '1',
-                'secTok': '',
-                'signIn': 'Sign in',
-                'timeStmp': '',
-                'service': 'youtube',
-                'uilel': '3',
-                'hl': 'en_US',
+            'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+            'Email': username,
+            'GALX': galx,
+            'Passwd': password,
+
+            'PersistentCookie': 'yes',
+            '_utf8': '霱',
+            'bgresponse': 'js_disabled',
+            'checkConnection': '',
+            'checkedDomains': 'youtube',
+            'dnConn': '',
+            'pstMsg': '0',
+            'rmShown': '1',
+            'secTok': '',
+            'signIn': 'Sign in',
+            'timeStmp': '',
+            'service': 'youtube',
+            'uilel': '3',
+            'hl': 'en_US',
         }
 
         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
         # chokes on unicode
-        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 
         req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
@@ -149,7 +148,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                 'service': 'youtube',
                 'hl': 'en_US',
             }
-            tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items())
+            tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
             tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
 
             tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
@@ -175,28 +174,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             return False
         return True
 
-    def _confirm_age(self):
-        age_form = {
-            'next_url': '/',
-            'action_confirm': 'Confirm',
-        }
-        req = compat_urllib_request.Request(self._AGE_URL,
-            compat_urllib_parse.urlencode(age_form).encode('ascii'))
-
-        self._download_webpage(
-            req, None,
-            note='Confirming age', errnote='Unable to confirm age',
-            fatal=False)
-
     def _real_initialize(self):
         if self._downloader is None:
             return
-        if self._get_login_info()[0] is not None:
-            if not self._set_language():
-                return
+        self._set_language()
         if not self._login():
             return
-        self._confirm_age()
 
 
 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
@@ -302,6 +285,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
+        '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
 
         # Dash webm audio
         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
@@ -395,8 +379,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             'info_dict': {
                 'id': 'IB3lcPjvWLA',
                 'ext': 'm4a',
-                'title': 'Afrojack - The Spark ft. Spree Wilson',
-                'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8',
+                'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
+                'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
                 'uploader': 'AfrojackVEVO',
                 'uploader_id': 'AfrojackVEVO',
                 'upload_date': '20131011',
@@ -418,7 +402,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'title': 'Burning Everyone\'s Koran',
                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
             }
-        }
+        },
+        # Normal age-gate video (No vevo, embed allowed)
+        {
+            'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
+            'info_dict': {
+                'id': 'HtVdAasjOgU',
+                'ext': 'mp4',
+                'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
+                'description': 'md5:eca57043abae25130f58f655ad9a7771',
+                'uploader': 'The Witcher',
+                'uploader_id': 'WitcherGame',
+                'upload_date': '20140605',
+            },
+        },
     ]
 
     def __init__(self, *args, **kwargs):
@@ -491,7 +488,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         def gen_sig_code(idxs):
             def _genslice(start, end, step):
                 starts = '' if start == 0 else str(start)
-                ends = (':%d' % (end+step)) if end + step >= 0 else ':'
+                ends = (':%d' % (end + step)) if end + step >= 0 else ':'
                 steps = '' if step == 1 else (':%d' % step)
                 return 's[%s%s%s]' % (starts, ends, steps)
 
@@ -529,7 +526,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
     def _parse_sig_js(self, jscode):
         funcname = self._search_regex(
             r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode,
-             'Initial JS player signature function name')
+            'Initial JS player signature function name')
 
         jsi = JSInterpreter(jscode)
         initial_function = jsi.extract_function(funcname)
@@ -606,9 +603,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             return {}
         player_config = json.loads(mobj.group(1))
         try:
-            args = player_config[u'args']
-            caption_url = args[u'ttsurl']
-            timestamp = args[u'timestamp']
+            args = player_config['args']
+            caption_url = args['ttsurl']
+            timestamp = args['timestamp']
             # We get the available subtitles
             list_params = compat_urllib_parse.urlencode({
                 'type': 'list',
@@ -618,7 +615,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             list_url = caption_url + '&' + list_params
             caption_list = self._download_xml(list_url, video_id)
             original_lang_node = caption_list.find('track')
-            if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
+            if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr':
                 self._downloader.report_warning('Video doesn\'t have automatic captions')
                 return {}
             original_lang = original_lang_node.attrib['lang_code']
@@ -651,10 +648,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _extract_from_m3u8(self, manifest_url, video_id):
         url_map = {}
+
         def _get_urls(_manifest):
             lines = _manifest.split('\n')
             urls = filter(lambda l: l and not l.startswith('#'),
-                            lines)
+                          lines)
             return urls
         manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
         formats_urls = _get_urls(manifest)
@@ -680,16 +678,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         # Get video webpage
         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
-        pref_cookies = [
-            c for c in self._downloader.cookiejar
-            if c.domain == '.youtube.com' and c.name == 'PREF']
-        for pc in pref_cookies:
-            if 'hl=' in pc.value:
-                pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
-            else:
-                if pc.value:
-                    pc.value += '&'
-                pc.value += 'hl=en'
         video_webpage = self._download_webpage(url, video_id)
 
         # Attempt to extract SWF player URL
@@ -700,7 +688,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             player_url = None
 
         # Get video info
-        self.report_video_info_webpage_download(video_id)
         if re.search(r'player-age-gate-content">', video_webpage) is not None:
             age_gate = True
             # We simulate the access to the video from www.youtube.com/v/{video_id}
@@ -719,15 +706,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             video_info = compat_parse_qs(video_info_webpage)
         else:
             age_gate = False
-            for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
-                video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+            try:
+                # Try looking directly into the video webpage
+                mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
+                if not mobj:
+                    raise ValueError('Could not find ytplayer.config')  # caught below
+                json_code = uppercase_escape(mobj.group(1))
+                ytplayer_config = json.loads(json_code)
+                args = ytplayer_config['args']
+                # Convert to the same format returned by compat_parse_qs
+                video_info = dict((k, [v]) for k, v in args.items())
+                if 'url_encoded_fmt_stream_map' not in args:
+                    raise ValueError('No stream_map present')  # caught below
+            except ValueError:
+                # We fallback to the get_video_info pages (used by the embed page)
+                self.report_video_info_webpage_download(video_id)
+                for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
+                    video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
                         % (video_id, el_type))
-                video_info_webpage = self._download_webpage(video_info_url, video_id,
-                                        note=False,
-                                        errnote='unable to download video info webpage')
-                video_info = compat_parse_qs(video_info_webpage)
-                if 'token' in video_info:
-                    break
+                    video_info_webpage = self._download_webpage(video_info_url,
+                        video_id, note=False,
+                        errnote='unable to download video info webpage')
+                    video_info = compat_parse_qs(video_info_webpage)
+                    if 'token' in video_info:
+                        break
         if 'token' not in video_info:
             if 'reason' in video_info:
                 raise ExtractorError(
@@ -850,33 +852,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         # annotations
         video_annotations = None
         if self._downloader.params.get('writeannotations', False):
-                video_annotations = self._extract_annotations(video_id)
-
-        # Decide which formats to download
-        try:
-            mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
-            if not mobj:
-                raise ValueError('Could not find vevo ID')
-            json_code = uppercase_escape(mobj.group(1))
-            ytplayer_config = json.loads(json_code)
-            args = ytplayer_config['args']
-            # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
-            # this signatures are encrypted
-            if 'url_encoded_fmt_stream_map' not in args:
-                raise ValueError('No stream_map present')  # caught below
-            re_signature = re.compile(r'[&,]s=')
-            m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
-            if m_s is not None:
-                self.to_screen('%s: Encrypted signatures detected.' % video_id)
-                video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
-            m_s = re_signature.search(args.get('adaptive_fmts', ''))
-            if m_s is not None:
-                if 'adaptive_fmts' in video_info:
-                    video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
-                else:
-                    video_info['adaptive_fmts'] = [args['adaptive_fmts']]
-        except ValueError:
-            pass
+            video_annotations = self._extract_annotations(video_id)
 
         def _map_to_format_list(urlmap):
             formats = []
@@ -900,7 +876,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'player_url': player_url,
             }]
         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
-            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
+            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
             if 'rtmpe%3Dyes' in encoded_url_map:
                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
             url_map = {}
@@ -946,7 +922,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
                         parts_sizes = self._signature_cache_id(encrypted_sig)
                         self.to_screen('{%s} signature length %s, %s' %
-                            (format_id, parts_sizes, player_desc))
+                                       (format_id, parts_sizes, player_desc))
 
                     signature = self._decrypt_signature(
                         encrypted_sig, video_id, player_url, age_gate)
@@ -970,10 +946,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
                 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
                 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
-                if age_gate:
-                    dash_manifest_url = video_info.get('dashmpd')[0]
-                else:
-                    dash_manifest_url = ytplayer_config['args']['dashmpd']
+                dash_manifest_url = video_info.get('dashmpd')[0]
+
                 def decrypt_sig(mobj):
                     s = mobj.group(1)
                     dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
@@ -1014,25 +988,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         self._sort_formats(formats)
 
         return {
-            'id':           video_id,
-            'uploader':     video_uploader,
-            'uploader_id':  video_uploader_id,
-            'upload_date':  upload_date,
-            'title':        video_title,
-            'thumbnail':    video_thumbnail,
-            'description':  video_description,
-            'categories':   video_categories,
-            'subtitles':    video_subtitles,
-            'duration':     video_duration,
-            'age_limit':    18 if age_gate else 0,
-            'annotations':  video_annotations,
+            'id': video_id,
+            'uploader': video_uploader,
+            'uploader_id': video_uploader_id,
+            'upload_date': upload_date,
+            'title': video_title,
+            'thumbnail': video_thumbnail,
+            'description': video_description,
+            'categories': video_categories,
+            'subtitles': video_subtitles,
+            'duration': video_duration,
+            'age_limit': 18 if age_gate else 0,
+            'annotations': video_annotations,
             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
-            'view_count':   view_count,
+            'view_count': view_count,
             'like_count': like_count,
             'dislike_count': dislike_count,
-            'formats':      formats,
+            'formats': formats,
         }
 
+
 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com playlists'
     _VALID_URL = r"""(?x)(?:
@@ -1046,7 +1021,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                         )
                         (
                             (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
-                            # Top tracks, they can also include dots 
+                            # Top tracks, they can also include dots
                             |(?:MC)[\w\.]*
                         )
                         .*
@@ -1163,7 +1138,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
             return self._extract_mix(playlist_id)
         if playlist_id.startswith('TL'):
             raise ExtractorError('For downloading YouTube.com top lists, use '
-                'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
+                                 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
 
         url = self._TEMPLATE_URL % playlist_id
         page = self._download_webpage(url, playlist_id)
@@ -1208,7 +1183,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
 class YoutubeTopListIE(YoutubePlaylistIE):
     IE_NAME = 'youtube:toplist'
     IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
-        ' (Example: "yttoplist:music:Top Tracks")')
+               ' (Example: "yttoplist:music:Top Tracks")')
     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
     _TESTS = [{
         'url': 'yttoplist:music:Trending',
@@ -1230,7 +1205,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
                 <span[^>]*>.*?%s.*?</span>''' % re.escape(query),
             channel_page, 'list')
         url = compat_urlparse.urljoin('https://www.youtube.com/', link)
-        
+
         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
         ids = []
         # sometimes the webpage doesn't contain the videos
@@ -1298,7 +1273,7 @@ class YoutubeChannelIE(InfoExtractor):
 
                 ids_in_page = self.extract_videos_from_page(page['content_html'])
                 video_ids.extend(ids_in_page)
-    
+
                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
                     break
 
@@ -1333,8 +1308,10 @@ class YoutubeUserIE(InfoExtractor):
         # Don't return True if the url can be extracted with other youtube
         # extractor, the regex would is too permissive and it would match.
         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
-        if any(ie.suitable(url) for ie in other_ies): return False
-        else: return super(YoutubeUserIE, cls).suitable(url)
+        if any(ie.suitable(url) for ie in other_ies):
+            return False
+        else:
+            return super(YoutubeUserIE, cls).suitable(url)
 
     def _real_extract(self, url):
         # Extract username
@@ -1540,8 +1517,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
         paging = 0
         for i in itertools.count(1):
             info = self._download_json(self._FEED_TEMPLATE % paging,
-                                          '%s feed' % self._FEED_NAME,
-                                          'Downloading page %s' % i)
+                                       '%s feed' % self._FEED_NAME,
+                                       'Downloading page %s' % i)
             feed_html = info.get('feed_html') or info.get('content_html')
             load_more_widget_html = info.get('load_more_widget_html') or feed_html
             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
@@ -1557,29 +1534,33 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
             paging = mobj.group('paging')
         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
 
+
 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = 'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
+    IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
     _FEED_NAME = 'recommended'
     _PLAYLIST_TITLE = 'Youtube Recommended videos'
 
+
 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = 'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
+    IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
     _FEED_NAME = 'watch_later'
     _PLAYLIST_TITLE = 'Youtube Watch Later'
     _PERSONAL_FEED = True
 
+
 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = 'Youtube watch history, "ythistory" keyword (requires authentication)'
+    IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
     _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
     _FEED_NAME = 'history'
     _PERSONAL_FEED = True
     _PLAYLIST_TITLE = 'Youtube Watch History'
 
+
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
     IE_NAME = 'youtube:favorites'
-    IE_DESC = 'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
+    IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
     _LOGIN_REQUIRED = True
 
diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py
new file mode 100644 (file)
index 0000000..1afbe68
--- /dev/null
@@ -0,0 +1,107 @@
+# coding=utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class ZingMp3BaseInfoExtractor(InfoExtractor):
+
+    @staticmethod
+    def _extract_item(item):
+        title = item.find('./title').text.strip()
+        source = item.find('./source').text
+        extension = item.attrib['type']
+        thumbnail = item.find('./backimage').text
+
+        return {
+            'title': title,
+            'url': source,
+            'ext': extension,
+            'thumbnail': thumbnail,
+        }
+
+    def _extract_player_xml(self, player_xml_url, id, playlist_title=None):
+        player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML')
+        items = player_xml.findall('./item')
+
+        if len(items) == 1:
+            # one single song
+            data = self._extract_item(items[0])
+            data['id'] = id
+
+            return data
+        else:
+            # playlist of songs
+            entries = []
+
+            for i, item in enumerate(items, 1):
+                entry = self._extract_item(item)
+                entry['id'] = '%s-%d' % (id, i)
+                entries.append(entry)
+
+            return {
+                '_type': 'playlist',
+                'id': id,
+                'title': playlist_title,
+                'entries': entries,
+            }
+
+
+class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
+    _VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html'
+    _TESTS = [{
+        'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
+        'md5': 'ead7ae13693b3205cbc89536a077daed',
+        'info_dict': {
+            'id': 'ZWZB9WAB',
+            'title': 'Xa Mãi Xa',
+            'ext': 'mp3',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }]
+    IE_NAME = 'zingmp3:song'
+    IE_DESC = 'mp3.zing.vn songs'
+
+    def _real_extract(self, url):
+        matched = re.match(self._VALID_URL, url)
+        slug = matched.group('slug')
+        song_id = matched.group('song_id')
+
+        webpage = self._download_webpage(
+            'http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id)
+
+        player_xml_url = self._search_regex(
+            r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
+
+        return self._extract_player_xml(player_xml_url, song_id)
+
+
+class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
+    _VALID_URL = r'https?://mp3\.zing\.vn/album/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
+    _TESTS = [{
+        'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
+        'info_dict': {
+            '_type': 'playlist',
+            'id': 'ZWZBWDAF',
+            'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless',
+        },
+        'playlist_count': 10,
+    }]
+    IE_NAME = 'zingmp3:album'
+    IE_DESC = 'mp3.zing.vn albums'
+
+    def _real_extract(self, url):
+        matched = re.match(self._VALID_URL, url)
+        slug = matched.group('slug')
+        album_id = matched.group('album_id')
+
+        webpage = self._download_webpage(
+            'http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id)
+        player_xml_url = self._search_regex(
+            r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
+
+        return self._extract_player_xml(
+            player_xml_url, album_id,
+            playlist_title=self._og_search_title(webpage))
index 16aa6ffca1c8f1e3199d0b2d570307ec5cf7be91..2e8c715084616a565472fb8700a8d995cb8c6d04 100644 (file)
@@ -222,7 +222,7 @@ def parseOpts(overrideArguments=None):
     selection.add_option(
         '--no-playlist',
         action='store_true', dest='noplaylist', default=False,
-        help='download only the currently playing video')
+        help='If the URL refers to a video and a playlist, download only the video.')
     selection.add_option(
         '--age-limit',
         metavar='YEARS', dest='age_limit', default=None, type=int,
@@ -262,7 +262,8 @@ def parseOpts(overrideArguments=None):
     video_format.add_option(
         '-f', '--format',
         action='store', dest='format', metavar='FORMAT', default=None,
-        help='video format code, specify the order of preference using'
+        help=(
+            'video format code, specify the order of preference using'
             ' slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also'
             ' supported. You can also use the special names "best",'
             ' "bestvideo", "bestaudio", "worst", "worstvideo" and'
@@ -271,7 +272,7 @@ def parseOpts(overrideArguments=None):
             ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.'
             ' You can merge the video and audio of two formats into a single'
             ' file using -f <video-format>+<audio-format> (requires ffmpeg or'
-            ' avconv), for example -f bestvideo+bestaudio.')
+            ' avconv), for example -f bestvideo+bestaudio.'))
     video_format.add_option(
         '--all-formats',
         action='store_const', dest='format', const='all',
@@ -621,7 +622,7 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '--exec',
         metavar='CMD', dest='exec_cmd',
-        help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )
+        help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
 
     parser.add_option_group(general)
     parser.add_option_group(selection)
index 6ac67cbaed2517d6c908634619c81e0a5bcf36d8..fb367ebe4474063a279fcd096b21461d11deafe8 100644 (file)
@@ -1,3 +1,4 @@
+from __future__ import unicode_literals
 
 from .atomicparsley import AtomicParsleyPP
 from .ffmpeg import (
index 788f94d021fa71648c3c5f521f8344f94d981e61..e54ae678da17bef5c5848bc7165d42d5eec912a4 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 from ..utils import PostProcessingError
 
 
index baf1b1945126776c3ad64dd72d65b9223ab845a0..09db43611a7c288e77c2cacaf96f266c541b2bbc 100644 (file)
@@ -26,4 +26,3 @@ class ExecAfterDownloadPP(PostProcessor):
                 'Command returned error code %d' % retCode)
 
         return None, information  # by default, keep file and do nothing
-
index f3f2743c078db157c187114760fbf65f6587e662..9303b8378b8065d84ed2d064ab76aacb463ef6bf 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import os
 import subprocess
 import sys
@@ -33,12 +35,12 @@ class FFmpegPostProcessor(PostProcessor):
 
     def check_version(self):
         if not self._executable:
-            raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
+            raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
 
         REQUIRED_VERSION = '1.0'
         if is_outdated_version(
                 self._versions[self._executable], REQUIRED_VERSION):
-            warning = u'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
+            warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
                 self._executable, self._executable, REQUIRED_VERSION)
             if self._downloader:
                 self._downloader.report_warning(warning)
@@ -84,7 +86,7 @@ class FFmpegPostProcessor(PostProcessor):
                [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
 
         if self._downloader.params.get('verbose', False):
-            self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
+            self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         stdout, stderr = p.communicate()
         if p.returncode != 0:
@@ -100,8 +102,8 @@ class FFmpegPostProcessor(PostProcessor):
 
     def _ffmpeg_filename_argument(self, fn):
         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
-        if fn.startswith(u'-'):
-            return u'./' + fn
+        if fn.startswith('-'):
+            return './' + fn
         return fn
 
 
@@ -117,7 +119,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
     def get_audio_codec(self, path):
 
         if not self._probe_executable:
-            raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
+            raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
         try:
             cmd = [
                 self._probe_executable,
@@ -153,7 +155,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
 
         filecodec = self.get_audio_codec(path)
         if filecodec is None:
-            raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
+            raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
 
         uses_avconv = self._uses_avconv()
         more_opts = []
@@ -202,7 +204,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
                 extension = 'wav'
                 more_opts += ['-f', 'wav']
 
-        prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
+        prefix, sep, ext = path.rpartition('.')  # not os.path.splitext, since the latter does not work on unicode in all setups
         new_path = prefix + sep + extension
 
         # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
@@ -211,16 +213,16 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
 
         try:
             if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
-                self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
+                self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
             else:
-                self._downloader.to_screen(u'[' + self._executable + '] Destination: ' + new_path)
+                self._downloader.to_screen('[' + self._executable + '] Destination: ' + new_path)
                 self.run_ffmpeg(path, new_path, acodec, more_opts)
         except:
-            etype,e,tb = sys.exc_info()
+            etype, e, tb = sys.exc_info()
             if isinstance(e, AudioConversionError):
-                msg = u'audio conversion failed: ' + e.msg
+                msg = 'audio conversion failed: ' + e.msg
             else:
-                msg = u'error running ' + self._executable
+                msg = 'error running ' + self._executable
             raise PostProcessingError(msg)
 
         # Try to update the date time for extracted audio file.
@@ -228,30 +230,30 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
             try:
                 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
             except:
-                self._downloader.report_warning(u'Cannot update utime of audio file')
+                self._downloader.report_warning('Cannot update utime of audio file')
 
         information['filepath'] = new_path
-        return self._nopostoverwrites,information
+        return self._nopostoverwrites, information
 
 
 class FFmpegVideoConvertor(FFmpegPostProcessor):
-    def __init__(self, downloader=None,preferedformat=None):
+    def __init__(self, downloader=None, preferedformat=None):
         super(FFmpegVideoConvertor, self).__init__(downloader)
-        self._preferedformat=preferedformat
+        self._preferedformat = preferedformat
 
     def run(self, information):
         path = information['filepath']
-        prefix, sep, ext = path.rpartition(u'.')
+        prefix, sep, ext = path.rpartition('.')
         outpath = prefix + sep + self._preferedformat
         if information['ext'] == self._preferedformat:
-            self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
-            return True,information
-        self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
+            self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
+            return True, information
+        self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
         self.run_ffmpeg(path, outpath, [])
         information['filepath'] = outpath
         information['format'] = self._preferedformat
         information['ext'] = self._preferedformat
-        return False,information
+        return False, information
 
 
 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
@@ -453,11 +455,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
         return cls._lang_map.get(code[:2])
 
     def run(self, information):
-        if information['ext'] != u'mp4':
-            self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
+        if information['ext'] != 'mp4':
+            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
             return True, information
         if not information.get('subtitles'):
-            self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed') 
+            self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
             return True, information
 
         sub_langs = [key for key in information['subtitles']]
@@ -466,14 +468,14 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
 
         opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
         for (i, lang) in enumerate(sub_langs):
-            opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
+            opts.extend(['-map', '%d:0' % (i + 1), '-c:s:%d' % i, 'mov_text'])
             lang_code = self._conver_lang_code(lang)
             if lang_code is not None:
                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
         opts.extend(['-f', 'mp4'])
 
-        temp_filename = filename + u'.temp'
-        self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
+        temp_filename = filename + '.temp'
+        self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
@@ -494,13 +496,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
             metadata['artist'] = info['uploader_id']
 
         if not metadata:
-            self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
+            self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
             return True, info
 
         filename = info['filepath']
         temp_filename = prepend_extension(filename, 'temp')
 
-        if info['ext'] == u'm4a':
+        if info['ext'] == 'm4a':
             options = ['-vn', '-acodec', 'copy']
         else:
             options = ['-c', 'copy']
@@ -508,7 +510,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
         for (name, value) in metadata.items():
             options.extend(['-metadata', '%s=%s' % (name, value)])
 
-        self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
+        self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
         self.run_ffmpeg(filename, temp_filename, options)
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
@@ -519,7 +521,7 @@ class FFmpegMergerPP(FFmpegPostProcessor):
     def run(self, info):
         filename = info['filepath']
         args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest']
-        self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename)
+        self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
         self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
         return True, info
 
@@ -530,7 +532,7 @@ class FFmpegAudioFixPP(FFmpegPostProcessor):
         temp_filename = prepend_extension(filename, 'temp')
 
         options = ['-vn', '-acodec', 'copy']
-        self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename)
+        self._downloader.to_screen('[ffmpeg] Fixing audio file "%s"' % filename)
         self.run_ffmpeg(filename, temp_filename, options)
 
         os.remove(encodeFilename(filename))
index b5cae41c8cd061a494a722c1db4261e3eca597b8..f6c63fe97545d86947ef1ef4bf2d70e9ea7144be 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import os
 import subprocess
 import sys
@@ -108,4 +110,3 @@ class XAttrMetadataPP(PostProcessor):
         except (subprocess.CalledProcessError, OSError):
             self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")
             return False, info
-
index 99220940f8efde29180a22f035b9ceaeee46a11a..2bd264b306f8dc8f53bf6e0b9dec97ecd6e85cb4 100644 (file)
@@ -827,4 +827,3 @@ class SWFInterpreter(object):
 
         avm_class.method_pyfunctions[func_name] = resfunc
         return resfunc
-
index 27308376130b97cc9cf6059b1f9dd4a64753ef42..4c07a558e7ad2f2db422ed2a0124df49efc6b09c 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import io
 import json
 import traceback
@@ -7,20 +9,18 @@ import subprocess
 import sys
 from zipimport import zipimporter
 
-from .utils import (
+from .compat import (
     compat_str,
     compat_urllib_request,
 )
 from .version import __version__
 
+
 def rsa_verify(message, signature, key):
     from struct import pack
     from hashlib import sha256
-    from sys import version_info
-    def b(x):
-        if version_info[0] == 2: return x
-        else: return x.encode('latin1')
-    assert(type(message) == type(b('')))
+
+    assert isinstance(message, bytes)
     block_size = 0
     n = key[0]
     while n:
@@ -31,14 +31,18 @@ def rsa_verify(message, signature, key):
     while signature:
         raw_bytes.insert(0, pack("B", signature & 0xFF))
         signature >>= 8
-    signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
-    if signature[0:2] != b('\x00\x01'): return False
+    signature = (block_size - len(raw_bytes)) * b'\x00' + b''.join(raw_bytes)
+    if signature[0:2] != b'\x00\x01':
+        return False
     signature = signature[2:]
-    if not b('\x00') in signature: return False
-    signature = signature[signature.index(b('\x00'))+1:]
-    if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
+    if b'\x00' not in signature:
+        return False
+    signature = signature[signature.index(b'\x00') + 1:]
+    if not signature.startswith(b'\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20'):
+        return False
     signature = signature[19:]
-    if signature != sha256(message).digest(): return False
+    if signature != sha256(message).digest():
+        return False
     return True
 
 
@@ -51,18 +55,19 @@ def update_self(to_screen, verbose):
     UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
 
     if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"):
-        to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
+        to_screen('It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
         return
 
     # Check if there is a new version
     try:
         newversion = compat_urllib_request.urlopen(VERSION_URL).read().decode('utf-8').strip()
     except:
-        if verbose: to_screen(compat_str(traceback.format_exc()))
-        to_screen(u'ERROR: can\'t find the current version. Please try again later.')
+        if verbose:
+            to_screen(compat_str(traceback.format_exc()))
+        to_screen('ERROR: can\'t find the current version. Please try again later.')
         return
     if newversion == __version__:
-        to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
+        to_screen('youtube-dl is up-to-date (' + __version__ + ')')
         return
 
     # Download and check versions info
@@ -70,16 +75,17 @@ def update_self(to_screen, verbose):
         versions_info = compat_urllib_request.urlopen(JSON_URL).read().decode('utf-8')
         versions_info = json.loads(versions_info)
     except:
-        if verbose: to_screen(compat_str(traceback.format_exc()))
-        to_screen(u'ERROR: can\'t obtain versions info. Please try again later.')
+        if verbose:
+            to_screen(compat_str(traceback.format_exc()))
+        to_screen('ERROR: can\'t obtain versions info. Please try again later.')
         return
     if not 'signature' in versions_info:
-        to_screen(u'ERROR: the versions file is not signed or corrupted. Aborting.')
+        to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
         return
     signature = versions_info['signature']
     del versions_info['signature']
     if not rsa_verify(json.dumps(versions_info, sort_keys=True).encode('utf-8'), signature, UPDATES_RSA_KEY):
-        to_screen(u'ERROR: the versions file signature is invalid. Aborting.')
+        to_screen('ERROR: the versions file signature is invalid. Aborting.')
         return
 
     version_id = versions_info['latest']
@@ -87,10 +93,10 @@ def update_self(to_screen, verbose):
     def version_tuple(version_str):
         return tuple(map(int, version_str.split('.')))
     if version_tuple(__version__) >= version_tuple(version_id):
-        to_screen(u'youtube-dl is up to date (%s)' % __version__)
+        to_screen('youtube-dl is up to date (%s)' % __version__)
         return
 
-    to_screen(u'Updating to version ' + version_id + ' ...')
+    to_screen('Updating to version ' + version_id + ' ...')
     version = versions_info['versions'][version_id]
 
     print_notes(to_screen, versions_info['versions'])
@@ -98,11 +104,11 @@ def update_self(to_screen, verbose):
     filename = sys.argv[0]
     # Py2EXE: Filename could be different
     if hasattr(sys, "frozen") and not os.path.isfile(filename):
-        if os.path.isfile(filename + u'.exe'):
-            filename += u'.exe'
+        if os.path.isfile(filename + '.exe'):
+            filename += '.exe'
 
     if not os.access(filename, os.W_OK):
-        to_screen(u'ERROR: no write permissions on %s' % filename)
+        to_screen('ERROR: no write permissions on %s' % filename)
         return
 
     # Py2EXE
@@ -110,7 +116,7 @@ def update_self(to_screen, verbose):
         exe = os.path.abspath(filename)
         directory = os.path.dirname(exe)
         if not os.access(directory, os.W_OK):
-            to_screen(u'ERROR: no write permissions on %s' % directory)
+            to_screen('ERROR: no write permissions on %s' % directory)
             return
 
         try:
@@ -118,40 +124,43 @@ def update_self(to_screen, verbose):
             newcontent = urlh.read()
             urlh.close()
         except (IOError, OSError):
-            if verbose: to_screen(compat_str(traceback.format_exc()))
-            to_screen(u'ERROR: unable to download latest version')
+            if verbose:
+                to_screen(compat_str(traceback.format_exc()))
+            to_screen('ERROR: unable to download latest version')
             return
 
         newcontent_hash = hashlib.sha256(newcontent).hexdigest()
         if newcontent_hash != version['exe'][1]:
-            to_screen(u'ERROR: the downloaded file hash does not match. Aborting.')
+            to_screen('ERROR: the downloaded file hash does not match. Aborting.')
             return
 
         try:
             with open(exe + '.new', 'wb') as outf:
                 outf.write(newcontent)
         except (IOError, OSError):
-            if verbose: to_screen(compat_str(traceback.format_exc()))
-            to_screen(u'ERROR: unable to write the new version')
+            if verbose:
+                to_screen(compat_str(traceback.format_exc()))
+            to_screen('ERROR: unable to write the new version')
             return
 
         try:
             bat = os.path.join(directory, 'youtube-dl-updater.bat')
             with io.open(bat, 'w') as batfile:
-                batfile.write(u"""
+                batfile.write('''
 @echo off
 echo Waiting for file handle to be closed ...
 ping 127.0.0.1 -n 5 -w 1000 > NUL
 move /Y "%s.new" "%s" > NUL
 echo Updated youtube-dl to version %s.
 start /b "" cmd /c del "%%~f0"&exit /b"
-                \n""" % (exe, exe, version_id))
+                \n''' % (exe, exe, version_id))
 
             subprocess.Popen([bat])  # Continues to run in the background
             return  # Do not show premature success messages
         except (IOError, OSError):
-            if verbose: to_screen(compat_str(traceback.format_exc()))
-            to_screen(u'ERROR: unable to overwrite current version')
+            if verbose:
+                to_screen(compat_str(traceback.format_exc()))
+            to_screen('ERROR: unable to overwrite current version')
             return
 
     # Zip unix package
@@ -161,35 +170,39 @@ start /b "" cmd /c del "%%~f0"&exit /b"
             newcontent = urlh.read()
             urlh.close()
         except (IOError, OSError):
-            if verbose: to_screen(compat_str(traceback.format_exc()))
-            to_screen(u'ERROR: unable to download latest version')
+            if verbose:
+                to_screen(compat_str(traceback.format_exc()))
+            to_screen('ERROR: unable to download latest version')
             return
 
         newcontent_hash = hashlib.sha256(newcontent).hexdigest()
         if newcontent_hash != version['bin'][1]:
-            to_screen(u'ERROR: the downloaded file hash does not match. Aborting.')
+            to_screen('ERROR: the downloaded file hash does not match. Aborting.')
             return
 
         try:
             with open(filename, 'wb') as outf:
                 outf.write(newcontent)
         except (IOError, OSError):
-            if verbose: to_screen(compat_str(traceback.format_exc()))
-            to_screen(u'ERROR: unable to overwrite current version')
+            if verbose:
+                to_screen(compat_str(traceback.format_exc()))
+            to_screen('ERROR: unable to overwrite current version')
             return
 
-    to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
+    to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
+
 
 def get_notes(versions, fromVersion):
     notes = []
-    for v,vdata in sorted(versions.items()):
+    for v, vdata in sorted(versions.items()):
         if v > fromVersion:
             notes.extend(vdata.get('notes', []))
     return notes
 
+
 def print_notes(to_screen, versions, fromVersion=__version__):
     notes = get_notes(versions, fromVersion)
     if notes:
-        to_screen(u'PLEASE NOTE:')
+        to_screen('PLEASE NOTE:')
         for note in notes:
             to_screen(note)
index 5be7cf99200c57639af17b1dc89f9a861658f331..4d3cbac74aaebdbe0b314690b7dea07e2e2371e2 100644 (file)
@@ -41,6 +41,7 @@ from .compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urlparse,
+    shlex_quote,
 )
 
 
@@ -55,6 +56,7 @@ std_headers = {
     'Accept-Language': 'en-us,en;q=0.5',
 }
 
+
 def preferredencoding():
     """Get preferred encoding.
 
@@ -129,7 +131,7 @@ if sys.version_info >= (2, 7):
         """ Find the xpath xpath[@key=val] """
         assert re.match(r'^[a-zA-Z-]+$', key)
         assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
-        expr = xpath + u"[@%s='%s']" % (key, val)
+        expr = xpath + "[@%s='%s']" % (key, val)
         return node.find(expr)
 else:
     def find_xpath_attr(node, xpath, key, val):
@@ -145,6 +147,8 @@ else:
 
 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 # the namespace parameter
+
+
 def xpath_with_ns(path, ns_map):
     components = [c.split(':') for c in path.split('/')]
     replaced = []
@@ -236,9 +240,9 @@ def sanitize_open(filename, open_mode):
 
         # In case of error, try to remove win32 forbidden chars
         alt_filename = os.path.join(
-                        re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
-                        for path_part in os.path.split(filename)
-                       )
+            re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
+            for path_part in os.path.split(filename)
+        )
         if alt_filename == filename:
             raise
         else:
@@ -255,6 +259,7 @@ def timeconvert(timestr):
         timestamp = email.utils.mktime_tz(timetuple)
     return timestamp
 
+
 def sanitize_filename(s, restricted=False, is_id=False):
     """Sanitizes a string so it could be used as part of a filename.
     If restricted is set, use a stricter subset of allowed characters.
@@ -287,6 +292,7 @@ def sanitize_filename(s, restricted=False, is_id=False):
             result = '_'
     return result
 
+
 def orderedSet(iterable):
     """ Remove all duplicates from the input iterable """
     res = []
@@ -371,6 +377,7 @@ def decodeOption(optval):
     assert isinstance(optval, compat_str)
     return optval
 
+
 def formatSeconds(secs):
     if secs > 3600:
         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
@@ -423,6 +430,7 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
 
 class ExtractorError(Exception):
     """Error during info extraction."""
+
     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
         """ tb, if given, is the original traceback (so that it can be printed out).
         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
@@ -467,6 +475,7 @@ class DownloadError(Exception):
     configured to continue on errors. They will contain the appropriate
     error message.
     """
+
     def __init__(self, msg, exc_info=None):
         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
         super(DownloadError, self).__init__(msg)
@@ -488,9 +497,11 @@ class PostProcessingError(Exception):
     This exception may be raised by PostProcessor's .run() method to
     indicate an error in the postprocessing task.
     """
+
     def __init__(self, msg):
         self.msg = msg
 
+
 class MaxDownloadsReached(Exception):
     """ --max-downloads limit has been reached. """
     pass
@@ -520,6 +531,7 @@ class ContentTooShortError(Exception):
         self.downloaded = downloaded
         self.expected = expected
 
+
 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     """Handler for HTTP requests and responses.
 
@@ -639,7 +651,7 @@ def unified_strdate(date_str):
         return None
 
     upload_date = None
-    #Replace commas
+    # Replace commas
     date_str = date_str.replace(',', ' ')
     # %z (UTC offset) is only supported in python>=3.2
     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
@@ -680,6 +692,7 @@ def unified_strdate(date_str):
             upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
     return upload_date
 
+
 def determine_ext(url, default_ext='unknown_video'):
     if url is None:
         return default_ext
@@ -689,9 +702,11 @@ def determine_ext(url, default_ext='unknown_video'):
     else:
         return default_ext
 
+
 def subtitles_filename(filename, sub_lang, sub_format):
     return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
 
+
 def date_from_str(date_str):
     """
     Return a datetime object from a string in the format YYYYMMDD or
@@ -706,7 +721,7 @@ def date_from_str(date_str):
         if sign == '-':
             time = -time
         unit = match.group('unit')
-        #A bad aproximation?
+        # A bad aproximation?
         if unit == 'month':
             unit = 'day'
             time *= 30
@@ -717,7 +732,8 @@ def date_from_str(date_str):
         delta = datetime.timedelta(**{unit: time})
         return today + delta
     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
-    
+
+
 def hyphenate_date(date_str):
     """
     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
@@ -727,8 +743,10 @@ def hyphenate_date(date_str):
     else:
         return date_str
 
+
 class DateRange(object):
     """Represents a time interval between two dates"""
+
     def __init__(self, start=None, end=None):
         """start and end must be strings in the format accepted by date"""
         if start is not None:
@@ -741,17 +759,20 @@ class DateRange(object):
             self.end = datetime.datetime.max.date()
         if self.start > self.end:
             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+
     @classmethod
     def day(cls, day):
         """Returns a range that only contains the given day"""
-        return cls(day,day)
+        return cls(day, day)
+
     def __contains__(self, date):
         """Check if the date is in the range"""
         if not isinstance(date, datetime.date):
             date = date_from_str(date)
         return self.start <= date <= self.end
+
     def __str__(self):
-        return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+        return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
 
 
 def platform_name():
@@ -1025,6 +1046,57 @@ def format_bytes(bytes):
     return '%.2f%s' % (converted, suffix)
 
 
+def parse_filesize(s):
+    if s is None:
+        return None
+
+    # The lower-case forms are of course incorrect and inofficial,
+    # but we support those too
+    _UNIT_TABLE = {
+        'B': 1,
+        'b': 1,
+        'KiB': 1024,
+        'KB': 1000,
+        'kB': 1024,
+        'Kb': 1000,
+        'MiB': 1024 ** 2,
+        'MB': 1000 ** 2,
+        'mB': 1024 ** 2,
+        'Mb': 1000 ** 2,
+        'GiB': 1024 ** 3,
+        'GB': 1000 ** 3,
+        'gB': 1024 ** 3,
+        'Gb': 1000 ** 3,
+        'TiB': 1024 ** 4,
+        'TB': 1000 ** 4,
+        'tB': 1024 ** 4,
+        'Tb': 1000 ** 4,
+        'PiB': 1024 ** 5,
+        'PB': 1000 ** 5,
+        'pB': 1024 ** 5,
+        'Pb': 1000 ** 5,
+        'EiB': 1024 ** 6,
+        'EB': 1000 ** 6,
+        'eB': 1024 ** 6,
+        'Eb': 1000 ** 6,
+        'ZiB': 1024 ** 7,
+        'ZB': 1000 ** 7,
+        'zB': 1024 ** 7,
+        'Zb': 1000 ** 7,
+        'YiB': 1024 ** 8,
+        'YB': 1000 ** 8,
+        'yB': 1024 ** 8,
+        'Yb': 1000 ** 8,
+    }
+
+    units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
+    m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+    if not m:
+        return None
+
+    return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
+
+
 def get_term_width():
     columns = compat_getenv('COLUMNS', None)
     if columns:
@@ -1149,7 +1221,7 @@ def parse_duration(s):
 
 
 def prepend_extension(filename, ext):
-    name, real_ext = os.path.splitext(filename) 
+    name, real_ext = os.path.splitext(filename)
     return '{0}.{1}{2}'.format(name, ext, real_ext)
 
 
@@ -1433,3 +1505,8 @@ def ytdl_is_updateable():
     from zipimport import zipimporter
 
     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+    # Get a short string representation for a subprocess command
+    return ' '.join(shlex_quote(a) for a in args)
index 6be5d07c481ae9e4aff6b247a1b5083a2564be54..847a47c458c011237aafd82fb70a8b6225d25ceb 100644 (file)
@@ -1,2 +1,3 @@
+from __future__ import unicode_literals
 
-__version__ = '2014.11.23'
+__version__ = '2014.12.01'