# DESCRIPTION
**youtube-dl** is a small command-line program to download videos from
YouTube.com and a few more sites. It requires the Python interpreter, version
-2.6, 2.7, or 3.3+, and it is not platform specific. It should work on
+2.6, 2.7, or 3.2+, and it is not platform specific. It should work on
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
which means you can modify it, redistribute it or use it however you like.
COUNT views
--max-views COUNT Do not download any videos with more than
COUNT views
- --no-playlist download only the currently playing video
+ --no-playlist If the URL refers to a video and a
+ playlist, download only the video.
--age-limit YEARS download only videos suitable for the given
age
--download-archive FILE Download only videos not listed in the
def _real_extract(self, url):
video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
# TODO more code goes here, for example ...
- webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
return {
'id': video_id,
'title': title,
+ 'description': self._og_search_description(webpage),
# TODO more properties (see youtube_dl/extractor/common.py)
}
```
youtube-dl is a small command-line program to download videos from
YouTube.com and a few more sites. It requires the Python interpreter,
-version 2.6, 2.7, or 3.3+, and it is not platform specific. It should
+version 2.6, 2.7, or 3.2+, and it is not platform specific. It should
work on your Unix box, on Windows or on Mac OS X. It is released to the
public domain, which means you can modify it, redistribute it or use it
however you like.
COUNT views
--max-views COUNT Do not download any videos with more than
COUNT views
- --no-playlist download only the currently playing video
+ --no-playlist If the URL refers to a video and a
+ playlist, download only the video.
--age-limit YEARS download only videos suitable for the given
age
--download-archive FILE Download only videos not listed in the
def _real_extract(self, url):
video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
# TODO more code goes here, for example ...
- webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
return {
'id': video_id,
'title': title,
+ 'description': self._og_search_description(webpage),
# TODO more properties (see youtube_dl/extractor/common.py)
}
```
#!/usr/bin/env python
+from __future__ import unicode_literals
+
import os
from os.path import dirname as dirn
import sys
BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
+
def build_completion(opt_parser):
opts_flag = []
for group in opt_parser.option_groups:
for option in group.option_list:
- #for every long flag
+ # for every long flag
opts_flag.append(option.get_opt_string())
with open(BASH_COMPLETION_TEMPLATE) as f:
template = f.read()
with open(BASH_COMPLETION_FILE, "w") as f:
- #just using the special char
+ # just using the special char
filled_template = template.replace("{{flags}}", " ".join(opts_flag))
f.write(filled_template)
def win_service_main(service_name, real_main, argc, argv_raw):
try:
- #args = [argv_raw[i].value for i in range(argc)]
+ # args = [argv_raw[i].value for i in range(argc)]
stop_event = threading.Event()
handler = HandlerEx(functools.partial(stop_event, win_service_handler))
h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
#==============================================================================
+
class BuildError(Exception):
def __init__(self, output, code=500):
self.output = output
class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
- actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching.
+ actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching.
def do_GET(self):
path = urlparse.urlparse(self.path)
#!/usr/bin/env python
+from __future__ import unicode_literals
"""
This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
'batch-file': ['--require-parameter'],
}
+
def build_completion(opt_parser):
commands = []
for group in opt_parser.option_groups:
for option in group.option_list:
long_option = option.get_opt_string().strip('-')
- help_msg = shell_quote([option.help])
complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
if option._short_opts:
complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
#!/usr/bin/env python3
+from __future__ import unicode_literals
import json
import sys
#!/usr/bin/env python3
+from __future__ import unicode_literals
+
import hashlib
-import shutil
-import subprocess
-import tempfile
import urllib.request
import json
#!/usr/bin/env python3
+from __future__ import unicode_literals, with_statement
import rsa
import json
versions_info = json.load(open('update/versions.json'))
if 'signature' in versions_info:
- del versions_info['signature']
+ del versions_info['signature']
print('Enter the PKCS1 private key, followed by a blank line:')
privkey = b''
while True:
- try:
- line = input()
- except EOFError:
- break
- if line == '':
- break
- privkey += line.encode('ascii') + b'\n'
+ try:
+ line = input()
+ except EOFError:
+ break
+ if line == '':
+ break
+ privkey += line.encode('ascii') + b'\n'
privkey = rsa.PrivateKey.load_pkcs1(privkey)
signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode()
print('signature: ' + signature)
versions_info['signature'] = signature
-json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True)
\ No newline at end of file
+with open('update/versions.json', 'w') as versionsf:
+ json.dump(versions_info, versionsf, indent=4, sort_keys=True)
#!/usr/bin/env python
# coding: utf-8
-from __future__ import with_statement
+from __future__ import with_statement, unicode_literals
import datetime
import glob
-import io # For Python 2 compatibilty
+import io # For Python 2 compatibilty
import os
import re
for fn in glob.glob('*.html*'):
with io.open(fn, encoding='utf-8') as f:
content = f.read()
- newc = re.sub(u'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', u'Copyright © 2006-' + year, content)
+ newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content)
if content != newc:
tmpFn = fn + '.part'
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
#!/usr/bin/env python3
+from __future__ import unicode_literals
import datetime
import io
with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file:
atom_file.write(atom_template)
-
#!/usr/bin/env python3
+from __future__ import unicode_literals
import sys
import os
import youtube_dl
+
def main():
with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
template = tmplf.read()
continue
elif ie_desc is not None:
ie_html += ': {}'.format(ie.IE_DESC)
- if ie.working() == False:
+ if not ie.working():
ie_html += ' (Currently broken)'
ie_htmls.append('<li>{}</li>'.format(ie_html))
+from __future__ import unicode_literals
+
import io
import sys
import re
+from __future__ import unicode_literals
import io
import os.path
+++ /dev/null
-#!/usr/bin/env python
-
-import sys, os
-
-try:
- import urllib.request as compat_urllib_request
-except ImportError: # Python 2
- import urllib2 as compat_urllib_request
-
-sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
-sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
-sys.stderr.write(u'The new location of the binaries is https://github.com/rg3/youtube-dl/downloads, not the git repository.\n\n')
-
-try:
- raw_input()
-except NameError: # Python 3
- input()
-
-filename = sys.argv[0]
-
-API_URL = "https://api.github.com/repos/rg3/youtube-dl/downloads"
-BIN_URL = "https://github.com/downloads/rg3/youtube-dl/youtube-dl"
-
-if not os.access(filename, os.W_OK):
- sys.exit('ERROR: no write permissions on %s' % filename)
-
-try:
- urlh = compat_urllib_request.urlopen(BIN_URL)
- newcontent = urlh.read()
- urlh.close()
-except (IOError, OSError) as err:
- sys.exit('ERROR: unable to download latest version')
-
-try:
- with open(filename, 'wb') as outf:
- outf.write(newcontent)
-except (IOError, OSError) as err:
- sys.exit('ERROR: unable to overwrite current version')
-
-sys.stderr.write(u'Done! Now you can run youtube-dl.\n')
+++ /dev/null
-from distutils.core import setup
-import py2exe
-
-py2exe_options = {
- "bundle_files": 1,
- "compressed": 1,
- "optimize": 2,
- "dist_dir": '.',
- "dll_excludes": ['w9xpopen.exe']
-}
-
-setup(console=['youtube-dl.py'], options={ "py2exe": py2exe_options }, zipfile=None)
\ No newline at end of file
+++ /dev/null
-#!/usr/bin/env python
-
-import sys, os
-import urllib2
-import json, hashlib
-
-def rsa_verify(message, signature, key):
- from struct import pack
- from hashlib import sha256
- from sys import version_info
- def b(x):
- if version_info[0] == 2: return x
- else: return x.encode('latin1')
- assert(type(message) == type(b('')))
- block_size = 0
- n = key[0]
- while n:
- block_size += 1
- n >>= 8
- signature = pow(int(signature, 16), key[1], key[0])
- raw_bytes = []
- while signature:
- raw_bytes.insert(0, pack("B", signature & 0xFF))
- signature >>= 8
- signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
- if signature[0:2] != b('\x00\x01'): return False
- signature = signature[2:]
- if not b('\x00') in signature: return False
- signature = signature[signature.index(b('\x00'))+1:]
- if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
- signature = signature[19:]
- if signature != sha256(message).digest(): return False
- return True
-
-sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
-sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
-sys.stderr.write(u'From now on, get the binaries from http://rg3.github.com/youtube-dl/download.html, not from the git repository.\n\n')
-
-raw_input()
-
-filename = sys.argv[0]
-
-UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
-VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
-JSON_URL = UPDATE_URL + 'versions.json'
-UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
-
-if not os.access(filename, os.W_OK):
- sys.exit('ERROR: no write permissions on %s' % filename)
-
-exe = os.path.abspath(filename)
-directory = os.path.dirname(exe)
-if not os.access(directory, os.W_OK):
- sys.exit('ERROR: no write permissions on %s' % directory)
-
-try:
- versions_info = urllib2.urlopen(JSON_URL).read().decode('utf-8')
- versions_info = json.loads(versions_info)
-except:
- sys.exit(u'ERROR: can\'t obtain versions info. Please try again later.')
-if not 'signature' in versions_info:
- sys.exit(u'ERROR: the versions file is not signed or corrupted. Aborting.')
-signature = versions_info['signature']
-del versions_info['signature']
-if not rsa_verify(json.dumps(versions_info, sort_keys=True), signature, UPDATES_RSA_KEY):
- sys.exit(u'ERROR: the versions file signature is invalid. Aborting.')
-
-version = versions_info['versions'][versions_info['latest']]
-
-try:
- urlh = urllib2.urlopen(version['exe'][0])
- newcontent = urlh.read()
- urlh.close()
-except (IOError, OSError) as err:
- sys.exit('ERROR: unable to download latest version')
-
-newcontent_hash = hashlib.sha256(newcontent).hexdigest()
-if newcontent_hash != version['exe'][1]:
- sys.exit(u'ERROR: the downloaded file hash does not match. Aborting.')
-
-try:
- with open(exe + '.new', 'wb') as outf:
- outf.write(newcontent)
-except (IOError, OSError) as err:
- sys.exit(u'ERROR: unable to write the new version')
-
-try:
- bat = os.path.join(directory, 'youtube-dl-updater.bat')
- b = open(bat, 'w')
- b.write("""
-echo Updating youtube-dl...
-ping 127.0.0.1 -n 5 -w 1000 > NUL
-move /Y "%s.new" "%s"
-del "%s"
- \n""" %(exe, exe, bat))
- b.close()
-
- os.startfile(bat)
-except (IOError, OSError) as err:
- sys.exit('ERROR: unable to overwrite current version')
-
-sys.stderr.write(u'Done! Now you can run youtube-dl.\n')
#!/usr/bin/env python
+from __future__ import unicode_literals
+
import os
from os.path import dirname as dirn
import sys
from __future__ import print_function
import os.path
-import pkg_resources
import warnings
import sys
"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.3"
+ "Programming Language :: Python :: 3.2",
+ "Programming Language :: Python :: 3.3",
+ "Programming Language :: Python :: 3.4",
],
**params
params = get_params(override=override)
super(FakeYDL, self).__init__(params, auto_init=False)
self.result = []
-
+
def to_screen(self, s, skip_eol=None):
print(s)
def expect_warning(self, regex):
# Silence an expected warning matching a regex
old_report_warning = self.report_warning
+
def report_warning(self, message):
- if re.match(regex, message): return
+ if re.match(regex, message):
+ return
old_report_warning(message)
self.report_warning = types.MethodType(report_warning, self)
elif isinstance(expected, type):
got = got_dict.get(info_field)
self.assertTrue(isinstance(got, expected),
- 'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
+ 'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
else:
if isinstance(expected, compat_str) and expected.startswith('md5:'):
got = 'md5:' + md5(got_dict.get(info_field))
else:
got = got_dict.get(info_field)
self.assertEqual(expected, got,
- 'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+ 'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# Check for the presence of mandatory fields
if got_dict.get('_type') != 'playlist':
# Are checkable fields missing from the test case definition?
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
- for key, value in got_dict.items()
- if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
+ for key, value in got_dict.items()
+ if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
if missing_keys:
def _repr(v):
if isinstance(v, compat_str):
- return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'")
+ return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
else:
return repr(v)
info_dict_str = ''.join(
'ext': 'mp4',
'width': None,
}
+
def fname(templ):
ydl = YoutubeDL({'outtmpl': templ})
return ydl.prepare_filename(info)
def test_youtube_playlist_matching(self):
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
- assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
+ assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
assertPlaylist('PL63F0C78739B09958')
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
- assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
+ assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
# Top tracks
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
- self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
+ self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
RETRIES = 3
+
class YoutubeDL(youtube_dl.YoutubeDL):
def __init__(self, *args, **kwargs):
self.to_stderr = self.to_screen
self.processed_info_dicts = []
super(YoutubeDL, self).__init__(*args, **kwargs)
+
def report_warning(self, message):
# Don't accept warnings during tests
raise ExtractorError(message)
+
def process_info(self, info_dict):
self.processed_info_dicts.append(info_dict)
return super(YoutubeDL, self).process_info(info_dict)
+
def _file_md5(fn):
with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
class TestDownload(unittest.TestCase):
maxDiff = None
+
def setUp(self):
self.defs = defs
-### Dynamically generate tests
+# Dynamically generate tests
+
+
def generator(test_case):
def test_template(self):
return
for other_ie in other_ies:
if not other_ie.working():
- print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
+ print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
return
params = get_params(test_case.get('params', {}))
ydl = YoutubeDL(params, auto_init=False)
ydl.add_default_info_extractors()
finished_hook_called = set()
+
def _hook(status):
if status['status'] == 'finished':
finished_hook_called.add(status['filename'])
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
res_dict = None
+
def try_rm_tcs_files(tcs=None):
if tcs is None:
tcs = test_cases
raise
if try_num == RETRIES:
- report_warning(u'Failed due to network errors, skipping...')
+ report_warning('Failed due to network errors, skipping...')
return
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
return test_template
-### And add them to TestDownload
+# And add them to TestDownload
for n, test_case in enumerate(defs):
test_method = generator(test_case)
tname = 'test_' + str(test_case['name'])
class BaseTestSubtitles(unittest.TestCase):
url = None
IE = None
+
def setUp(self):
self.DL = FakeYDL()
self.ie = self.IE(self.DL)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
+ self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
IGNORED_FILES = [
'setup.py', # http://bugs.python.org/issue13943
+ 'conf.py',
+ 'buildserver.py',
]
class TestUnicodeLiterals(unittest.TestCase):
def test_all_files(self):
- print('Skipping this test (not yet fully implemented)')
- return
-
for dirpath, _, filenames in os.walk(rootDir):
for basename in filenames:
if not basename.endswith('.py'):
if "'" not in code and '"' not in code:
continue
- imps = 'from __future__ import unicode_literals'
- self.assertTrue(
- imps in code,
- ' %s missing in %s' % (imps, fn))
+ self.assertRegexpMatches(
+ code,
+ r'(?:#.*\n*)?from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
+ 'unicode_literals import missing in %s' % fn)
m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
if m is not None:
escape_rfc3986,
escape_url,
js_to_json,
- get_filesystem_encoding,
intlist_to_bytes,
+ args_to_str,
+ parse_filesize,
)
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), [])
self.assertEqual(orderedSet([1]), [1])
- #keep the list ordered
+ # keep the list ordered
self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1])
def test_unescape_html(self):
self.assertEqual(unescapeHTML('%20;'), '%20;')
self.assertEqual(
unescapeHTML('é'), 'é')
-
+
def test_daterange(self):
- _20century = DateRange("19000101","20000101")
+ _20century = DateRange("19000101", "20000101")
self.assertFalse("17890714" in _20century)
_ac = DateRange("00010101")
self.assertTrue("19690721" in _ac)
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
def test_smuggle_url(self):
- data = {u"ö": u"ö", u"abc": [3]}
+ data = {"ö": "ö", "abc": [3]}
url = 'https://foo.bar/baz?x=y#a'
smug_url = smuggle_url(url, data)
unsmug_url, unsmug_data = unsmuggle_url(smug_url)
intlist_to_bytes([0, 1, 127, 128, 255]),
b'\x00\x01\x7f\x80\xff')
+ def test_args_to_str(self):
+ self.assertEqual(
+ args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
+ 'foo ba/r -baz \'2 be\' \'\''
+ )
+
+ def test_parse_filesize(self):
+ self.assertEqual(parse_filesize(None), None)
+ self.assertEqual(parse_filesize(''), None)
+ self.assertEqual(parse_filesize('91 B'), 91)
+ self.assertEqual(parse_filesize('foobar'), None)
+ self.assertEqual(parse_filesize('2 MiB'), 2097152)
+ self.assertEqual(parse_filesize('5 GB'), 5000000000)
+ self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
+
if __name__ == '__main__':
unittest.main()
#!/usr/bin/env python
# coding: utf-8
+from __future__ import unicode_literals
# Allow direct execution
import os
})
-
TEST_ID = 'gr51aVj-mLg'
ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
+
class TestAnnotations(unittest.TestCase):
def setUp(self):
# Clear old files
self.tearDown()
-
def test_info_json(self):
- expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text.
+ expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text.
ie = youtube_dl.extractor.YoutubeIE()
ydl = YoutubeDL(params)
ydl.add_info_extractor(ie)
self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
annoxml = None
with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
- annoxml = xml.etree.ElementTree.parse(annof)
+ annoxml = xml.etree.ElementTree.parse(annof)
self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
root = annoxml.getroot()
self.assertEqual(root.tag, 'document')
self.assertEqual(annotationsTag.tag, 'annotations')
annotations = annotationsTag.findall('annotation')
- #Not all the annotations have TEXT children and the annotations are returned unsorted.
+ # Not all the annotations have TEXT children and the annotations are returned unsorted.
for a in annotations:
- self.assertEqual(a.tag, 'annotation')
- if a.get('type') == 'text':
- textTag = a.find('TEXT')
- text = textTag.text
- self.assertTrue(text in expected) #assertIn only added in python 2.7
- #remove the first occurance, there could be more than one annotation with the same text
- expected.remove(text)
- #We should have seen (and removed) all the expected annotation texts.
+ self.assertEqual(a.tag, 'annotation')
+ if a.get('type') == 'text':
+ textTag = a.find('TEXT')
+ text = textTag.text
+ self.assertTrue(text in expected) # assertIn only added in python 2.7
+ # remove the first occurance, there could be more than one annotation with the same text
+ expected.remove(text)
+ # We should have seen (and removed) all the expected annotation texts.
self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
-
def tearDown(self):
try_rm(ANNOTATIONS_FILE)
#!/usr/bin/env python
# coding: utf-8
+from __future__ import unicode_literals
# Allow direct execution
import os
TEST_ID = 'BaW_jenozKc'
INFO_JSON_FILE = TEST_ID + '.info.json'
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
-EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
+EXPECTED_DESCRIPTION = '''test chars: "'/\ä↭𝕐
test URL: https://github.com/rg3/youtube-dl/issues/1892
This is a test video for youtube-dl.
self.assertTrue(os.path.exists(INFO_JSON_FILE))
with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
jd = json.load(jsonf)
- self.assertEqual(jd['upload_date'], u'20121002')
+ self.assertEqual(jd['upload_date'], '20121002')
self.assertEqual(jd['description'], EXPECTED_DESCRIPTION)
self.assertEqual(jd['id'], TEST_ID)
self.assertEqual(jd['extractor'], 'youtube')
- self.assertEqual(jd['title'], u'''youtube-dl test video "'/\ä↭𝕐''')
+ self.assertEqual(jd['title'], '''youtube-dl test video "'/\ä↭𝕐''')
self.assertEqual(jd['uploader'], 'Philipp Hagemeister')
self.assertTrue(os.path.exists(DESCRIPTION_FILE))
#!/usr/bin/env python
+from __future__ import unicode_literals
# Allow direct execution
import os
from youtube_dl.extractor import (
YoutubePlaylistIE,
YoutubeIE,
- YoutubeChannelIE,
- YoutubeShowIE,
- YoutubeTopListIE,
- YoutubeSearchURLIE,
)
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
self.assertEqual(result['_type'], 'url')
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
-
+
def test_youtube_course(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
.PP
\f[B]youtube\-dl\f[] is a small command\-line program to download videos
from YouTube.com and a few more sites.
-It requires the Python interpreter, version 2.6, 2.7, or 3.3+, and it is
+It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is
not platform specific.
It should work on your Unix box, on Windows or on Mac OS X.
It is released to the public domain, which means you can modify it,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
\-\-max\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ more\ than
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
-\-\-no\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ the\ currently\ playing\ video
+\-\-no\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ If\ the\ URL\ refers\ to\ a\ video\ and\ a
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist,\ download\ only\ the\ video.
\-\-age\-limit\ YEARS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ suitable\ for\ the\ given
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ age
\-\-download\-archive\ FILE\ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ not\ listed\ in\ the
\ \ \ \ def\ _real_extract(self,\ url):
\ \ \ \ \ \ \ \ video_id\ =\ self._match_id(url)
+\ \ \ \ \ \ \ \ webpage\ =\ self._download_webpage(url,\ video_id)
\ \ \ \ \ \ \ \ #\ TODO\ more\ code\ goes\ here,\ for\ example\ ...
-\ \ \ \ \ \ \ \ webpage\ =\ self._download_webpage(url,\ video_id)
\ \ \ \ \ \ \ \ title\ =\ self._html_search_regex(r\[aq]<h1>(.*?)</h1>\[aq],\ webpage,\ \[aq]title\[aq])
\ \ \ \ \ \ \ \ return\ {
\ \ \ \ \ \ \ \ \ \ \ \ \[aq]id\[aq]:\ video_id,
\ \ \ \ \ \ \ \ \ \ \ \ \[aq]title\[aq]:\ title,
+\ \ \ \ \ \ \ \ \ \ \ \ \[aq]description\[aq]:\ self._og_search_description(webpage),
\ \ \ \ \ \ \ \ \ \ \ \ #\ TODO\ more\ properties\ (see\ youtube_dl/extractor/common.py)
\ \ \ \ \ \ \ \ }
\f[]
complete --command youtube-dl --long-option dateafter --description 'download only videos uploaded on or after this date (i.e. inclusive)'
complete --command youtube-dl --long-option min-views --description 'Do not download any videos with less than COUNT views'
complete --command youtube-dl --long-option max-views --description 'Do not download any videos with more than COUNT views'
-complete --command youtube-dl --long-option no-playlist --description 'download only the currently playing video'
+complete --command youtube-dl --long-option no-playlist --description 'If the URL refers to a video and a playlist, download only the video.'
complete --command youtube-dl --long-option age-limit --description 'download only videos suitable for the given age'
complete --command youtube-dl --long-option download-archive --description 'Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.' --require-parameter
complete --command youtube-dl --long-option include-ads --description 'Download advertisements as well (experimental)'
write_string,
YoutubeDLHandler,
prepend_extension,
+ args_to_str,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractors
self.print_debug_header()
self.add_default_info_extractors()
+ def warn_if_short_id(self, argv):
+ # short YouTube ID starting with dash?
+ idxs = [
+ i for i, a in enumerate(argv)
+ if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
+ if idxs:
+ correct_argv = (
+ ['youtube-dl'] +
+ [a for i, a in enumerate(argv) if i not in idxs] +
+ ['--'] + [argv[i] for i in idxs]
+ )
+ self.report_warning(
+ 'Long argument string detected. '
+ 'Use -- to separate parameters and URLs, like this:\n%s\n' %
+ args_to_str(correct_argv))
+
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
self._output_process.stdin.write((message + '\n').encode('utf-8'))
self._output_process.stdin.flush()
res = ''.join(self._output_channel.readline().decode('utf-8')
- for _ in range(line_count))
+ for _ in range(line_count))
return res[:-len('\n')]
def to_screen(self, message, skip_eol=False):
try:
ie_result = ie.extract(url)
- if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+ if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
break
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
- except ExtractorError as de: # An error we somewhat expected
+ except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
break
except MaxDownloadsReached:
self.report_warning(
'Extractor %s returned a compat_list result. '
'It needs to be updated.' % ie_result.get('extractor'))
+
def _fixup(r):
- self.add_extra_info(r,
+ self.add_extra_info(
+ r,
{
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
- })
+ }
+ )
return r
ie_result['entries'] = [
self.process_ie_result(_fixup(r), download, extra_info)
# Two formats have been requested like '137+139'
format_1, format_2 = rf.split('+')
formats_info = (self.select_format(format_1, formats),
- self.select_format(format_2, formats))
+ self.select_format(format_2, formats))
if all(formats_info):
# The first format must contain the video and the
# second the audio
if formats_info[0].get('vcodec') == 'none':
self.report_error('The first format must '
- 'contain the video, try using '
- '"-f %s+%s"' % (format_2, format_1))
+ 'contain the video, try using '
+ '"-f %s+%s"' % (format_2, format_1))
return
selected_format = {
'requested_formats': formats_info,
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
- subfile.write(sub)
+ subfile.write(sub)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
- (info_dict['extractor'], info_dict['id'], thumb_filename))
+ (info_dict['extractor'], info_dict['id'], thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download thumbnail "%s": %s' %
- (info_dict['thumbnail'], compat_str(err)))
+ (info_dict['thumbnail'], compat_str(err)))
if not self.params.get('skip_download', False):
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
if not merger._executable:
postprocessors = []
self.report_warning('You have requested multiple '
- 'formats but ffmpeg or avconv are not installed.'
- ' The formats won\'t be merged')
+ 'formats but ffmpeg or avconv are not installed.'
+ ' The formats won\'t be merged')
else:
postprocessors = [merger]
for f in info_dict['requested_formats']:
for url in url_list:
try:
- #It also downloads the videos
+ # It also downloads the videos
res = self.extract_info(url)
except UnavailableVideoError:
self.report_error('unable to download video')
if opts.headers is not None:
for h in opts.headers:
if h.find(':', 1) < 0:
- parser.error('wrong header formatting, it should be key:value, not "%s"'%h)
+ parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
key, value = h.split(':', 2)
if opts.verbose:
- write_string('[debug] Adding header from command line option %s:%s\n'%(key, value))
+ write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
std_headers[key] = value
# Dump user agent
compat_print(desc)
sys.exit(0)
-
# Conflicting, missing and erroneous options
if opts.usenetrc and (opts.username is not None or opts.password is not None):
parser.error('using .netrc conflicts with giving username/password')
# --all-sub automatically sets --write-sub if --write-auto-sub is not given
# this was the old behaviour if only --all-sub was given.
- if opts.allsubtitles and (opts.writeautomaticsub == False):
+ if opts.allsubtitles and not opts.writeautomaticsub:
opts.writesubtitles = True
if sys.version_info < (3,):
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
if opts.outtmpl is not None:
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
- outtmpl =((opts.outtmpl is not None and opts.outtmpl)
- or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
- or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
- or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
- or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
- or (opts.useid and '%(id)s.%(ext)s')
- or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
- or DEFAULT_OUTTMPL)
+ outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
+ or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
+ or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
+ or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
+ or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
+ or (opts.useid and '%(id)s.%(ext)s')
+ or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
+ or DEFAULT_OUTTMPL)
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error('Cannot download a video and extract audio into the same'
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
ydl.add_post_processor(FFmpegAudioFixPP())
ydl.add_post_processor(AtomicParsleyPP())
-
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
if opts.exec_cmd:
# Maybe do nothing
if (len(all_urls) < 1) and (opts.load_info_filename is None):
- if not (opts.update_self or opts.rm_cachedir):
- parser.error('you must provide at least one URL')
- else:
+ if opts.update_self or opts.rm_cachedir:
sys.exit()
+ ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
+ parser.error('you must provide at least one URL')
+
try:
if opts.load_info_filename is not None:
retcode = ydl.download_with_info_file(opts.load_info_filename)
#!/usr/bin/env python
+from __future__ import unicode_literals
# Execute with
# $ python youtube_dl/__main__.py (2.6+)
+from __future__ import unicode_literals
+
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
import base64
BLOCK_SIZE_BYTES = 16
+
def aes_ctr_decrypt(data, key, counter):
"""
Decrypt with aes in counter mode
-
+
@param {int[]} data cipher
@param {int[]} key 16/24/32-Byte cipher key
@param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
-
- decrypted_data=[]
+
+ decrypted_data = []
for i in range(block_count):
counter_block = counter.next_value()
- block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
- block += [0]*(BLOCK_SIZE_BYTES - len(block))
-
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
decrypted_data += xor(block, cipher_counter_block)
decrypted_data = decrypted_data[:len(data)]
-
+
return decrypted_data
+
def aes_cbc_decrypt(data, key, iv):
"""
Decrypt with aes in CBC mode
-
+
@param {int[]} data cipher
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte IV
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
-
- decrypted_data=[]
+
+ decrypted_data = []
previous_cipher_block = iv
for i in range(block_count):
- block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
- block += [0]*(BLOCK_SIZE_BYTES - len(block))
-
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
decrypted_block = aes_decrypt(block, expanded_key)
decrypted_data += xor(decrypted_block, previous_cipher_block)
previous_cipher_block = block
decrypted_data = decrypted_data[:len(data)]
-
+
return decrypted_data
+
def key_expansion(data):
"""
Generate key schedule
-
+
@param {int[]} data 16/24/32-Byte cipher key
- @returns {int[]} 176/208/240-Byte expanded key
+ @returns {int[]} 176/208/240-Byte expanded key
"""
- data = data[:] # copy
+ data = data[:] # copy
rcon_iteration = 1
key_size_bytes = len(data)
expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
-
+
while len(data) < expanded_key_size_bytes:
temp = data[-4:]
temp = key_schedule_core(temp, rcon_iteration)
rcon_iteration += 1
- data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
-
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
for _ in range(3):
temp = data[-4:]
- data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
-
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
if key_size_bytes == 32:
temp = data[-4:]
temp = sub_bytes(temp)
- data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
-
- for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
temp = data[-4:]
- data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
data = data[:expanded_key_size_bytes]
-
+
return data
+
def aes_encrypt(data, expanded_key):
"""
Encrypt one block with aes
-
+
@param {int[]} data 16-Byte state
- @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
@returns {int[]} 16-Byte cipher
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
- for i in range(1, rounds+1):
+ for i in range(1, rounds + 1):
data = sub_bytes(data)
data = shift_rows(data)
if i != rounds:
data = mix_columns(data)
- data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+ data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
return data
+
def aes_decrypt(data, expanded_key):
"""
Decrypt one block with aes
-
+
@param {int[]} data 16-Byte cipher
@param {int[]} expanded_key 176/208/240-Byte expanded key
@returns {int[]} 16-Byte state
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
-
+
for i in range(rounds, 0, -1):
- data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+ data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
if i != rounds:
data = mix_columns_inv(data)
data = shift_rows_inv(data)
data = sub_bytes_inv(data)
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
-
+
return data
+
def aes_decrypt_text(data, password, key_size_bytes):
"""
Decrypt text
- The cipher key is retrieved by encrypting the first 16 Byte of 'password'
with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
- Mode of operation is 'counter'
-
+
@param {str} data Base64 encoded string
@param {str,unicode} password Password (will be encoded with utf-8)
@param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
@returns {str} Decrypted data
"""
NONCE_LENGTH_BYTES = 8
-
+
data = bytes_to_intlist(base64.b64decode(data))
password = bytes_to_intlist(password.encode('utf-8'))
-
- key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
+
+ key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
-
+
nonce = data[:NONCE_LENGTH_BYTES]
cipher = data[NONCE_LENGTH_BYTES:]
-
+
class Counter:
- __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+ __value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
return temp
-
+
decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
plaintext = intlist_to_bytes(decrypted_data)
-
+
return plaintext
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
-MIX_COLUMN_MATRIX = ((0x2,0x3,0x1,0x1),
- (0x1,0x2,0x3,0x1),
- (0x1,0x1,0x2,0x3),
- (0x3,0x1,0x1,0x2))
-MIX_COLUMN_MATRIX_INV = ((0xE,0xB,0xD,0x9),
- (0x9,0xE,0xB,0xD),
- (0xD,0x9,0xE,0xB),
- (0xB,0xD,0x9,0xE))
+MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1),
+ (0x1, 0x2, 0x3, 0x1),
+ (0x1, 0x1, 0x2, 0x3),
+ (0x3, 0x1, 0x1, 0x2))
+MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9),
+ (0x9, 0xE, 0xB, 0xD),
+ (0xD, 0x9, 0xE, 0xB),
+ (0xB, 0xD, 0x9, 0xE))
RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
+
def sub_bytes(data):
return [SBOX[x] for x in data]
+
def sub_bytes_inv(data):
return [SBOX_INV[x] for x in data]
+
def rotate(data):
return data[1:] + [data[0]]
+
def key_schedule_core(data, rcon_iteration):
data = rotate(data)
data = sub_bytes(data)
data[0] = data[0] ^ RCON[rcon_iteration]
-
+
return data
+
def xor(data1, data2):
- return [x^y for x, y in zip(data1, data2)]
+ return [x ^ y for x, y in zip(data1, data2)]
+
def rijndael_mul(a, b):
- if(a==0 or b==0):
+ if(a == 0 or b == 0):
return 0
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
+
def mix_column(data, matrix):
data_mixed = []
for row in range(4):
data_mixed.append(mixed)
return data_mixed
+
def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
data_mixed = []
for i in range(4):
- column = data[i*4 : (i+1)*4]
+ column = data[i * 4: (i + 1) * 4]
data_mixed += mix_column(column, matrix)
return data_mixed
+
def mix_columns_inv(data):
return mix_columns(data, MIX_COLUMN_MATRIX_INV)
+
def shift_rows(data):
data_shifted = []
for column in range(4):
for row in range(4):
- data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
+ data_shifted.append(data[((column + row) & 0b11) * 4 + row])
return data_shifted
+
def shift_rows_inv(data):
data_shifted = []
for column in range(4):
for row in range(4):
- data_shifted.append( data[((column - row) & 0b11) * 4 + row] )
+ data_shifted.append(data[((column - row) & 0b11) * 4 + row])
return data_shifted
+
def inc(data):
- data = data[:] # copy
- for i in range(len(data)-1,-1,-1):
+ data = data[:] # copy
+ for i in range(len(data) - 1, -1, -1):
if data[i] == 255:
data[i] = 0
else:
import getpass
import optparse
import os
+import re
import subprocess
import sys
try:
import urllib.request as compat_urllib_request
-except ImportError: # Python 2
+except ImportError: # Python 2
import urllib2 as compat_urllib_request
try:
import urllib.error as compat_urllib_error
-except ImportError: # Python 2
+except ImportError: # Python 2
import urllib2 as compat_urllib_error
try:
import urllib.parse as compat_urllib_parse
-except ImportError: # Python 2
+except ImportError: # Python 2
import urllib as compat_urllib_parse
try:
from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError: # Python 2
+except ImportError: # Python 2
from urlparse import urlparse as compat_urllib_parse_urlparse
try:
import urllib.parse as compat_urlparse
-except ImportError: # Python 2
+except ImportError: # Python 2
import urlparse as compat_urlparse
try:
import http.cookiejar as compat_cookiejar
-except ImportError: # Python 2
+except ImportError: # Python 2
import cookielib as compat_cookiejar
try:
import html.entities as compat_html_entities
-except ImportError: # Python 2
+except ImportError: # Python 2
import htmlentitydefs as compat_html_entities
try:
import html.parser as compat_html_parser
-except ImportError: # Python 2
+except ImportError: # Python 2
import HTMLParser as compat_html_parser
try:
import http.client as compat_http_client
-except ImportError: # Python 2
+except ImportError: # Python 2
import httplib as compat_http_client
try:
try:
from urllib.parse import parse_qs as compat_parse_qs
-except ImportError: # Python 2
+except ImportError: # Python 2
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
# Python 2's version is apparently totally broken
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
+ encoding='utf-8', errors='replace'):
qs, _coerce_result = qs, unicode
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = []
return r
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
+ encoding='utf-8', errors='replace'):
parsed_result = {}
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
- encoding=encoding, errors=errors)
+ encoding=encoding, errors=errors)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
return parsed_result
try:
- compat_str = unicode # Python 2
+ compat_str = unicode # Python 2
except NameError:
compat_str = str
try:
- compat_chr = unichr # Python 2
+ compat_chr = unichr # Python 2
except NameError:
compat_chr = chr
from shlex import quote as shlex_quote
except ImportError: # Python < 3.3
def shlex_quote(s):
- return "'" + s.replace("'", "'\"'\"'") + "'"
+ if re.match(r'^[-_\w./]+$', s):
+ return s
+ else:
+ return "'" + s.replace("'", "'\"'\"'") + "'"
def compat_ord(c):
- if type(c) is int: return c
- else: return ord(c)
+ if type(c) is int:
+ return c
+ else:
+ return ord(c)
if sys.version_info >= (3, 0):
drive = ''
userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
- if i != 1: #~user
+ if i != 1: # ~user
userhome = os.path.join(os.path.dirname(userhome), path[1:i])
return userhome + path[i:]
print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
else:
def compat_print(s):
- assert type(s) == type(u'')
+ assert isinstance(s, compat_str)
print(s)
return F4mFD
else:
return HttpFD
+
+__all__ = [
+ 'get_suitable_downloader',
+ 'FileDownloader',
+]
if total is None:
return None
dif = now - start
- if current == 0 or dif < 0.001: # One millisecond
+ if current == 0 or dif < 0.001: # One millisecond
return None
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
@staticmethod
def calc_speed(start, now, bytes):
dif = now - start
- if bytes == 0 or dif < 0.001: # One millisecond
+ if bytes == 0 or dif < 0.001: # One millisecond
return None
return float(bytes) / dif
@staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)
- new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
+ new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
if elapsed_time < 0.001:
return int(new_max)
rate = bytes / elapsed_time
if size == 1:
real_size = self.read_unsigned_long_long()
header_end = 16
- return real_size, box_type, self.read(real_size-header_end)
+ return real_size, box_type, self.read(real_size - header_end)
def read_asrt(self):
# version
n_frags = segment_run_entry[1]
fragment_run_entry_table = boot_info['fragments'][0]['fragments']
first_frag_number = fragment_run_entry_table[0]['first']
- for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)):
+ for (i, frag_number) in zip(range(1, n_frags + 1), itertools.count(first_frag_number)):
res.append((1, frag_number))
return res
self.to_screen('[download] Downloading f4m manifest')
manifest = self.ydl.urlopen(man_url).read()
self.report_destination(filename)
- http_dl = HttpQuietDownloader(self.ydl,
+ http_dl = HttpQuietDownloader(
+ self.ydl,
{
'continuedl': True,
'quiet': True,
'noprogress': True,
'test': self.params.get('test', False),
- })
+ }
+ )
doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
def frag_progress_hook(status):
frag_total_bytes = status.get('total_bytes', 0)
estimated_size = (state['downloaded_bytes'] +
- (total_frags - state['frag_counter']) * frag_total_bytes)
+ (total_frags - state['frag_counter']) * frag_total_bytes)
if status['status'] == 'finished':
state['downloaded_bytes'] += frag_total_bytes
state['frag_counter'] += 1
frag_downloaded_bytes = status['downloaded_bytes']
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
frag_progress = self.calc_percent(frag_downloaded_bytes,
- frag_total_bytes)
+ frag_total_bytes)
progress = self.calc_percent(state['frag_counter'], total_frags)
progress += frag_progress / float(total_frags)
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
self.report_progress(progress, format_bytes(estimated_size),
- status.get('speed'), eta)
+ status.get('speed'), eta)
http_dl.add_progress_hook(frag_progress_hook)
frags_filenames = []
if check_executable(program, ['-version']):
break
else:
- self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+ self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False
cmd = [program] + args
retval = subprocess.call(cmd)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen(u'\r[%s] %s bytes' % (cmd[0], fsize))
+ self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
})
return True
else:
- self.to_stderr(u"\n")
- self.report_error(u'%s exited with code %d' % (program, retval))
+ self.to_stderr('\n')
+ self.report_error('%s exited with code %d' % (program, retval))
return False
})
self.try_rename(tmpfilename, filename)
return True
-
+from __future__ import unicode_literals
+
import os
import time
self.report_retry(count, retries)
if count > retries:
- self.report_error(u'giving up after %s retries' % retries)
+ self.report_error('giving up after %s retries' % retries)
return False
data_len = data.info().get('Content-length', None)
min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len:
- self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+ self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
- self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+ self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = format_bytes(data_len)
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename)
except (OSError, IOError) as err:
- self.report_error(u'unable to open for writing: %s' % str(err))
+ self.report_error('unable to open for writing: %s' % str(err))
return False
try:
stream.write(data_block)
except (IOError, OSError) as err:
- self.to_stderr(u"\n")
- self.report_error(u'unable to write data: %s' % str(err))
+ self.to_stderr('\n')
+ self.report_error('unable to write data: %s' % str(err))
return False
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
self.slow_down(start, byte_counter - resume_len)
if stream is None:
- self.to_stderr(u"\n")
- self.report_error(u'Did not get any data blocks')
+ self.to_stderr('\n')
+ self.report_error('Did not get any data blocks')
return False
- if tmpfilename != u'-':
+ if tmpfilename != '-':
stream.close()
self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len:
+from __future__ import unicode_literals
+
import os
import subprocess
from .common import FileDownloader
+from ..compat import compat_subprocess_get_DEVNULL
from ..utils import (
encodeFilename,
)
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
- args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
+ args = [
+ 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
+ '-dumpstream', '-dumpfile', tmpfilename, url]
# Check for mplayer first
try:
- subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+ subprocess.call(
+ ['mplayer', '-h'],
+ stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
except (OSError, IOError):
- self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0])
+ self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
return False
# Download using mplayer.
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
+ self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
})
return True
else:
- self.to_stderr(u"\n")
- self.report_error(u'mplayer exited with code %d' % retval)
+ self.to_stderr('\n')
+ self.report_error('mplayer exited with code %d' % retval)
return False
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
- downloaded_data_len = int(float(mobj.group(1))*1024)
+ downloaded_data_len = int(float(mobj.group(1)) * 1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
- eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
- speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
+ eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent)
+ speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
if mobj:
- downloaded_data_len = int(float(mobj.group(1))*1024)
+ downloaded_data_len = int(float(mobj.group(1)) * 1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
if not cursor_in_new_line:
self.to_screen('')
cursor_in_new_line = True
- self.to_screen('[rtmpdump] '+line)
+ self.to_screen('[rtmpdump] ' + line)
proc.wait()
if not cursor_in_new_line:
self.to_screen('')
while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % prevsize)
- time.sleep(5.0) # This seems to be needed
+ time.sleep(5.0) # This seems to be needed
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == RD_FAILED:
+from __future__ import unicode_literals
+
from .abc import ABCIE
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
+from .bpb import BpbIE
from .br import BRIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
+from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
from .canal13cl import Canal13clIE
from .musicplayon import MusicPlayOnIE
from .musicvault import MusicVaultIE
from .muzu import MuzuTVIE
-from .myspace import MySpaceIE
+from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .naver import NaverIE
from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE
from .tapely import TapelyIE
+from .tass import TassIE
from .teachertube import (
TeacherTubeIE,
TeacherTubeUserIE,
from .thisav import ThisAVIE
from .tinypic import TinyPicIE
from .tlc import TlcIE, TlcDeIE
+from .tmz import TMZIE
from .tnaflix import TNAFlixIE
from .thvideo import (
THVideoIE,
from .tube8 import Tube8IE
from .tudou import TudouIE
from .tumblr import TumblrIE
+from .tunein import TuneInIE
from .turbo import TurboIE
from .tutv import TutvIE
from .tvigle import TvigleIE
VineUserIE,
)
from .viki import VikiIE
-from .vk import VKIE
+from .vk import (
+ VKIE,
+ VKUserVideosIE,
+)
from .vodlocker import VodlockerIE
from .vporn import VpornIE
from .vrt import VRTIE
from .xbef import XBefIE
from .xboxclips import XboxClipsIE
from .xhamster import XHamsterIE
+from .xminus import XMinusIE
from .xnxx import XNXXIE
from .xvideos import XVideosIE
from .xtube import XTubeUserIE, XTubeIE
YoutubeWatchLaterIE,
)
from .zdf import ZDFIE
+from .zingmp3 import (
+ ZingMp3SongIE,
+ ZingMp3AlbumIE,
+)
_ALL_CLASSES = [
klass
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
- return globals()[ie_name+'IE']
+ return globals()[ie_name + 'IE']
from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
}
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
- playlist_id = m.group('id')
+ playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(
- r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title')
+ r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, 'title')
description = self._html_search_regex(
r'<p class="excerpt"[^>]*?>(.*?)</p>',
- webpage, u'description', fatal=False)
+ webpage, 'description', fatal=False)
urls = re.findall(
r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
webpage)
class AddAnimeIE(InfoExtractor):
-
- _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
+ _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
_TEST = {
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
'md5': '72954ea10bc979ab5e2eb288b21425a0',
}
def _real_extract(self, url):
+ video_id = self._match_id(url)
+
try:
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError) or \
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
redir_webpage)
if av is None:
- raise ExtractorError(u'Cannot find redirect math task')
+ raise ExtractorError('Cannot find redirect math task')
av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
parsed_url = compat_urllib_parse_urlparse(url)
from .common import InfoExtractor
+
class AdultSwimIE(InfoExtractor):
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
_TEST = {
-#coding: utf-8
-
+# coding: utf-8
from __future__ import unicode_literals
import re
}
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
- video_id = m.group('id')
+ video_id = self._match_id(url)
# Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id
for i, video_url in enumerate(video_urls):
req = HEADRequest(video_url)
res = self._request_webpage(
- req, video_id, note=u'Testing video URL %d' % i, errnote=False)
+ req, video_id, note='Testing video URL %d' % i, errnote=False)
if res:
break
else:
- raise ExtractorError(u'No working video URLs found')
+ raise ExtractorError('No working video URLs found')
- title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
+ title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
thumbnail = self._search_regex(
- r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
+ r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
uploader_id = mobj.group('company')
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
+
def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
# The ' in the onClick attributes are not escaped, it couldn't be parsed
# like: http://trailers.apple.com/trailers/wb/gravity/
+
def _clean_json(m):
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
s = re.sub(self._JSON_RE, _clean_json, s)
- s = '<html>' + s + u'</html>'
+ s = '<html>%s</html>' % s
return s
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
for li in doc.findall('./div/ul/li'):
on_click = li.find('.//a').attrib['onClick']
trailer_info_json = self._search_regex(self._JSON_RE,
- on_click, 'trailer info')
+ on_click, 'trailer info')
trailer_info = json.loads(trailer_info_json)
title = trailer_info['title']
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
'upload_date': upload_date,
'thumbnail': thumbnail,
}
-
qualities,
)
-# There are different sources of video in arte.tv, the extraction process
+# There are different sources of video in arte.tv, the extraction process
# is different for each one. The videos usually expire in 7 days, so we can't
# add tests.
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
IE_NAME = 'audiomack'
_TESTS = [
- #hosted on audiomack
+ # hosted on audiomack
{
'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
'info_dict':
{
- 'id' : 'roosh-williams/extraordinary',
+ 'id': 'roosh-williams/extraordinary',
'ext': 'mp3',
'title': 'Roosh Williams - Extraordinary'
}
},
- #hosted on soundcloud via audiomack
+ # hosted on soundcloud via audiomack
{
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
'file': '172419696.mp3',
raise ExtractorError("Unable to deduce api url of song")
realurl = api_response["url"]
- #Audiomack wraps a lot of soundcloud tracks in their branded wrapper
+ # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# - if so, pass the work off to the soundcloud extractor
if SoundcloudIE.suitable(realurl):
return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
_TEST = {
'url': 'http://bambuser.com/v/4050584',
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
- #u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
+ # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
'info_dict': {
'id': '4050584',
'ext': 'flv',
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
- '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
+ '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)['result']
urls = []
last_id = ''
for i in itertools.count(1):
- req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
+ req_url = (
+ 'http://bambuser.com/xhr-api/index.php?username={user}'
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}'
- ).format(user=user, count=self._STEP, last=last_id)
+ ).format(user=user, count=self._STEP, last=last_id)
req = compat_urllib_request.Request(req_url)
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
initial_url = mp3_info['url']
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
m_url = re.match(re_url, initial_url)
- #We build the url we will use to get the final track url
+ # We build the url we will use to get the final track url
# This url is build in Bandcamp in the script download_bunde_*.js
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
# If we could correctly generate the .rand field the url would be
- #in the "download_url" key
+ # in the "download_url" key
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
return {
from __future__ import unicode_literals
import re
+import xml.etree.ElementTree
from .subtitles import SubtitlesInfoExtractor
from ..utils import ExtractorError
+from ..compat import compat_HTTPError
class BBCCoUkIE(SubtitlesInfoExtractor):
'skip_download': True,
},
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
- }
+ },
+ {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
+ 'info_dict': {
+ 'id': 'b03k3pb7',
+ 'ext': 'flv',
+ 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
+ 'description': '2. Invasion',
+ 'duration': 3600,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+ },
]
def _extract_asx_playlist(self, connection, programme_id):
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
def _extract_medias(self, media_selection):
+ error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
+ if error is not None:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
def _extract_connections(self, media):
subtitles[lang] = srt
return subtitles
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- group_id = mobj.group('id')
-
- webpage = self._download_webpage(url, group_id, 'Downloading video page')
- if re.search(r'id="emp-error" class="notinuk">', webpage):
- raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
- expected=True)
-
- playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
- 'Downloading playlist XML')
-
- no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
- if no_items is not None:
- reason = no_items.get('reason')
- if reason == 'preAvailability':
- msg = 'Episode %s is not yet available' % group_id
- elif reason == 'postAvailability':
- msg = 'Episode %s is no longer available' % group_id
+ def _download_media_selector(self, programme_id):
+ try:
+ media_selection = self._download_xml(
+ 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
+ programme_id, 'Downloading media selection XML')
+ except ExtractorError as ee:
+ if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+ media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
else:
- msg = 'Episode %s is not available: %s' % (group_id, reason)
- raise ExtractorError(msg, expected=True)
+ raise
formats = []
subtitles = None
- for item in self._extract_items(playlist):
- kind = item.get('kind')
- if kind != 'programme' and kind != 'radioProgramme':
- continue
- title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
- description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
+ for media in self._extract_medias(media_selection):
+ kind = media.get('kind')
+ if kind == 'audio':
+ formats.extend(self._extract_audio(media, programme_id))
+ elif kind == 'video':
+ formats.extend(self._extract_video(media, programme_id))
+ elif kind == 'captions':
+ subtitles = self._extract_captions(media, programme_id)
- programme_id = item.get('identifier')
- duration = int(item.get('duration'))
+ return formats, subtitles
- media_selection = self._download_xml(
- 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
- programme_id, 'Downloading media selection XML')
+ def _real_extract(self, url):
+ group_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, group_id, 'Downloading video page')
- for media in self._extract_medias(media_selection):
- kind = media.get('kind')
- if kind == 'audio':
- formats.extend(self._extract_audio(media, programme_id))
- elif kind == 'video':
- formats.extend(self._extract_video(media, programme_id))
- elif kind == 'captions':
- subtitles = self._extract_captions(media, programme_id)
+ programme_id = self._search_regex(
+ r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
+ if programme_id:
+ player = self._download_json(
+ 'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
+ group_id)['jsConf']['player']
+ title = player['title']
+ description = player['subtitle']
+ duration = player['duration']
+ formats, subtitles = self._download_media_selector(programme_id)
+ else:
+ playlist = self._download_xml(
+ 'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
+ group_id, 'Downloading playlist XML')
+
+ no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
+ if no_items is not None:
+ reason = no_items.get('reason')
+ if reason == 'preAvailability':
+ msg = 'Episode %s is not yet available' % group_id
+ elif reason == 'postAvailability':
+ msg = 'Episode %s is no longer available' % group_id
+ elif reason == 'noMedia':
+ msg = 'Episode %s is not currently available' % group_id
+ else:
+ msg = 'Episode %s is not available: %s' % (group_id, reason)
+ raise ExtractorError(msg, expected=True)
+
+ for item in self._extract_items(playlist):
+ kind = item.get('kind')
+ if kind != 'programme' and kind != 'radioProgramme':
+ continue
+ title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
+ description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
+ programme_id = item.get('identifier')
+ duration = int(item.get('duration'))
+ formats, subtitles = self._download_media_selector(programme_id)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(programme_id, subtitles)
'duration': duration,
'formats': formats,
'subtitles': subtitles,
- }
\ No newline at end of file
+ }
title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title')
-
+
description = self._html_search_regex(
r'<meta name="description" content="([^"]*)"',
webpage, 'description', fatal=False)
-#coding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
'uploader': 'redvsblue',
'uploader_id': '792887',
}
+ },
+ {
+ 'url': 'http://blip.tv/play/gbk766dkj4Yn',
+ 'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
+ 'info_dict': {
+ 'id': '1749452',
+ 'ext': 'mp4',
+ 'upload_date': '20090208',
+ 'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
+ 'title': 'Nostalgia Critic: Transformers',
+ 'timestamp': 1234068723,
+ 'uploader': 'NostalgiaCritic',
+ 'uploader_id': '246467',
+ }
}
]
# See https://github.com/rg3/youtube-dl/issues/857 and
# https://github.com/rg3/youtube-dl/issues/4197
if lookup_id:
- info_page = self._download_webpage(
- 'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id')
- video_id = self._search_regex(r'config\.id\s*=\s*"([0-9]+)', info_page, 'video_id')
- else:
- video_id = mobj.group('id')
+ urlh = self._request_webpage(
+ 'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
+ url = compat_urlparse.urlparse(urlh.geturl())
+ qs = compat_urlparse.parse_qs(url.query)
+ mobj = re.match(self._VALID_URL, qs['file'][0])
+
+ video_id = mobj.group('id')
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
msg = self._download_webpage(
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
video_id, 'Resolving URL for %s' % role)
- real_url = compat_urlparse.parse_qs(msg)['message'][0]
+ real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
media_type = media_content.get('type')
if media_type == 'text/srt' or url.endswith('.srt'):
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class BpbIE(InfoExtractor):
+ IE_DESC = 'Bundeszentrale für politische Bildung'
+ _VALID_URL = r'http://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
+
+ _TEST = {
+ 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
+ 'md5': '0792086e8e2bfbac9cdf27835d5f2093',
+ 'info_dict': {
+ 'id': '297',
+ 'ext': 'mp4',
+ 'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
+ 'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<h2 class="white">(.*?)</h2>', webpage, 'title')
+ video_url = self._html_search_regex(
+ r'(http://film\.bpb\.de/player/dokument_[0-9]+\.mp4)',
+ webpage, 'video URL')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ }
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class BuzzFeedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)'
+ _TESTS = [{
+ 'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia',
+ 'info_dict': {
+ 'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss',
+ 'title': 'This Angry Ram Destroys A Punching Bag Like A Boss',
+ 'description': 'Rambro!',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'aVCR29aE_OQ',
+ 'ext': 'mp4',
+ 'upload_date': '20141024',
+ 'uploader_id': 'Buddhanz1',
+ 'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl',
+ 'uploader': 'Buddhanz',
+ 'title': 'Angry Ram destroys a punching bag',
+ }
+ }]
+ }, {
+ 'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia',
+ 'params': {
+ 'skip_download': True, # Got enough YouTube download tests
+ },
+ 'info_dict': {
+ 'description': 'Munchkin the Teddy Bear is back !',
+ 'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'mVmBL8B-In0',
+ 'ext': 'mp4',
+ 'upload_date': '20141124',
+ 'uploader_id': 'CindysMunchkin',
+ 'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
+ 'uploader': 'Munchkin the Shih Tzu',
+ 'title': 'Munchkin the Teddy Bear gets her exercise',
+ },
+ }]
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ all_buckets = re.findall(
+ r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'',
+ webpage)
+
+ entries = []
+ for bd_json in all_buckets:
+ bd = json.loads(bd_json)
+ video = bd.get('video') or bd.get('progload_video')
+ if not video:
+ continue
+ entries.append(self.url_result(video['url']))
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'entries': entries,
+ }
'like_count': int(infos.find('NB_LIKES').text),
'comment_count': int(infos.find('NB_COMMENTS').text),
'formats': formats,
- }
\ No newline at end of file
+ }
real_id = self._search_regex(
r"video\.settings\.pid\s*=\s*'([^']+)';",
webpage, 'real video ID')
- return self.url_result(u'theplatform:%s' % real_id)
+ return self.url_result('theplatform:%s' % real_id)
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
- }
\ No newline at end of file
+ }
req.add_header('Referer', url)
playlist = self._download_xml(req, video_id)
-
+
formats = []
for i in playlist.find('smilRoot/body'):
if 'AD' not in i.attrib['id']:
from .common import InfoExtractor
from ..utils import ExtractorError
+
class Channel9IE(InfoExtractor):
'''
Common extractor for channel9.msdn.com.
'session_code': 'KOS002',
'session_day': 'Day 1',
'session_room': 'Arena 1A',
- 'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ],
+ 'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'],
},
},
{
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
'duration': 1540,
'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
- 'authors': [ 'Mike Wilmot' ],
+ 'authors': ['Mike Wilmot'],
},
}
]
'format_id': x.group('quality'),
'format_note': x.group('note'),
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
- 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
+ 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
'preference': self._known_formats.index(x.group('quality')),
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
} for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
view_count = self._extract_view_count(html)
comment_count = self._extract_comment_count(html)
- common = {'_type': 'video',
- 'id': content_path,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'avg_rating': avg_rating,
- 'rating_count': rating_count,
- 'view_count': view_count,
- 'comment_count': comment_count,
- }
+ common = {
+ '_type': 'video',
+ 'id': content_path,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'avg_rating': avg_rating,
+ 'rating_count': rating_count,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ }
result = []
if slides is not None:
d = common.copy()
- d.update({ 'title': title + '-Slides', 'url': slides })
+ d.update({'title': title + '-Slides', 'url': slides})
result.append(d)
if zip_ is not None:
d = common.copy()
- d.update({ 'title': title + '-Zip', 'url': zip_ })
+ d.update({'title': title + '-Zip', 'url': zip_})
result.append(d)
if len(formats) > 0:
d = common.copy()
- d.update({ 'title': title, 'formats': formats })
+ d.update({'title': title, 'formats': formats})
result.append(d)
return result
else:
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
- else: # Assuming list
+ else: # Assuming list
return self._extract_list(content_path)
if videolist_url:
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
formats = []
- baseurl = vidurl[:vidurl.rfind('/')+1]
+ baseurl = vidurl[:vidurl.rfind('/') + 1]
for video in videolist.findall('.//video'):
src = video.get('src')
if not src:
'title': 'FIFA 14 - E3 2013 Trailer',
'duration': 82,
},
- u'skip': 'Blocked in the US'
+ 'skip': 'Blocked in the US'
}
def _real_extract(self, url):
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
(video_id, int(time.time())))
doc = self._download_xml(
- info_url, video_id, note=u'Downloading info page')
+ info_url, video_id, note='Downloading info page')
title = doc.find('title').text
video_url = doc.find('filename').text
if video_url is None:
transform_source=fix_xml_ampersands)
track_doc = pdoc.find('trackList/track')
+
def find_param(name):
node = find_xpath_attr(track_doc, './/param', 'name', name)
if node is not None:
'duration': 135,
'upload_date': '20130609',
},
- },
- {
+ }, {
"url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
"md5": "b5cc60c60a3477d185af8f19a2a26f4e",
"info_dict": {
class CollegeHumorIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
- _TESTS = [{
- 'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
- 'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
- 'info_dict': {
- 'id': '6902724',
- 'ext': 'mp4',
- 'title': 'Comic-Con Cosplay Catastrophe',
- 'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
- 'age_limit': 13,
- 'duration': 187,
+ _TESTS = [
+ {
+ 'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
+ 'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
+ 'info_dict': {
+ 'id': '6902724',
+ 'ext': 'mp4',
+ 'title': 'Comic-Con Cosplay Catastrophe',
+ 'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
+ 'age_limit': 13,
+ 'duration': 187,
+ },
+ }, {
+ 'url': 'http://www.collegehumor.com/video/3505939/font-conference',
+ 'md5': '72fa701d8ef38664a4dbb9e2ab721816',
+ 'info_dict': {
+ 'id': '3505939',
+ 'ext': 'mp4',
+ 'title': 'Font Conference',
+ 'description': "This video wasn't long enough, so we made it double-spaced.",
+ 'age_limit': 10,
+ 'duration': 179,
+ },
+ }, {
+ # embedded youtube video
+ 'url': 'http://www.collegehumor.com/embed/6950306',
+ 'info_dict': {
+ 'id': 'Z-bao9fg6Yc',
+ 'ext': 'mp4',
+ 'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
+ 'uploader': 'Mark Dice',
+ 'uploader_id': 'MarkDice',
+ 'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
+ 'upload_date': '20140127',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Youtube'],
},
- },
- {
- 'url': 'http://www.collegehumor.com/video/3505939/font-conference',
- 'md5': '72fa701d8ef38664a4dbb9e2ab721816',
- 'info_dict': {
- 'id': '3505939',
- 'ext': 'mp4',
- 'title': 'Font Conference',
- 'description': "This video wasn't long enough, so we made it double-spaced.",
- 'age_limit': 10,
- 'duration': 179,
- },
- },
- # embedded youtube video
- {
- 'url': 'http://www.collegehumor.com/embed/6950306',
- 'info_dict': {
- 'id': 'Z-bao9fg6Yc',
- 'ext': 'mp4',
- 'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
- 'uploader': 'Mark Dice',
- 'uploader_id': 'MarkDice',
- 'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
- 'upload_date': '20140127',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Youtube'],
- },
]
def _real_extract(self, url):
import xml.etree.ElementTree
from ..compat import (
+ compat_cookiejar,
compat_http_client,
compat_urllib_error,
compat_urllib_parse_urlparse,
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
return (content, urlh)
- def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
+ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
+ if prefix is not None:
+ webpage_bytes = prefix + webpage_bytes
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
if m:
encoding = m.group(1)
"""Report attempt to log in."""
self.to_screen('Logging in')
- #Methods for following #608
+ # Methods for following #608
@staticmethod
def url_result(url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
- #TODO: ie should be the class used for getting the info
+ # TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
'url': url,
'ie_key': ie}
if video_id is not None:
video_info['id'] = video_id
return video_info
+
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""
raise RegexNotFoundError('Unable to extract %s' % _name)
else:
self._downloader.report_warning('unable to extract %s; '
- 'please report this issue on http://yt-dl.org/bug' % _name)
+ 'please report this issue on http://yt-dl.org/bug' % _name)
return None
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))
-
+
return (username, password)
def _get_tfa_info(self):
def _twitter_search_player(self, html):
return self._html_search_meta('twitter:player', html,
- 'twitter card player')
+ 'twitter card player')
def _sort_formats(self, formats):
if not formats:
self._downloader.report_warning(msg)
return res
+ def _set_cookie(self, domain, name, value, expire_time=None):
+ cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
+ None, '/', True, False, expire_time, '', None, None, None)
+ self._downloader.cookiejar.set_cookie(cookie)
+
class SearchInfoExtractor(InfoExtractor):
"""
return {
'id': video_id,
- 'url':video_url,
+ 'url': video_url,
'title': title,
'description': description,
'timestamp': timestamp,
'comment_count': comment_count,
'height': height,
'width': width,
- }
\ No newline at end of file
+ }
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(login_request, None, False, 'Wrong login info')
-
def _real_initialize(self):
self._login()
-
def _decrypt_subtitles(self, data, iv, id):
data = bytes_to_intlist(data)
iv = bytes_to_intlist(iv)
return shaHash + [0] * 12
key = obfuscate_key(id)
+
class Counter:
__value = iv
+
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
return output
- def _real_extract(self,url):
+ def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
formats = []
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
- video_format = fmt+'p'
+ video_format = fmt + 'p'
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
# urlencode doesn't work!
- streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
+ streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
streamdata = self._download_xml(
subtitles = {}
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
- sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
- video_id, note='Downloading subtitles for '+sub_name)
+ sub_page = self._download_webpage(
+ 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
+ video_id, note='Downloading subtitles for ' + sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
return
return {
- 'id': video_id,
- 'title': video_title,
+ 'id': video_id,
+ 'title': video_title,
'description': video_description,
- 'thumbnail': video_thumbnail,
- 'uploader': video_uploader,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
'upload_date': video_upload_date,
- 'subtitles': subtitles,
- 'formats': formats,
+ 'subtitles': subtitles,
+ 'formats': formats,
}
-#coding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
unescapeHTML,
)
+
class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod
def _build_request(url):
request.add_header('Cookie', 'ff=off')
return request
+
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion"""
embed_page = self._download_webpage(embed_url, video_id,
'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
- 'video info', flags=re.MULTILINE)
+ 'video info', flags=re.MULTILINE)
info = json.loads(info)
if info.get('error') is not None:
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
break
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
- for video_id in orderedSet(video_ids)]
+ for video_id in orderedSet(video_ids)]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
class DefenseGouvFrIE(InfoExtractor):
IE_NAME = 'defense.gouv.fr'
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
- r'ligthboxvideo/base-de-medias/webtv/(.*)')
+ r'ligthboxvideo/base-de-medias/webtv/(.*)')
_TEST = {
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
video_id = self._search_regex(
r"flashvars.pvg_id=\"(\d+)\";",
webpage, 'ID')
-
+
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
- + video_id)
+ + video_id)
info = self._download_webpage(json_url, title,
- 'Downloading JSON config')
+ 'Downloading JSON config')
video_url = json.loads(info)['renditions'][0]['url']
-
+
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'ext': 'mp4',
'title': 'MythBusters: Mission Impossible Outtakes',
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
- ' each other -- to the point of confusing Jamie\'s dog -- and '
- 'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
- ' back.'),
+ ' each other -- to the point of confusing Jamie\'s dog -- and '
+ 'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
+ ' back.'),
'duration': 156,
},
}
webpage = self._download_webpage(url, video_id)
video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
- webpage, 'video list', flags=re.DOTALL)
+ webpage, 'video list', flags=re.DOTALL)
video_list = json.loads(video_list_json)
info = video_list['clips'][0]
formats = []
video_id = mobj.group('id')
info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
info = self._download_json(info_url, video_id)
- date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
+ date = time.gmtime(info['dateCreated'] / 1000) # The timestamp is in miliseconds
return {
'id': video_id,
class DropboxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
- _TESTS = [{
- 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
- 'info_dict': {
- 'id': 'nelirfsxnmcfbfh',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video \'ä"BaW_jenozKc'
- }
- },
- {
- 'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
- 'only_matching': True,
- },
+ _TESTS = [
+ {
+ 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
+ 'info_dict': {
+ 'id': 'nelirfsxnmcfbfh',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video \'ä"BaW_jenozKc'
+ }
+ }, {
+ 'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
+ 'only_matching': True,
+ },
]
def _real_extract(self, url):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
- webpage, 'video URL')
+ webpage, 'video URL')
final_url = compat_urllib_parse.unquote(video_url)
uploader = self._html_search_meta('uploader', webpage)
title = self._og_search_title(webpage).replace(' | eHow', '')
info = {
'id': compat_str(track_data['id']),
'url': track_data['track_file_stream_url'],
- 'title': track_data['performer'] + u' - ' + track_data['name'],
+ 'title': track_data['performer'] + ' - ' + track_data['name'],
'raw_title': track_data['name'],
'uploader_id': data['user']['login'],
'ext': 'm4a',
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
login_page_req.add_header('Cookie', 'locale=en_US')
login_page = self._download_webpage(login_page_req, None,
- note='Downloading login page',
- errnote='Unable to download login page')
+ note='Downloading login page',
+ errnote='Unable to download login page')
lsd = self._search_regex(
r'<input type="hidden" name="lsd" value="([^"]*)"',
login_page, 'lsd')
'legacy_return': '1',
'timezone': '-60',
'trynum': '1',
- }
+ }
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try:
login_results = self._download_webpage(request, None,
- note='Logging in', errnote='unable to fetch login page')
+ note='Logging in', errnote='unable to fetch login page')
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
check_response = self._download_webpage(check_req, None,
- note='Confirming login')
+ note='Confirming login')
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
info_url = (
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
- format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.','%2E')))
+ format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E')))
info_webpage = self._download_webpage(
info_url, video_id, note='Downloading info page')
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
- webpage, 'like count', fatal=False)
+ webpage, 'like count', fatal=False)
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
- webpage, 'dislike count', fatal=False)
+ webpage, 'dislike count', fatal=False)
return {
'id': video_id,
'duration': int_or_none(duration),
'like_count': int_or_none(like_count),
'dislike_count': int_or_none(dislike_count),
- }
\ No newline at end of file
+ }
video_id = mobj.group('id')
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
embed_page = self._download_webpage(embed_url, video_id,
- 'Downloading embed page')
+ 'Downloading embed page')
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
query = compat_urllib_parse.urlencode({
'func': 'GetResults',
server = random.randint(2, 4)
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
- episode)
+ episode)
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
- 'playlist', flags=re.DOTALL)
+ 'playlist', flags=re.DOTALL)
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
# TODO: return a single multipart video
videos = []
'info_dict': {
'id': '5645318632',
'ext': 'mp4',
- "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
- "uploader_id": "forestwander-nature-pictures",
+ "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
+ "uploader_id": "forestwander-nature-pictures",
"title": "Dark Hollow Waterfalls"
}
}
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
- first_xml, 'node_id')
+ first_xml, 'node_id')
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
description = self._html_search_meta('description', webpage, 'description')
if description:
upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date',
- fatal=False)
+ fatal=False)
if upload_date:
upload_date = unified_strdate(upload_date)
view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False)
token_url = "http://tkn.4tube.com/{0}/desktop/{1}".format(media_id, "+".join(sources))
headers = {
- b'Content-Type': b'application/x-www-form-urlencoded',
- b'Origin': b'http://www.4tube.com',
- }
+ b'Content-Type': b'application/x-www-form-urlencoded',
+ b'Origin': b'http://www.4tube.com',
+ }
token_req = compat_urllib_request.Request(token_url, b'{}', headers)
tokens = self._download_json(token_req, video_id)
'format_id': format + 'p',
'resolution': format + 'p',
'quality': int(format),
- } for format in sources]
+ } for format in sources]
self._sort_formats(formats)
'duration': duration,
'age_limit': 18,
'webpage_url': webpage_url,
- }
\ No newline at end of file
+ }
if info.get('status') == 'NOK':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
+ allowed_countries = info['videos'][0].get('geoblocage')
+ if allowed_countries:
+ georestricted = True
+ geo_info = self._download_json(
+ 'http://geo.francetv.fr/ws/edgescape.json', video_id,
+ 'Downloading geo restriction info')
+ country = geo_info['reponse']['geo_info']['country_code']
+ if country not in allowed_countries:
+ raise ExtractorError(
+ 'The video is not available from your location',
+ expected=True)
+ else:
+ georestricted = False
formats = []
for video in info['videos']:
continue
format_id = video['format']
if video_url.endswith('.f4m'):
+ if georestricted:
+ # See https://github.com/rg3/youtube-dl/issues/3963
+ # m3u8 urls work fine
+ continue
video_url_parsed = compat_urllib_parse_urlparse(video_url)
f4m_url = self._download_webpage(
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
info_json = self._download_webpage(info_url, name)
info = json.loads(info_json)
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
- ie='Dailymotion')
+ ie='Dailymotion')
class CultureboxIE(FranceTVBaseInfoExtractor):
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
# MD5 is flaky, seems to change regularly
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
- u'info_dict': {
+ 'info_dict': {
'id': '20130811',
'ext': 'mp4',
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
'title': 'Rosetta #CometLanding webcast HL 10',
}
},
+ # LazyYT
+ {
+ 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
+ 'info_dict': {
+ 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
+ },
+ 'playlist_mincount': 2,
+ },
+ # Direct link with incorrect MIME type
+ {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'md5': '4ccbebe5f36706d85221f204d7eb5913',
+ 'info_dict': {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'id': '5_Lennart_Poettering_-_Systemd',
+ 'ext': 'webm',
+ 'title': '5_Lennart_Poettering_-_Systemd',
+ 'upload_date': '20141120',
+ },
+ 'expected_warnings': [
+ 'URL could be a direct video link, returning it as such.'
+ ]
+ }
+
]
def report_following_redirect(self, new_url):
if default_search in ('error', 'fixup_error'):
raise ExtractorError(
- ('%r is not a valid URL. '
- 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
- ) % (url, url), expected=True)
+ '%r is not a valid URL. '
+ 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
+ % (url, url), expected=True)
else:
if ':' not in default_search:
default_search += ':'
if not self._downloader.params.get('test', False) and not is_intentional:
self._downloader.report_warning('Falling back on generic information extractor.')
- if full_response:
- webpage = self._webpage_read_content(full_response, url, video_id)
- else:
- webpage = self._download_webpage(url, video_id)
+ if not full_response:
+ full_response = self._request_webpage(url, video_id)
+
+ # Maybe it's a direct link to a video?
+ # Be careful not to download the whole thing!
+ first_bytes = full_response.read(512)
+ if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
+ self._downloader.report_warning(
+ 'URL could be a direct video link, returning it as such.')
+ upload_date = unified_strdate(
+ head_response.headers.get('Last-Modified'))
+ return {
+ 'id': video_id,
+ 'title': os.path.splitext(url_basename(url))[0],
+ 'direct': True,
+ 'url': url,
+ 'upload_date': upload_date,
+ }
+
+ webpage = self._webpage_read_content(
+ full_response, url, video_id, prefix=first_bytes)
+
self.report_extraction(video_id)
# Is it an RSS feed?
return _playlist_from_matches(
matches, lambda m: unescapeHTML(m[1]))
+ # Look for lazyYT YouTube embed
+ matches = re.findall(
+ r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
+ if matches:
+ return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
+
# Look for embedded Dailymotion player
matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
'title': video_title,
'id': video_id,
}
-
+
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match:
return {
# Look for embedded blip.tv player
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
if mobj:
- return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
+ return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
if mobj:
return self.url_result(mobj.group(1), 'BlipTV')
# Look for Ooyala videos
mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
- re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
return OoyalaIE._build_url_result(mobj.group('ec'))
'_type': 'playlist',
'entries': entries,
}
-
'uploader_id': uploader_id,
'like_count': like_count,
'formats': formats
- }
\ No newline at end of file
+ }
determine_ext,
compat_urllib_parse,
compat_urllib_request,
+ int_or_none,
)
class GorillaVidIE(InfoExtractor):
- IE_DESC = 'GorillaVid.in, daclips.in and movpod.in'
+ IE_DESC = 'GorillaVid.in, daclips.in, movpod.in and fastvideo.in'
_VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)?
- (?:daclips\.in|gorillavid\.in|movpod\.in))/
+ (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in))/
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
'''
'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
'thumbnail': 're:http://.*\.jpg',
}
+ }, {
+ # video with countdown timeout
+ 'url': 'http://fastvideo.in/1qmdn1lmsmbw',
+ 'md5': '8b87ec3f6564a3108a0e8e66594842ba',
+ 'info_dict': {
+ 'id': '1qmdn1lmsmbw',
+ 'ext': 'mp4',
+ 'title': 'Man of Steel - Trailer',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
}, {
'url': 'http://movpod.in/0wguyyxi1yca',
'only_matching': True,
(?:id="[^"]+"\s+)?
value="([^"]*)"
''', webpage))
-
+
if fields['op'] == 'download1':
+ countdown = int_or_none(self._search_regex(
+ r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
+ webpage, 'countdown', default=None))
+ if countdown:
+ self._sleep(countdown, video_id)
+
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
webpage = self._download_webpage(req, video_id, 'Downloading video page')
- title = self._search_regex(r'style="z-index: [0-9]+;">([^<]+)</span>', webpage, 'title')
- video_url = self._search_regex(r'file\s*:\s*\'(http[^\']+)\',', webpage, 'file url')
- thumbnail = self._search_regex(r'image\s*:\s*\'(http[^\']+)\',', webpage, 'thumbnail', fatal=False)
+ title = self._search_regex(
+ r'style="z-index: [0-9]+;">([^<]+)</span>',
+ webpage, 'title', default=None) or self._og_search_title(webpage)
+ video_url = self._search_regex(
+ r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
+ thumbnail = self._search_regex(
+ r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', fatal=False)
formats = [{
'format_id': 'sd',
webpage2 = self._download_webpage(redirect_url, video_id)
video_url = self._html_search_regex(
r'flvMask:(.*?);', webpage2, 'video_url')
-
+
duration = parse_duration(self._search_regex(
r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
webpage, 'duration', fatal=False))
from __future__ import unicode_literals
-import re
import base64
from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
compat_urllib_parse,
compat_urllib_request,
+)
+from ..utils import (
ExtractorError,
HEADRequest,
)
_VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
_TEST = {
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
- 'file': '1435540.mp3',
'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
'info_dict': {
+ 'id': '1435540',
+ 'ext': 'mp3',
'title': 'Freddie Gibbs - Lay It Down'
}
}
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
- video_id = m.group('id')
-
- webpage_src = self._download_webpage(url, video_id)
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
video_url_base64 = self._search_regex(
- r'data-path="(.*?)"', webpage_src, u'video URL', fatal=False)
+ r'data-path="(.*?)"', webpage, 'video URL', default=None)
if video_url_base64 is None:
video_url = self._search_regex(
- r'"contentUrl" content="(.*?)"', webpage_src, u'video URL')
+ r'"contentUrl" content="(.*?)"', webpage, 'content URL')
return self.url_result(video_url, ie='Youtube')
reqdata = compat_urllib_parse.urlencode([
if video_url.endswith('.html'):
raise ExtractorError('Redirect failed')
- video_title = self._og_search_title(webpage_src).strip()
+ video_title = self._og_search_title(webpage).strip()
return {
'id': video_id,
'url': video_url,
'title': video_title,
- 'thumbnail': self._og_search_thumbnail(webpage_src),
+ 'thumbnail': self._og_search_thumbnail(webpage),
}
'info_dict': {
'id': '390161',
'ext': 'mp4',
- 'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
+ 'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
'title': 'How to Tie a Square Knot Properly',
}
}
self.report_extraction(video_id)
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
- webpage, 'video URL')
+ webpage, 'video URL')
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
- webpage, 'description', fatal=False)
+ webpage, 'description', fatal=False)
return {
'id': video_id,
'id': '078fdd005f6d3c02f63d795faa1b984f',
'ext': 'mp4',
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
- 'description': 'Giant skeletons, bloody hunts, and captivating'
- ' natural beauty take our breath away.',
+ 'description': (
+ 'Giant skeletons, bloody hunts, and captivating'
+ ' natural beauty take our breath away.'
+ ),
},
},
]
video_id = self._find_video_id(webpage)
result = self._get_video_info(video_id)
description = self._html_search_regex(self._DESCRIPTION_RE,
- webpage, 'video description', flags=re.DOTALL)
+ webpage, 'video description', flags=re.DOTALL)
result['description'] = description
return result
},
'playlist_count': 7,
}
-
+
def _real_extract(self, url):
list_id = self._match_id(url)
webpage = self._download_webpage(url, list_id)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
- webpage, 'uploader id', fatal=False)
+ webpage, 'uploader id', fatal=False)
desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
- fatal=False)
+ fatal=False)
return {
'id': video_id,
def _clean_query(query):
NEEDED_ARGS = ['publishedid', 'customerid']
query_dic = compat_urlparse.parse_qs(query)
- cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)
+ cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)
# Other player ids return m3u8 urls
cleaned_dic['playerid'] = '247'
cleaned_dic['videokbrate'] = '100000'
url = self._build_url(query)
flashconfiguration = self._download_xml(url, video_id,
- 'Downloading flash configuration')
+ 'Downloading flash configuration')
file_url = flashconfiguration.find('file').text
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
# Replace some of the parameters in the query to get the best quality
# and http links (no m3u8 manifests)
file_url = re.sub(r'(?<=\?)(.+)$',
- lambda m: self._clean_query(m.group()),
- file_url)
+ lambda m: self._clean_query(m.group()),
+ file_url)
info = self._download_xml(file_url, video_id,
- 'Downloading video info')
+ 'Downloading video info')
item = info.find('channel/item')
def _bp(p):
- return xpath_with_ns(p,
- {'media': 'http://search.yahoo.com/mrss/',
- 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})
+ return xpath_with_ns(
+ p,
+ {
+ 'media': 'http://search.yahoo.com/mrss/',
+ 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
+ }
+ )
formats = []
for content in item.findall(_bp('media:group/media:content')):
attr = content.attrib
player_url = (
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
- (floor(random()*1073741824), floor(random()*1073741824))
+ (floor(random() * 1073741824), floor(random() * 1073741824))
)
req = compat_urllib_request.Request(player_url)
'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
},
'skip': 'Only works from Russia',
- }
+ }
]
# Sorted by quality
compilation = result['compilation']
title = result['title']
- title = '%s - %s' % (compilation, title) if compilation is not None else title
+ title = '%s - %s' % (compilation, title) if compilation is not None else title
previews = result['preview']
previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
compilation_id = mobj.group('compilationid')
season_id = mobj.group('seasonid')
- if season_id is not None: # Season link
+ if season_id is not None: # Season link
season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id)
playlist_id = '%s/season%s' % (compilation_id, season_id)
playlist_title = self._html_search_meta('title', season_page, 'title')
entries = self._extract_entries(season_page, compilation_id)
- else: # Compilation link
+ else: # Compilation link
compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
playlist_id = compilation_id
playlist_title = self._html_search_meta('title', compilation_page, 'title')
seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
- if len(seasons) == 0: # No seasons in this compilation
+ if len(seasons) == 0: # No seasons in this compilation
entries = self._extract_entries(compilation_page, compilation_id)
else:
entries = []
compilation_id, 'Downloading season %s web page' % season_id)
entries.extend(self._extract_entries(season_page, compilation_id))
- return self.playlist_result(entries, playlist_id, playlist_title)
\ No newline at end of file
+ return self.playlist_result(entries, playlist_id, playlist_title)
'title': title,
'description': description,
}
-
xml_link = self._html_search_regex(
r'<param name="flashvars" value="config=(.*?)" />',
webpage, 'config URL')
-
+
video_id = self._search_regex(
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
xml_link, 'video ID')
xml_link, title, 'Downloading XML config')
info_json = config.find('format.json').text
info = json.loads(info_json)['versions'][0]
-
+
video_url = 'http://video720.jeuxvideo.com/' + info['file']
return {
try:
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
- iframe_html, 'video url')
+ iframe_html, 'video url')
video_url = unescapeHTML(video_url).replace('\/', '/')
except RegexNotFoundError:
youtube_url = self._search_regex(
return self.url_result(youtube_url, ie='Youtube')
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
- html, 'title')
+ html, 'title')
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
- html, 'artist')
+ html, 'artist')
return {
'id': video_id,
class KankanIE(InfoExtractor):
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
-
+
_TEST = {
'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
'file': '48863.flv',
'id': '1404461844',
'ext': 'mp4',
'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
- 'description': 'A unique motocross documentary that examines the '
- 'life and mind of one of sports most elite athletes: Josh Grant.',
+ 'description': (
+ 'A unique motocross documentary that examines the '
+ 'life and mind of one of sports most elite athletes: Josh Grant.'
+ ),
},
}, {
'note': 'Embedded video (not using the native kickstarter video service)',
'duration': duration,
'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count),
- }
\ No newline at end of file
+ }
'title': title,
'url': downloadUrl
}
-
'categories': categories,
'ext': 'mp4',
}
-
r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False)
upload_date = self._html_search_regex(
- r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False)
+ r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
if upload_date is not None:
upload_date = unified_strdate(upload_date)
if len(videos) == 1:
return make_entry(video_id, videos[0])
else:
- return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)]
\ No newline at end of file
+ return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident'
}
- },
- {
+ }, {
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
'info_dict': {
'uploader': 'ARD_Stinkt',
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
}
- },
- {
+ }, {
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
'md5': '42c6d97d54f1db107958760788c5f48f',
'info_dict': {
video_id = mobj.group(1)
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
- 'Downloading video JSON')
+ 'Downloading video JSON')
video_json = json.loads(page)
if 'Status' in video_json:
'password': password,
'remember': 'false',
'stayPut': 'false'
- }
+ }
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
if m is not None:
response = m.group('json')
- response_json = json.loads(response)
+ response_json = json.loads(response)
state = response_json['state']
if state == 'notlogged':
mobj = re.match(self._VALID_URL, url)
course_path = mobj.group('coursepath')
course_id = mobj.group('courseid')
-
+
page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
course_id, 'Downloading course JSON')
course_json = json.loads(page)
course_title = course_json['Title']
- return self.playlist_result(entries, course_id, course_title)
\ No newline at end of file
+ return self.playlist_result(entries, course_id, course_title)
video_id = mobj.group('id')
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
- 'Downloading video RSS')
+ 'Downloading video RSS')
title = rss.find('./channel/item/title').text
description = rss.find('./channel/item/description').text
'duration': duration,
'view_count': view_count,
'formats': formats,
- }
\ No newline at end of file
+ }
compat_urllib_parse,
)
+
class MalemotionIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
_TEST = {
class MDRIE(InfoExtractor):
_VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
-
+
# No tests, MDR regularily deletes its videos
_TEST = {
'url': 'http://www.mdr.de/fakt/video189002.html',
# Youtube video
{
'add_ie': ['Youtube'],
- 'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
+ 'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
'info_dict': {
'id': '_aUehQsCQtM',
'ext': 'mp4',
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
video_uploader = self._html_search_regex(
- r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
- webpage, 'uploader nickname', fatal=False)
+ r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
+ webpage, 'uploader nickname', fatal=False)
duration = int_or_none(
self._html_search_meta('video:duration', webpage))
webpage = self._download_webpage(url, video_id)
# The xml is not well formatted, there are raw '&'
info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
- video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
+ video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
formats = []
self._sort_formats(formats)
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
- webpage, 'description', flags=re.DOTALL)
+ webpage, 'description', flags=re.DOTALL)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
- }
\ No newline at end of file
+ }
title = os.path.splitext(data['fname'])[0]
- #Could be several links with different quality
+ # Could be several links with different quality
links = re.findall(r'"file" : "?(.+?)",', webpage)
# Assume the links are ordered in quality
formats = [{
page = self._download_webpage(url, video_id, 'Downloading page')
if re.search(r'>Video Not Found or Deleted<', page) is not None:
- raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
- }
\ No newline at end of file
+ }
like_count = str_to_int(self._html_search_regex(
r'<strong>Favorited</strong>\s+([^<]+)<',
webpage, 'like count', fatal=False))
-
+
upload_date = self._html_search_regex(
r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
if 'Ago' in upload_date:
webpage = self._download_webpage(url, video_id)
jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
- formats =[{
+ formats = [{
'format_id': 'sd',
'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
'quality': 0,
'title': 'dissapeared image',
'description': 'optical illusion dissapeared image magic illusion',
}
- }
\ No newline at end of file
+ }
r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
False, default=None)
vcodec = src['type'].partition('/')[2]
-
+
formats.append({
'format_id': encoding_id + '-' + vcodec,
'url': src['src'],
# Otherwise we get a webpage that would execute some javascript
req.add_header('Youtubedl-user-agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id,
- 'Downloading mobile page')
+ 'Downloading mobile page')
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
req = HEADRequest(metrics_url)
response = self._request_webpage(req, mtvn_id, 'Resolving url')
url = response.geturl()
# Transform the url to get the best quality:
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
- return [{'url': url,'ext': 'mp4'}]
+ return [{'url': url, 'ext': 'mp4'}]
def _extract_video_formats(self, mdoc, mtvn_id):
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
self.to_screen('The normal version is not available from your '
- 'country, trying with the mobile version')
+ 'country, trying with the mobile version')
return self._extract_mobile_video_formats(mtvn_id)
raise ExtractorError('This video is not available from your country.',
- expected=True)
+ expected=True)
formats = []
for rendition in mdoc.findall('.//rendition'):
mediagen_url += '&acceptMethods=fms'
mediagen_doc = self._download_xml(mediagen_url, video_id,
- 'Downloading video urls')
+ 'Downloading video urls')
description_node = itemdoc.find('description')
if description_node is not None:
# This a short id that's used in the webpage urls
mtvn_id = None
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
- 'scheme', 'urn:mtvn:id')
+ 'scheme', 'urn:mtvn:id')
if mtvn_id_node is not None:
mtvn_id = mtvn_id_node.text
if mgid is None or ':' not in mgid:
mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
- webpage, u'mgid')
+ webpage, 'mgid')
return self._get_videos_info(mgid)
video_id = self._id_from_uri(uri)
site_id = uri.replace(video_id, '')
config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/'
- 'context4/context5/config.xml'.format(site_id))
+ 'context4/context5/config.xml'.format(site_id))
config_doc = self._download_xml(config_url, video_id)
feed_node = config_doc.find('.//feed')
feed_url = feed_node.text.strip().split('?')[0]
uri = mobj.groupdict().get('mgid')
if uri is None:
webpage = self._download_webpage(url, video_id)
-
+
# Some videos come from Vevo.com
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
webpage, re.DOTALL)
if m_vevo:
- vevo_id = m_vevo.group(1);
+ vevo_id = m_vevo.group(1)
self.to_screen('Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
-
+
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
return self._get_videos_info(uri)
'is_live': True,
'thumbnail': thumbnail,
}
-
'duration': int_or_none(duration),
'view_count': int_or_none(view_count),
'formats': formats,
- }
\ No newline at end of file
+ }
-import re
-import json
+from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
compat_urllib_parse,
- determine_ext,
)
class MuzuTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
- IE_NAME = u'muzu.tv'
+ IE_NAME = 'muzu.tv'
_TEST = {
- u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
- u'file': u'1981454.mp4',
- u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
- u'info_dict': {
- u'title': u'Cat Walk (Original Mix)',
- u'description': u'md5:90e868994de201b2570e4e5854e19420',
- u'uploader': u'MarcAshken featuring SOS',
+ 'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
+ 'md5': '98f8b2c7bc50578d6a0364fff2bfb000',
+ 'info_dict': {
+ 'id': '1981454',
+ 'ext': 'mp4',
+ 'title': 'Cat Walk (Original Mix)',
+ 'description': 'md5:90e868994de201b2570e4e5854e19420',
+ 'uploader': 'MarcAshken featuring SOS',
},
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
- info_data = compat_urllib_parse.urlencode({'format': 'json',
- 'url': url,
- })
- video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
- video_id, u'Downloading video info')
- info = json.loads(video_info_page)
+ info_data = compat_urllib_parse.urlencode({
+ 'format': 'json',
+ 'url': url,
+ })
+ info = self._download_json(
+ 'http://www.muzu.tv/api/oembed/?%s' % info_data,
+ video_id, 'Downloading video info')
- player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
- video_id, u'Downloading player info')
- video_info = json.loads(player_info_page)['videos'][0]
- for quality in ['1080' , '720', '480', '360']:
+ player_info = self._download_json(
+ 'http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
+ video_id, 'Downloading player info')
+ video_info = player_info['videos'][0]
+ for quality in ['1080', '720', '480', '360']:
if video_info.get('v%s' % quality):
break
- data = compat_urllib_parse.urlencode({'ai': video_id,
- # Even if each time you watch a video the hash changes,
- # it seems to work for different videos, and it will work
- # even if you use any non empty string as a hash
- 'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
- 'device': 'web',
- 'qv': quality,
- })
- video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
- video_id, u'Downloading video url')
- video_url_info = json.loads(video_url_page)
+ data = compat_urllib_parse.urlencode({
+ 'ai': video_id,
+ # Even if each time you watch a video the hash changes,
+ # it seems to work for different videos, and it will work
+ # even if you use any non empty string as a hash
+ 'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
+ 'device': 'web',
+ 'qv': quality,
+ })
+ video_url_info = self._download_json(
+ 'http://player.muzu.tv/player/requestVideo?%s' % data,
+ video_id, 'Downloading video url')
video_url = video_url_info['url']
- return {'id': video_id,
- 'title': info['title'],
- 'url': video_url,
- 'ext': determine_ext(video_url),
- 'thumbnail': info['thumbnail_url'],
- 'description': info['description'],
- 'uploader': info['author_name'],
- }
+ return {
+ 'id': video_id,
+ 'title': info['title'],
+ 'url': video_url,
+ 'thumbnail': info['thumbnail_url'],
+ 'description': info['description'],
+ 'uploader': info['author_name'],
+ }
+# encoding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
compat_str,
)
+from ..utils import ExtractorError
class MySpaceIE(InfoExtractor):
_TESTS = [
{
- 'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
+ 'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
'info_dict': {
- 'id': '100008689',
+ 'id': '109594919',
'ext': 'flv',
- 'title': 'Viva La Vida',
- 'description': 'The official Viva La Vida video, directed by Hype Williams',
- 'uploader': 'Coldplay',
- 'uploader_id': 'coldplay',
+ 'title': 'Little Big Town',
+ 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
+ 'uploader': 'Five Minutes to the Stage',
+ 'uploader_id': 'fiveminutestothestage',
},
'params': {
# rtmp download
'skip_download': True,
},
},
- # song
+ # songs
{
- 'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242',
+ 'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
'info_dict': {
- 'id': '39008454',
+ 'id': '93388656',
'ext': 'flv',
- 'title': 'Darkness In My Heart',
- 'uploader_id': 'spiderbags',
+ 'title': 'Of weakened soul...',
+ 'uploader': 'Killsorrow',
+ 'uploader_id': 'killsorrow',
},
'params': {
# rtmp download
'skip_download': True,
},
+ }, {
+ 'add_ie': ['Vevo'],
+ 'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
+ 'info_dict': {
+ 'id': 'USZM20600099',
+ 'ext': 'mp4',
+ 'title': 'Animal I Have Become',
+ 'uploader': 'Three Days Grace',
+ 'timestamp': int,
+ 'upload_date': '20060502',
+ },
+ 'skip': 'VEVO is only available in some countries',
+ }, {
+ 'add_ie': ['Youtube'],
+ 'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
+ 'info_dict': {
+ 'id': 'ypWvQgnJrSU',
+ 'ext': 'mp4',
+ 'title': 'Starset - First Light',
+ 'description': 'md5:2d5db6c9d11d527683bcda818d332414',
+ 'uploader': 'Jacob Soren',
+ 'uploader_id': 'SorenPromotions',
+ 'upload_date': '20140725',
+ }
},
]
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
+ player_url = self._search_regex(
+ r'playerSwf":"([^"?]*)', webpage, 'player URL')
if mobj.group('mediatype').startswith('music/song'):
# songs don't store any useful info in the 'context' variable
+ song_data = self._search_regex(
+ r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
+ webpage, 'song_data', default=None, group=0)
+ if song_data is None:
+ # some songs in an album are not playable
+ self.report_warning(
+ '%s: No downloadable song on this page' % video_id)
+ return
def search_data(name):
- return self._search_regex(r'data-%s="(.*?)"' % name, webpage,
- name)
+ return self._search_regex(
+ r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
+ song_data, name, default='', group='data')
streamUrl = search_data('stream-url')
+ if not streamUrl:
+ vevo_id = search_data('vevo-id')
+ youtube_id = search_data('youtube-id')
+ if vevo_id:
+ self.to_screen('Vevo video detected: %s' % vevo_id)
+ return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
+ elif youtube_id:
+ self.to_screen('Youtube video detected: %s' % youtube_id)
+ return self.url_result(youtube_id, ie='Youtube')
+ else:
+ raise ExtractorError(
+ 'Found song but don\'t know how to download it')
info = {
'id': video_id,
'title': self._og_search_title(webpage),
+ 'uploader': search_data('artist-name'),
'uploader_id': search_data('artist-username'),
'thumbnail': self._og_search_thumbnail(webpage),
}
else:
- context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
- u'context'))
+ context = json.loads(self._search_regex(
+ r'context = ({.*?});', webpage, 'context'))
video = context['video']
streamUrl = video['streamUrl']
info = {
info.update({
'url': rtmp_url,
'play_path': play_path,
+ 'player_url': player_url,
'ext': 'flv',
})
return info
+
+
+class MySpaceAlbumIE(InfoExtractor):
+ IE_NAME = 'MySpace:album'
+ _VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
+ 'info_dict': {
+ 'title': 'Transmissions',
+ 'id': '19455773',
+ },
+ 'playlist_count': 14,
+ 'skip': 'this album is only available in some countries',
+ }, {
+ 'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
+ 'info_dict': {
+ 'title': 'The Demo',
+ 'id': '18596029',
+ },
+ 'playlist_count': 5,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+ display_id = mobj.group('title') + playlist_id
+ webpage = self._download_webpage(url, display_id)
+ tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
+ if not tracks_paths:
+ raise ExtractorError(
+ '%s: No songs found, try using proxy' % display_id,
+ expected=True)
+ entries = [
+ self.url_result(t_path, ie=MySpaceIE.ie_key())
+ for t_path in tracks_paths]
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'display_id': display_id,
+ 'title': self._og_search_title(webpage),
+ 'entries': entries,
+ }
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
# Released into the Public Domain by Tristan Fischer on 2013-05-19
# https://github.com/rg3/youtube-dl/pull/842
- def __rc4crypt(self,data, key):
+ def __rc4crypt(self, data, key):
x = 0
box = list(range(256))
for i in list(range(256)):
out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
return out
- def __md5(self,s):
+ def __md5(self, s):
return hashlib.md5(s).hexdigest().encode()
- def _real_extract(self,url):
+ def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
GK = (
- b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
- b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
- b'TnpsbA0KTVRkbU1tSTRNdz09'
+ b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
+ b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
+ b'TnpsbA0KTVRkbU1tSTRNdz09'
)
# Get video webpage
video_url = mobj.group(1) + '.flv'
video_title = self._html_search_regex('<title>([^<]+)</title>',
- webpage, 'title')
+ webpage, 'title')
return {
'id': video_id,
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
- webpage, 'title')
+ webpage, 'title')
return {
'id': video_id,
'play_path': video_playpath,
'player_url': video_swfobj,
}
-
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
- webpage)
+ webpage)
if m_id is None:
m_error = re.search(
r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
raise ExtractorError('couldn\'t extract vid and key')
vid = m_id.group(1)
key = m_id.group(2)
- query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
+ query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key, })
query_urls = compat_urllib_parse.urlencode({
'masterVid': vid,
'protocol': 'p2p',
if domain.startswith('rtmp'):
f.update({
'ext': 'flv',
- 'rtmp_protocol': '1', # rtmpt
+ 'rtmp_protocol': '1', # rtmpt
})
formats.append(f)
self._sort_formats(formats)
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration', fatal=False))
-
return {
'id': shortened_video_id,
'url': video_url,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
- }
\ No newline at end of file
+ }
mobj = re.match(self._VALID_URL, url)
music_id = mobj.group('id')
webpage = self._download_webpage(url, music_id)
-
+
title = self._html_search_regex(
r',"name":"([^"]+)",', webpage, 'music title')
uploader = self._html_search_regex(
r',"artist":"([^"]+)",', webpage, 'music uploader')
-
+
music_url_json_string = self._html_search_regex(
r'({"url":"[^"]+"),', webpage, 'music url') + '}'
music_url_json = json.loads(music_url_json_string)
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
- }
\ No newline at end of file
+ }
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
- page, 'director id', fatal=False)
+ page, 'director id', fatal=False)
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
- page, 'director name', fatal=False)
+ page, 'director name', fatal=False)
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
- compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
+ compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
'uploader': uploader,
'uploader_id': uploader_id,
'formats': formats,
- }
\ No newline at end of file
+ }
import json
from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
compat_urlparse,
compat_urllib_parse,
+)
+from ..utils import (
unified_strdate,
)
path_url, video_id, 'Downloading final video url')
video_url = path_doc.find('path').text
else:
- video_url = initial_video_url
+ video_url = initial_video_url
join = compat_urlparse.urljoin
return {
response = self._download_webpage(request_url, playlist_title)
response = self._fix_json(response)
if not response.strip():
- self._downloader.report_warning(u'Got an empty reponse, trying '
+ self._downloader.report_warning('Got an empty reponse, trying '
'adding the "newvideos" parameter')
response = self._download_webpage(request_url + '&newvideos=true',
- playlist_title)
+ playlist_title)
response = self._fix_json(response)
videos = json.loads(response)
if 'deleted=' in flv_info_webpage:
raise ExtractorError('The video has been deleted.',
- expected=True)
+ expected=True)
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
webpage = self._download_webpage(url, list_id)
entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
- webpage, 'entries')
+ webpage, 'entries')
entries = json.loads(entries_json)
entries = [{
'_type': 'url',
'ie_key': NiconicoIE.ie_key(),
'url': ('http://www.nicovideo.jp/watch/%s' %
- entry['item_data']['video_id']),
+ entry['item_data']['video_id']),
} for entry in entries]
return {
"thumbnail": "re:^https?://",
},
'add_ie': ['Youtube']
- },
- {
+ }, {
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
'info_dict': {
'id': 'KklwM',
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
_LOGIN_URL = 'http://noco.tv/do.php'
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
+ _SUB_LANG_TEMPLATE = '&sub_lang=%s'
_NETRC_MACHINE = 'noco'
_TEST = {
if 'erreur' in login:
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
- def _call_api(self, path, video_id, note):
+ def _call_api(self, path, video_id, note, sub_lang=None):
ts = compat_str(int(time.time() * 1000))
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
url = self._API_URL_TEMPLATE % (path, ts, tk)
+ if sub_lang:
+ url += self._SUB_LANG_TEMPLATE % sub_lang
resp = self._download_json(url, video_id, note)
formats = []
- for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
-
- video = self._call_api(
- 'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
- video_id, 'Downloading %s video JSON' % format_id)
-
- file_url = video['file']
- if not file_url:
- continue
-
- if file_url in ['forbidden', 'not found']:
- popmessage = video['popmessage']
- self._raise_error(popmessage['title'], popmessage['message'])
-
- formats.append({
- 'url': file_url,
- 'format_id': format_id,
- 'width': fmt['res_width'],
- 'height': fmt['res_lines'],
- 'abr': fmt['audiobitrate'],
- 'vbr': fmt['videobitrate'],
- 'filesize': fmt['filesize'],
- 'format_note': qualities[format_id]['quality_name'],
- 'preference': qualities[format_id]['priority'],
- })
+ for lang, lang_dict in medias['fr']['video_list'].items():
+ for format_id, fmt in lang_dict['quality_list'].items():
+ format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
+
+ video = self._call_api(
+ 'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
+ video_id, 'Downloading %s video JSON' % format_id_extended,
+ lang if lang != 'none' else None)
+
+ file_url = video['file']
+ if not file_url:
+ continue
+
+ if file_url in ['forbidden', 'not found']:
+ popmessage = video['popmessage']
+ self._raise_error(popmessage['title'], popmessage['message'])
+
+ formats.append({
+ 'url': file_url,
+ 'format_id': format_id_extended,
+ 'width': fmt['res_width'],
+ 'height': fmt['res_lines'],
+ 'abr': fmt['audiobitrate'],
+ 'vbr': fmt['videobitrate'],
+ 'filesize': fmt['filesize'],
+ 'format_note': qualities[format_id]['quality_name'],
+ 'preference': qualities[format_id]['priority'],
+ })
self._sort_formats(formats)
'uploader_id': uploader_id,
'duration': duration,
'formats': formats,
- }
\ No newline at end of file
+ }
webpage = self._download_webpage(url, video_id)
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
- webpage, 'uploader')
+ webpage, 'uploader')
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
- webpage, 'date')
+ webpage, 'date')
video_upload_date = unified_strdate(raw_upload_date)
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
'url': video_url,
'title': title,
'description': description
- }
\ No newline at end of file
+ }
'title': 'youtubedl test video _BaW_jenozKc.mp4',
'description': 'Description',
}
- }
\ No newline at end of file
+ }
'duration': duration,
'view_count': view_count,
'formats': formats,
- }
\ No newline at end of file
+ }
'upload_date': upload_date,
'age_limit': 18,
'formats': formats,
- }
\ No newline at end of file
+ }
'duration': duration,
'formats': formats,
'thumbnails': thumbnails,
- }
\ No newline at end of file
+ }
@classmethod
def _build_url_result(cls, embed_code):
return cls.url_result(cls._url_for_embed_code(embed_code),
- ie=cls.ie_key())
+ ie=cls.ie_key())
def _extract_result(self, info, more_info):
return {
}
else:
return self._extract_result(videos_info[0], videos_more_info)
-
'title': data['title'],
'description': data['subtitle'],
'entries': entries
- }
\ No newline at end of file
+ }
# Extract URL, uploader, and title from webpage
self.report_extraction(video_id)
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
- webpage, 'info json')
+ webpage, 'info json')
info = json.loads(info_json)
url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
return {
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
+ clean_html,
compat_urllib_parse,
)
webpage = self._download_webpage(url, video_id)
+ m_error = re.search(
+ r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
+ if m_error:
+ raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
+
video_title = None
duration = None
video_thumbnail = None
from .common import InfoExtractor
from ..utils import int_or_none
+
class PodomaticIE(InfoExtractor):
IE_NAME = 'podomatic'
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
comment_count = self._extract_count(
r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
- video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
+ video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url')
video_url = compat_urllib_parse.unquote(video_url)
- #Get the uploaded date
+ # Get the uploaded date
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False)
if upload_date:
'upload_date': upload_date,
'duration': duration,
'formats': formats,
- }
\ No newline at end of file
+ }
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
- return subtitles
\ No newline at end of file
+ return subtitles
webpage = self._download_webpage(url, video_id)
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
- webpage, 'json data', flags=re.MULTILINE)
+ webpage, 'json data', flags=re.MULTILINE)
try:
data = json.loads(json_data)
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
_VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.redtube.com/66418',
- 'file': '66418.mp4',
- # md5 varies from time to time, as in
- # https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
- #'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
'info_dict': {
+ 'id': '66418',
+ 'ext': 'mp4',
"title": "Sucked on a toilet",
"age_limit": 18,
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- video_id = mobj.group('id')
- video_extension = 'mp4'
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- self.report_extraction(video_id)
-
video_url = self._html_search_regex(
- r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
-
+ r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
video_title = self._html_search_regex(
r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
- webpage, u'title')
-
+ webpage, 'title')
video_thumbnail = self._og_search_thumbnail(webpage)
# No self-labeling, but they describe themselves as
return {
'id': video_id,
'url': video_url,
- 'ext': video_extension,
+ 'ext': 'mp4',
'title': video_title,
'thumbnail': video_thumbnail,
'age_limit': age_limit,
'thumbnail': thumbnail_url,
'description': description,
}
-
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
- video_urlpart = videopath.split('/flash/')[1][:-4]
+ video_urlpart = videopath.split('/flash/')[1][:-5]
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
formats.extend([
playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
videoinfo = playerdata.find('./playlist/videoinfo')
-
+
formats = []
for filename in videoinfo.findall('filename'):
mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
'upload_date': upload_date,
'duration': duration,
'formats': formats,
- }
\ No newline at end of file
+ }
return url
-
class RTVEALaCartaIE(InfoExtractor):
IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta'
'view_count': view_count,
'duration': duration,
'formats': formats,
- }
\ No newline at end of file
+ }
'thumbnail': 're:http://.*\.jpg',
},
'add_ies': ['generic'],
- },
- {
+ }, {
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
'only_matching': True,
}]
'description': description,
'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id,
'formats': formats,
- }
\ No newline at end of file
+ }
if title is None:
title = self._html_search_regex(
[r'<b>Title:</b> ([^<]*)</div>',
- r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
+ r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
webpage, 'title')
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage, default=None)
'title': title,
'entries': entries,
}
-
-
\ No newline at end of file
'filesize': filesize,
'title': title,
'thumbnail': thumbnail,
- }
\ No newline at end of file
+ }
def _extract_video(self, video_id):
data = compat_urllib_parse.urlencode({'vid': video_id})
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
- video_id, 'Downloading video url')
+ video_id, 'Downloading video url')
image_page = self._download_webpage(
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
video_id, 'Downloading thumbnail info')
ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex(
- r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
+ r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
'description', fatal=False)
return {
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
- webpage, 'title').strip()
+ webpage, 'title').strip()
video_url = self._html_search_regex(
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
# encoding: utf-8
from __future__ import unicode_literals
-import os.path
import re
import json
import hashlib
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
- url_basename,
int_or_none,
+ unified_strdate,
)
class SmotriIE(InfoExtractor):
IE_DESC = 'Smotri.com'
IE_NAME = 'smotri'
- _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
+ _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
_NETRC_MACHINE = 'smotri'
_TESTS = [
'uploader': 'rbc2008',
'uploader_id': 'rbc08',
'upload_date': '20131118',
- 'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
},
},
'uploader': 'Support Photofile@photofile',
'uploader_id': 'support-photofile',
'upload_date': '20070704',
- 'description': 'test, видео test',
'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
},
},
'uploader_id': 'timoxa40',
'upload_date': '20100404',
'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
- 'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
},
'params': {
'videopassword': 'qwerty',
'upload_date': '20101001',
'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
'age_limit': 18,
- 'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
},
'params': {
'videopassword': '333'
'uploader': 'HannahL',
'uploader_id': 'lisaha95',
'upload_date': '20090331',
- 'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
},
},
]
- _SUCCESS = 0
- _PASSWORD_NOT_VERIFIED = 1
- _PASSWORD_DETECTED = 2
- _VIDEO_NOT_FOUND = 3
-
@classmethod
def _extract_url(cls, webpage):
mobj = re.search(
return self._html_search_meta(name, html, display_name)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('videoid')
- real_video_id = mobj.group('realvideoid')
+ video_id = self._match_id(url)
+
+ video_form = {
+ 'ticket': video_id,
+ 'video_url': '1',
+ 'frame_url': '1',
+ 'devid': 'LoadupFlashPlayer',
+ 'getvideoinfo': '1',
+ }
- # Download video JSON data
- video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
- video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON')
- video_json = json.loads(video_json_page)
+ request = compat_urllib_request.Request(
+ 'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
+ request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- status = video_json['status']
- if status == self._VIDEO_NOT_FOUND:
+ video = self._download_json(request, video_id, 'Downloading video JSON')
+
+ if video.get('_moderate_no') or not video.get('moderated'):
+ raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True)
+
+ if video.get('error'):
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
- elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
- # video-password set
- video_password = self._downloader.params.get('videopassword', None)
- if not video_password:
- raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
- video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
- video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)')
- video_json = json.loads(video_json_page)
- status = video_json['status']
- if status == self._PASSWORD_NOT_VERIFIED:
- raise ExtractorError('Video password is invalid', expected=True)
-
- if status != self._SUCCESS:
- raise ExtractorError('Unexpected status value %s' % status)
-
- # Extract the URL of the video
- video_url = video_json['file_data']
+
+ video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
+ title = video['title']
+ thumbnail = video['_imgURL']
+ upload_date = unified_strdate(video['added'])
+ uploader = video['userNick']
+ uploader_id = video['userLogin']
+ duration = int_or_none(video['duration'])
# Video JSON does not provide enough meta data
# We will extract some from the video web page instead
- video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
- video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
+ webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
+ webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
# Warning if video is unavailable
warning = self._html_search_regex(
- r'<div class="videoUnModer">(.*?)</div>', video_page,
+ r'<div class="videoUnModer">(.*?)</div>', webpage,
'warning message', default=None)
if warning is not None:
self._downloader.report_warning(
(video_id, warning))
# Adult content
- if re.search('EroConfirmText">', video_page) is not None:
+ if re.search('EroConfirmText">', webpage) is not None:
self.report_age_confirmation()
confirm_string = self._html_search_regex(
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
- video_page, 'confirm string')
- confirm_url = video_page_url + '&confirm=%s' % confirm_string
- video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
+ webpage, 'confirm string')
+ confirm_url = webpage_url + '&confirm=%s' % confirm_string
+ webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
adult_content = True
else:
adult_content = False
- # Extract the rest of meta data
- video_title = self._search_meta('name', video_page, 'title')
- if not video_title:
- video_title = os.path.splitext(url_basename(video_url))[0]
-
- video_description = self._search_meta('description', video_page)
- END_TEXT = ' на сайте Smotri.com'
- if video_description and video_description.endswith(END_TEXT):
- video_description = video_description[:-len(END_TEXT)]
- START_TEXT = 'Смотреть онлайн ролик '
- if video_description and video_description.startswith(START_TEXT):
- video_description = video_description[len(START_TEXT):]
- video_thumbnail = self._search_meta('thumbnail', video_page)
-
- upload_date_str = self._search_meta('uploadDate', video_page, 'upload date')
- if upload_date_str:
- upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
- video_upload_date = (
- (
- upload_date_m.group('year') +
- upload_date_m.group('month') +
- upload_date_m.group('day')
- )
- if upload_date_m else None
- )
- else:
- video_upload_date = None
-
- duration_str = self._search_meta('duration', video_page)
- if duration_str:
- duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
- video_duration = (
- (
- (int(duration_m.group('hours')) * 60 * 60) +
- (int(duration_m.group('minutes')) * 60) +
- int(duration_m.group('seconds'))
- )
- if duration_m else None
- )
- else:
- video_duration = None
-
- video_uploader = self._html_search_regex(
- '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
- video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
-
- video_uploader_id = self._html_search_regex(
- '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
- video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
-
- video_view_count = self._html_search_regex(
+ view_count = self._html_search_regex(
'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
- video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
+ webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL)
return {
'id': video_id,
'url': video_url,
- 'title': video_title,
- 'thumbnail': video_thumbnail,
- 'description': video_description,
- 'uploader': video_uploader,
- 'upload_date': video_upload_date,
- 'uploader_id': video_uploader_id,
- 'duration': video_duration,
- 'view_count': int_or_none(video_view_count),
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': int_or_none(view_count),
'age_limit': 18 if adult_content else 0,
- 'video_page_url': video_page_url
}
},
'playlist_mincount': 4,
}
-
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
community_id = mobj.group('communityid')
(username, password) = self._get_login_info()
if username is None:
raise ExtractorError('Erotic broadcasts allowed only for registered users, '
- 'use --username and --password options to provide account credentials.', expected=True)
+ 'use --username and --password options to provide account credentials.', expected=True)
login_form = {
'login-hint53': '1',
# encoding: utf-8
+from __future__ import unicode_literals
import json
import re
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
_TEST = {
- u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
- u'file': u'382479172.mp4',
- u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
- u'info_dict': {
- u'title': u'MV:Far East Movement《The Illest》',
+ 'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
+ 'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7',
+ 'info_dict': {
+ 'id': '382479172',
+ 'ext': 'mp4',
+ 'title': 'MV:Far East Movement《The Illest》',
},
- u'skip': u'Only available from China',
+ 'skip': 'Only available from China',
}
def _real_extract(self, url):
if mytv:
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
else:
- base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+ base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
data_url = base_data_url + str(vid_id)
data_json = self._download_webpage(
data_url, video_id,
- note=u'Downloading JSON data for ' + str(vid_id))
+ note='Downloading JSON data for ' + str(vid_id))
return json.loads(data_json)
mobj = re.match(self._VALID_URL, url)
webpage = self._download_webpage(url, video_id)
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
- webpage, u'video title')
+ webpage, 'video title')
title = raw_title.partition('-')[0].strip()
vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
- u'video path')
+ 'video path')
data = _fetch_data(vid, mytv)
QUALITIES = ('ori', 'super', 'high', 'nor')
for q in QUALITIES
if data['data'][q + 'Vid'] != 0]
if not vid_ids:
- raise ExtractorError(u'No formats available for this video')
+ raise ExtractorError('No formats available for this video')
# For now, we just pick the highest available quality
vid_id = vid_ids[-1]
(allot, prot, clipsURL[i], su[i]))
part_str = self._download_webpage(
part_url, video_id,
- note=u'Downloading part %d of %d' % (i+1, part_count))
+ note='Downloading part %d of %d' % (i + 1, part_count))
part_info = part_str.split('|')
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
# We have to retrieve the url
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
- 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
+ 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
format_dict = self._download_json(
streams_url,
track_id, 'Downloading track url')
# extract uploader (which is in the url)
uploader = mobj.group('uploader')
# extract simple title (uploader + slug of song title)
- slug_title = mobj.group('title')
+ slug_title = mobj.group('title')
token = mobj.group('token')
full_title = resolve_title = '%s/%s' % (uploader, slug_title)
if token:
resolve_title += '/%s' % token
-
+
self.report_resolve(full_title)
-
+
url = 'http://soundcloud.com/%s' % resolve_title
info_json_url = self._resolv_url(url)
info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
entries = [
self._extract_info_dict(t, quiet=True, secret_token=token)
- for t in data['tracks']]
+ for t in data['tracks']]
return {
'_type': 'playlist',
# Other videos works fine with the info from the object
brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
if brightcove_url is None:
- raise ExtractorError(u'The webpage does not contain a video', expected=True)
+ raise ExtractorError(
+ 'The webpage does not contain a video', expected=True)
return self.url_result(brightcove_url, BrightcoveIE.ie_key())
'description': description,
'duration': duration,
'thumbnails': thumbnails
- }
\ No newline at end of file
+ }
'duration': duration,
'categories': categories,
'formats': formats,
- }
\ No newline at end of file
+ }
'rtmp_live': asset.get('live'),
'timestamp': parse_iso8601(asset.get('date')),
}
-
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
rootpage = self._download_webpage(rootURL, info['id'],
- errnote='Unable to download course info page')
+ errnote='Unable to download course info page')
links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
info['entries'] = [self.url_result(
+from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- compat_str,
ExtractorError,
)
sub_lang_list = self._get_available_subtitles(video_id, webpage)
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
sub_lang = ",".join(list(sub_lang_list.keys()))
- self.to_screen(u'%s: Available subtitles for video: %s' %
+ self.to_screen('%s: Available subtitles for video: %s' %
(video_id, sub_lang))
auto_lang = ",".join(auto_captions_list.keys())
- self.to_screen(u'%s: Available automatic captions for video: %s' %
+ self.to_screen('%s: Available automatic captions for video: %s' %
(video_id, auto_lang))
def extract_subtitles(self, video_id, webpage):
sub_lang_list = {}
for sub_lang in requested_langs:
- if not sub_lang in available_subs_list:
- self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
+ if sub_lang not in available_subs_list:
+ self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
continue
sub_lang_list[sub_lang] = available_subs_list[sub_lang]
try:
sub = self._download_subtitle_url(sub_lang, url)
except ExtractorError as err:
- self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
+ self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return
if not sub:
- self._downloader.report_warning(u'Did not fetch video subtitles')
+ self._downloader.report_warning('Did not fetch video subtitles')
return
return sub
Must be redefined by the subclasses that support automatic captions,
otherwise it will return {}
"""
- self._downloader.report_warning(u'Automatic Captions not supported by this server')
+ self._downloader.report_warning('Automatic Captions not supported by this server')
return {}
if media_type == 'Video':
fmt.update({
- 'format_note': ['144p', '288p', '544p', '720p'][quality-1],
+ 'format_note': ['144p', '288p', '544p', '720p'][quality - 1],
'vcodec': codec,
})
elif media_type == 'Audio':
--- /dev/null
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ qualities,
+)
+
+
+class TassIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://tass.ru/obschestvo/1586870',
+ 'md5': '3b4cdd011bc59174596b6145cda474a4',
+ 'info_dict': {
+ 'id': '1586870',
+ 'ext': 'mp4',
+ 'title': 'Посетителям московского зоопарка показали красную панду',
+ 'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://itar-tass.com/obschestvo/1600009',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ sources = json.loads(js_to_json(self._search_regex(
+ r'(?s)sources\s*:\s*(\[.+?\])', webpage, 'sources')))
+
+ quality = qualities(['sd', 'hd'])
+
+ formats = []
+ for source in sources:
+ video_url = source.get('file')
+ if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
+ continue
+ label = source.get('label')
+ formats.append({
+ 'url': video_url,
+ 'format_id': label,
+ 'quality': quality(label),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ }
urls = []
webpage = self._download_webpage(url, user_id)
urls.extend(re.findall(self._MEDIA_RE, webpage))
-
+
pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
for p in pages:
more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
class TeamcocoIE(InfoExtractor):
_VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
_TESTS = [
- {
- 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
- 'file': '80187.mp4',
- 'md5': '3f7746aa0dc86de18df7539903d399ea',
- 'info_dict': {
- 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
- 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
+ {
+ 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
+ 'file': '80187.mp4',
+ 'md5': '3f7746aa0dc86de18df7539903d399ea',
+ 'info_dict': {
+ 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
+ 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
+ }
+ }, {
+ 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
+ 'file': '19705.mp4',
+ 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
+ 'info_dict': {
+ "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
+ "title": "Louis C.K. Interview Pt. 1 11/3/11"
+ }
}
- },
- {
- 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
- 'file': '19705.mp4',
- 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
- 'info_dict': {
- "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
- "title": "Louis C.K. Interview Pt. 1 11/3/11"
- }
- }
]
def _real_extract(self, url):
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
-
+
video_id = mobj.group("video_id")
if not video_id:
video_id = self._html_search_regex(
'ext': 'mp4',
'title': 'The illusion of consciousness',
'description': ('Philosopher Dan Dennett makes a compelling '
- 'argument that not only don\'t we understand our own '
- 'consciousness, but that half the time our brains are '
- 'actively fooling us.'),
+ 'argument that not only don\'t we understand our own '
+ 'consciousness, but that half the time our brains are '
+ 'actively fooling us.'),
'uploader': 'Dan Dennett',
'width': 854,
'duration': 1308,
def _extract_info(self, webpage):
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
- webpage, 'info json')
+ webpage, 'info json')
return json.loads(info_json)
def _real_extract(self, url):
'''Returns the videos of the playlist'''
webpage = self._download_webpage(url, name,
- 'Downloading playlist webpage')
+ 'Downloading playlist webpage')
info = self._extract_info(webpage)
playlist_info = info['playlist']
-#coding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .mitele import MiTeleIE
embed_url = self._html_search_regex(
r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
embed_page = self._download_webpage(embed_url, video_id,
- 'Downloading embed player page')
+ 'Downloading embed player page')
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
wat_info = self._download_json(
'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
'skip_download': True,
},
}
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if mobj.group('config'):
- config_url = url+ '&form=json'
+ config_url = url + '&form=json'
config_url = config_url.replace('swf/', 'config/')
config_url = config_url.replace('onsite/', 'onsite/config/')
config = self._download_json(config_url, video_id, 'Downloading config')
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
else:
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
- 'format=smil&mbr=true'.format(video_id))
-
+ 'format=smil&mbr=true'.format(video_id))
meta = self._download_xml(smil_url, video_id)
try:
'formats': formats,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
- 'duration': info['duration']//1000,
+ 'duration': info['duration'] // 1000,
}
-#coding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
webpage, 'uploader id', fatal=False)
ext = determine_ext(video_url)
-
+
return {
- 'id': video_id,
- 'url': video_url,
- 'uploader': uploader,
+ 'id': video_id,
+ 'url': video_url,
+ 'uploader': uploader,
'uploader_id': uploader_id,
- 'title': title,
- 'ext': ext,
+ 'title': title,
+ 'ext': ext,
}
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id, 'Downloading page')
-
+
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
- '\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
+ '\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
if mobj is None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
'url': video_url,
'thumbnail': thumbnail,
'title': title
- }
\ No newline at end of file
+ }
'ext': 'mp4',
'title': 'Breaking Amish: Die Welt da draußen',
'uploader': 'Discovery Networks - Germany',
- 'description': 'Vier Amische und eine Mennonitin wagen in New York'
+ 'description': (
+ 'Vier Amische und eine Mennonitin wagen in New York'
' den Sprung in ein komplett anderes Leben. Begleitet sie auf'
- ' ihrem spannenden Weg.',
+ ' ihrem spannenden Weg.'),
},
}
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TMZIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://www.tmz.com/videos/0_okj015ty/',
+ 'md5': '791204e3bf790b1426cb2db0706184c0',
+ 'info_dict': {
+ 'id': '0_okj015ty',
+ 'url': 'http://tmz.vo.llnwd.net/o28/2014-03/13/0_okj015ty_0_rt8ro3si_2.mp4',
+ 'ext': 'mp4',
+ 'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
+ 'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
+ 'thumbnail': 'http://cdnbakmi.kaltura.com/p/591531/sp/59153100/thumbnail/entry_id/0_okj015ty/version/100002/acv/182/width/640',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ return {
+ 'id': video_id,
+ 'url': self._html_search_meta('VideoURL', webpage, fatal=True),
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._html_search_meta('ThumbURL', webpage),
+ }
fmt['height'] = int(m.group(1))
formats.append(fmt)
self._sort_formats(formats)
-
+
return {
'id': video_id,
'display_id': display_id,
webpage = self._download_webpage(url, name)
title = self._search_regex(r'<title>(.+?)</title>',
- webpage, 'video title').replace(' - Trailer Addict','')
+ webpage, 'video title').replace(' - Trailer Addict', '')
view_count_str = self._search_regex(
r'<span class="views_n">([0-9,.]+)</span>',
webpage, 'view count', fatal=False)
fvar = "fvar"
info_url = "http://www.traileraddict.com/%s.php?tid=%s" % (fvar, str(video_id))
- info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
+ info_webpage = self._download_webpage(info_url, video_id, "Downloading the info webpage")
final_url = self._search_regex(r'&fileurl=(.+)',
- info_webpage, 'Download url').replace('%3F','?')
+ info_webpage, 'Download url').replace('%3F', '?')
thumbnail_url = self._search_regex(r'&image=(.+?)&',
- info_webpage, 'thumbnail url')
+ info_webpage, 'thumbnail url')
description = self._html_search_regex(
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
+from __future__ import unicode_literals
+
import json
-import re
from .common import InfoExtractor
class TriluliluIE(InfoExtractor):
- _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/video-[^/]+/(?P<id>[^/]+)'
_TEST = {
- u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
- u'file': u"big-buck-bunny-1.mp4",
- u'info_dict': {
- u"title": u"Big Buck Bunny",
- u"description": u":) pentru copilul din noi",
+ 'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
+ 'info_dict': {
+ 'id': 'big-buck-bunny-1',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny',
+ 'description': ':) pentru copilul din noi',
},
# Server ignores Range headers (--test)
- u"params": {
- u"skip_download": True
+ 'params': {
+ 'skip_download': True
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('video_id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
log_str = self._search_regex(
- r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
+ r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, 'log info')
log = json.loads(log_str)
- format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
- u'video-formats2' % log)
+ format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
+ 'video-formats2' % log)
format_doc = self._download_xml(
format_url, video_id,
- note=u'Downloading formats',
- errnote=u'Error while downloading formats')
-
+ note='Downloading formats',
+ errnote='Error while downloading formats')
+
video_url_template = (
- u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
- u'&source=site&hash=%(hash)s&username=%(userid)s&'
- u'key=ministhebest&format=%%s&sig=&exp=' %
+ 'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
+ '&source=site&hash=%(hash)s&username=%(userid)s&'
+ 'key=ministhebest&format=%%s&sig=&exp=' %
log)
formats = [
{
'description': description,
'thumbnail': thumbnail,
}
-
'skip': 'Only works from China'
}]
- def _url_for_id(self, id, quality = None):
- info_url = "http://v2.tudou.com/f?id="+str(id)
+ def _url_for_id(self, id, quality=None):
+ info_url = "http://v2.tudou.com/f?id=" + str(id)
if quality:
info_url += '&hd' + quality
webpage = self._download_webpage(info_url, id, "Opening the info webpage")
- final_url = self._html_search_regex('>(.+?)</f>',webpage, 'video url')
+ final_url = self._html_search_regex('>(.+?)</f>', webpage, 'video url')
return final_url
def _real_extract(self, url):
result = []
len_parts = len(parts)
if len_parts > 1:
- self.to_screen(u'%s: found %s parts' % (video_id, len_parts))
+ self.to_screen('%s: found %s parts' % (video_id, len_parts))
for part in parts:
part_id = part['k']
final_url = self._url_for_id(part_id, quality)
webpage, 'iframe url')
iframe = self._download_webpage(iframe_url, video_id)
video_url = self._search_regex(r'<source src="([^"]+)"',
- iframe, 'video url')
+ iframe, 'video url')
# The only place where you can get a title, it's not complete,
# but searching in other places doesn't work for all videos
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class TuneInIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?
+ (?:
+ tunein\.com/
+ (?:
+ radio/.*?-s|
+ station/.*?StationId\=
+ )(?P<id>[0-9]+)
+ |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
+ )
+ '''
+ _API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station'
+
+ _INFO_DICT = {
+ 'id': '34682',
+ 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
+ 'ext': 'AAC',
+ 'thumbnail': 're:^https?://.*\.png$',
+ 'location': 'Tacoma, WA',
+ }
+ _TESTS = [
+ {
+ 'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
+ 'info_dict': _INFO_DICT,
+ 'params': {
+ 'skip_download': True, # live stream
+ },
+ },
+ { # test redirection
+ 'url': 'http://tun.in/ser7s',
+ 'info_dict': _INFO_DICT,
+ 'params': {
+ 'skip_download': True, # live stream
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ redirect_id = mobj.group('redirect_id')
+ if redirect_id:
+ # The server doesn't support HEAD requests
+ urlh = self._request_webpage(
+ url, redirect_id, note='Downloading redirect page')
+ url = urlh.geturl()
+ self.to_screen('Following redirect: %s' % url)
+ mobj = re.match(self._VALID_URL, url)
+ station_id = mobj.group('id')
+
+ station_info = self._download_json(
+ self._API_URL_TEMPLATE.format(station_id),
+ station_id, note='Downloading station JSON')
+
+ title = station_info['Title']
+ thumbnail = station_info.get('Logo')
+ location = station_info.get('Location')
+ streams_url = station_info.get('StreamUrl')
+ if not streams_url:
+ raise ExtractorError('No downloadable streams found',
+ expected=True)
+ stream_data = self._download_webpage(
+ streams_url, station_id, note='Downloading stream data')
+ streams = json.loads(self._search_regex(
+ r'\((.*)\);', stream_data, 'stream info'))['Streams']
+
+ is_live = None
+ formats = []
+ for stream in streams:
+ if stream.get('Type') == 'Live':
+ is_live = True
+ formats.append({
+ 'abr': stream.get('Bandwidth'),
+ 'ext': stream.get('MediaType'),
+ 'acodec': stream.get('MediaType'),
+ 'vcodec': 'none',
+ 'url': stream.get('Url'),
+ # Sometimes streams with the highest quality do not exist
+ 'preference': stream.get('Reliability'),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': station_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'location': location,
+ 'is_live': is_live,
+ }
'duration': duration,
'age_limit': age_limit,
'formats': formats,
- }
\ No newline at end of file
+ }
-import json
-import re
+from __future__ import unicode_literals
from .common import InfoExtractor
class TvpIE(InfoExtractor):
- IE_NAME = u'tvp.pl'
+ IE_NAME = 'tvp.pl'
_VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
_TEST = {
- u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
- u'md5': u'148408967a6a468953c0a75cbdaf0d7a',
- u'file': u'12878238.wmv',
- u'info_dict': {
- u'title': u'31.10.2013 - Odcinek 2',
- u'description': u'31.10.2013 - Odcinek 2',
+ 'url': 'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
+ 'md5': '148408967a6a468953c0a75cbdaf0d7a',
+ 'info_dict': {
+ 'id': '12878238',
+ 'ext': 'wmv',
+ 'title': '31.10.2013 - Odcinek 2',
+ 'description': '31.10.2013 - Odcinek 2',
},
- u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
+ 'skip': 'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id
- json_params = self._download_webpage(
- json_url, video_id, u"Downloading video metadata")
-
- params = json.loads(json_params)
- self.report_extraction(video_id)
+ params = self._download_json(
+ json_url, video_id, "Downloading video metadata")
video_url = params['video_url']
- title = self._og_search_title(webpage, fatal=True)
return {
'id': video_id,
- 'title': title,
+ 'title': self._og_search_title(webpage),
'ext': 'wmv',
'url': video_url,
'description': self._og_search_description(webpage),
error_str += ' - %s' % error_data.get('formErrors')
raise ExtractorError(error_str, expected=True)
- def _download_json(self, url, video_id, note='Downloading JSON metadata'):
- response = super(UdemyIE, self)._download_json(url, video_id, note)
+ def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
+ headers = {
+ 'X-Udemy-Snail-Case': 'true',
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ for cookie in self._downloader.cookiejar:
+ if cookie.name == 'client_id':
+ headers['X-Udemy-Client-Id'] = cookie.value
+ elif cookie.name == 'access_token':
+ headers['X-Udemy-Bearer-Token'] = cookie.value
+
+ if isinstance(url_or_request, compat_urllib_request.Request):
+ for header, value in headers.items():
+ url_or_request.add_header(header, value)
+ else:
+ url_or_request = compat_urllib_request.Request(url_or_request, headers=headers)
+
+ response = super(UdemyIE, self)._download_json(url_or_request, video_id, note)
self._handle_error(response)
return response
if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
return
- csrf = self._html_search_regex(r'<input type="hidden" name="csrf" value="(.+?)"', login_popup, 'csrf token')
+ csrf = self._html_search_regex(
+ r'<input type="hidden" name="csrf" value="(.+?)"',
+ login_popup, 'csrf token')
login_form = {
'email': username,
'displayType': 'json',
'isSubmitted': '1',
}
- request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
- response = self._download_json(request, None, 'Logging in as %s' % username)
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+ response = self._download_json(
+ request, None, 'Logging in as %s' % username)
if 'returnUrl' not in response:
raise ExtractorError('Unable to log in')
+
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
lecture_id = mobj.group('id')
lecture = self._download_json(
- 'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, lecture_id, 'Downloading lecture JSON')
+ 'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id,
+ lecture_id, 'Downloading lecture JSON')
- if lecture['assetType'] != 'Video':
- raise ExtractorError('Lecture %s is not a video' % lecture_id, expected=True)
+ asset_type = lecture.get('assetType') or lecture.get('asset_type')
+ if asset_type != 'Video':
+ raise ExtractorError(
+ 'Lecture %s is not a video' % lecture_id, expected=True)
asset = lecture['asset']
- stream_url = asset['streamUrl']
+ stream_url = asset.get('streamUrl') or asset.get('stream_url')
mobj = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url)
if mobj:
return self.url_result(mobj.group(1), 'Youtube')
video_id = asset['id']
- thumbnail = asset['thumbnailUrl']
+ thumbnail = asset.get('thumbnailUrl') or asset.get('thumbnail_url')
duration = asset['data']['duration']
- download_url = asset['downloadUrl']
+ download_url = asset.get('downloadUrl') or asset.get('download_url')
+
+ video = download_url.get('Video') or download_url.get('video')
+ video_480p = download_url.get('Video480p') or download_url.get('video_480p')
formats = [
{
- 'url': download_url['Video480p'][0],
+ 'url': video_480p[0],
'format_id': '360p',
},
{
- 'url': download_url['Video'][0],
+ 'url': video[0],
'format_id': '720p',
},
]
course_path = mobj.group('coursepath')
response = self._download_json(
- 'https://www.udemy.com/api-1.1/courses/%s' % course_path, course_path, 'Downloading course JSON')
+ 'https://www.udemy.com/api-1.1/courses/%s' % course_path,
+ course_path, 'Downloading course JSON')
course_id = int(response['id'])
course_title = response['title']
webpage = self._download_webpage(
- 'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id, course_id, 'Enrolling in the course')
+ 'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id,
+ course_id, 'Enrolling in the course')
if self._SUCCESSFULLY_ENROLLED in webpage:
self.to_screen('%s: Successfully enrolled in' % course_id)
elif self._ALREADY_ENROLLED in webpage:
self.to_screen('%s: Already enrolled in' % course_id)
- response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
+ response = self._download_json(
+ 'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
course_id, 'Downloading course curriculum')
entries = [
- self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
- for asset in response if asset.get('assetType') == 'Video'
+ self.url_result(
+ 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
+ for asset in response if asset.get('assetType') or asset.get('asset_type') == 'Video'
]
- return self.playlist_result(entries, course_id, course_title)
\ No newline at end of file
+ return self.playlist_result(entries, course_id, course_title)
self.report_extraction(video_id)
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
- webpage, 'title')
+ webpage, 'title')
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
- webpage, 'uploader', fatal=False, flags=re.DOTALL)
+ webpage, 'uploader', fatal=False, flags=re.DOTALL)
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
- webpage, 'thumbnail', fatal=False)
+ webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
redirect_page, urlh = self._download_webpage_handle(url, video_id)
new_location = self._search_regex(r'window\.location = \'(.*)\';',
- redirect_page, 'redirect location')
+ redirect_page, 'redirect location')
redirect_url = urlh.geturl() + new_location
webpage = self._download_webpage(redirect_url, video_id,
- 'Downloading redirect page')
+ 'Downloading redirect page')
title = self._html_search_regex(r'<title>(.*)</title>',
- webpage, 'title').split('/')[0].strip()
+ webpage, 'title').split('/')[0].strip()
info_url = "http://vbox7.com/play/magare.do"
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
video_url = compat_urlparse.unquote(config['clip']['url'])
title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
- webpage, 'uploader')
+ webpage, 'uploader')
thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
- webpage, 'thumbnail')
+ webpage, 'thumbnail')
description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
- webpage, 'description', flags=re.DOTALL)
+ webpage, 'description', flags=re.DOTALL)
return {
'_type': 'video',
if mobj:
video_id = mobj.group('id')
page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
- 'Downloading video page')
+ 'Downloading video page')
rutv_url = RUTVIE._extract_url(page)
if rutv_url:
return self.url_result(rutv_url, 'RUTV')
- raise ExtractorError('No video found', expected=True)
\ No newline at end of file
+ raise ExtractorError('No video found', expected=True)
class VevoIE(InfoExtractor):
"""
Accepts urls from vevo.com or in the format 'vevo:{id}'
- (currently used by MTVIE)
+ (currently used by MTVIE and MySpaceIE)
"""
_VALID_URL = r'''(?x)
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
'duration': float_or_none(data['duration'], 1000),
'view_count': data['displays'],
'formats': formats,
- }
\ No newline at end of file
+ }
except ExtractorError:
raise ExtractorError('The page doesn\'t contain a video', expected=True)
return self.url_result(ooyala_url, ie='Ooyala')
-
'view_count': view_count,
'formats': formats,
'age_limit': 18,
- }
\ No newline at end of file
+ }
-import re
+from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
- determine_ext,
+ int_or_none,
)
+
class VideofyMeIE(InfoExtractor):
- _VALID_URL = r'https?://(www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
- IE_NAME = u'videofy.me'
+ _VALID_URL = r'https?://(?:www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
+ IE_NAME = 'videofy.me'
_TEST = {
- u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
- u'file': u'1100701.mp4',
- u'md5': u'c77d700bdc16ae2e9f3c26019bd96143',
- u'info_dict': {
- u'title': u'This is VideofyMe',
- u'description': None,
- u'uploader': u'VideofyMe',
- u'uploader_id': u'thisisvideofyme',
+ 'url': 'http://www.videofy.me/thisisvideofyme/1100701',
+ 'md5': 'c77d700bdc16ae2e9f3c26019bd96143',
+ 'info_dict': {
+ 'id': '1100701',
+ 'ext': 'mp4',
+ 'title': 'This is VideofyMe',
+ 'description': None,
+ 'uploader': 'VideofyMe',
+ 'uploader_id': 'thisisvideofyme',
+ 'view_count': int,
},
-
+
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
- video_id)
+ video_id)
video = config.find('video')
sources = video.find('sources')
- url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
- for key in ['on', 'av', 'off']] if node is not None)
+ url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
+ for key in ['on', 'av', 'off']] if node is not None)
video_url = url_node.find('url').text
+ view_count = int_or_none(self._search_regex(
+ r'([0-9]+)', video.find('views').text, 'view count', fatal=False))
- return {'id': video_id,
- 'title': video.find('title').text,
- 'url': video_url,
- 'ext': determine_ext(video_url),
- 'thumbnail': video.find('thumb').text,
- 'description': video.find('description').text,
- 'uploader': config.find('blog/name').text,
- 'uploader_id': video.find('identifier').text,
- 'view_count': re.search(r'\d+', video.find('views').text).group(),
- }
+ return {
+ 'id': video_id,
+ 'title': video.find('title').text,
+ 'url': video_url,
+ 'thumbnail': video.find('thumb').text,
+ 'description': video.find('description').text,
+ 'uploader': config.find('blog/name').text,
+ 'uploader_id': video.find('identifier').text,
+ 'view_count': view_count,
+ }
+from __future__ import unicode_literals
+
import re
import random
class VideoPremiumIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
+ _VALID_URL = r'https?://(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
_TEST = {
- u'url': u'http://videopremium.tv/4w7oadjsf156',
- u'file': u'4w7oadjsf156.f4v',
- u'info_dict': {
- u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4"
+ 'url': 'http://videopremium.tv/4w7oadjsf156',
+ 'info_dict': {
+ 'id': '4w7oadjsf156',
+ 'ext': 'f4v',
+ 'title': 'youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4'
},
- u'params': {
- u'skip_download': True,
+ 'params': {
+ 'skip_download': True,
},
- u'skip': u'Test file has been deleted.',
+ 'skip': 'Test file has been deleted.',
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage_url = 'http://videopremium.tv/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
# Download again, we need a cookie
webpage = self._download_webpage(
webpage_url, video_id,
- note=u'Downloading webpage again (with cookie)')
+ note='Downloading webpage again (with cookie)')
video_title = self._html_search_regex(
- r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title')
+ r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, 'video title')
return {
- 'id': video_id,
- 'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
- 'play_path': "mp4:%s.f4v" % video_id,
- 'page_url': "http://videopremium.tv/" + video_id,
- 'player_url': "http://videopremium.tv/uplayer/uppod.swf",
- 'ext': 'f4v',
- 'title': video_title,
+ 'id': video_id,
+ 'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
+ 'play_path': "mp4:%s.f4v" % video_id,
+ 'page_url': "http://videopremium.tv/" + video_id,
+ 'player_url': "http://videopremium.tv/uplayer/uppod.swf",
+ 'ext': 'f4v',
+ 'title': video_title,
}
'like_count': int_or_none(video['liked']),
'dislike_count': int_or_none(video['disliked']),
'formats': formats,
- }
\ No newline at end of file
+ }
'title': 'optical illusion dissapeared image magic illusion',
'description': ''
},
- }
\ No newline at end of file
+ }
-#coding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
def _real_extract(self, url):
video_id = self._match_id(url)
-
+
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url')
title = self._html_search_regex(
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
-
+
return {
'id': video_id,
'title': title,
'url': video_url,
}
-
\ No newline at end of file
else:
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
config = self._search_regex(config_re, webpage, 'info section',
- flags=re.DOTALL)
+ flags=re.DOTALL)
config = json.loads(config)
except Exception as e:
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
compat_urllib_parse,
compat_str,
unescapeHTML,
-)
+ unified_strdate,
+ orderedSet)
class VKIE(InfoExtractor):
IE_NAME = 'vk.com'
- _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
+ _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))'
_NETRC_MACHINE = 'vk'
_TESTS = [
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:Noize MC.*',
'duration': 195,
+ 'upload_date': '20120212',
},
},
{
- 'url': 'http://vk.com/video4643923_163339118',
- 'md5': 'f79bccb5cd182b1f43502ca5685b2b36',
+ 'url': 'http://vk.com/video205387401_165548505',
+ 'md5': '6c0aeb2e90396ba97035b9cbde548700',
'info_dict': {
- 'id': '163339118',
+ 'id': '165548505',
'ext': 'mp4',
- 'uploader': 'Elya Iskhakova',
- 'title': 'Dream Theater - Hollow Years Live at Budokan 720*',
- 'duration': 558,
+ 'uploader': 'Tom Cruise',
+ 'title': 'No name',
+ 'duration': 9,
+ 'upload_date': '20130721'
}
},
{
'uploader': 'Vladimir Gavrin',
'title': 'Lin Dan',
'duration': 101,
+ 'upload_date': '20120730',
}
},
{
+ # VIDEO NOW REMOVED
+ # please update if you find a video whose URL follows the same pattern
'url': 'http://vk.com/video-8871596_164049491',
'md5': 'a590bcaf3d543576c9bd162812387666',
'note': 'Only available for registered users',
'uploader': 'Триллеры',
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
'duration': 8352,
- },
- 'skip': 'Requires vk account credentials',
- },
- {
- 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
- 'md5': 'd82c22e449f036282d1d3f7f4d276869',
- 'info_dict': {
- 'id': '166094326',
- 'ext': 'mp4',
- 'uploader': 'Киномания - лучшее из мира кино',
- 'title': 'Запах женщины (1992)',
- 'duration': 9392,
+ 'upload_date': '20121218'
},
'skip': 'Requires vk account credentials',
},
'uploader': 'Киномания - лучшее из мира кино',
'title': ' ',
'duration': 7291,
+ 'upload_date': '20140328',
},
'skip': 'Requires vk account credentials',
},
'ext': 'mp4',
'title': 'Книга Илая',
'duration': 6771,
+ 'upload_date': '20140626',
},
'skip': 'Only works from Russia',
},
+ {
+ # removed video, just testing that we match the pattern
+ 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
+ 'only_matching': True,
+ },
]
def _login(self):
}
request = compat_urllib_request.Request('https://login.vk.com/?act=login',
- compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+ compat_urllib_parse.urlencode(login_form).encode('utf-8'))
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
if re.search(r'onLoginFailed', login_page):
ERRORS = {
r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
- 'Video %s has been removed from public access due to rightholder complaint.',
+ 'Video %s has been removed from public access due to rightholder complaint.',
+
r'<!>Please log in or <':
- 'Video %s is only available for registered users, '
- 'use --username and --password options to provide account credentials.',
- '<!>Unknown error':
- 'Video %s does not exist.'
+ 'Video %s is only available for registered users, '
+ 'use --username and --password options to provide account credentials.',
+
+ r'<!>Unknown error':
+ 'Video %s does not exist.'
}
for error_re, error_msg in ERRORS.items():
data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
data = json.loads(data_json)
+ # Extract upload date
+ upload_date = None
+ mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
+ if mobj is not None:
+ mobj.group(1) + ' ' + mobj.group(2)
+ upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
+
formats = [{
'format_id': k,
'url': v,
'title': unescapeHTML(data['md_title']),
'thumbnail': data.get('jpg'),
'uploader': data.get('md_author'),
- 'duration': data.get('duration')
+ 'duration': data.get('duration'),
+ 'upload_date': upload_date,
}
+
+
+class VKUserVideosIE(InfoExtractor):
+ IE_NAME = 'vk.com:user-videos'
+ IE_DESC = 'vk.com:All of a user\'s videos'
+ _VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?'
+ _TEMPLATE_URL = 'https://vk.com/videos'
+ _TEST = {
+ 'url': 'http://vk.com/videos205387401',
+ 'playlist_mincount': 4,
+ }
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ page = self._download_webpage(url, page_id)
+ video_ids = orderedSet(
+ m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
+ url_entries = [
+ self.url_result(
+ 'http://vk.com/video' + video_id, 'VK', video_id=video_id)
+ for video_id in video_ids]
+ return self.playlist_result(url_entries, page_id)
'timestamp': 1413835980.560,
'upload_date': '20141020',
'duration': 3238,
- }
+ }
},
# cobra.be
{
'timestamp': timestamp,
'duration': duration,
'formats': formats,
- }
\ No newline at end of file
+ }
'upload_date': upload_date,
}
-# TODO test _1
\ No newline at end of file
+# TODO test _1
videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
player_url = videos_urls[-1]
m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
- player_url)
+ player_url)
if m_sina is not None:
self.to_screen('Sina video detected')
sina_id = m_sina.group(1)
'title': video_title,
'thumbnail': thumbnail,
}
-
'thumbnail': thumbnail,
'age_limit': 18,
}
-
}
]
- def _real_extract(self,url):
+ def _real_extract(self, url):
def extract_video_url(webpage):
mp4 = re.search(r'<video\s+.*?file="([^"]+)".*?>', webpage)
if mp4 is None:
description = mobj.group(1) if mobj else None
upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'',
- webpage, 'upload date', fatal=False)
+ webpage, 'upload date', fatal=False)
if upload_date:
upload_date = unified_strdate(upload_date)
uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
- webpage, 'uploader id', default='anonymous')
+ webpage, 'uploader id', default='anonymous')
thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False)
duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>',
- webpage, 'duration', fatal=False))
+ webpage, 'duration', fatal=False))
view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False)
if view_count:
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_chr,
+ compat_ord,
+)
+from ..utils import (
+ int_or_none,
+ parse_filesize,
+)
+
+
+class XMinusIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?x-minus\.org/track/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://x-minus.org/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D0%B5%D1%80%D0%B0.html',
+ 'md5': '401a15f2d2dcf6d592cb95528d72a2a8',
+ 'info_dict': {
+ 'id': '4542',
+ 'ext': 'mp3',
+ 'title': 'Леонид Агутин-Песенка шофера',
+ 'duration': 156,
+ 'tbr': 320,
+ 'filesize_approx': 5900000,
+ 'view_count': int,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ artist = self._html_search_regex(
+ r'minus_track\.artist="(.+?)"', webpage, 'artist')
+ title = artist + '-' + self._html_search_regex(
+ r'minus_track\.title="(.+?)"', webpage, 'title')
+ duration = int_or_none(self._html_search_regex(
+ r'minus_track\.dur_sec=\'([0-9]*?)\'',
+ webpage, 'duration', fatal=False))
+ filesize_approx = parse_filesize(self._html_search_regex(
+ r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])',
+ webpage, 'approximate filesize', fatal=False))
+ tbr = int_or_none(self._html_search_regex(
+ r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
+ webpage, 'bitrate', fatal=False))
+ view_count = int_or_none(self._html_search_regex(
+ r'<div class="quality.*?► ([0-9]+)',
+ webpage, 'view count', fatal=False))
+
+ enc_token = self._html_search_regex(
+ r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
+ token = ''.join(
+ c if pos == 3 else compat_chr(compat_ord(c) - 1)
+ for pos, c in enumerate(reversed(enc_token)))
+ video_url = 'http://x-minus.org/dwlf/%s/%s.mp3' % (video_id, token)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'duration': duration,
+ 'filesize_approx': filesize_approx,
+ 'tbr': tbr,
+ 'view_count': view_count,
+ }
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r'flv_url=(.*?)&',
- webpage, 'video URL')
+ webpage, 'video URL')
video_url = compat_urllib_parse.unquote(video_url)
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
- webpage, 'title')
+ webpage, 'title')
video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&',
- webpage, 'thumbnail', fatal=False)
+ webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
url, username, note='Retrieving profile page')
video_count = int(self._search_regex(
- r'<strong>%s\'s Videos \(([0-9]+)\)</strong>'%username, profile_page,
+ r'<strong>%s\'s Videos \(([0-9]+)\)</strong>' % username, profile_page,
'video count'))
PAGE_SIZE = 25
for pagenum in itertools.count(0):
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
info = self._download_json(result_url, query,
- note='Downloading results page '+str(pagenum+1))
+ note='Downloading results page ' + str(pagenum + 1))
m = info['m']
results = info['results']
'title': title,
'formats': self._extract_f4m_formats(f4m_url, video_id),
'thumbnail': self._og_search_thumbnail(webpage),
- }
\ No newline at end of file
+ }
def _gen_sid(self):
nowTime = int(time.time() * 1000)
- random1 = random.randint(1000,1998)
- random2 = random.randint(1000,9999)
+ random1 = random.randint(1000, 1998)
+ random2 = random.randint(1000, 9999)
- return "%d%d%d" %(nowTime,random1,random2)
+ return "%d%d%d" % (nowTime, random1, random2)
def _get_file_ID_mix_string(self, seed):
mixed = []
source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
seed = float(seed)
for i in range(len(source)):
- seed = (seed * 211 + 30031) % 65536
- index = math.floor(seed / 65536 * len(source))
+ seed = (seed * 211 + 30031) % 65536
+ index = math.floor(seed / 65536 * len(source))
mixed.append(source[int(index)])
source.remove(source[int(index)])
- #return ''.join(mixed)
+ # return ''.join(mixed)
return mixed
def _get_file_id(self, fileId, seed):
# -8 means blocked outside China.
error = config['data'][0].get('error') # Chinese and English, separated by newline.
raise ExtractorError(error or 'Server reported error %i' % error_code,
- expected=True)
+ expected=True)
video_title = config['data'][0]['title']
seed = config['data'][0]['seed']
keys = [s['k'] for s in config['data'][0]['segs'][format]]
# segs is usually a dictionary, but an empty *list* if an error occured.
- files_info=[]
+ files_info = []
sid = self._gen_sid()
fileid = self._get_file_id(fileid, seed)
- #column 8,9 of fileid represent the segment number
- #fileid[7:9] should be changed
+ # column 8,9 of fileid represent the segment number
+ # fileid[7:9] should be changed
for index, key in enumerate(keys):
temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
try:
params = json.loads(json_params)
except:
- raise ExtractorError(u'Invalid JSON')
+ raise ExtractorError('Invalid JSON')
self.report_extraction(video_id)
try:
# Get all of the links from the page
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
- webpage, 'download list').strip()
+ webpage, 'download list').strip()
LINK_RE = r'<a href="([^"]+)">'
links = re.findall(LINK_RE, download_list_html)
for encrypted_link in encrypted_links:
link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')
links.append(link)
-
+
formats = []
for link in links:
# A link looks like this:
self._sort_formats(formats)
if not formats:
- raise ExtractorError(u'ERROR: no known formats available for video')
-
+ raise ExtractorError('ERROR: no known formats available for video')
+
return {
'id': video_id,
'uploader': video_uploader,
import json
import os.path
import re
+import time
import traceback
from .common import InfoExtractor, SearchInfoExtractor
uppercase_escape,
)
+
class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
- _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
- _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
def _set_language(self):
- return bool(self._download_webpage(
- self._LANG_URL, None,
- note='Setting language', errnote='unable to set language',
- fatal=False))
+ self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en',
+ # YouTube sets the expire time to about two months
+ expire_time=time.time() + 60*24*3600)
def _login(self):
"""
# Log in
login_form_strs = {
- 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
- 'Email': username,
- 'GALX': galx,
- 'Passwd': password,
-
- 'PersistentCookie': 'yes',
- '_utf8': '霱',
- 'bgresponse': 'js_disabled',
- 'checkConnection': '',
- 'checkedDomains': 'youtube',
- 'dnConn': '',
- 'pstMsg': '0',
- 'rmShown': '1',
- 'secTok': '',
- 'signIn': 'Sign in',
- 'timeStmp': '',
- 'service': 'youtube',
- 'uilel': '3',
- 'hl': 'en_US',
+ 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+ 'Email': username,
+ 'GALX': galx,
+ 'Passwd': password,
+
+ 'PersistentCookie': 'yes',
+ '_utf8': '霱',
+ 'bgresponse': 'js_disabled',
+ 'checkConnection': '',
+ 'checkedDomains': 'youtube',
+ 'dnConn': '',
+ 'pstMsg': '0',
+ 'rmShown': '1',
+ 'secTok': '',
+ 'signIn': 'Sign in',
+ 'timeStmp': '',
+ 'service': 'youtube',
+ 'uilel': '3',
+ 'hl': 'en_US',
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
- login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+ login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
'service': 'youtube',
'hl': 'en_US',
}
- tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items())
+ tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
return False
return True
- def _confirm_age(self):
- age_form = {
- 'next_url': '/',
- 'action_confirm': 'Confirm',
- }
- req = compat_urllib_request.Request(self._AGE_URL,
- compat_urllib_parse.urlencode(age_form).encode('ascii'))
-
- self._download_webpage(
- req, None,
- note='Confirming age', errnote='Unable to confirm age',
- fatal=False)
-
def _real_initialize(self):
if self._downloader is None:
return
- if self._get_login_info()[0] is not None:
- if not self._set_language():
- return
+ self._set_language()
if not self._login():
return
- self._confirm_age()
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
+ '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
'info_dict': {
'id': 'IB3lcPjvWLA',
'ext': 'm4a',
- 'title': 'Afrojack - The Spark ft. Spree Wilson',
- 'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8',
+ 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
+ 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO',
'upload_date': '20131011',
'title': 'Burning Everyone\'s Koran',
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
}
- }
+ },
+ # Normal age-gate video (No vevo, embed allowed)
+ {
+ 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
+ 'info_dict': {
+ 'id': 'HtVdAasjOgU',
+ 'ext': 'mp4',
+ 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
+ 'description': 'md5:eca57043abae25130f58f655ad9a7771',
+ 'uploader': 'The Witcher',
+ 'uploader_id': 'WitcherGame',
+ 'upload_date': '20140605',
+ },
+ },
]
def __init__(self, *args, **kwargs):
def gen_sig_code(idxs):
def _genslice(start, end, step):
starts = '' if start == 0 else str(start)
- ends = (':%d' % (end+step)) if end + step >= 0 else ':'
+ ends = (':%d' % (end + step)) if end + step >= 0 else ':'
steps = '' if step == 1 else (':%d' % step)
return 's[%s%s%s]' % (starts, ends, steps)
def _parse_sig_js(self, jscode):
funcname = self._search_regex(
r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode,
- 'Initial JS player signature function name')
+ 'Initial JS player signature function name')
jsi = JSInterpreter(jscode)
initial_function = jsi.extract_function(funcname)
return {}
player_config = json.loads(mobj.group(1))
try:
- args = player_config[u'args']
- caption_url = args[u'ttsurl']
- timestamp = args[u'timestamp']
+ args = player_config['args']
+ caption_url = args['ttsurl']
+ timestamp = args['timestamp']
# We get the available subtitles
list_params = compat_urllib_parse.urlencode({
'type': 'list',
list_url = caption_url + '&' + list_params
caption_list = self._download_xml(list_url, video_id)
original_lang_node = caption_list.find('track')
- if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
+ if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr':
self._downloader.report_warning('Video doesn\'t have automatic captions')
return {}
original_lang = original_lang_node.attrib['lang_code']
def _extract_from_m3u8(self, manifest_url, video_id):
url_map = {}
+
def _get_urls(_manifest):
lines = _manifest.split('\n')
urls = filter(lambda l: l and not l.startswith('#'),
- lines)
+ lines)
return urls
manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
formats_urls = _get_urls(manifest)
# Get video webpage
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
- pref_cookies = [
- c for c in self._downloader.cookiejar
- if c.domain == '.youtube.com' and c.name == 'PREF']
- for pc in pref_cookies:
- if 'hl=' in pc.value:
- pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
- else:
- if pc.value:
- pc.value += '&'
- pc.value += 'hl=en'
video_webpage = self._download_webpage(url, video_id)
# Attempt to extract SWF player URL
player_url = None
# Get video info
- self.report_video_info_webpage_download(video_id)
if re.search(r'player-age-gate-content">', video_webpage) is not None:
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
video_info = compat_parse_qs(video_info_webpage)
else:
age_gate = False
- for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
- video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+ try:
+ # Try looking directly into the video webpage
+ mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
+ if not mobj:
+ raise ValueError('Could not find ytplayer.config') # caught below
+ json_code = uppercase_escape(mobj.group(1))
+ ytplayer_config = json.loads(json_code)
+ args = ytplayer_config['args']
+ # Convert to the same format returned by compat_parse_qs
+ video_info = dict((k, [v]) for k, v in args.items())
+ if 'url_encoded_fmt_stream_map' not in args:
+ raise ValueError('No stream_map present') # caught below
+ except ValueError:
+ # We fallback to the get_video_info pages (used by the embed page)
+ self.report_video_info_webpage_download(video_id)
+ for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
+ video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type))
- video_info_webpage = self._download_webpage(video_info_url, video_id,
- note=False,
- errnote='unable to download video info webpage')
- video_info = compat_parse_qs(video_info_webpage)
- if 'token' in video_info:
- break
+ video_info_webpage = self._download_webpage(video_info_url,
+ video_id, note=False,
+ errnote='unable to download video info webpage')
+ video_info = compat_parse_qs(video_info_webpage)
+ if 'token' in video_info:
+ break
if 'token' not in video_info:
if 'reason' in video_info:
raise ExtractorError(
# annotations
video_annotations = None
if self._downloader.params.get('writeannotations', False):
- video_annotations = self._extract_annotations(video_id)
-
- # Decide which formats to download
- try:
- mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
- if not mobj:
- raise ValueError('Could not find vevo ID')
- json_code = uppercase_escape(mobj.group(1))
- ytplayer_config = json.loads(json_code)
- args = ytplayer_config['args']
- # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
- # this signatures are encrypted
- if 'url_encoded_fmt_stream_map' not in args:
- raise ValueError('No stream_map present') # caught below
- re_signature = re.compile(r'[&,]s=')
- m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
- if m_s is not None:
- self.to_screen('%s: Encrypted signatures detected.' % video_id)
- video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
- m_s = re_signature.search(args.get('adaptive_fmts', ''))
- if m_s is not None:
- if 'adaptive_fmts' in video_info:
- video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
- else:
- video_info['adaptive_fmts'] = [args['adaptive_fmts']]
- except ValueError:
- pass
+ video_annotations = self._extract_annotations(video_id)
def _map_to_format_list(urlmap):
formats = []
'player_url': player_url,
}]
elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
- encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
+ encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
url_map = {}
parts_sizes = self._signature_cache_id(encrypted_sig)
self.to_screen('{%s} signature length %s, %s' %
- (format_id, parts_sizes, player_desc))
+ (format_id, parts_sizes, player_desc))
signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate)
# However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
# Luckily, it seems, this case uses some kind of default signature (len == 86), so the
# combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
- if age_gate:
- dash_manifest_url = video_info.get('dashmpd')[0]
- else:
- dash_manifest_url = ytplayer_config['args']['dashmpd']
+ dash_manifest_url = video_info.get('dashmpd')[0]
+
def decrypt_sig(mobj):
s = mobj.group(1)
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
self._sort_formats(formats)
return {
- 'id': video_id,
- 'uploader': video_uploader,
- 'uploader_id': video_uploader_id,
- 'upload_date': upload_date,
- 'title': video_title,
- 'thumbnail': video_thumbnail,
- 'description': video_description,
- 'categories': video_categories,
- 'subtitles': video_subtitles,
- 'duration': video_duration,
- 'age_limit': 18 if age_gate else 0,
- 'annotations': video_annotations,
+ 'id': video_id,
+ 'uploader': video_uploader,
+ 'uploader_id': video_uploader_id,
+ 'upload_date': upload_date,
+ 'title': video_title,
+ 'thumbnail': video_thumbnail,
+ 'description': video_description,
+ 'categories': video_categories,
+ 'subtitles': video_subtitles,
+ 'duration': video_duration,
+ 'age_limit': 18 if age_gate else 0,
+ 'annotations': video_annotations,
'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
- 'view_count': view_count,
+ 'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
- 'formats': formats,
+ 'formats': formats,
}
+
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com playlists'
_VALID_URL = r"""(?x)(?:
)
(
(?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
- # Top tracks, they can also include dots
+ # Top tracks, they can also include dots
|(?:MC)[\w\.]*
)
.*
return self._extract_mix(playlist_id)
if playlist_id.startswith('TL'):
raise ExtractorError('For downloading YouTube.com top lists, use '
- 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
+ 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
class YoutubeTopListIE(YoutubePlaylistIE):
IE_NAME = 'youtube:toplist'
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
- ' (Example: "yttoplist:music:Top Tracks")')
+ ' (Example: "yttoplist:music:Top Tracks")')
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
_TESTS = [{
'url': 'yttoplist:music:Trending',
<span[^>]*>.*?%s.*?</span>''' % re.escape(query),
channel_page, 'list')
url = compat_urlparse.urljoin('https://www.youtube.com/', link)
-
+
video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
ids = []
# sometimes the webpage doesn't contain the videos
ids_in_page = self.extract_videos_from_page(page['content_html'])
video_ids.extend(ids_in_page)
-
+
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break
# Don't return True if the url can be extracted with other youtube
# extractor, the regex would is too permissive and it would match.
other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
- if any(ie.suitable(url) for ie in other_ies): return False
- else: return super(YoutubeUserIE, cls).suitable(url)
+ if any(ie.suitable(url) for ie in other_ies):
+ return False
+ else:
+ return super(YoutubeUserIE, cls).suitable(url)
def _real_extract(self, url):
# Extract username
paging = 0
for i in itertools.count(1):
info = self._download_json(self._FEED_TEMPLATE % paging,
- '%s feed' % self._FEED_NAME,
- 'Downloading page %s' % i)
+ '%s feed' % self._FEED_NAME,
+ 'Downloading page %s' % i)
feed_html = info.get('feed_html') or info.get('content_html')
load_more_widget_html = info.get('load_more_widget_html') or feed_html
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
paging = mobj.group('paging')
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
+
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
+ IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
_PLAYLIST_TITLE = 'Youtube Recommended videos'
+
class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
+ IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
_FEED_NAME = 'watch_later'
_PLAYLIST_TITLE = 'Youtube Watch Later'
_PERSONAL_FEED = True
+
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'Youtube watch history, "ythistory" keyword (requires authentication)'
+ IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
_FEED_NAME = 'history'
_PERSONAL_FEED = True
_PLAYLIST_TITLE = 'Youtube Watch History'
+
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = 'youtube:favorites'
- IE_DESC = 'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
+ IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
_LOGIN_REQUIRED = True
--- /dev/null
+# coding=utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class ZingMp3BaseInfoExtractor(InfoExtractor):
+
+ @staticmethod
+ def _extract_item(item):
+ title = item.find('./title').text.strip()
+ source = item.find('./source').text
+ extension = item.attrib['type']
+ thumbnail = item.find('./backimage').text
+
+ return {
+ 'title': title,
+ 'url': source,
+ 'ext': extension,
+ 'thumbnail': thumbnail,
+ }
+
+ def _extract_player_xml(self, player_xml_url, id, playlist_title=None):
+ player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML')
+ items = player_xml.findall('./item')
+
+ if len(items) == 1:
+ # one single song
+ data = self._extract_item(items[0])
+ data['id'] = id
+
+ return data
+ else:
+ # playlist of songs
+ entries = []
+
+ for i, item in enumerate(items, 1):
+ entry = self._extract_item(item)
+ entry['id'] = '%s-%d' % (id, i)
+ entries.append(entry)
+
+ return {
+ '_type': 'playlist',
+ 'id': id,
+ 'title': playlist_title,
+ 'entries': entries,
+ }
+
+
+class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
+ _VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html'
+ _TESTS = [{
+ 'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
+ 'md5': 'ead7ae13693b3205cbc89536a077daed',
+ 'info_dict': {
+ 'id': 'ZWZB9WAB',
+ 'title': 'Xa Mãi Xa',
+ 'ext': 'mp3',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }]
+ IE_NAME = 'zingmp3:song'
+ IE_DESC = 'mp3.zing.vn songs'
+
+ def _real_extract(self, url):
+ matched = re.match(self._VALID_URL, url)
+ slug = matched.group('slug')
+ song_id = matched.group('song_id')
+
+ webpage = self._download_webpage(
+ 'http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id)
+
+ player_xml_url = self._search_regex(
+ r'&xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
+
+ return self._extract_player_xml(player_xml_url, song_id)
+
+
+class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
+ _VALID_URL = r'https?://mp3\.zing\.vn/album/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
+ _TESTS = [{
+ 'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'ZWZBWDAF',
+ 'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless',
+ },
+ 'playlist_count': 10,
+ }]
+ IE_NAME = 'zingmp3:album'
+ IE_DESC = 'mp3.zing.vn albums'
+
+ def _real_extract(self, url):
+ matched = re.match(self._VALID_URL, url)
+ slug = matched.group('slug')
+ album_id = matched.group('album_id')
+
+ webpage = self._download_webpage(
+ 'http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id)
+ player_xml_url = self._search_regex(
+ r'&xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
+
+ return self._extract_player_xml(
+ player_xml_url, album_id,
+ playlist_title=self._og_search_title(webpage))
selection.add_option(
'--no-playlist',
action='store_true', dest='noplaylist', default=False,
- help='download only the currently playing video')
+ help='If the URL refers to a video and a playlist, download only the video.')
selection.add_option(
'--age-limit',
metavar='YEARS', dest='age_limit', default=None, type=int,
video_format.add_option(
'-f', '--format',
action='store', dest='format', metavar='FORMAT', default=None,
- help='video format code, specify the order of preference using'
+ help=(
+ 'video format code, specify the order of preference using'
' slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also'
' supported. You can also use the special names "best",'
' "bestvideo", "bestaudio", "worst", "worstvideo" and'
' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'
' You can merge the video and audio of two formats into a single'
' file using -f <video-format>+<audio-format> (requires ffmpeg or'
- ' avconv), for example -f bestvideo+bestaudio.')
+ ' avconv), for example -f bestvideo+bestaudio.'))
video_format.add_option(
'--all-formats',
action='store_const', dest='format', const='all',
postproc.add_option(
'--exec',
metavar='CMD', dest='exec_cmd',
- help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )
+ help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
parser.add_option_group(general)
parser.add_option_group(selection)
+from __future__ import unicode_literals
from .atomicparsley import AtomicParsleyPP
from .ffmpeg import (
+from __future__ import unicode_literals
+
from ..utils import PostProcessingError
'Command returned error code %d' % retCode)
return None, information # by default, keep file and do nothing
-
+from __future__ import unicode_literals
+
import os
import subprocess
import sys
def check_version(self):
if not self._executable:
- raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
+ raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
REQUIRED_VERSION = '1.0'
if is_outdated_version(
self._versions[self._executable], REQUIRED_VERSION):
- warning = u'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
+ warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
self._executable, self._executable, REQUIRED_VERSION)
if self._downloader:
self._downloader.report_warning(warning)
[encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
if self._downloader.params.get('verbose', False):
- self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
+ self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
def _ffmpeg_filename_argument(self, fn):
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
- if fn.startswith(u'-'):
- return u'./' + fn
+ if fn.startswith('-'):
+ return './' + fn
return fn
def get_audio_codec(self, path):
if not self._probe_executable:
- raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
+ raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
try:
cmd = [
self._probe_executable,
filecodec = self.get_audio_codec(path)
if filecodec is None:
- raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
+ raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
uses_avconv = self._uses_avconv()
more_opts = []
extension = 'wav'
more_opts += ['-f', 'wav']
- prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
+ prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
new_path = prefix + sep + extension
# If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
try:
if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
- self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
+ self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
else:
- self._downloader.to_screen(u'[' + self._executable + '] Destination: ' + new_path)
+ self._downloader.to_screen('[' + self._executable + '] Destination: ' + new_path)
self.run_ffmpeg(path, new_path, acodec, more_opts)
except:
- etype,e,tb = sys.exc_info()
+ etype, e, tb = sys.exc_info()
if isinstance(e, AudioConversionError):
- msg = u'audio conversion failed: ' + e.msg
+ msg = 'audio conversion failed: ' + e.msg
else:
- msg = u'error running ' + self._executable
+ msg = 'error running ' + self._executable
raise PostProcessingError(msg)
# Try to update the date time for extracted audio file.
try:
os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
except:
- self._downloader.report_warning(u'Cannot update utime of audio file')
+ self._downloader.report_warning('Cannot update utime of audio file')
information['filepath'] = new_path
- return self._nopostoverwrites,information
+ return self._nopostoverwrites, information
class FFmpegVideoConvertor(FFmpegPostProcessor):
- def __init__(self, downloader=None,preferedformat=None):
+ def __init__(self, downloader=None, preferedformat=None):
super(FFmpegVideoConvertor, self).__init__(downloader)
- self._preferedformat=preferedformat
+ self._preferedformat = preferedformat
def run(self, information):
path = information['filepath']
- prefix, sep, ext = path.rpartition(u'.')
+ prefix, sep, ext = path.rpartition('.')
outpath = prefix + sep + self._preferedformat
if information['ext'] == self._preferedformat:
- self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
- return True,information
- self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
+ self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
+ return True, information
+ self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
self.run_ffmpeg(path, outpath, [])
information['filepath'] = outpath
information['format'] = self._preferedformat
information['ext'] = self._preferedformat
- return False,information
+ return False, information
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
return cls._lang_map.get(code[:2])
def run(self, information):
- if information['ext'] != u'mp4':
- self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
+ if information['ext'] != 'mp4':
+ self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
return True, information
if not information.get('subtitles'):
- self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
+ self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
return True, information
sub_langs = [key for key in information['subtitles']]
opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
for (i, lang) in enumerate(sub_langs):
- opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
+ opts.extend(['-map', '%d:0' % (i + 1), '-c:s:%d' % i, 'mov_text'])
lang_code = self._conver_lang_code(lang)
if lang_code is not None:
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
opts.extend(['-f', 'mp4'])
- temp_filename = filename + u'.temp'
- self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
+ temp_filename = filename + '.temp'
+ self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
metadata['artist'] = info['uploader_id']
if not metadata:
- self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
+ self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
return True, info
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
- if info['ext'] == u'm4a':
+ if info['ext'] == 'm4a':
options = ['-vn', '-acodec', 'copy']
else:
options = ['-c', 'copy']
for (name, value) in metadata.items():
options.extend(['-metadata', '%s=%s' % (name, value)])
- self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
+ self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
def run(self, info):
filename = info['filepath']
args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest']
- self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename)
+ self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
return True, info
temp_filename = prepend_extension(filename, 'temp')
options = ['-vn', '-acodec', 'copy']
- self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename)
+ self._downloader.to_screen('[ffmpeg] Fixing audio file "%s"' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
+from __future__ import unicode_literals
+
import os
import subprocess
import sys
except (subprocess.CalledProcessError, OSError):
self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")
return False, info
-
avm_class.method_pyfunctions[func_name] = resfunc
return resfunc
-
+from __future__ import unicode_literals
+
import io
import json
import traceback
import sys
from zipimport import zipimporter
-from .utils import (
+from .compat import (
compat_str,
compat_urllib_request,
)
from .version import __version__
+
def rsa_verify(message, signature, key):
from struct import pack
from hashlib import sha256
- from sys import version_info
- def b(x):
- if version_info[0] == 2: return x
- else: return x.encode('latin1')
- assert(type(message) == type(b('')))
+
+ assert isinstance(message, bytes)
block_size = 0
n = key[0]
while n:
while signature:
raw_bytes.insert(0, pack("B", signature & 0xFF))
signature >>= 8
- signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
- if signature[0:2] != b('\x00\x01'): return False
+ signature = (block_size - len(raw_bytes)) * b'\x00' + b''.join(raw_bytes)
+ if signature[0:2] != b'\x00\x01':
+ return False
signature = signature[2:]
- if not b('\x00') in signature: return False
- signature = signature[signature.index(b('\x00'))+1:]
- if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
+ if b'\x00' not in signature:
+ return False
+ signature = signature[signature.index(b'\x00') + 1:]
+ if not signature.startswith(b'\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20'):
+ return False
signature = signature[19:]
- if signature != sha256(message).digest(): return False
+ if signature != sha256(message).digest():
+ return False
return True
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"):
- to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
+ to_screen('It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
return
# Check if there is a new version
try:
newversion = compat_urllib_request.urlopen(VERSION_URL).read().decode('utf-8').strip()
except:
- if verbose: to_screen(compat_str(traceback.format_exc()))
- to_screen(u'ERROR: can\'t find the current version. Please try again later.')
+ if verbose:
+ to_screen(compat_str(traceback.format_exc()))
+ to_screen('ERROR: can\'t find the current version. Please try again later.')
return
if newversion == __version__:
- to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
+ to_screen('youtube-dl is up-to-date (' + __version__ + ')')
return
# Download and check versions info
versions_info = compat_urllib_request.urlopen(JSON_URL).read().decode('utf-8')
versions_info = json.loads(versions_info)
except:
- if verbose: to_screen(compat_str(traceback.format_exc()))
- to_screen(u'ERROR: can\'t obtain versions info. Please try again later.')
+ if verbose:
+ to_screen(compat_str(traceback.format_exc()))
+ to_screen('ERROR: can\'t obtain versions info. Please try again later.')
return
if not 'signature' in versions_info:
- to_screen(u'ERROR: the versions file is not signed or corrupted. Aborting.')
+ to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
return
signature = versions_info['signature']
del versions_info['signature']
if not rsa_verify(json.dumps(versions_info, sort_keys=True).encode('utf-8'), signature, UPDATES_RSA_KEY):
- to_screen(u'ERROR: the versions file signature is invalid. Aborting.')
+ to_screen('ERROR: the versions file signature is invalid. Aborting.')
return
version_id = versions_info['latest']
def version_tuple(version_str):
return tuple(map(int, version_str.split('.')))
if version_tuple(__version__) >= version_tuple(version_id):
- to_screen(u'youtube-dl is up to date (%s)' % __version__)
+ to_screen('youtube-dl is up to date (%s)' % __version__)
return
- to_screen(u'Updating to version ' + version_id + ' ...')
+ to_screen('Updating to version ' + version_id + ' ...')
version = versions_info['versions'][version_id]
print_notes(to_screen, versions_info['versions'])
filename = sys.argv[0]
# Py2EXE: Filename could be different
if hasattr(sys, "frozen") and not os.path.isfile(filename):
- if os.path.isfile(filename + u'.exe'):
- filename += u'.exe'
+ if os.path.isfile(filename + '.exe'):
+ filename += '.exe'
if not os.access(filename, os.W_OK):
- to_screen(u'ERROR: no write permissions on %s' % filename)
+ to_screen('ERROR: no write permissions on %s' % filename)
return
# Py2EXE
exe = os.path.abspath(filename)
directory = os.path.dirname(exe)
if not os.access(directory, os.W_OK):
- to_screen(u'ERROR: no write permissions on %s' % directory)
+ to_screen('ERROR: no write permissions on %s' % directory)
return
try:
newcontent = urlh.read()
urlh.close()
except (IOError, OSError):
- if verbose: to_screen(compat_str(traceback.format_exc()))
- to_screen(u'ERROR: unable to download latest version')
+ if verbose:
+ to_screen(compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to download latest version')
return
newcontent_hash = hashlib.sha256(newcontent).hexdigest()
if newcontent_hash != version['exe'][1]:
- to_screen(u'ERROR: the downloaded file hash does not match. Aborting.')
+ to_screen('ERROR: the downloaded file hash does not match. Aborting.')
return
try:
with open(exe + '.new', 'wb') as outf:
outf.write(newcontent)
except (IOError, OSError):
- if verbose: to_screen(compat_str(traceback.format_exc()))
- to_screen(u'ERROR: unable to write the new version')
+ if verbose:
+ to_screen(compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to write the new version')
return
try:
bat = os.path.join(directory, 'youtube-dl-updater.bat')
with io.open(bat, 'w') as batfile:
- batfile.write(u"""
+ batfile.write('''
@echo off
echo Waiting for file handle to be closed ...
ping 127.0.0.1 -n 5 -w 1000 > NUL
move /Y "%s.new" "%s" > NUL
echo Updated youtube-dl to version %s.
start /b "" cmd /c del "%%~f0"&exit /b"
- \n""" % (exe, exe, version_id))
+ \n''' % (exe, exe, version_id))
subprocess.Popen([bat]) # Continues to run in the background
return # Do not show premature success messages
except (IOError, OSError):
- if verbose: to_screen(compat_str(traceback.format_exc()))
- to_screen(u'ERROR: unable to overwrite current version')
+ if verbose:
+ to_screen(compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to overwrite current version')
return
# Zip unix package
newcontent = urlh.read()
urlh.close()
except (IOError, OSError):
- if verbose: to_screen(compat_str(traceback.format_exc()))
- to_screen(u'ERROR: unable to download latest version')
+ if verbose:
+ to_screen(compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to download latest version')
return
newcontent_hash = hashlib.sha256(newcontent).hexdigest()
if newcontent_hash != version['bin'][1]:
- to_screen(u'ERROR: the downloaded file hash does not match. Aborting.')
+ to_screen('ERROR: the downloaded file hash does not match. Aborting.')
return
try:
with open(filename, 'wb') as outf:
outf.write(newcontent)
except (IOError, OSError):
- if verbose: to_screen(compat_str(traceback.format_exc()))
- to_screen(u'ERROR: unable to overwrite current version')
+ if verbose:
+ to_screen(compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to overwrite current version')
return
- to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
+ to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
+
def get_notes(versions, fromVersion):
notes = []
- for v,vdata in sorted(versions.items()):
+ for v, vdata in sorted(versions.items()):
if v > fromVersion:
notes.extend(vdata.get('notes', []))
return notes
+
def print_notes(to_screen, versions, fromVersion=__version__):
notes = get_notes(versions, fromVersion)
if notes:
- to_screen(u'PLEASE NOTE:')
+ to_screen('PLEASE NOTE:')
for note in notes:
to_screen(note)
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
+ shlex_quote,
)
'Accept-Language': 'en-us,en;q=0.5',
}
+
def preferredencoding():
"""Get preferred encoding.
""" Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z-]+$', key)
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
- expr = xpath + u"[@%s='%s']" % (key, val)
+ expr = xpath + "[@%s='%s']" % (key, val)
return node.find(expr)
else:
def find_xpath_attr(node, xpath, key, val):
# On python2.6 the xml.etree.ElementTree.Element methods don't support
# the namespace parameter
+
+
def xpath_with_ns(path, ns_map):
components = [c.split(':') for c in path.split('/')]
replaced = []
# In case of error, try to remove win32 forbidden chars
alt_filename = os.path.join(
- re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
- for path_part in os.path.split(filename)
- )
+ re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
+ for path_part in os.path.split(filename)
+ )
if alt_filename == filename:
raise
else:
timestamp = email.utils.mktime_tz(timetuple)
return timestamp
+
def sanitize_filename(s, restricted=False, is_id=False):
"""Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
result = '_'
return result
+
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """
res = []
assert isinstance(optval, compat_str)
return optval
+
def formatSeconds(secs):
if secs > 3600:
return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
class ExtractorError(Exception):
"""Error during info extraction."""
+
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
""" tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
configured to continue on errors. They will contain the appropriate
error message.
"""
+
def __init__(self, msg, exc_info=None):
""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
super(DownloadError, self).__init__(msg)
This exception may be raised by PostProcessor's .run() method to
indicate an error in the postprocessing task.
"""
+
def __init__(self, msg):
self.msg = msg
+
class MaxDownloadsReached(Exception):
""" --max-downloads limit has been reached. """
pass
self.downloaded = downloaded
self.expected = expected
+
class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
"""Handler for HTTP requests and responses.
return None
upload_date = None
- #Replace commas
+ # Replace commas
date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
return upload_date
+
def determine_ext(url, default_ext='unknown_video'):
if url is None:
return default_ext
else:
return default_ext
+
def subtitles_filename(filename, sub_lang, sub_format):
return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
+
def date_from_str(date_str):
"""
Return a datetime object from a string in the format YYYYMMDD or
if sign == '-':
time = -time
unit = match.group('unit')
- #A bad aproximation?
+ # A bad aproximation?
if unit == 'month':
unit = 'day'
time *= 30
delta = datetime.timedelta(**{unit: time})
return today + delta
return datetime.datetime.strptime(date_str, "%Y%m%d").date()
-
+
+
def hyphenate_date(date_str):
"""
Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
else:
return date_str
+
class DateRange(object):
"""Represents a time interval between two dates"""
+
def __init__(self, start=None, end=None):
"""start and end must be strings in the format accepted by date"""
if start is not None:
self.end = datetime.datetime.max.date()
if self.start > self.end:
raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+
@classmethod
def day(cls, day):
"""Returns a range that only contains the given day"""
- return cls(day,day)
+ return cls(day, day)
+
def __contains__(self, date):
"""Check if the date is in the range"""
if not isinstance(date, datetime.date):
date = date_from_str(date)
return self.start <= date <= self.end
+
def __str__(self):
- return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+ return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
def platform_name():
return '%.2f%s' % (converted, suffix)
+def parse_filesize(s):
+ if s is None:
+ return None
+
+ # The lower-case forms are of course incorrect and inofficial,
+ # but we support those too
+ _UNIT_TABLE = {
+ 'B': 1,
+ 'b': 1,
+ 'KiB': 1024,
+ 'KB': 1000,
+ 'kB': 1024,
+ 'Kb': 1000,
+ 'MiB': 1024 ** 2,
+ 'MB': 1000 ** 2,
+ 'mB': 1024 ** 2,
+ 'Mb': 1000 ** 2,
+ 'GiB': 1024 ** 3,
+ 'GB': 1000 ** 3,
+ 'gB': 1024 ** 3,
+ 'Gb': 1000 ** 3,
+ 'TiB': 1024 ** 4,
+ 'TB': 1000 ** 4,
+ 'tB': 1024 ** 4,
+ 'Tb': 1000 ** 4,
+ 'PiB': 1024 ** 5,
+ 'PB': 1000 ** 5,
+ 'pB': 1024 ** 5,
+ 'Pb': 1000 ** 5,
+ 'EiB': 1024 ** 6,
+ 'EB': 1000 ** 6,
+ 'eB': 1024 ** 6,
+ 'Eb': 1000 ** 6,
+ 'ZiB': 1024 ** 7,
+ 'ZB': 1000 ** 7,
+ 'zB': 1024 ** 7,
+ 'Zb': 1000 ** 7,
+ 'YiB': 1024 ** 8,
+ 'YB': 1000 ** 8,
+ 'yB': 1024 ** 8,
+ 'Yb': 1000 ** 8,
+ }
+
+ units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
+ m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+ if not m:
+ return None
+
+ return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
+
+
def get_term_width():
columns = compat_getenv('COLUMNS', None)
if columns:
def prepend_extension(filename, ext):
- name, real_ext = os.path.splitext(filename)
+ name, real_ext = os.path.splitext(filename)
return '{0}.{1}{2}'.format(name, ext, real_ext)
from zipimport import zipimporter
return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+ # Get a short string representation for a subprocess command
+ return ' '.join(shlex_quote(a) for a in args)
+from __future__ import unicode_literals
-__version__ = '2014.11.23'
+__version__ = '2014.12.01'