-F, --list-formats list all available formats (currently youtube
only)
--write-sub write subtitle file (currently youtube only)
+ --write-auto-sub write automatic subtitle file (currently youtube
+ only)
--only-sub [deprecated] alias of --skip-download
--all-subs downloads all the available subtitles of the
video (currently youtube only)
--list-subs lists all available subtitles for the video
(currently youtube only)
- --sub-format FORMAT subtitle format [srt/sbv] (default=srt)
+ --sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt)
(currently youtube only)
--sub-lang LANG language of the subtitles to download (optional)
use IETF language tags like 'en'
-F, --list-formats list all available formats (currently youtube
only)
--write-sub write subtitle file (currently youtube only)
+ --write-auto-sub write automatic subtitle file (currently youtube
+ only)
--only-sub [deprecated] alias of --skip-download
--all-subs downloads all the available subtitles of the
video (currently youtube only)
--list-subs lists all available subtitles for the video
(currently youtube only)
- --sub-format FORMAT subtitle format [srt/sbv] (default=srt)
+ --sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt)
(currently youtube only)
--sub-lang LANG language of the subtitles to download (optional)
use IETF language tags like 'en'
--- /dev/null
+#!/usr/bin/env python
+
+# Generate youtube signature algorithm from test cases
+
+import sys
+
+tests = [
+ ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
+ "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
+ ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
+ "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
+ ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
+ "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
+ ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
+ "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"),
+ ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
+ "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
+ ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
+ "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"),
+ ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
+ "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
+]
+
+def find_matching(wrong, right):
+ idxs = [wrong.index(c) for c in right]
+ return compress(idxs)
+ return ('s[%d]' % i for i in idxs)
+
+def compress(idxs):
+ def _genslice(start, end, step):
+ starts = '' if start == 0 else str(start)
+ ends = ':%d' % (end+step)
+ steps = '' if step == 1 else (':%d' % step)
+ return 's[%s%s%s]' % (starts, ends, steps)
+
+ step = None
+ for i, prev in zip(idxs[1:], idxs[:-1]):
+ if step is not None:
+ if i - prev == step:
+ continue
+ yield _genslice(start, prev, step)
+ step = None
+ continue
+ if i - prev in [-1, 1]:
+ step = i - prev
+ start = prev
+ continue
+ else:
+ yield 's[%d]' % prev
+ if step is None:
+ yield 's[%d]' % i
+ else:
+ yield _genslice(start, i, step)
+
+def _assert_compress(inp, exp):
+ res = list(compress(inp))
+ if res != exp:
+ print('Got %r, expected %r' % (res, exp))
+ assert res == exp
+_assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]'])
+_assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]'])
+_assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]'])
+
+def gen(wrong, right, indent):
+ code = ' + '.join(find_matching(wrong, right))
+ return 'if len(s) == %d:\n%s return %s\n' % (len(wrong), indent, code)
+
+def genall(tests):
+ indent = ' ' * 8
+ return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests)
+
+def main():
+ print(genall(tests))
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+import io
+import json
+import os.path
+
+from youtube_dl import YoutubeDL, YoutubeDLHandler
+from youtube_dl.utils import (
+ compat_cookiejar,
+ compat_urllib_request,
+)
+
+# General configuration (from __init__, not very elegant...)
+jar = compat_cookiejar.CookieJar()
+cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
+proxy_handler = compat_urllib_request.ProxyHandler()
+opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
+compat_urllib_request.install_opener(opener)
+
+PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
+with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
+ parameters = json.load(pf)
+
+class FakeYDL(YoutubeDL):
+ def __init__(self):
+ self.result = []
+ # Different instances of the downloader can't share the same dictionary
+ # some test set the "sublang" parameter, which would break the md5 checks.
+ self.params = dict(parameters)
+ def to_screen(self, s):
+ print(s)
+ def trouble(self, s, tb=None):
+ raise Exception(s)
+ def download(self, x):
+ self.result.append(x)
\ No newline at end of file
from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE
from youtube_dl.utils import *
-from youtube_dl import YoutubeDL
-PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
-with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
- parameters = json.load(pf)
-
-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
-
-class FakeYDL(YoutubeDL):
- def __init__(self):
- self.result = []
- self.params = parameters
- def to_screen(self, s):
- print(s)
- def trouble(self, s, tb=None):
- raise Exception(s)
- def extract_info(self, url):
- self.result.append(url)
- return url
+from helper import FakeYDL
class TestYoutubeLists(unittest.TestCase):
def assertIsPlaylist(self,info):
--- /dev/null
+#!/usr/bin/env python
+
+import unittest
+import sys
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.extractor.youtube import YoutubeIE
+from helper import FakeYDL
+
+sig = YoutubeIE(FakeYDL())._decrypt_signature
+
+class TestYoutubeSig(unittest.TestCase):
+ def test_43_43(self):
+ wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135'
+ right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE'
+ self.assertEqual(sig(wrong), right)
+
+ def test_88(self):
+ wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
+ right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
+ self.assertEqual(sig(wrong), right)
+
+ def test_87(self):
+ wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
+ right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
+ self.assertEqual(sig(wrong), right)
+
+ def test_86(self):
+ wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
+ right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
+ self.assertEqual(sig(wrong), right)
+
+ def test_85(self):
+ wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
+ right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"
+ self.assertEqual(sig(wrong), right)
+
+ def test_84(self):
+ wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
+ right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
+ self.assertEqual(sig(wrong), right)
+
+ def test_83(self):
+ wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
+ right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"
+ self.assertEqual(sig(wrong), right)
+
+ def test_82(self):
+ wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
+ right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
+ self.assertEqual(sig(wrong), right)
+
+if __name__ == '__main__':
+ unittest.main()
from youtube_dl.extractor import YoutubeIE
from youtube_dl.utils import *
-from youtube_dl import YoutubeDL
-
-PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
-with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
- parameters = json.load(pf)
-
-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
-
-class FakeYDL(YoutubeDL):
- def __init__(self):
- self.result = []
- # Different instances of the downloader can't share the same dictionary
- # some test set the "sublang" parameter, which would break the md5 checks.
- self.params = dict(parameters)
- def to_screen(self, s):
- print(s)
- def trouble(self, s, tb=None):
- raise Exception(s)
- def download(self, x):
- self.result.append(x)
+from helper import FakeYDL
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
info_dict = IE.extract('QRS8MkLhQmM')
subtitles = info_dict[0]['subtitles']
self.assertEqual(len(subtitles), 13)
- def test_youtube_subtitles_format(self):
+ def test_youtube_subtitles_sbv_format(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
DL.params['subtitlesformat'] = 'sbv'
info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0]
self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b')
+ def test_youtube_subtitles_vtt_format(self):
+ DL = FakeYDL()
+ DL.params['writesubtitles'] = True
+ DL.params['subtitlesformat'] = 'vtt'
+ IE = YoutubeIE(DL)
+ info_dict = IE.extract('QRS8MkLhQmM')
+ sub = info_dict[0]['subtitles'][0]
+ self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self):
DL = FakeYDL()
DL.params['listsubtitles'] = True
self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self):
DL = FakeYDL()
- DL.params['writesubtitles'] = True
+ DL.params['writeautomaticsub'] = True
DL.params['subtitleslang'] = 'it'
IE = YoutubeIE(DL)
info_dict = IE.extract('8YoUxe5ncPo')
"info_dict": {
"title": "卡马乔国足开大脚长传冲吊集锦"
}
+ },
+ {
+ "name": "CSpan",
+ "url": "http://www.c-spanvideo.org/program/HolderonV",
+ "file": "315139.flv",
+ "md5": "74a623266956f69e4df0068ab6c80fe4",
+ "info_dict": {
+ "title": "Attorney General Eric Holder on Voting Rights Act Decision"
+ },
+ "skip": "Requires rtmpdump"
+ },
+ {
+ "name": "Wimp",
+ "url": "http://www.wimp.com/deerfence/",
+ "file": "deerfence.flv",
+ "md5": "8b215e2e0168c6081a1cf84b2846a2b5",
+ "info_dict": {
+ "title": "Watch Till End: Herd of deer jump over a fence."
+ }
}
]
\-F,\ \-\-list\-formats\ \ \ \ \ \ \ \ \ list\ all\ available\ formats\ (currently\ youtube
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
\-\-write\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ write\ subtitle\ file\ (currently\ youtube\ only)
+\-\-write\-auto\-sub\ \ \ \ \ \ \ \ \ \ \ write\ automatic\ subtitle\ file\ (currently\ youtube
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only)
\-\-only\-sub\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ \-\-skip\-download
\-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ all\ the\ available\ subtitles\ of\ the
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ (currently\ youtube\ only)
\-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ lists\ all\ available\ subtitles\ for\ the\ video
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only)
-\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ subtitle\ format\ [srt/sbv]\ (default=srt)
+\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ subtitle\ format\ [srt/sbv/vtt]\ (default=srt)
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ youtube\ only)
\-\-sub\-lang\ LANG\ \ \ \ \ \ \ \ \ \ \ \ language\ of\ the\ subtitles\ to\ download\ (optional)
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ IETF\ language\ tags\ like\ \[aq]en\[aq]
local cur prev opts
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
- opts="--help --version --update --ignore-errors --rate-limit --retries --buffer-size --no-resize-buffer --dump-user-agent --user-agent --referer --list-extractors --proxy --no-check-certificate --test --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --only-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites"
+ opts="--help --version --update --ignore-errors --rate-limit --retries --buffer-size --no-resize-buffer --dump-user-agent --user-agent --referer --list-extractors --proxy --no-check-certificate --test --playlist-start --playlist-end --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --title --id --literal --auto-number --output --autonumber-size --restrict-filenames --batch-file --no-overwrites --continue --no-continue --cookies --no-part --no-mtime --write-description --write-info-json --write-thumbnail --quiet --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-filename --get-format --newline --no-progress --console-title --verbose --dump-intermediate-pages --format --all-formats --prefer-free-formats --max-quality --list-formats --write-sub --write-auto-sub --only-sub --all-subs --list-subs --sub-format --sub-lang --username --password --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites"
if [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
self.ydl.report_warning(*args, **kargs)
def report_error(self, *args, **kargs):
- self.ydl.error(*args, **kargs)
+ self.ydl.report_error(*args, **kargs)
def slow_down(self, start_time, byte_counter):
"""Sleep if the download speed is over the rate limit."""
writeinfojson: Write the video description to a .info.json file
writethumbnail: Write the thumbnail image to a file
writesubtitles: Write the video subtitles to a file
+ writeautomaticsub: Write the automatic subtitles to a file
allsubtitles: Downloads all the subtitles of the video
listsubtitles: Lists all available subtitles for the video
- subtitlesformat: Subtitle format [sbv/srt] (default=srt)
+ subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
subtitleslang: Language of the subtitles to download
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
self.report_error(u'Cannot write description file ' + descfn)
return
- if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
+ if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']:
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitle = info_dict['subtitles'][0]
video_format.add_option('--write-sub', '--write-srt',
action='store_true', dest='writesubtitles',
help='write subtitle file (currently youtube only)', default=False)
+ video_format.add_option('--write-auto-sub', '--write-automatic-sub',
+ action='store_true', dest='writeautomaticsub',
+ help='write automatic subtitle file (currently youtube only)', default=False)
video_format.add_option('--only-sub',
action='store_true', dest='skip_download',
help='[deprecated] alias of --skip-download', default=False)
help='lists all available subtitles for the video (currently youtube only)', default=False)
video_format.add_option('--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT',
- help='subtitle format [srt/sbv] (default=srt) (currently youtube only)', default='srt')
+ help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt')
video_format.add_option('--sub-lang', '--srt-lang',
action='store', dest='subtitleslang', metavar='LANG',
help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
'writeinfojson': opts.writeinfojson,
'writethumbnail': opts.writethumbnail,
'writesubtitles': opts.writesubtitles,
+ 'writeautomaticsub': opts.writeautomaticsub,
'allsubtitles': opts.allsubtitles,
'listsubtitles': opts.listsubtitles,
'subtitlesformat': opts.subtitlesformat,
from .breakcom import BreakIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE
+from .cspan import CSpanIE
from .dailymotion import DailymotionIE
from .depositfiles import DepositFilesIE
from .eighttracks import EightTracksIE
from .vevo import VevoIE
from .vimeo import VimeoIE
from .vine import VineIE
+from .wimp import WimpIE
from .worldstarhiphop import WorldStarHipHopIE
from .xhamster import XHamsterIE
from .xnxx import XNXXIE
VevoIE(),
JukeboxIE(),
TudouIE(),
+ CSpanIE(),
+ WimpIE(),
GenericIE()
]
import re
-import socket
+import json
from .common import InfoExtractor
from ..utils import (
- compat_http_client,
- compat_str,
- compat_urllib_error,
+ # This is used by the not implemented extractLiveStream method
compat_urllib_parse,
- compat_urllib_request,
ExtractorError,
unified_strdate,
)
class ArteTvIE(InfoExtractor):
- """arte.tv information extractor."""
-
- _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
+ _VALID_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
_LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv'
- def fetch_webpage(self, url):
- request = compat_urllib_request.Request(url)
- try:
- self.report_download_webpage(url)
- webpage = compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err))
- except ValueError as err:
- raise ExtractorError(u'Invalid URL: %s' % url)
- return webpage
-
- def grep_webpage(self, url, regex, regexFlags, matchTuples):
- page = self.fetch_webpage(url)
- mobj = re.search(regex, page, regexFlags)
- info = {}
-
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
-
- for (i, key, err) in matchTuples:
- if mobj.group(i) is None:
- raise ExtractorError(err)
- else:
- info[key] = mobj.group(i)
-
- return info
-
# TODO implement Live Stream
# def extractLiveStream(self, url):
# video_lang = url.split('/')[-4]
# )
# video_url = u'%s/%s' % (info.get('url'), info.get('path'))
- def extractPlus7Stream(self, url):
- video_lang = url.split('/')[-3]
- info = self.grep_webpage(
- url,
- r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
- 0,
- [
- (1, 'url', u'Invalid URL: %s' % url)
- ]
- )
- next_url = compat_urllib_parse.unquote(info.get('url'))
- info = self.grep_webpage(
- next_url,
- r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
- 0,
- [
- (1, 'url', u'Could not find <video> tag: %s' % url)
- ]
- )
- next_url = compat_urllib_parse.unquote(info.get('url'))
-
- info = self.grep_webpage(
- next_url,
- r'<video id="(.*?)".*?>.*?' +
- '<name>(.*?)</name>.*?' +
- '<dateVideo>(.*?)</dateVideo>.*?' +
- '<url quality="hd">(.*?)</url>',
- re.DOTALL,
- [
- (1, 'id', u'could not extract video id: %s' % url),
- (2, 'title', u'could not extract video title: %s' % url),
- (3, 'date', u'could not extract video date: %s' % url),
- (4, 'url', u'could not extract video url: %s' % url)
- ]
- )
-
- return {
- 'id': info.get('id'),
- 'url': compat_urllib_parse.unquote(info.get('url')),
- 'uploader': u'arte.tv',
- 'upload_date': unified_strdate(info.get('date')),
- 'title': info.get('title').decode('utf-8'),
- 'ext': u'mp4',
- 'format': u'NA',
- 'player_url': None,
- }
-
def _real_extract(self, url):
- video_id = url.split('/')[-1]
- self.report_extraction(video_id)
+ mobj = re.match(self._VALID_URL, url)
+ name = mobj.group('name')
+ # This is not a real id, it can be for example AJT for the news
+ # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
+ video_id = mobj.group('id')
if re.search(self._LIVE_URL, video_id) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url)
# return
+
+ webpage = self._download_webpage(url, video_id)
+ json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
+
+ json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
+ self.report_extraction(video_id)
+ info = json.loads(json_info)
+ player_info = info['videoJsonPlayer']
+
+ info_dict = {'id': player_info['VID'],
+ 'title': player_info['VTI'],
+ 'description': player_info['VDE'],
+ 'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
+ 'thumbnail': player_info['programImage'],
+ }
+
+ formats = player_info['VSR'].values()
+ # We order the formats by quality
+ formats = sorted(formats, key=lambda f: int(f['height']))
+ # Pick the best quality
+ format_info = formats[-1]
+ if format_info['mediaType'] == u'rtmp':
+ info_dict['url'] = format_info['streamer']
+ info_dict['play_path'] = 'mp4:' + format_info['url']
+ info_dict['ext'] = 'mp4'
else:
- info = self.extractPlus7Stream(url)
+ info_dict['url'] = format_info['url']
+ info_dict['ext'] = 'mp4'
- return [info]
+ return info_dict
'ext': 'mp4',
'format': format,
'thumbnail': None,
- 'description': officialTitle,
+ 'description': compat_str(officialTitle),
}
results.append(info)
--- /dev/null
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse,
+)
+
+class CSpanIE(InfoExtractor):
+ _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ prog_name = mobj.group(1)
+ webpage = self._download_webpage(url, prog_name)
+ video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
+ data = compat_urllib_parse.urlencode({'programid': video_id,
+ 'dynamic':'1'})
+ info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
+ video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
+
+ self.report_extraction(video_id)
+
+ title = self._html_search_regex(r'<string name="title">(.*?)</string>',
+ video_info, 'title')
+ description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
+ webpage, 'description',
+ flags=re.MULTILINE|re.DOTALL)
+ thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.*?)"',
+ webpage, 'thumbnail')
+
+ url = self._search_regex(r'<string name="URL">(.*?)</string>',
+ video_info, 'video url')
+ url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
+ path = self._search_regex(r'<string name="path">(.*?)</string>',
+ video_info, 'rtmp play path')
+
+ return {'id': video_id,
+ 'title': title,
+ 'ext': 'flv',
+ 'url': url,
+ 'play_path': path,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
from .common import InfoExtractor
from ..utils import (
- unified_strdate,
ExtractorError,
)
--- /dev/null
+import re
+import base64
+
+from .common import InfoExtractor
+
+
+class WimpIE(InfoExtractor):
+ _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group(1)
+ webpage = self._download_webpage(url, video_id)
+ title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title')
+ thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail')
+ googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')
+ googleString = base64.b64decode(googleString).decode('ascii')
+ final_url = self._search_regex('","(.*?)"', googleString,'final video url')
+ ext = final_url.rpartition(u'.')[2]
+
+ return [{
+ 'id': video_id,
+ 'url': final_url,
+ 'ext': ext,
+ 'title': title,
+ 'thumbnail': thumbnail_url,
+ }]
+
def _decrypt_signature(self, s):
"""Decrypt the key the two subkeys must have a length of 43"""
- (a,b) = s.split('.')
- if len(a) != 43 or len(b) != 43:
- raise ExtractorError(u'Unable to decrypt signature, subkeys lengths %d.%d not supported; retrying might work' % (len(a), len(b)))
- if self._downloader.params.get('verbose'):
- self.to_screen('encrypted signature length %d.%d' % (len(a), len(b)))
- b = ''.join([b[:8],a[0],b[9:18],b[-4],b[19:39], b[18]])[0:40]
- a = a[-40:]
- s_dec = '.'.join((a,b))[::-1]
- return s_dec
+
+ if len(s) == 88:
+ return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
+ elif len(s) == 87:
+ return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
+ elif len(s) == 86:
+ return s[2:63] + s[82] + s[64:82] + s[63]
+ elif len(s) == 85:
+ return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
+ elif len(s) == 84:
+ return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
+ elif len(s) == 83:
+ return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36]
+ elif len(s) == 82:
+ return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
+
+ else:
+ raise ExtractorError(u'Unable to decrypt signature, subkeys length %d not supported; retrying might work' % (len(s)))
def _get_available_subtitles(self, video_id):
self.report_video_subtitles_download(video_id)
if video_subtitles:
(sub_error, sub_lang, sub) = video_subtitles[0]
if sub_error:
- # We try with the automatic captions
- video_subtitles = self._request_automatic_caption(video_id, video_webpage)
- (sub_error_auto, sub_lang, sub) = video_subtitles[0]
- if sub is not None:
- pass
- else:
- # We report the original error
- self._downloader.report_warning(sub_error)
+ self._downloader.report_warning(sub_error)
+
+ if self._downloader.params.get('writeautomaticsub', False):
+ video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+ (sub_error, sub_lang, sub) = video_subtitles[0]
+ if sub_error:
+ self._downloader.report_warning(sub_error)
if self._downloader.params.get('allsubtitles', False):
video_subtitles = self._extract_all_subtitles(video_id)
if 'sig' in url_data:
url += '&signature=' + url_data['sig'][0]
elif 's' in url_data:
+ if self._downloader.params.get('verbose'):
+ s = url_data['s'][0]
+ player = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
+ 'html5 player', fatal=False)
+ self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' %
+ (len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player))
signature = self._decrypt_signature(url_data['s'][0])
url += '&signature=' + signature
if 'ratebypass' not in url:
-__version__ = '2013.06.33'
+__version__ = '2013.06.34'