Kay Bouché
Yang Hongbo
Lei Wang
+Petr Novák
+Leonardo Taccari
+Martin Weinelt
+version 2018.03.14
+
+Extractors
+* [soundcloud] Update client id (#15866)
++ [tennistv] Add support for tennistv.com
++ [line] Add support for tv.line.me (#9427)
+* [xnxx] Fix extraction (#15817)
+* [njpwworld] Fix authentication (#15815)
+
+
+version 2018.03.10
+
+Core
+* [downloader/hls] Skip uplynk ad fragments (#15748)
+
+Extractors
+* [pornhub] Don't override session cookies (#15697)
++ [raywenderlich] Add support for videos.raywenderlich.com (#15251)
+* [funk] Fix extraction and rework extractors (#15792)
+* [nexx] Restore reverse engineered approach
++ [heise] Add support for kaltura embeds (#14961, #15728)
++ [tvnow] Extract series metadata (#15774)
+* [ruutu] Continue formats extraction on NOT-USED URLs (#15775)
+* [vrtnu] Use redirect URL for building video JSON URL (#15767, #15769)
+* [vimeo] Modernize login code and improve error messaging
+* [archiveorg] Fix extraction (#15770, #15772)
++ [hidive] Add support for hidive.com (#15494)
+* [afreecatv] Detect deleted videos
+* [afreecatv] Fix extraction (#15755)
+* [vice] Fix extraction and rework extractors (#11101, #13019, #13622, #13778)
++ [vidzi] Add support for vidzi.si (#15751)
+* [npo] Fix typo
+
+
+version 2018.03.03
+
+Core
++ [utils] Add parse_resolution
+Revert respect --prefer-insecure while updating
+
+Extractors
++ [yapfiles] Add support for yapfiles.ru (#15726, #11085)
+* [spankbang] Fix formats extraction (#15727)
+* [adn] Fix extraction (#15716)
++ [toggle] Extract DASH and ISM formats (#15721)
++ [nickelodeon] Add support for nickelodeon.com.tr (#15706)
+* [npo] Validate and filter format URLs (#15709)
+
+
+version 2018.02.26
+
+Extractors
+* [udemy] Use custom User-Agent (#15571)
+
+
+version 2018.02.25
+
+Core
+* [postprocessor/embedthumbnail] Skip embedding when there aren't any
+ thumbnails (#12573)
+* [extractor/common] Improve jwplayer subtitles extraction (#15695)
+
+Extractors
++ [vidlii] Add support for vidlii.com (#14472, #14512, #14779)
++ [streamango] Capture and output error messages
+* [streamango] Fix extraction (#14160, #14256)
++ [telequebec] Add support for emissions (#14649, #14655)
++ [telequebec:live] Add support for live streams (#15688)
++ [mailru:music] Add support for mail.ru/music (#15618)
+* [aenetworks] Switch to akamai HLS formats (#15612)
+* [ytsearch] Fix flat title extraction (#11260, #15681)
+
+
+version 2018.02.22
+
+Core
++ [utils] Fixup some common URL typos in sanitize_url (#15649)
+* Respect --prefer-insecure while updating (#15497)
+
+Extractors
+* [vidio] Fix HLS URL extraction (#15675)
++ [nexx] Add support for arc.nexx.cloud URLs
+* [nexx] Switch to arc API (#15652)
+* [redtube] Fix duration extraction (#15659)
++ [sonyliv] Respect referrer (#15648)
++ [brightcove:new] Use referrer for formats' HTTP headers
++ [cbc] Add support for olympics.cbc.ca (#15535)
++ [fusion] Add support for fusion.tv (#15628)
+* [npo] Improve quality metadata extraction
+* [npo] Relax URL regular expression (#14987, #14994)
++ [npo] Capture and output error message
++ [pornhub] Add support for channels (#15613)
+* [youtube] Handle shared URLs with generic extractor (#14303)
+
+
+version 2018.02.11
+
+Core
++ [YoutubeDL] Add support for filesize_approx in format selector (#15550)
+
+Extractors
++ [francetv] Add support for live streams (#13689)
++ [francetv] Add support for zouzous.fr and ludo.fr (#10454, #13087, #13103,
+ #15012)
+* [francetv] Separate main extractor and rework others to delegate to it
+* [francetv] Improve manifest URL signing (#15536)
++ [francetv] Sign m3u8 manifest URLs (#15565)
++ [veoh] Add support for embed URLs (#15561)
+* [afreecatv] Fix extraction (#15556)
+* [periscope] Use accessVideoPublic endpoint (#15554)
+* [discovery] Fix auth request (#15542)
++ [6play] Extract subtitles (#15541)
+* [newgrounds] Fix metadata extraction (#15531)
++ [nbc] Add support for stream.nbcolympics.com (#10295)
+* [dvtv] Fix live streams extraction (#15442)
+
+
+version 2018.02.08
+
+Extractors
++ [myvi] Extend URL regular expression
++ [myvi:embed] Add support for myvi.tv embeds (#15521)
++ [prosiebensat1] Extend URL regular expression (#15520)
+* [pokemon] Relax URL regular expression and extend title extraction (#15518)
++ [gameinformer] Use geo verification headers
+* [la7] Fix extraction (#15501, #15502)
+* [gameinformer] Fix brightcove id extraction (#15416)
++ [afreecatv] Pass referrer to video info request (#15507)
++ [telebruxelles] Add support for live streams
+* [telebruxelles] Relax URL regular expression
+* [telebruxelles] Fix extraction (#15504)
+* [extractor/common] Respect secure schemes in _extract_wowza_formats
+
+
+version 2018.02.04
+
+Core
+* [downloader/http] Randomize HTTP chunk size
++ [downloader/http] Add ability to pass downloader options via info dict
+* [downloader/http] Fix 302 infinite loops by not reusing requests
++ Document http_chunk_size
+
+Extractors
++ [brightcove] Pass embed page URL as referrer (#15486)
++ [youtube] Enforce using chunked HTTP downloading for DASH formats
+
+
+version 2018.02.03
+
+Core
++ Introduce --http-chunk-size for chunk-based HTTP downloading
++ Add support for IronPython
+* [downloader/ism] Fix Python 3.2 support
+
+Extractors
+* [redbulltv] Fix extraction (#15481)
+* [redtube] Fix metadata extraction (#15472)
+* [pladform] Respect platform id and extract HLS formats (#15468)
+- [rtlnl] Remove progressive formats (#15459)
+* [6play] Do no modify asset URLs with a token (#15248)
+* [nationalgeographic] Relax URL regular expression
+* [dplay] Relax URL regular expression (#15458)
+* [cbsinteractive] Fix data extraction (#15451)
++ [amcnetworks] Add support for sundancetv.com (#9260)
+
+
version 2018.01.27
Core
size. By default, the buffer size is
automatically resized from an initial value
of SIZE.
+ --http-chunk-size SIZE Size of a chunk for chunk-based HTTP
+ downloading (e.g. 10485760 or 10M) (default
+ is disabled). May be useful for bypassing
+ bandwidth throttling imposed by a webserver
+ (experimental)
--playlist-reverse Download playlist videos in reverse order
--playlist-random Download playlist videos in random order
--xattr-set-filesize Set file xattribute ytdl.filesize with
size. By default, the buffer size is
automatically resized from an initial value
of SIZE.
+ --http-chunk-size SIZE Size of a chunk for chunk-based HTTP
+ downloading (e.g. 10485760 or 10M) (default
+ is disabled). May be useful for bypassing
+ bandwidth throttling imposed by a webserver
+ (experimental)
--playlist-reverse Download playlist videos in reverse order
--playlist-random Download playlist videos in random order
--xattr-set-filesize Set file xattribute ytdl.filesize with
- **CarambaTVPage**
- **CartoonNetwork**
- **cbc.ca**
+ - **cbc.ca:olympics**
- **cbc.ca:player**
- **cbc.ca:watch**
- **cbc.ca:watch:video**
- **CSpan**: C-SPAN
- **CtsNews**: 華視新聞
- **CTVNews**
- - **culturebox.francetvinfo.fr**
+ - **Culturebox**
- **CultureUnplugged**
- **curiositystream**
- **curiositystream:collection**
- **FranceTV**
- **FranceTVEmbed**
- **francetvinfo.fr**
+ - **FranceTVJeunesse**
+ - **FranceTVSite**
- **Freesound**
- **freespeech.org**
- **FreshLive**
- **Funimation**
- - **Funk**
+ - **FunkChannel**
+ - **FunkMix**
- **FunnyOrDie**
- **Fusion**
- **Fux**
- **HentaiStigma**
- **hetklokhuis**
- **hgtv.com:show**
+ - **HiDive**
- **HistoricFilms**
- **history:topic**: History.com Topic
- **hitbox**
- **limelight**
- **limelight:channel**
- **limelight:channel_list**
+ - **LineTV**
- **LiTV**
- **LiveLeak**
- **LiveLeakEmbed**
- **m6**
- **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru
+ - **mailru:music**: Музыка@Mail.Ru
+ - **mailru:music:search**: Музыка@Mail.Ru
- **MakersChannel**
- **MakerTV**
- **mangomolo:live**
- **MySpass**
- **Myvi**
- **MyVidster**
+ - **MyviEmbed**
- **n-tv.de**
- **natgeo**
- **natgeo:episodeguide**
- **NBA**
- **NBC**
- **NBCNews**
- - **NBCOlympics**
+ - **nbcolympics**
+ - **nbcolympics:stream**
- **NBCSports**
- **NBCSportsVPlayer**
- **ndr**: NDR.de - Norddeutscher Rundfunk
- **RaiPlay**
- **RaiPlayLive**
- **RaiPlayPlaylist**
+ - **RayWenderlich**
- **RBMARadio**
- **RDS**: RDS.ca
- **RedBullTV**
- **Telegraaf**
- **TeleMB**
- **TeleQuebec**
+ - **TeleQuebecEmission**
+ - **TeleQuebecLive**
- **TeleTask**
- **Telewebion**
+ - **TennisTV**
- **TF1**
- **TFO**
- **TheIntercept**
- **vice**
- **vice:article**
- **vice:show**
- - **Viceland**
- **Vidbit**
- **Viddler**
- **Videa**
- **VideoPress**
- **videoweed**: VideoWeed
- **Vidio**
+ - **VidLii**
- **vidme**
- **vidme:user**
- **vidme:user:likes**
- **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- **yandexmusic:track**: Яндекс.Музыка - Трек
+ - **YapFiles**
- **YesJapan**
- **yinyuetai:video**: 音悦Tai
- **Ynet**
[flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
-ignore = E402,E501,E731
+ignore = E402,E501,E731,E741
--- /dev/null
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import re
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import try_rm
+from youtube_dl import YoutubeDL
+from youtube_dl.compat import compat_http_server
+from youtube_dl.downloader.http import HttpFD
+from youtube_dl.utils import encodeFilename
+import ssl
+import threading
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def http_server_port(httpd):
+ if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
+ # In Jython SSLSocket is not a subclass of socket.socket
+ sock = httpd.socket.sock
+ else:
+ sock = httpd.socket
+ return sock.getsockname()[1]
+
+
+TEST_SIZE = 10 * 1024
+
+
+class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ def log_message(self, format, *args):
+ pass
+
+ def send_content_range(self, total=None):
+ range_header = self.headers.get('Range')
+ start = end = None
+ if range_header:
+ mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header)
+ if mobj:
+ start = int(mobj.group(1))
+ end = int(mobj.group(2))
+ valid_range = start is not None and end is not None
+ if valid_range:
+ content_range = 'bytes %d-%d' % (start, end)
+ if total:
+ content_range += '/%d' % total
+ self.send_header('Content-Range', content_range)
+ return (end - start + 1) if valid_range else total
+
+ def serve(self, range=True, content_length=True):
+ self.send_response(200)
+ self.send_header('Content-Type', 'video/mp4')
+ size = TEST_SIZE
+ if range:
+ size = self.send_content_range(TEST_SIZE)
+ if content_length:
+ self.send_header('Content-Length', size)
+ self.end_headers()
+ self.wfile.write(b'#' * size)
+
+ def do_GET(self):
+ if self.path == '/regular':
+ self.serve()
+ elif self.path == '/no-content-length':
+ self.serve(content_length=False)
+ elif self.path == '/no-range':
+ self.serve(range=False)
+ elif self.path == '/no-range-no-content-length':
+ self.serve(range=False, content_length=False)
+ else:
+ assert False
+
+
+class FakeLogger(object):
+ def debug(self, msg):
+ pass
+
+ def warning(self, msg):
+ pass
+
+ def error(self, msg):
+ pass
+
+
+class TestHttpFD(unittest.TestCase):
+ def setUp(self):
+ self.httpd = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ self.port = http_server_port(self.httpd)
+ self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+ self.server_thread.daemon = True
+ self.server_thread.start()
+
+ def download(self, params, ep):
+ params['logger'] = FakeLogger()
+ ydl = YoutubeDL(params)
+ downloader = HttpFD(ydl, params)
+ filename = 'testfile.mp4'
+ try_rm(encodeFilename(filename))
+ self.assertTrue(downloader.real_download(filename, {
+ 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
+ }))
+ self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
+ try_rm(encodeFilename(filename))
+
+ def download_all(self, params):
+ for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
+ self.download(params, ep)
+
+ def test_regular(self):
+ self.download_all({})
+
+ def test_chunked(self):
+ self.download_all({
+ 'http_chunk_size': 1000,
+ })
+
+
+if __name__ == '__main__':
+ unittest.main()
self.end_headers()
return
- new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
+ new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
self.send_response(302)
self.send_header(b'Location', new_url.encode('utf-8'))
self.end_headers()
class TestHTTP(unittest.TestCase):
def setUp(self):
self.httpd = compat_http_server.HTTPServer(
- ('localhost', 0), HTTPTestRequestHandler)
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
return
ydl = YoutubeDL({'logger': FakeLogger()})
- r = ydl.extract_info('http://localhost:%d/302' % self.port)
- self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port)
+ r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
+ self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
class TestHTTPS(unittest.TestCase):
def setUp(self):
certfn = os.path.join(TEST_DIR, 'testcert.pem')
self.httpd = compat_http_server.HTTPServer(
- ('localhost', 0), HTTPTestRequestHandler)
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
self.httpd.socket = ssl.wrap_socket(
self.httpd.socket, certfile=certfn, server_side=True)
self.port = http_server_port(self.httpd)
ydl = YoutubeDL({'logger': FakeLogger()})
self.assertRaises(
Exception,
- ydl.extract_info, 'https://localhost:%d/video.html' % self.port)
+ ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
- r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
- self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port)
+ r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
+ self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
def _build_proxy_handler(name):
class TestProxy(unittest.TestCase):
def setUp(self):
self.proxy = compat_http_server.HTTPServer(
- ('localhost', 0), _build_proxy_handler('normal'))
+ ('127.0.0.1', 0), _build_proxy_handler('normal'))
self.port = http_server_port(self.proxy)
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
self.proxy_thread.daemon = True
self.proxy_thread.start()
self.geo_proxy = compat_http_server.HTTPServer(
- ('localhost', 0), _build_proxy_handler('geo'))
+ ('127.0.0.1', 0), _build_proxy_handler('geo'))
self.geo_port = http_server_port(self.geo_proxy)
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
self.geo_proxy_thread.daemon = True
self.geo_proxy_thread.start()
def test_proxy(self):
- geo_proxy = 'localhost:{0}'.format(self.geo_port)
+ geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
ydl = YoutubeDL({
- 'proxy': 'localhost:{0}'.format(self.port),
+ 'proxy': '127.0.0.1:{0}'.format(self.port),
'geo_verification_proxy': geo_proxy,
})
url = 'http://foo.com/bar'
def test_proxy_with_idn(self):
ydl = YoutubeDL({
- 'proxy': 'localhost:{0}'.format(self.port),
+ 'proxy': '127.0.0.1:{0}'.format(self.port),
})
url = 'http://中文.tw/'
response = ydl.urlopen(url).read().decode('utf-8')
parse_filesize,
parse_count,
parse_iso8601,
+ parse_resolution,
pkcs1pad,
read_batch_urls,
sanitize_filename,
sanitize_path,
+ sanitize_url,
expand_path,
prepend_extension,
replace_extension,
self.assertEqual(sanitize_path('./abc'), 'abc')
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
+ def test_sanitize_url(self):
+ self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
+ self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
+ self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
+ self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
+
def test_expand_path(self):
def env(var):
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
+ self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(parse_count('1.1kk '), 1100000)
self.assertEqual(parse_count('1.1kk views'), 1100000)
+ def test_parse_resolution(self):
+ self.assertEqual(parse_resolution(None), {})
+ self.assertEqual(parse_resolution(''), {})
+ self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('720p'), {'height': 720})
+ self.assertEqual(parse_resolution('4k'), {'height': 2160})
+ self.assertEqual(parse_resolution('8K'), {'height': 4320})
+
def test_version_tuple(self):
self.assertEqual(version_tuple('1'), (1,))
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
.RS
.RE
.TP
+.B \-\-http\-chunk\-size \f[I]SIZE\f[]
+Size of a chunk for chunk\-based HTTP downloading (e.g.
+10485760 or 10M) (default is disabled).
+May be useful for bypassing bandwidth throttling imposed by a webserver
+(experimental)
+.RS
+.RE
+.TP
.B \-\-playlist\-reverse
Download playlist videos in reverse order
.RS
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
- opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs"
+ opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
diropts="--cache-dir"
complete --command youtube-dl --long-option keep-fragments --description 'Keep downloaded fragments on disk after downloading is finished; fragments are erased by default'
complete --command youtube-dl --long-option buffer-size --description 'Size of download buffer (e.g. 1024 or 16K) (default is %default)'
complete --command youtube-dl --long-option no-resize-buffer --description 'Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.'
+complete --command youtube-dl --long-option http-chunk-size --description 'Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)'
complete --command youtube-dl --long-option test
complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
complete --command youtube-dl --long-option playlist-random --description 'Download playlist videos in random order'
elif [[ ${prev} == "--recode-video" ]]; then
_arguments '*: :(mp4 flv ogg webm mkv)'
else
- _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs)'
+ _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs)'
fi
;;
esac
the downloader (see youtube_dl/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
noresizebuffer, retries, continuedl, noprogress, consoletitle,
- xattr_set_filesize, external_downloader_args, hls_use_mpegts.
+ xattr_set_filesize, external_downloader_args, hls_use_mpegts,
+ http_chunk_size.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
'!=': operator.ne,
}
operator_rex = re.compile(r'''(?x)\s*
- (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
+ (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
$
if numeric_buffersize is None:
parser.error('invalid buffer size specified')
opts.buffersize = numeric_buffersize
+ if opts.http_chunk_size is not None:
+ numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
+ if not numeric_chunksize:
+ parser.error('invalid http chunk size specified')
+ opts.http_chunk_size = numeric_chunksize
if opts.playliststart <= 0:
raise ValueError('Playlist start must be positive')
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
'keep_fragments': opts.keep_fragments,
'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer,
+ 'http_chunk_size': opts.http_chunk_size,
'continuedl': opts.continue_dl,
'noprogress': opts.noprogress,
'progress_with_newline': opts.progress_with_newline,
if isinstance(spec, compat_str):
spec = spec.encode('ascii')
return struct.unpack(spec, *args)
+
+ class compat_Struct(struct.Struct):
+ def __init__(self, fmt):
+ if isinstance(fmt, compat_str):
+ fmt = fmt.encode('ascii')
+ super(compat_Struct, self).__init__(fmt)
else:
compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack
+ if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
+ class compat_Struct(struct.Struct):
+ def unpack(self, string):
+ if not isinstance(string, buffer): # noqa: F821
+ string = buffer(string) # noqa: F821
+ return super(compat_Struct, self).unpack(string)
+ else:
+ compat_Struct = struct.Struct
+
try:
from future_builtins import zip as compat_zip
'compat_HTMLParseError',
'compat_HTMLParser',
'compat_HTTPError',
+ 'compat_Struct',
'compat_b64decode',
'compat_basestring',
'compat_chr',
external_downloader_args: A list of additional command-line arguments for the
external downloader.
hls_use_mpegts: Use the mpegts container for HLS videos.
+ http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
+ useful for bypassing bandwidth throttling imposed by
+ a webserver (experimental)
Subclasses of this one must re-define the real_download method.
"""
fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict)
- def anvato_ad(s):
- return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
+ def is_ad_fragment(s):
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
+ s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
media_frags = 0
ad_frags = 0
if not line:
continue
if line.startswith('#'):
- if anvato_ad(line):
+ if is_ad_fragment(line):
ad_frags += 1
ad_frag_next = True
continue
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
- elif anvato_ad(line):
+ elif is_ad_fragment(line):
ad_frag_next = True
self._finish_frag_download(ctx)
import os
import socket
import time
+import random
import re
from .common import FileDownloader
-from ..compat import compat_urllib_error
+from ..compat import (
+ compat_str,
+ compat_urllib_error,
+)
from ..utils import (
ContentTooShortError,
encodeFilename,
+ int_or_none,
sanitize_open,
sanitized_Request,
write_xattr,
add_headers = info_dict.get('http_headers')
if add_headers:
headers.update(add_headers)
- basic_request = sanitized_Request(url, None, headers)
- request = sanitized_Request(url, None, headers)
is_test = self.params.get('test', False)
-
- if is_test:
- request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
+ chunk_size = self._TEST_FILE_SIZE if is_test else (
+ info_dict.get('downloader_options', {}).get('http_chunk_size') or
+ self.params.get('http_chunk_size') or 0)
ctx.open_mode = 'wb'
ctx.resume_len = 0
+ ctx.data_len = None
+ ctx.block_size = self.params.get('buffersize', 1024)
+ ctx.start_time = time.time()
+ ctx.chunk_size = None
if self.params.get('continuedl', True):
# Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
- ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
+ ctx.resume_len = os.path.getsize(
+ encodeFilename(ctx.tmpfilename))
+
+ ctx.is_resume = ctx.resume_len > 0
count = 0
retries = self.params.get('retries', 0)
def __init__(self, source_error):
self.source_error = source_error
+ class NextFragment(Exception):
+ pass
+
+ def set_range(req, start, end):
+ range_header = 'bytes=%d-' % start
+ if end:
+ range_header += compat_str(end)
+ req.add_header('Range', range_header)
+
def establish_connection():
- if ctx.resume_len != 0:
- self.report_resuming_byte(ctx.resume_len)
- request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
+ ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
+ if not is_test and chunk_size else chunk_size)
+ if ctx.resume_len > 0:
+ range_start = ctx.resume_len
+ if ctx.is_resume:
+ self.report_resuming_byte(ctx.resume_len)
ctx.open_mode = 'ab'
+ elif ctx.chunk_size > 0:
+ range_start = 0
+ else:
+ range_start = None
+ ctx.is_resume = False
+ range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
+ if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
+ range_end = ctx.data_len - 1
+ has_range = range_start is not None
+ ctx.has_range = has_range
+ request = sanitized_Request(url, None, headers)
+ if has_range:
+ set_range(request, range_start, range_end)
# Establish connection
try:
ctx.data = self.ydl.urlopen(request)
# that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
- if ctx.resume_len > 0:
+ if has_range:
content_range = ctx.data.headers.get('Content-Range')
if content_range:
- content_range_m = re.search(r'bytes (\d+)-', content_range)
+ content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
# Content-Range is present and matches requested Range, resume is possible
- if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
- return
+ if content_range_m:
+ if range_start == int(content_range_m.group(1)):
+ content_range_end = int_or_none(content_range_m.group(2))
+ content_len = int_or_none(content_range_m.group(3))
+ accept_content_len = (
+ # Non-chunked download
+ not ctx.chunk_size or
+ # Chunked download and requested piece or
+ # its part is promised to be served
+ content_range_end == range_end or
+ content_len < range_end)
+ if accept_content_len:
+ ctx.data_len = content_len
+ return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
self.report_unable_to_resume()
ctx.resume_len = 0
ctx.open_mode = 'wb'
+ ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
return
except (compat_urllib_error.HTTPError, ) as err:
- if (err.code < 500 or err.code >= 600) and err.code != 416:
- # Unexpected HTTP error
- raise
- elif err.code == 416:
+ if err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
- ctx.data = self.ydl.urlopen(basic_request)
+ ctx.data = self.ydl.urlopen(
+ sanitized_Request(url, None, headers))
content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
ctx.resume_len = 0
ctx.open_mode = 'wb'
return
+ elif err.code < 500 or err.code >= 600:
+ # Unexpected HTTP error
+ raise
raise RetryDownload(err)
except socket.error as err:
if err.errno != errno.ECONNRESET:
return False
byte_counter = 0 + ctx.resume_len
- block_size = self.params.get('buffersize', 1024)
+ block_size = ctx.block_size
start = time.time()
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
# Progress message
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
- if data_len is None:
+ if ctx.data_len is None:
eta = None
else:
- eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+ eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter,
- 'total_bytes': data_len,
+ 'total_bytes': ctx.data_len,
'tmpfilename': ctx.tmpfilename,
'filename': ctx.filename,
'eta': eta,
'speed': speed,
- 'elapsed': now - start,
+ 'elapsed': now - ctx.start_time,
})
if is_test and byte_counter == data_len:
break
+ if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
+ ctx.resume_len = byte_counter
+ # ctx.block_size = block_size
+ raise NextFragment()
+
if ctx.stream is None:
self.to_stderr('\n')
self.report_error('Did not get any data blocks')
'total_bytes': byte_counter,
'filename': ctx.filename,
'status': 'finished',
- 'elapsed': time.time() - start,
+ 'elapsed': time.time() - ctx.start_time,
})
return True
if count <= retries:
self.report_retry(e.source_error, count, retries)
continue
+ except NextFragment:
+ continue
except SucceedDownload:
return True
from __future__ import unicode_literals
import time
-import struct
import binascii
import io
from .fragment import FragmentFD
-from ..compat import compat_urllib_error
+from ..compat import (
+ compat_Struct,
+ compat_urllib_error,
+)
-u8 = struct.Struct(b'>B')
-u88 = struct.Struct(b'>Bx')
-u16 = struct.Struct(b'>H')
-u1616 = struct.Struct(b'>Hxx')
-u32 = struct.Struct(b'>I')
-u64 = struct.Struct(b'>Q')
+u8 = compat_Struct('>B')
+u88 = compat_Struct('>Bx')
+u16 = compat_Struct('>H')
+u1616 = compat_Struct('>Hxx')
+u32 = compat_Struct('>I')
+u64 = compat_Struct('>Q')
-s88 = struct.Struct(b'>bx')
-s16 = struct.Struct(b'>h')
-s1616 = struct.Struct(b'>hxx')
-s32 = struct.Struct(b'>i')
+s88 = compat_Struct('>bx')
+s16 = compat_Struct('>h')
+s1616 = compat_Struct('>hxx')
+s32 = compat_Struct('>i')
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
sample_entry_payload += u16.pack(0x18) # depth
sample_entry_payload += s16.pack(-1) # pre defined
- codec_private_data = binascii.unhexlify(params['codec_private_data'])
+ codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
if fourcc in ('H264', 'AVC1'):
sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version
_TESTS = [{
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
'info_dict': {
- 'id': '10498713',
+ 'id': '10505354',
'ext': 'flv',
'display_id': 'dramatic-video-rare-death-job-america',
'title': 'Occupational Hazards',
}, {
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
'info_dict': {
- 'id': '39125818',
+ 'id': '38897857',
'ext': 'mp4',
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
'title': 'Justin Timberlake Drops Hints For Secret Single',
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
- bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
+ bytes_to_intlist(b'\xc8\x6e\x06\xbc\xbe\xc6\x49\xf5\x88\x0d\xc8\x47\xc4\x27\x0c\x60'),
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
options = player_config.get('options') or {}
metas = options.get('metas') or {}
- title = metas.get('title') or video_info['title']
links = player_config.get('links') or {}
+ sub_path = player_config.get('subtitles')
error = None
if not links:
- links_url = player_config['linksurl']
+ links_url = player_config.get('linksurl') or options['videoUrl']
links_data = self._download_json(urljoin(
self._BASE_URL, links_url), video_id)
links = links_data.get('links') or {}
+ metas = metas or links_data.get('meta') or {}
+ sub_path = sub_path or links_data.get('subtitles')
error = links_data.get('error')
+ title = metas.get('title') or video_info['title']
formats = []
for format_id, qualities in links.items():
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
'thumbnail': video_info.get('image'),
'formats': formats,
- 'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id),
+ 'subtitles': self.extract_subtitles(sub_path, video_id),
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
'series': video_info.get('playlistTitle'),
}
query = {
'mbr': 'true',
- 'assetTypes': 'high_video_s3'
+ 'assetTypes': 'high_video_ak',
+ 'switch': 'hls_high_ak',
}
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
def _real_extract(self, url):
video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if re.search(r'alert\(["\']This video has been deleted', webpage):
+ raise ExtractorError(
+ 'Video %s has been deleted' % video_id, expected=True)
+
+ station_id = self._search_regex(
+ r'nStationNo\s*=\s*(\d+)', webpage, 'station')
+ bbs_id = self._search_regex(
+ r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
+ video_id = self._search_regex(
+ r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
+
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
- video_id, query={
+ video_id, headers={
+ 'Referer': 'http://vod.afreecatv.com/embed.php',
+ }, query={
'nTitleNo': video_id,
+ 'nStationNo': station_id,
+ 'nBbsNo': bbs_id,
'partialView': 'SKIP_ADULT',
})
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, flag), expected=True)
- video_element = video_xml.findall(compat_xpath('./track/video'))[1]
+ video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
if video_element is None or video_element.text is None:
- raise ExtractorError('Specified AfreecaTV video does not exist',
- expected=True)
+ raise ExtractorError(
+ 'Video %s video does not exist' % video_id, expected=True)
video_url = video_element.text.strip()
class AMCNetworksIE(ThePlatformIE):
- _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
'md5': '',
}, {
'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
'only_matching': True,
+ }, {
+ 'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
+ 'only_matching': True,
}]
def _real_extract(self, url):
webpage = self._download_webpage(
'http://archive.org/embed/' + video_id, video_id)
jwplayer_playlist = self._parse_json(self._search_regex(
- r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\);",
+ r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
webpage, 'jwplayer playlist'), video_id)
info = self._parse_jwplayer_data(
{'playlist': jwplayer_playlist}, video_id, base_url=url)
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_TESTS = [{
- 'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
+ # available till 26.07.2022
+ 'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
'info_dict': {
- 'id': '29582122',
+ 'id': '44726822',
'ext': 'mp4',
- 'title': 'Ich liebe das Leben trotzdem',
- 'description': 'md5:45e4c225c72b27993314b31a84a5261c',
- 'duration': 4557,
+ 'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
+ 'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
+ 'duration': 1740,
},
'params': {
# m3u8 download
'skip_download': True,
- },
- 'skip': 'HTTP Error 404: Not Found',
- }, {
- 'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
- 'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
- 'info_dict': {
- 'id': '29522730',
- 'ext': 'mp4',
- 'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)',
- 'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
- 'duration': 5252,
- },
- 'skip': 'HTTP Error 404: Not Found',
+ }
}, {
# audio
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
- 'md5': '219d94d8980b4f538c7fcb0865eb7f2c',
- 'info_dict': {
- 'id': '28488308',
- 'ext': 'mp3',
- 'title': 'Tod eines Fußballers',
- 'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
- 'duration': 3240,
- },
- 'skip': 'HTTP Error 404: Not Found',
+ 'only_matching': True,
}, {
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
'only_matching': True,
}, {
# audio
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
- 'md5': '4e8f00631aac0395fee17368ac0e9867',
- 'info_dict': {
- 'id': '30796318',
- 'ext': 'mp3',
- 'title': 'Vor dem Fest',
- 'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
- 'duration': 3287,
- },
- 'skip': 'Video is no longer available',
+ 'only_matching': True,
}]
def _extract_media_info(self, media_info_url, webpage, video_id):
class ARDIE(InfoExtractor):
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
- _TEST = {
- 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
- 'md5': 'd216c3a86493f9322545e045ddc3eb35',
+ _TESTS = [{
+ # available till 14.02.2019
+ 'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
+ 'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
'info_dict': {
- 'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge',
- 'id': '100',
+ 'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
+ 'id': '102',
'ext': 'mp4',
- 'duration': 2600,
- 'title': 'Die Story im Ersten: Mission unter falscher Flagge',
- 'upload_date': '20140804',
+ 'duration': 4435.0,
+ 'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
+ 'upload_date': '20180214',
'thumbnail': r're:^https?://.*\.jpg$',
},
- 'skip': 'HTTP Error 404: Not Found',
- }
+ }, {
+ 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
return entries
- def _parse_brightcove_metadata(self, json_data, video_id):
+ def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
title = json_data['name'].strip()
formats = []
self._sort_formats(formats)
+ for f in formats:
+ f.setdefault('http_headers', {}).update(headers)
+
subtitles = {}
for text_track in json_data.get('text_tracks', []):
if text_track.get('src'):
webpage, 'policy key', group='pk')
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
- try:
- json_data = self._download_json(api_url, video_id, headers={
- 'Accept': 'application/json;pk=%s' % policy_key
+ headers = {
+ 'Accept': 'application/json;pk=%s' % policy_key,
+ }
+ referrer = smuggled_data.get('referrer')
+ if referrer:
+ headers.update({
+ 'Referer': referrer,
+ 'Origin': re.search(r'https?://[^/]+', referrer).group(0),
})
+ try:
+ json_data = self._download_json(api_url, video_id, headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
'tveToken': tve_token,
})
- return self._parse_brightcove_metadata(json_data, video_id)
+ return self._parse_brightcove_metadata(
+ json_data, video_id, headers=headers)
def _real_extract(self, url):
display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
+ webpage, urlh = self._download_webpage_handle(url, display_id)
title = self._html_search_regex(
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
webpage, 'release_date', default=None))
# If there's a ? or a # in the URL, remove them and everything after
- clean_url = url.split('?')[0].split('#')[0].strip('/')
+ clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
securevideo_url = clean_url + '.mssecurevideo.json'
try:
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
xpath_element,
xpath_with_ns,
find_xpath_attr,
+ parse_duration,
parse_iso8601,
parse_age_limit,
int_or_none,
video_id = self._match_id(url)
rss = self._call_api('web/browse/' + video_id, video_id)
return self._parse_rss_feed(rss)
+
+
+class CBCOlympicsIE(InfoExtractor):
+ IE_NAME = 'cbc.ca:olympics'
+ _VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._hidden_inputs(webpage)['videoId']
+ video_doc = self._download_xml(
+ 'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id)
+ title = xpath_text(video_doc, 'title', fatal=True)
+ is_live = xpath_text(video_doc, 'kind') == 'Live'
+ if is_live:
+ title = self._live_title(title)
+
+ formats = []
+ for video_source in video_doc.findall('videoSources/videoSource'):
+ uri = xpath_text(video_source, 'uri')
+ if not uri:
+ continue
+ tokenize = self._download_json(
+ 'https://olympics.cbc.ca/api/api-akamai/tokenize',
+ video_id, data=json.dumps({
+ 'VideoSource': uri,
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Referer': url,
+ # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js
+ 'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie
+ }, fatal=False)
+ if not tokenize:
+ continue
+ content_url = tokenize['ContentUrl']
+ video_source_format = video_source.get('format')
+ if video_source_format == 'IIS':
+ formats.extend(self._extract_ism_formats(
+ content_url, video_id, ism_id=video_source_format, fatal=False))
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ content_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id=video_source_format, fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': xpath_text(video_doc, 'description'),
+ 'thumbnail': xpath_text(video_doc, 'thumbnailUrl'),
+ 'duration': parse_duration(xpath_text(video_doc, 'duration')),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
- r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
+ r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'",
webpage, 'data json')
data = self._parse_json(data_json, display_id)
- vdata = data.get('video') or data['videos'][0]
+ vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0]
video_id = vdata['mpxRefId']
width : height ratio as float.
* no_resume The server does not support resuming the
(HTTP or RTMP) download. Boolean.
+ * downloader_options A dictionary of downloader options as
+ described in FileDownloader
url: Final video URL.
ext: Video filename extension.
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
query = compat_urlparse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
- url_base = self._search_regex(
- r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
- http_base_url = '%s:%s' % ('http', url_base)
+ mobj = re.search(
+ r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
+ url_base = mobj.group('url')
+ http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
formats = []
def manifest_url(manifest):
for track in tracks:
if not isinstance(track, dict):
continue
- if track.get('kind') != 'captions':
+ track_kind = track.get('kind')
+ if not track_kind or not isinstance(track_kind, compat_str):
+ continue
+ if track_kind.lower() not in ('captions', 'subtitles'):
continue
track_url = urljoin(base_url, track.get('file'))
if not track_url:
import string
from .discoverygo import DiscoveryGoBaseIE
+from ..compat import compat_str
from ..utils import (
ExtractorError,
- update_url_query,
+ try_get,
)
from ..compat import compat_HTTPError
class DiscoveryIE(DiscoveryGoBaseIE):
- _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
+ _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
discovery|
investigationdiscovery|
discoverylife|
_GEO_BYPASS = False
def _real_extract(self, url):
- path, display_id = re.match(self._VALID_URL, url).groups()
+ site, path, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
react_data = self._parse_json(self._search_regex(
video_id = video['id']
access_token = self._download_json(
- 'https://www.discovery.com/anonymous', display_id, query={
- 'authLink': update_url_query(
- 'https://login.discovery.com/v1/oauth2/authorize', {
- 'client_id': react_data['application']['apiClientId'],
- 'redirect_uri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html',
- 'response_type': 'anonymous',
- 'state': 'nonce,' + ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
- })
+ 'https://www.%s.com/anonymous' % site, display_id, query={
+ 'authRel': 'authorization',
+ 'client_id': try_get(
+ react_data, lambda x: x['application']['apiClientId'],
+ compat_str) or '3020a40c2356a645b4b4',
+ 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
+ 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
})['access_token']
try:
class DPlayIE(InfoExtractor):
- _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:videoer/)?(?P<id>[^/]+/[^/?#]+)'
+ _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)'
_TESTS = [{
# non geo restricted, via secure api, unsigned download hls URL
'skip_download': True,
},
}, {
- # geo restricted, bypassable via X-Forwarded-For
+
'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
'only_matching': True,
+ }, {
+ 'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001',
+ 'only_matching': True,
}]
def _real_extract(self, url):
}, {
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
'info_dict': {
- 'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
+ 'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci',
'id': '973eb3bc854e11e498be002590604f2e',
},
'playlist': [{
}, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True,
+ }, {
+ 'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
+ 'md5': '87defe16681b1429c91f7a74809823c6',
+ 'info_dict': {
+ 'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
+ 'ext': 'mp4',
+ 'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
- def _parse_video_metadata(self, js, video_id):
+ def _parse_video_metadata(self, js, video_id, live_js=None):
data = self._parse_json(js, video_id, transform_source=js_to_json)
+ if live_js:
+ data.update(self._parse_json(
+ live_js, video_id, transform_source=js_to_json))
title = unescapeHTML(data['title'])
webpage = self._download_webpage(url, video_id)
+ # live content
+ live_item = self._search_regex(
+ r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});',
+ webpage, 'video', default=None)
+
# single video
item = self._search_regex(
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
- webpage, 'video', default=None, fatal=False)
+ webpage, 'video', default=None)
if item:
- return self._parse_video_metadata(item, video_id)
+ return self._parse_video_metadata(item, video_id, live_item)
# playlist
items = re.findall(
CBCPlayerIE,
CBCWatchVideoIE,
CBCWatchIE,
+ CBCOlympicsIE,
)
from .cbs import CBSIE
from .cbslocal import CBSLocalIE
from .franceinter import FranceInterIE
from .francetv import (
FranceTVIE,
+ FranceTVSiteIE,
FranceTVEmbedIE,
FranceTVInfoIE,
+ FranceTVJeunesseIE,
GenerationWhatIE,
CultureboxIE,
)
from .freespeech import FreespeechIE
from .freshlive import FreshLiveIE
from .funimation import FunimationIE
-from .funk import FunkIE
+from .funk import (
+ FunkMixIE,
+ FunkChannelIE,
+)
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .fxnetworks import FXNetworksIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .hgtv import HGTVComShowIE
+from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE
from .hitbox import HitboxIE, HitboxLiveIE
from .hitrecord import HitRecordIE
LimelightChannelIE,
LimelightChannelListIE,
)
+from .line import LineTVIE
from .litv import LiTVIE
from .liveleak import (
LiveLeakIE,
)
from .m6 import M6IE
from .macgamestore import MacGameStoreIE
-from .mailru import MailRuIE
+from .mailru import (
+ MailRuIE,
+ MailRuMusicIE,
+ MailRuMusicSearchIE,
+)
from .makerschannel import MakersChannelIE
from .makertv import MakerTVIE
from .mangomolo import (
from .mwave import MwaveIE, MwaveMeetGreetIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
-from .myvi import MyviIE
+from .myvi import (
+ MyviIE,
+ MyviEmbedIE,
+)
from .myvidster import MyVidsterIE
from .nationalgeographic import (
NationalGeographicVideoIE,
NBCIE,
NBCNewsIE,
NBCOlympicsIE,
+ NBCOlympicsStreamIE,
NBCSportsIE,
NBCSportsVPlayerIE,
)
RaiPlayPlaylistIE,
RaiIE,
)
+from .raywenderlich import RayWenderlichIE
from .rbmaradio import RBMARadioIE
from .rds import RDSIE
from .redbulltv import RedBullTVIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
from .telemb import TeleMBIE
-from .telequebec import TeleQuebecIE
+from .telequebec import (
+ TeleQuebecIE,
+ TeleQuebecEmissionIE,
+ TeleQuebecLiveIE,
+)
from .teletask import TeleTaskIE
from .telewebion import TelewebionIE
+from .tennistv import TennisTVIE
from .testurl import TestURLIE
from .tf1 import TF1IE
from .tfo import TFOIE
ViceArticleIE,
ViceShowIE,
)
-from .viceland import VicelandIE
from .vidbit import VidbitIE
from .viddler import ViddlerIE
from .videa import VideaIE
from .videopremium import VideoPremiumIE
from .videopress import VideoPressIE
from .vidio import VidioIE
+from .vidlii import VidLiiIE
from .vidme import (
VidmeIE,
VidmeUserIE,
YandexMusicPlaylistIE,
)
from .yandexdisk import YandexDiskIE
+from .yapfiles import YapFilesIE
from .yesjapan import YesJapanIE
from .yinyuetai import YinYueTaiIE
from .ynet import YnetIE
import re
from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
from ..utils import (
clean_html,
+ determine_ext,
ExtractorError,
int_or_none,
parse_duration,
- determine_ext,
+ try_get,
)
from .dailymotion import DailymotionIE
class FranceTVBaseInfoExtractor(InfoExtractor):
+ def _make_url_result(self, video_or_full_id, catalog=None):
+ full_id = 'francetv:%s' % video_or_full_id
+ if '@' not in video_or_full_id and catalog:
+ full_id += '@%s' % catalog
+ return self.url_result(
+ full_id, ie=FranceTVIE.ie_key(),
+ video_id=video_or_full_id.split('@')[0])
+
+
+class FranceTVIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://
+ sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
+ .*?\bidDiffusion=[^&]+|
+ (?:
+ https?://videos\.francetv\.fr/video/|
+ francetv:
+ )
+ (?P<id>[^@]+)(?:@(?P<catalog>.+))?
+ )
+ '''
+
+ _TESTS = [{
+ # without catalog
+ 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
+ 'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
+ 'info_dict': {
+ 'id': '162311093',
+ 'ext': 'mp4',
+ 'title': '13h15, le dimanche... - Les mystères de Jésus',
+ 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
+ 'timestamp': 1502623500,
+ 'upload_date': '20170813',
+ },
+ }, {
+ # with catalog
+ 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:162311093',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_1004933@Zouzous',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_983319@Info-web',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_983319',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_657393@Regions',
+ 'only_matching': True,
+ }, {
+ # france-3 live
+ 'url': 'francetv:SIM_France3',
+ 'only_matching': True,
+ }]
+
def _extract_video(self, video_id, catalogue=None):
+ # Videos are identified by idDiffusion so catalogue part is optional.
+ # However when provided, some extra formats may be returned so we pass
+ # it if available.
info = self._download_json(
'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
video_id, 'Downloading video JSON', query={
if info.get('status') == 'NOK':
raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
+ '%s returned error: %s' % (self.IE_NAME, info['message']),
+ expected=True)
allowed_countries = info['videos'][0].get('geoblocage')
if allowed_countries:
georestricted = True
else:
georestricted = False
+ def sign(manifest_url, manifest_id):
+ for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
+ signed_url = self._download_webpage(
+ 'https://%s/esi/TA' % host, video_id,
+ 'Downloading signed %s manifest URL' % manifest_id,
+ fatal=False, query={
+ 'url': manifest_url,
+ })
+ if (signed_url and isinstance(signed_url, compat_str) and
+ re.search(r'^(?:https?:)?//', signed_url)):
+ return signed_url
+ return manifest_url
+
+ is_live = None
+
formats = []
for video in info['videos']:
if video['statut'] != 'ONLINE':
video_url = video['url']
if not video_url:
continue
+ if is_live is None:
+ is_live = (try_get(
+ video, lambda x: x['plages_ouverture'][0]['direct'],
+ bool) is True) or '/live.francetv.fr/' in video_url
format_id = video['format']
ext = determine_ext(video_url)
if ext == 'f4m':
# See https://github.com/rg3/youtube-dl/issues/3963
# m3u8 urls work fine
continue
- f4m_url = self._download_webpage(
- 'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
- video_id, 'Downloading f4m manifest token', fatal=False)
- if f4m_url:
- formats.extend(self._extract_f4m_formats(
- f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
- video_id, f4m_id=format_id, fatal=False))
+ formats.extend(self._extract_f4m_formats(
+ sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
+ video_id, f4m_id=format_id, fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id=format_id, fatal=False))
+ sign(video_url, format_id), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ fatal=False))
elif video_url.startswith('rtmp'):
formats.append({
'url': video_url,
return {
'id': video_id,
- 'title': title,
+ 'title': self._live_title(title) if is_live else title,
'description': clean_html(info['synopsis']),
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
'timestamp': int_or_none(info['diffusion']['timestamp']),
+ 'is_live': is_live,
'formats': formats,
'subtitles': subtitles,
}
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ catalog = mobj.group('catalog')
-class FranceTVIE(FranceTVBaseInfoExtractor):
+ if not video_id:
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = qs.get('idDiffusion', [None])[0]
+ catalog = qs.get('catalogue', [None])[0]
+ if not video_id:
+ raise ExtractorError('Invalid URL', expected=True)
+
+ return self._extract_video(video_id, catalog)
+
+
+class FranceTVSiteIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
_TESTS = [{
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
'info_dict': {
- 'id': '157550144',
+ 'id': '162311093',
'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus',
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
- 'timestamp': 1494156300,
- 'upload_date': '20170507',
+ 'timestamp': 1502623500,
+ 'upload_date': '20170813',
},
'params': {
- # m3u8 downloads
'skip_download': True,
},
+ 'add_ie': [FranceTVIE.ie_key()],
}, {
# france3
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
}, {
'url': 'https://www.france.tv/142749-rouge-sang.html',
'only_matching': True,
+ }, {
+ # france-3 live
+ 'url': 'https://www.france.tv/france-3/direct.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id, catalogue = self._html_search_regex(
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
webpage, 'video ID').split('@')
- return self._extract_video(video_id, catalogue)
+
+ return self._make_url_result(video_id, catalogue)
class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
'info_dict': {
'id': 'NI_983319',
'timestamp': 1493981780,
'duration': 16,
},
- }
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
video_id)
- return self._extract_video(video['video_id'], video.get('catalog'))
+ return self._make_url_result(video['video_id'], video.get('catalog'))
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetvinfo.fr'
- _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
+ _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
_TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
},
},
'params': {
- # m3u8 downloads
'skip_download': True,
},
+ 'add_ie': [FranceTVIE.ie_key()],
}, {
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
- 'info_dict': {
- 'id': 'EV_20019',
- 'ext': 'mp4',
- 'title': 'Débat des candidats à la Commission européenne',
- 'description': 'Débat des candidats à la Commission européenne',
- },
- 'params': {
- 'skip_download': 'HLS (reqires ffmpeg)'
- },
- 'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
+ 'only_matching': True,
}, {
'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
- 'md5': 'f485bda6e185e7d15dbc69b72bae993e',
- 'info_dict': {
- 'id': 'NI_173343',
- 'ext': 'mp4',
- 'title': 'Les entreprises familiales : le secret de la réussite',
- 'thumbnail': r're:^https?://.*\.jpe?g$',
- 'timestamp': 1433273139,
- 'upload_date': '20150602',
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
+ 'only_matching': True,
}, {
'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
- 'md5': 'f485bda6e185e7d15dbc69b72bae993e',
- 'info_dict': {
- 'id': 'NI_657393',
- 'ext': 'mp4',
- 'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"',
- 'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
- 'thumbnail': r're:^https?://.*\.jpe?g$',
- 'timestamp': 1458300695,
- 'upload_date': '20160318',
- },
- 'params': {
- 'skip_download': True,
- },
+ 'only_matching': True,
}, {
# Dailymotion embed
'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- page_title = mobj.group('title')
- webpage = self._download_webpage(url, page_title)
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
dailymotion_urls = DailymotionIE._extract_urls(webpage)
if dailymotion_urls:
(r'id-video=([^@]+@[^"]+)',
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
webpage, 'video id').split('@')
- return self._extract_video(video_id, catalogue)
+
+ return self._make_url_result(video_id, catalogue)
class GenerationWhatIE(InfoExtractor):
IE_NAME = 'france2.fr:generation-what'
- _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
'upload_date': '20160411',
},
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Youtube'],
}, {
'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
'only_matching': True,
def _real_extract(self, url):
display_id = self._match_id(url)
+
webpage = self._download_webpage(url, display_id)
+
youtube_id = self._search_regex(
r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
webpage, 'youtube id')
- return self.url_result(youtube_id, 'Youtube', youtube_id)
+
+ return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id)
class CultureboxIE(FranceTVBaseInfoExtractor):
- IE_NAME = 'culturebox.francetvinfo.fr'
- _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
+ _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
- 'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
+ _TESTS = [{
+ 'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689',
'info_dict': {
- 'id': 'EV_50111',
- 'ext': 'flv',
- 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
- 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
- 'upload_date': '20150320',
- 'timestamp': 1426892400,
- 'duration': 2760.9,
+ 'id': 'EV_134885',
+ 'ext': 'mp4',
+ 'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7',
+ 'description': 'md5:19c44af004b88219f4daa50fa9a351d4',
+ 'upload_date': '20180206',
+ 'timestamp': 1517945220,
+ 'duration': 5981,
},
- }
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- name = mobj.group('name')
+ display_id = self._match_id(url)
- webpage = self._download_webpage(url, name)
+ webpage = self._download_webpage(url, display_id)
if ">Ce live n'est plus disponible en replay<" in webpage:
- raise ExtractorError('Video %s is not available' % name, expected=True)
+ raise ExtractorError(
+ 'Video %s is not available' % display_id, expected=True)
video_id, catalogue = self._search_regex(
r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
webpage, 'video id').split('@')
- return self._extract_video(video_id, catalogue)
+ return self._make_url_result(video_id, catalogue)
+
+
+class FranceTVJeunesseIE(FranceTVBaseInfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P<id>[^/?#&]+))'
+
+ _TESTS = [{
+ 'url': 'https://www.zouzous.fr/heros/simon',
+ 'info_dict': {
+ 'id': 'simon',
+ },
+ 'playlist_count': 9,
+ }, {
+ 'url': 'https://www.ludo.fr/heros/ninjago',
+ 'info_dict': {
+ 'id': 'ninjago',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'https://www.zouzous.fr/heros/simon?abc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+
+ playlist = self._download_json(
+ '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id)
+
+ if not playlist.get('count'):
+ raise ExtractorError(
+ '%s is not available' % playlist_id, expected=True)
+
+ entries = []
+ for item in playlist['items']:
+ identity = item.get('identity')
+ if identity and isinstance(identity, compat_str):
+ entries.append(self._make_url_result(identity))
+
+ return self.playlist_result(entries, playlist_id)
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from .nexx import NexxIE
-from ..utils import extract_attributes
+from ..utils import int_or_none
+
+
+class FunkBaseIE(InfoExtractor):
+ def _make_url_result(self, video):
+ return {
+ '_type': 'url_transparent',
+ 'url': 'nexx:741:%s' % video['sourceId'],
+ 'ie_key': NexxIE.ie_key(),
+ 'id': video['sourceId'],
+ 'title': video.get('title'),
+ 'description': video.get('description'),
+ 'duration': int_or_none(video.get('duration')),
+ 'season_number': int_or_none(video.get('seasonNr')),
+ 'episode_number': int_or_none(video.get('episodeNr')),
+ }
+
+
+class FunkMixIE(FunkBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
+ 'md5': '8edf617c2f2b7c9847dfda313f199009',
+ 'info_dict': {
+ 'id': '123748',
+ 'ext': 'mp4',
+ 'title': '"Die realste Kifferdoku aller Zeiten"',
+ 'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
+ 'timestamp': 1490274721,
+ 'upload_date': '20170323',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mix_id = mobj.group('id')
+ alias = mobj.group('alias')
+
+ lists = self._download_json(
+ 'https://www.funk.net/api/v3.1/curation/curatedLists/',
+ mix_id, headers={
+ 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
+ 'Referer': url,
+ }, query={
+ 'size': 100,
+ })['result']['lists']
+
+ metas = next(
+ l for l in lists
+ if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
+ video = next(
+ meta['videoDataDelegate']
+ for meta in metas if meta.get('alias') == alias)
+
+ return self._make_url_result(video)
-class FunkIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
+class FunkChannelIE(FunkBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
_TESTS = [{
- 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
- 'md5': '4d40974481fa3475f8bccfd20c5361f8',
+ 'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
'info_dict': {
- 'id': '716599',
+ 'id': '1155821',
'ext': 'mp4',
- 'title': 'Neue Rechte Welle',
- 'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
- 'timestamp': 1501337639,
- 'upload_date': '20170729',
+ 'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
+ 'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
+ 'timestamp': 1514507395,
+ 'upload_date': '20171229',
},
'params': {
- 'format': 'bestvideo',
'skip_download': True,
},
}, {
- 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
+ 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
'only_matching': True,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ channel_id = mobj.group('id')
+ alias = mobj.group('alias')
- webpage = self._download_webpage(url, video_id)
+ results = self._download_json(
+ 'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
+ headers={
+ 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
+ 'Referer': url,
+ }, query={
+ 'channelId': channel_id,
+ 'size': 100,
+ })['result']
- domain_id = NexxIE._extract_domain_id(webpage) or '741'
- nexx_id = extract_attributes(self._search_regex(
- r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
- webpage, 'media player'))['data-id']
+ video = next(r for r in results if r.get('alias') == alias)
- return self.url_result(
- 'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
- video_id=nexx_id)
+ return self._make_url_result(video)
class FusionIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
+ 'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
'info_dict': {
'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
'ext': 'mp4',
},
'add_ie': ['Ooyala'],
}, {
- 'url': 'http://fusion.net/video/201781',
+ 'url': 'http://fusion.tv/video/201781',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- brightcove_id = self._search_regex(r"getVideo\('[^']+video_id=(\d+)", webpage, 'brightcove id')
- return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
+ webpage = self._download_webpage(
+ url, display_id, headers=self.geo_verification_headers())
+ brightcove_id = self._search_regex(
+ [r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"],
+ webpage, 'brightcove id')
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
+ brightcove_id)
from .vshare import VShareIE
from .mediasite import MediasiteIE
from .springboardplatform import SpringboardPlatformIE
+from .yapfiles import YapFilesIE
+from .vice import ViceIE
class GenericIE(InfoExtractor):
'skip_download': True,
},
'add_ie': [SpringboardPlatformIE.ie_key()],
+ },
+ {
+ 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
+ 'info_dict': {
+ 'id': 'uPDB5I9wfp8',
+ 'ext': 'webm',
+ 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
+ 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
+ 'upload_date': '20160219',
+ 'uploader': 'Pocoyo - Português (BR)',
+ 'uploader_id': 'PocoyoBrazil',
+ },
+ 'add_ie': [YoutubeIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
+ 'info_dict': {
+ 'id': 'vMDE4NzI1Mjgt690b',
+ 'ext': 'mp4',
+ 'title': 'Котята',
+ },
+ 'add_ie': [YapFilesIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
}
# {
# # TODO: find another test
# Look for Brightcove New Studio embeds
bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
if bc_urls:
- return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
+ return self.playlist_from_matches(
+ bc_urls, video_id, video_title,
+ getter=lambda x: smuggle_url(x, {'referrer': url}),
+ ie='BrightcoveNew')
# Look for Nexx embeds
nexx_urls = NexxIE._extract_urls(webpage)
springboardplatform_urls, video_id, video_title,
ie=SpringboardPlatformIE.ie_key())
+ yapfiles_urls = YapFilesIE._extract_urls(webpage)
+ if yapfiles_urls:
+ return self.playlist_from_matches(
+ yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
+
+ vice_urls = ViceIE._extract_urls(webpage)
+ if vice_urls:
+ return self.playlist_from_matches(
+ vice_urls, video_id, video_title, ie=ViceIE.ie_key())
+
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():
from __future__ import unicode_literals
from .common import InfoExtractor
+from .kaltura import KalturaIE
from .youtube import YoutubeIE
from ..utils import (
determine_ext,
int_or_none,
parse_iso8601,
+ smuggle_url,
xpath_text,
)
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://www.heise.de/video/artikel/nachgehakt-Wie-sichert-das-c-t-Tool-Restric-tor-Windows-10-ab-3700244.html',
+ 'md5': '4b58058b46625bdbd841fc2804df95fc',
+ 'info_dict': {
+ 'id': '1_ntrmio2s',
+ 'timestamp': 1512470717,
+ 'upload_date': '20171205',
+ 'ext': 'mp4',
+ 'title': 'ct10 nachgehakt hos restrictor',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
'only_matching': True,
if yt_urls:
return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
+ kaltura_url = KalturaIE._extract_url(webpage)
+ if kaltura_url:
+ return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
+
container_id = self._search_regex(
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
webpage, 'container ID')
+
sequenz_id = self._search_regex(
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
webpage, 'sequenz ID')
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urlencode_postdata,
+)
+
+
+class HiDiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<title>[^/]+)/(?P<key>[^/?#&]+)'
+ # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
+ # so disabling geo bypass completely
+ _GEO_BYPASS = False
+
+ _TESTS = [{
+ 'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
+ 'info_dict': {
+ 'id': 'the-comic-artist-and-his-assistants/s01e001',
+ 'ext': 'mp4',
+ 'title': 'the-comic-artist-and-his-assistants/s01e001',
+ 'series': 'the-comic-artist-and-his-assistants',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ title, key = mobj.group('title', 'key')
+ video_id = '%s/%s' % (title, key)
+
+ settings = self._download_json(
+ 'https://www.hidive.com/play/settings', video_id,
+ data=urlencode_postdata({
+ 'Title': title,
+ 'Key': key,
+ }))
+
+ restriction = settings.get('restrictionReason')
+ if restriction == 'RegionRestricted':
+ self.raise_geo_restricted()
+
+ if restriction and restriction != 'None':
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, restriction), expected=True)
+
+ formats = []
+ subtitles = {}
+ for rendition_id, rendition in settings['renditions'].items():
+ bitrates = rendition.get('bitrates')
+ if not isinstance(bitrates, dict):
+ continue
+ m3u8_url = bitrates.get('hls')
+ if not isinstance(m3u8_url, compat_str):
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='%s-hls' % rendition_id, fatal=False))
+ cc_files = rendition.get('ccFiles')
+ if not isinstance(cc_files, list):
+ continue
+ for cc_file in cc_files:
+ if not isinstance(cc_file, list) or len(cc_file) < 3:
+ continue
+ cc_lang = cc_file[0]
+ cc_url = cc_file[2]
+ if not isinstance(cc_lang, compat_str) or not isinstance(
+ cc_url, compat_str):
+ continue
+ subtitles.setdefault(cc_lang, []).append({
+ 'url': cc_url,
+ })
+
+ season_number = int_or_none(self._search_regex(
+ r's(\d+)', key, 'season number', default=None))
+ episode_number = int_or_none(self._search_regex(
+ r'e(\d+)', key, 'episode number', default=None))
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ 'series': title,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ }
webpage = self._download_webpage(url, video_id)
player_data = self._parse_json(
- self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
+ self._search_regex(
+ [r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'],
+ webpage, 'player data'),
video_id, transform_source=js_to_json)
return {
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class LineTVIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.line\.me/v/(?P<id>\d+)_[^/]+-(?P<segment>ep\d+-\d+)'
+
+ _TESTS = [{
+ 'url': 'https://tv.line.me/v/793123_goodbye-mrblack-ep1-1/list/69246',
+ 'info_dict': {
+ 'id': '793123_ep1-1',
+ 'ext': 'mp4',
+ 'title': 'Goodbye Mr.Black | EP.1-1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 998.509,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://tv.line.me/v/2587507_%E6%B4%BE%E9%81%A3%E5%A5%B3%E9%86%ABx-ep1-02/list/185245',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ series_id, segment = re.match(self._VALID_URL, url).groups()
+ video_id = '%s_%s' % (series_id, segment)
+
+ webpage = self._download_webpage(url, video_id)
+
+ player_params = self._parse_json(self._search_regex(
+ r'naver\.WebPlayer\(({[^}]+})\)', webpage, 'player parameters'),
+ video_id, transform_source=js_to_json)
+
+ video_info = self._download_json(
+ 'https://global-nvapis.line.me/linetv/rmcnmv/vod_play_videoInfo.json',
+ video_id, query={
+ 'videoId': player_params['videoId'],
+ 'key': player_params['key'],
+ })
+
+ stream = video_info['streams'][0]
+ extra_query = '?__gda__=' + stream['key']['value']
+ formats = self._extract_m3u8_formats(
+ stream['source'] + extra_query, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+
+ for a_format in formats:
+ a_format['url'] += extra_query
+
+ duration = None
+ for video in video_info.get('videos', {}).get('list', []):
+ encoding_option = video.get('encodingOption', {})
+ abr = video['bitrate']['audio']
+ vbr = video['bitrate']['video']
+ tbr = abr + vbr
+ formats.append({
+ 'url': video['source'],
+ 'format_id': 'http-%d' % int(tbr),
+ 'height': encoding_option.get('height'),
+ 'width': encoding_option.get('width'),
+ 'abr': abr,
+ 'vbr': vbr,
+ 'filesize': video.get('size'),
+ })
+ if video.get('duration') and duration is None:
+ duration = video['duration']
+
+ self._sort_formats(formats)
+
+ if not formats[0].get('width'):
+ formats[0]['vcodec'] = 'none'
+
+ title = self._og_search_title(webpage)
+
+ # like_count requires an additional API request https://tv.line.me/api/likeit/getCount
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'extra_param_to_segment_url': extra_query[1:],
+ 'duration': duration,
+ 'thumbnails': [{'url': thumbnail['source']}
+ for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
+ 'view_count': video_info.get('meta', {}).get('count'),
+ }
# coding: utf-8
from __future__ import unicode_literals
+import itertools
+import json
import re
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
int_or_none,
+ parse_duration,
remove_end,
+ try_get,
)
'view_count': view_count,
'formats': formats,
}
+
+
+class MailRuMusicSearchBaseIE(InfoExtractor):
+ def _search(self, query, url, audio_id, limit=100, offset=0):
+ search = self._download_json(
+ 'https://my.mail.ru/cgi-bin/my/ajax', audio_id,
+ 'Downloading songs JSON page %d' % (offset // limit + 1),
+ headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }, query={
+ 'xemail': '',
+ 'ajax_call': '1',
+ 'func_name': 'music.search',
+ 'mna': '',
+ 'mnb': '',
+ 'arg_query': query,
+ 'arg_extended': '1',
+ 'arg_search_params': json.dumps({
+ 'music': {
+ 'limit': limit,
+ 'offset': offset,
+ },
+ }),
+ 'arg_limit': limit,
+ 'arg_offset': offset,
+ })
+ return next(e for e in search if isinstance(e, dict))
+
+ @staticmethod
+ def _extract_track(t, fatal=True):
+ audio_url = t['URL'] if fatal else t.get('URL')
+ if not audio_url:
+ return
+
+ audio_id = t['File'] if fatal else t.get('File')
+ if not audio_id:
+ return
+
+ thumbnail = t.get('AlbumCoverURL') or t.get('FiledAlbumCover')
+ uploader = t.get('OwnerName') or t.get('OwnerName_Text_HTML')
+ uploader_id = t.get('UploaderID')
+ duration = int_or_none(t.get('DurationInSeconds')) or parse_duration(
+ t.get('Duration') or t.get('DurationStr'))
+ view_count = int_or_none(t.get('PlayCount') or t.get('PlayCount_hr'))
+
+ track = t.get('Name') or t.get('Name_Text_HTML')
+ artist = t.get('Author') or t.get('Author_Text_HTML')
+
+ if track:
+ title = '%s - %s' % (artist, track) if artist else track
+ else:
+ title = audio_id
+
+ return {
+ 'extractor_key': MailRuMusicIE.ie_key(),
+ 'id': audio_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'vcodec': 'none',
+ 'abr': int_or_none(t.get('BitRate')),
+ 'track': track,
+ 'artist': artist,
+ 'album': t.get('Album'),
+ 'url': audio_url,
+ }
+
+
+class MailRuMusicIE(MailRuMusicSearchBaseIE):
+ IE_NAME = 'mailru:music'
+ IE_DESC = 'Музыка@Mail.Ru'
+ _VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P<id>[\da-f]+)'
+ _TESTS = [{
+ 'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893',
+ 'md5': '0f8c22ef8c5d665b13ac709e63025610',
+ 'info_dict': {
+ 'id': '4e31f7125d0dfaef505d947642366893',
+ 'ext': 'mp3',
+ 'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ',
+ 'uploader': 'Игорь Мудрый',
+ 'uploader_id': '1459196328',
+ 'duration': 280,
+ 'view_count': int,
+ 'vcodec': 'none',
+ 'abr': 320,
+ 'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017',
+ 'artist': 'М8Л8ТХ',
+ },
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, audio_id)
+
+ title = self._og_search_title(webpage)
+ music_data = self._search(title, url, audio_id)['MusicData']
+ t = next(t for t in music_data if t.get('File') == audio_id)
+
+ info = self._extract_track(t)
+ info['title'] = title
+ return info
+
+
+class MailRuMusicSearchIE(MailRuMusicSearchBaseIE):
+ IE_NAME = 'mailru:music:search'
+ IE_DESC = 'Музыка@Mail.Ru'
+ _VALID_URL = r'https?://my\.mail\.ru/music/search/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://my.mail.ru/music/search/black%20shadow',
+ 'info_dict': {
+ 'id': 'black shadow',
+ },
+ 'playlist_mincount': 532,
+ }]
+
+ def _real_extract(self, url):
+ query = compat_urllib_parse_unquote(self._match_id(url))
+
+ entries = []
+
+ LIMIT = 100
+ offset = 0
+
+ for _ in itertools.count(1):
+ search = self._search(query, url, query, LIMIT, offset)
+
+ music_data = search.get('MusicData')
+ if not music_data or not isinstance(music_data, list):
+ break
+
+ for t in music_data:
+ track = self._extract_track(t, fatal=False)
+ if track:
+ entries.append(track)
+
+ total = try_get(
+ search, lambda x: x['Results']['music']['Total'], int)
+
+ if total is not None:
+ if offset > total:
+ break
+
+ offset += LIMIT
+
+ return self.playlist_result(entries, query)
import re
+from .common import InfoExtractor
from .vimple import SprutoBaseIE
class MyviIE(SprutoBaseIE):
_VALID_URL = r'''(?x)
- https?://
- myvi\.(?:ru/player|tv)/
- (?:
+ (?:
+ https?://
+ (?:www\.)?
+ myvi\.
(?:
- embed/html|
- flash|
- api/Video/Get
- )/|
- content/preloader\.swf\?.*\bid=
- )
- (?P<id>[\da-zA-Z_-]+)
+ (?:ru/player|tv)/
+ (?:
+ (?:
+ embed/html|
+ flash|
+ api/Video/Get
+ )/|
+ content/preloader\.swf\?.*\bid=
+ )|
+ ru/watch/
+ )|
+ myvi:
+ )
+ (?P<id>[\da-zA-Z_-]+)
'''
_TESTS = [{
'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
}, {
'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
'only_matching': True,
+ }, {
+ 'url': 'https://www.myvi.ru/watch/YwbqszQynUaHPn_s82sx0Q2',
+ 'only_matching': True,
+ }, {
+ 'url': 'myvi:YwbqszQynUaHPn_s82sx0Q2',
+ 'only_matching': True,
}]
@classmethod
'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
return self._extract_spruto(spruto, video_id)
+
+
+class MyviEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?myvi\.tv/(?:[^?]+\?.*?\bv=|embed/)(?P<id>[\da-z]+)'
+ _TESTS = [{
+ 'url': 'https://www.myvi.tv/embed/ccdqic3wgkqwpb36x9sxg43t4r',
+ 'info_dict': {
+ 'id': 'b3ea0663-3234-469d-873e-7fecf36b31d1',
+ 'ext': 'mp4',
+ 'title': 'Твоя (original song).mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 277,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.myvi.tv/idmi6o?v=ccdqic3wgkqwpb36x9sxg43t4r#watch',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if MyviIE.suitable(url) else super(MyviEmbedIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://www.myvi.tv/embed/%s' % video_id, video_id)
+
+ myvi_id = self._search_regex(
+ r'CreatePlayer\s*\(\s*["\'].*?\bv=([\da-zA-Z_]+)',
+ webpage, 'video id')
+
+ return self.url_result('myvi:%s' % myvi_id, ie=MyviIE.ie_key())
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
IE_NAME = 'natgeo'
- _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
+ _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)'
_TESTS = [
{
{
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
'only_matching': True,
+ },
+ {
+ 'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
+ 'only_matching': True,
}
]
from __future__ import unicode_literals
import re
+import base64
from .common import InfoExtractor
from .theplatform import ThePlatformIE
class NBCOlympicsIE(InfoExtractor):
+ IE_NAME = 'nbcolympics'
_VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
_TEST = {
'ie_key': ThePlatformIE.ie_key(),
'display_id': display_id,
}
+
+
+class NBCOlympicsStreamIE(AdobePassIE):
+ IE_NAME = 'nbcolympics:stream'
+ _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
+ _TEST = {
+ 'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
+ 'info_dict': {
+ 'id': '203493',
+ 'ext': 'mp4',
+ 'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+ _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
+ resource = self._search_regex(
+ r"resource\s*=\s*'(.+)';", webpage,
+ 'resource').replace("' + pid + '", pid)
+ event_config = self._download_json(
+ self._DATA_URL_TEMPLATE % ('event_config', pid),
+ pid)['eventConfig']
+ title = self._live_title(event_config['eventTitle'])
+ source_url = self._download_json(
+ self._DATA_URL_TEMPLATE % ('live_sources', pid),
+ pid)['videoSources'][0]['sourceUrl']
+ media_token = self._extract_mvpd_auth(
+ url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
+ formats = self._extract_m3u8_formats(self._download_webpage(
+ 'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
+ 'cdn': 'akamai',
+ 'mediaToken': base64.b64encode(media_token.encode()),
+ 'resource': base64.b64encode(resource.encode()),
+ 'url': source_url,
+ }), pid, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': pid,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'is_live': True,
+ }
self._check_formats(formats, media_id)
self._sort_formats(formats)
- uploader = self._search_regex(
- r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
+ uploader = self._html_search_regex(
+ (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
+ r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
fatal=False)
- timestamp = unified_timestamp(self._search_regex(
- r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
+ timestamp = unified_timestamp(self._html_search_regex(
+ (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
+ r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp',
default=None))
duration = parse_duration(self._search_regex(
- r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
- default=None))
+ r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage,
+ 'duration', default=None))
filesize_approx = parse_filesize(self._html_search_regex(
- r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
+ r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize',
default=None))
if len(formats) == 1:
formats[0]['filesize_approx'] = filesize_approx
_VALID_URL = r'''(?x)
(?:
https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
- nexx:(?P<domain_id_s>\d+):
+ nexx:(?:(?P<domain_id_s>\d+):)?|
+ https?://arc\.nexx\.cloud/api/video/
)
(?P<id>\d+)
'''
'params': {
'skip_download': True,
},
+ }, {
+ # does not work via arc
+ 'url': 'nexx:741:1269984',
+ 'md5': 'c714b5b238b2958dc8d5642addba6886',
+ 'info_dict': {
+ 'id': '1269984',
+ 'ext': 'mp4',
+ 'title': '1 TAG ohne KLO... wortwörtlich! 😑',
+ 'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
+ 'description': 'md5:4604539793c49eda9443ab5c5b1d612f',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 607,
+ 'timestamp': 1518614955,
+ 'upload_date': '20180214',
+ },
}, {
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
'only_matching': True,
}, {
'url': 'nexx:748:128907',
'only_matching': True,
+ }, {
+ 'url': 'nexx:128907',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://arc.nexx.cloud/api/video/128907.json',
+ 'only_matching': True,
}]
@staticmethod
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
video_id = mobj.group('id')
- # Reverse engineered from JS code (see getDeviceID function)
- device_id = '%d:%d:%d%d' % (
- random.randint(1, 4), int(time.time()),
- random.randint(1e4, 99999), random.randint(1, 9))
-
- result = self._call_api(domain_id, 'session/init', video_id, data={
- 'nxp_devh': device_id,
- 'nxp_userh': '',
- 'precid': '0',
- 'playlicense': '0',
- 'screenx': '1920',
- 'screeny': '1080',
- 'playerversion': '6.0.00',
- 'gateway': 'html5',
- 'adGateway': '',
- 'explicitlanguage': 'en-US',
- 'addTextTemplates': '1',
- 'addDomainData': '1',
- 'addAdModel': '1',
- }, headers={
- 'X-Request-Enable-Auth-Fallback': '1',
- })
-
- cid = result['general']['cid']
-
- # As described in [1] X-Request-Token generation algorithm is
- # as follows:
- # md5( operation + domain_id + domain_secret )
- # where domain_secret is a static value that will be given by nexx.tv
- # as per [1]. Here is how this "secret" is generated (reversed
- # from _play.api.init function, search for clienttoken). So it's
- # actually not static and not that much of a secret.
- # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
- secret = result['device']['clienttoken'][int(device_id[0]):]
- secret = secret[0:len(secret) - int(device_id[-1])]
-
- op = 'byid'
-
- # Reversed from JS code for _play.api.call function (search for
- # X-Request-Token)
- request_token = hashlib.md5(
- ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
-
- video = self._call_api(
- domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
- 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
- 'addInteractionOptions': '1',
- 'addStatusDetails': '1',
- 'addStreamDetails': '1',
- 'addCaptions': '1',
- 'addScenes': '1',
- 'addHotSpots': '1',
- 'addBumpers': '1',
- 'captionFormat': 'data',
+ video = None
+
+ response = self._download_json(
+ 'https://arc.nexx.cloud/api/video/%s.json' % video_id,
+ video_id, fatal=False)
+ if response and isinstance(response, dict):
+ result = response.get('result')
+ if result and isinstance(result, dict):
+ video = result
+
+ # not all videos work via arc, e.g. nexx:741:1269984
+ if not video:
+ # Reverse engineered from JS code (see getDeviceID function)
+ device_id = '%d:%d:%d%d' % (
+ random.randint(1, 4), int(time.time()),
+ random.randint(1e4, 99999), random.randint(1, 9))
+
+ result = self._call_api(domain_id, 'session/init', video_id, data={
+ 'nxp_devh': device_id,
+ 'nxp_userh': '',
+ 'precid': '0',
+ 'playlicense': '0',
+ 'screenx': '1920',
+ 'screeny': '1080',
+ 'playerversion': '6.0.00',
+ 'gateway': 'html5',
+ 'adGateway': '',
+ 'explicitlanguage': 'en-US',
+ 'addTextTemplates': '1',
+ 'addDomainData': '1',
+ 'addAdModel': '1',
}, headers={
- 'X-Request-CID': cid,
- 'X-Request-Token': request_token,
+ 'X-Request-Enable-Auth-Fallback': '1',
})
+ cid = result['general']['cid']
+
+ # As described in [1] X-Request-Token generation algorithm is
+ # as follows:
+ # md5( operation + domain_id + domain_secret )
+ # where domain_secret is a static value that will be given by nexx.tv
+ # as per [1]. Here is how this "secret" is generated (reversed
+ # from _play.api.init function, search for clienttoken). So it's
+ # actually not static and not that much of a secret.
+ # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
+ secret = result['device']['clienttoken'][int(device_id[0]):]
+ secret = secret[0:len(secret) - int(device_id[-1])]
+
+ op = 'byid'
+
+ # Reversed from JS code for _play.api.call function (search for
+ # X-Request-Token)
+ request_token = hashlib.md5(
+ ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
+
+ video = self._call_api(
+ domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
+ 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
+ 'addInteractionOptions': '1',
+ 'addStatusDetails': '1',
+ 'addStreamDetails': '1',
+ 'addCaptions': '1',
+ 'addScenes': '1',
+ 'addHotSpots': '1',
+ 'addBumpers': '1',
+ 'captionFormat': 'data',
+ }, headers={
+ 'X-Request-CID': cid,
+ 'X-Request-Token': request_token,
+ })
+
general = video['general']
title = general['title']
class NickRuIE(MTVServicesInfoExtractor):
IE_NAME = 'nickelodeonru'
- _VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu|com\.tr)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.com.tr/programlar/sunger-bob/videolar/kayip-yatak/mgqbjy',
+ 'only_matching': True,
}]
def _real_extract(self, url):
_TESTS = [{
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
'info_dict': {
- 'id': 'Kk2X5',
+ 'id': 'kXzwOKyGlSA',
'ext': 'mp4',
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
webpage, urlh = self._download_webpage_handle(
'https://njpwworld.com/auth/login', None,
note='Logging in', errnote='Unable to login',
- data=urlencode_postdata({'login_id': username, 'pw': password}))
+ data=urlencode_postdata({'login_id': username, 'pw': password}),
+ headers={'Referer': 'https://njpwworld.com/auth'})
# /auth/login will return 302 for successful logins
if urlh.geturl() == 'https://njpwworld.com/auth/login':
self.report_warning('unable to login')
determine_ext,
ExtractorError,
fix_xml_ampersands,
+ int_or_none,
orderedSet,
parse_duration,
qualities,
npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
ntr\.nl/(?:[^/]+/){2,}|
omroepwnl\.nl/video/fragment/[^/]+__|
- (?:zapp|npo3)\.nl/(?:[^/]+/){2}
+ (?:zapp|npo3)\.nl/(?:[^/]+/){2,}
)
)
(?P<id>[^/?#]+)
}, {
'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
'only_matching': True,
+ }, {
+ 'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
+ 'only_matching': True,
}]
def _real_extract(self, url):
transform_source=strip_jsonp,
)
+ error = metadata.get('error')
+ if error:
+ raise ExtractorError(error, expected=True)
+
# For some videos actual video id (prid) is different (e.g. for
# http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698
# video id is POMS_WNL_853698 but prid is POW_00996502)
formats = []
urls = set()
- quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
+ def is_legal_url(format_url):
+ return format_url and format_url not in urls and re.match(
+ r'^(?:https?:)?//', format_url)
+
+ QUALITY_LABELS = ('Laag', 'Normaal', 'Hoog')
+ QUALITY_FORMATS = ('adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std')
+
+ quality_from_label = qualities(QUALITY_LABELS)
+ quality_from_format_id = qualities(QUALITY_FORMATS)
items = self._download_json(
'http://ida.omroep.nl/app.php/%s' % video_id, video_id,
'Downloading formats JSON', query={
})['items'][0]
for num, item in enumerate(items):
item_url = item.get('url')
- if not item_url or item_url in urls:
+ if not is_legal_url(item_url):
continue
urls.add(item_url)
format_id = self._search_regex(
r'video/ida/([^/]+)', item_url, 'format id',
default=None)
+ item_label = item.get('label')
+
def add_format_url(format_url):
+ width = int_or_none(self._search_regex(
+ r'(\d+)[xX]\d+', format_url, 'width', default=None))
+ height = int_or_none(self._search_regex(
+ r'\d+[xX](\d+)', format_url, 'height', default=None))
+ if item_label in QUALITY_LABELS:
+ quality = quality_from_label(item_label)
+ f_id = item_label
+ elif item_label in QUALITY_FORMATS:
+ quality = quality_from_format_id(format_id)
+ f_id = format_id
+ else:
+ quality, f_id = [None] * 2
formats.append({
'url': format_url,
- 'format_id': format_id,
- 'quality': quality(format_id),
+ 'format_id': f_id,
+ 'width': width,
+ 'height': height,
+ 'quality': quality,
})
# Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
stream_info = self._download_json(
item_url + '&type=json', video_id,
'Downloading %s stream JSON'
- % item.get('label') or item.get('format') or format_id or num)
+ % item_label or item.get('format') or format_id or num)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
error = (self._parse_json(
if not is_live:
for num, stream in enumerate(metadata.get('streams', [])):
stream_url = stream.get('url')
- if not stream_url or stream_url in urls:
+ if not is_legal_url(stream_url):
continue
urls.add(stream_url)
# smooth streaming is not supported
def _real_extract(self, url):
token = self._match_id(url)
- broadcast_data = self._call_api(
- 'getBroadcastPublic', {'broadcast_id': token}, token)
- broadcast = broadcast_data['broadcast']
- status = broadcast['status']
+ stream = self._call_api(
+ 'accessVideoPublic', {'broadcast_id': token}, token)
- user = broadcast_data.get('user', {})
+ broadcast = stream['broadcast']
+ title = broadcast['status']
- uploader = broadcast.get('user_display_name') or user.get('display_name')
- uploader_id = (broadcast.get('username') or user.get('username') or
- broadcast.get('user_id') or user.get('id'))
+ uploader = broadcast.get('user_display_name') or broadcast.get('username')
+ uploader_id = (broadcast.get('user_id') or broadcast.get('username'))
- title = '%s - %s' % (uploader, status) if uploader else status
+ title = '%s - %s' % (uploader, title) if uploader else title
state = broadcast.get('state').lower()
if state == 'running':
title = self._live_title(title)
'url': broadcast[image],
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
- stream = self._call_api(
- 'getAccessPublic', {'broadcast_id': token}, token)
-
video_urls = set()
formats = []
for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
import re
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
+ determine_ext,
ExtractorError,
int_or_none,
xpath_text,
(?P<id>\d+)
'''
_TESTS = [{
- # http://muz-tv.ru/kinozal/view/7400/
- 'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
- 'md5': '61f37b575dd27f1bb2e1854777fe31f4',
+ 'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0',
+ 'md5': '53362fac3a27352da20fa2803cc5cd6f',
'info_dict': {
- 'id': '100183293',
+ 'id': '3777899',
'ext': 'mp4',
- 'title': 'ТайнÑ\8b пеÑ\80евала Ð\94Ñ\8fÑ\82лова â\80¢ 1 Ñ\81еÑ\80иÑ\8f 2 Ñ\87аÑ\81Ñ\82Ñ\8c',
- 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+ 'title': 'СТУÐ\94Ð\98Я СÐ\9eЮÐ\97 â\80¢ ШоÑ\83 СÑ\82Ñ\83диÑ\8f СоÑ\8eз, 24 вÑ\8bпÑ\83Ñ\81к (01.02.2018) Ð\9dÑ\83Ñ\80лан СабÑ\83Ñ\80ов и Слава Ð\9aомиÑ\81Ñ\81аÑ\80енко',
+ 'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95',
'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 694,
- 'age_limit': 0,
+ 'duration': 3190,
},
}, {
'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
def _real_extract(self, url):
video_id = self._match_id(url)
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ pl = qs.get('pl', ['1'])[0]
+
video = self._download_xml(
- 'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
- video_id)
+ 'http://out.pladform.ru/getVideo', video_id, query={
+ 'pl': pl,
+ 'videoid': video_id,
+ })
- if video.tag == 'error':
+ def fail(text):
raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, video.text),
+ '%s returned error: %s' % (self.IE_NAME, text),
expected=True)
+ if video.tag == 'error':
+ fail(video.text)
+
quality = qualities(('ld', 'sd', 'hd'))
- formats = [{
- 'url': src.text,
- 'format_id': src.get('quality'),
- 'quality': quality(src.get('quality')),
- } for src in video.findall('./src')]
+ formats = []
+ for src in video.findall('./src'):
+ if src is None:
+ continue
+ format_url = src.text
+ if not format_url:
+ continue
+ if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src.text,
+ 'format_id': src.get('quality'),
+ 'quality': quality(src.get('quality')),
+ })
+
+ if not formats:
+ error = xpath_text(video, './cap', 'error', default=None)
+ if error:
+ fail(error)
+
self._sort_formats(formats)
webpage = self._download_webpage(
class PokemonIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P<display_id>[^/?#]+))'
+ _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
_TESTS = [{
- 'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true',
- 'md5': '9fb209ae3a569aac25de0f5afc4ee08f',
+ 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
+ 'md5': '2fe8eaec69768b25ef898cda9c43062e',
'info_dict': {
- 'id': 'd0436c00c3ce4071ac6cee8130ac54a1',
+ 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
'ext': 'mp4',
- 'title': 'From A to Z!',
- 'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!',
- 'timestamp': 1460478136,
- 'upload_date': '20160412',
+ 'title': 'The Ol’ Raise and Switch!',
+ 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
+ 'timestamp': 1511824728,
+ 'upload_date': '20171127',
+ },
+ 'add_id': ['LimelightMedia'],
+ }, {
+ # no data-video-title
+ 'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008',
+ 'info_dict': {
+ 'id': '99f3bae270bf4e5097274817239ce9c8',
+ 'ext': 'mp4',
+ 'title': 'Pokémon: The Rise of Darkrai',
+ 'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d',
+ 'timestamp': 1417778347,
+ 'upload_date': '20141205',
+ },
+ 'add_id': ['LimelightMedia'],
+ 'params': {
+ 'skip_download': True,
},
- 'add_id': ['LimelightMedia']
}, {
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
'only_matching': True,
r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
webpage, 'video data element'))
video_id = video_data['data-video-id']
- title = video_data['data-video-title']
+ title = video_data.get('data-video-title') or self._html_search_meta(
+ 'pkm-title', webpage, ' title', default=None) or self._search_regex(
+ r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
return {
'_type': 'url_transparent',
'id': video_id,
def _real_extract(self, url):
video_id = self._match_id(url)
+ self._set_cookie('pornhub.com', 'age_verified', '1')
+
def dl_webpage(platform):
+ self._set_cookie('pornhub.com', 'platform', platform)
return self._download_webpage(
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
- video_id, headers={
- 'Cookie': 'age_verified=1; platform=%s' % platform,
- })
+ video_id)
webpage = dl_webpage('pc')
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P<id>[^/]+)/videos'
+ _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
_TESTS = [{
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
'info_dict': {
}, {
'url': 'http://www.pornhub.com/users/rushandlia/videos',
'only_matching': True,
+ }, {
+ # default sorting as Top Rated Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos',
+ 'info_dict': {
+ 'id': 'povd',
+ },
+ 'playlist_mincount': 293,
+ }, {
+ # Top Rated Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
+ 'only_matching': True,
+ }, {
+ # Most Recent Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
+ 'only_matching': True,
+ }, {
+ # Most Viewed Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
+ 'only_matching': True,
}]
def _real_extract(self, url):
https?://
(?:www\.)?
(?:
+ (?:beta\.)?
(?:
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
)\.(?:de|at|ch)|
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from ..utils import (
+ extract_attributes,
+ ExtractorError,
+ smuggle_url,
+ unsmuggle_url,
+ urljoin,
+)
+
+
+class RayWenderlichIE(InfoExtractor):
+ _VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
+ 'info_dict': {
+ 'id': '248377018',
+ 'ext': 'mp4',
+ 'title': 'Testing In iOS Episode 1: Introduction',
+ 'duration': 133,
+ 'uploader': 'Ray Wenderlich',
+ 'uploader_id': 'user3304672',
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ },
+ 'add_ie': [VimeoIE.ie_key()],
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ }, {
+ 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
+ 'info_dict': {
+ 'title': 'Testing in iOS',
+ 'id': '105-testing-in-ios',
+ },
+ 'params': {
+ 'noplaylist': False,
+ },
+ 'playlist_count': 29,
+ }]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ mobj = re.match(self._VALID_URL, url)
+ course_id, lesson_id = mobj.group('course_id', 'id')
+ video_id = '%s/%s' % (course_id, lesson_id)
+
+ webpage = self._download_webpage(url, video_id)
+
+ no_playlist = self._downloader.params.get('noplaylist')
+ if no_playlist or smuggled_data.get('force_video', False):
+ if no_playlist:
+ self.to_screen(
+ 'Downloading just video %s because of --no-playlist'
+ % video_id)
+ if '>Subscribe to unlock' in webpage:
+ raise ExtractorError(
+ 'This content is only available for subscribers',
+ expected=True)
+ vimeo_id = self._search_regex(
+ r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
+ return self.url_result(
+ VimeoIE._smuggle_referrer(
+ 'https://player.vimeo.com/video/%s' % vimeo_id, url),
+ ie=VimeoIE.ie_key(), video_id=vimeo_id)
+
+ self.to_screen(
+ 'Downloading playlist %s - add --no-playlist to just download video'
+ % course_id)
+
+ lesson_ids = set((lesson_id, ))
+ for lesson in re.findall(
+ r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
+ attrs = extract_attributes(lesson)
+ if not attrs:
+ continue
+ lesson_url = attrs.get('href')
+ if not lesson_url:
+ continue
+ lesson_id = self._search_regex(
+ r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
+ if not lesson_id:
+ continue
+ lesson_ids.add(lesson_id)
+
+ entries = []
+ for lesson_id in sorted(lesson_ids):
+ entries.append(self.url_result(
+ smuggle_url(urljoin(url, lesson_id), {'force_video': True}),
+ ie=RayWenderlichIE.ie_key()))
+
+ title = self._search_regex(
+ r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title',
+ default=None)
+
+ return self.playlist_result(entries, course_id, title)
from ..compat import compat_HTTPError
from ..utils import (
float_or_none,
- int_or_none,
- try_get,
- # unified_timestamp,
ExtractorError,
)
class RedBullTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film|live)/(?:AP-\w+/segment/)?(?P<id>AP-\w+)'
+ _VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?P<id>AP-\w+)'
_TESTS = [{
# film
- 'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
+ 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
'md5': 'fb0445b98aa4394e504b413d98031d1f',
'info_dict': {
- 'id': 'AP-1Q756YYX51W11',
+ 'id': 'AP-1Q6XCDTAN1W11',
'ext': 'mp4',
- 'title': 'ABC of...WRC',
+ 'title': 'ABC of... WRC - ABC of... S1E6',
'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31',
'duration': 1582.04,
- # 'timestamp': 1488405786,
- # 'upload_date': '20170301',
},
}, {
# episode
- 'url': 'https://www.redbull.tv/video/AP-1PMT5JCWH1W11/grime?playlist=shows:shows-playall:web',
+ 'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11',
'info_dict': {
- 'id': 'AP-1PMT5JCWH1W11',
+ 'id': 'AP-1PMHKJFCW1W11',
'ext': 'mp4',
- 'title': 'Grime - Hashtags S2 E4',
- 'description': 'md5:334b741c8c1ce65be057eab6773c1cf5',
+ 'title': 'Grime - Hashtags S2E4',
+ 'description': 'md5:b5f522b89b72e1e23216e5018810bb25',
'duration': 904.6,
- # 'timestamp': 1487290093,
- # 'upload_date': '20170217',
- 'series': 'Hashtags',
- 'season_number': 2,
- 'episode_number': 4,
},
'params': {
'skip_download': True,
},
- }, {
- # segment
- 'url': 'https://www.redbull.tv/live/AP-1R5DX49XS1W11/segment/AP-1QSAQJ6V52111/semi-finals',
- 'info_dict': {
- 'id': 'AP-1QSAQJ6V52111',
- 'ext': 'mp4',
- 'title': 'Semi Finals - Vans Park Series Pro Tour',
- 'description': 'md5:306a2783cdafa9e65e39aa62f514fd97',
- 'duration': 11791.991,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
- 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
session = self._download_json(
- 'https://api-v2.redbull.tv/session', video_id,
+ 'https://api.redbull.tv/v3/session', video_id,
note='Downloading access token', query={
- 'build': '4.370.0',
'category': 'personal_computer',
- 'os_version': '1.0',
'os_family': 'http',
})
if session.get('code') == 'error':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, session['message']))
- auth = '%s %s' % (session.get('token_type', 'Bearer'), session['access_token'])
+ token = session['token']
try:
- info = self._download_json(
- 'https://api-v2.redbull.tv/content/%s' % video_id,
+ video = self._download_json(
+ 'https://api.redbull.tv/v3/products/' + video_id,
video_id, note='Downloading video information',
- headers={'Authorization': auth}
+ headers={'Authorization': token}
)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
error_message = self._parse_json(
- e.cause.read().decode(), video_id)['message']
+ e.cause.read().decode(), video_id)['error']
raise ExtractorError('%s said: %s' % (
self.IE_NAME, error_message), expected=True)
raise
- video = info['video_product']
-
- title = info['title'].strip()
+ title = video['title'].strip()
formats = self._extract_m3u8_formats(
- video['url'], video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
+ 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token),
+ video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
subtitles = {}
- for _, captions in (try_get(
- video, lambda x: x['attachments']['captions'],
- dict) or {}).items():
- if not captions or not isinstance(captions, list):
- continue
- for caption in captions:
- caption_url = caption.get('url')
- if not caption_url:
- continue
- ext = caption.get('format')
- if ext == 'xml':
- ext = 'ttml'
- subtitles.setdefault(caption.get('lang') or 'en', []).append({
- 'url': caption_url,
- 'ext': ext,
- })
+ for resource in video.get('resources', []):
+ if resource.startswith('closed_caption_'):
+ splitted_resource = resource.split('_')
+ if splitted_resource[2]:
+ subtitles.setdefault('en', []).append({
+ 'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource),
+ 'ext': splitted_resource[2],
+ })
- subheading = info.get('subheading')
+ subheading = video.get('subheading')
if subheading:
title += ' - %s' % subheading
return {
'id': video_id,
'title': title,
- 'description': info.get('long_description') or info.get(
+ 'description': video.get('long_description') or video.get(
'short_description'),
'duration': float_or_none(video.get('duration'), scale=1000),
- # 'timestamp': unified_timestamp(info.get('published')),
- 'series': info.get('show_title'),
- 'season_number': int_or_none(info.get('season_number')),
- 'episode_number': int_or_none(info.get('episode_number')),
'formats': formats,
'subtitles': subtitles,
}
_TEST = {
# from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
'url': 'https://v.redd.it/zv89llsvexdz',
- 'md5': '655d06ace653ea3b87bccfb1b27ec99d',
+ 'md5': '0a070c53eba7ec4534d95a5a1259e253',
'info_dict': {
'id': 'zv89llsvexdz',
'ext': 'mp4',
_VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.redtube.com/66418',
- 'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
+ 'md5': 'fc08071233725f26b8f014dba9590005',
'info_dict': {
'id': '66418',
'ext': 'mp4',
'title': 'Sucked on a toilet',
- 'upload_date': '20120831',
+ 'upload_date': '20110811',
'duration': 596,
'view_count': int,
'age_limit': 18,
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
title = self._html_search_regex(
- (r'<h1 class="videoTitle[^"]*">(?P<title>.+?)</h1>',
- r'videoTitle\s*:\s*(["\'])(?P<title>)\1'),
- webpage, 'title', group='title')
+ (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
+ r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
+ webpage, 'title', group='title',
+ default=None) or self._og_search_title(webpage)
formats = []
sources = self._parse_json(
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex(
- r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
+ r'<span[^>]+>ADDED ([^<]+)<',
webpage, 'upload date', fatal=False))
- duration = int_or_none(self._search_regex(
- r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
+ duration = int_or_none(self._og_search_property(
+ 'video:duration', webpage, default=None) or self._search_regex(
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
view_count = str_to_int(self._search_regex(
- r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
+ (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
+ r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'),
webpage, 'view count', fatal=False))
# No self-labeling, but they describe themselves as
meta = info.get('meta', {})
- # m3u8 streams are encrypted and may not be handled properly by older ffmpeg/avconv.
- # To workaround this previously adaptive -> flash trick was used to obtain
- # unencrypted m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
- # and bypass georestrictions as well.
- # Currently, unencrypted m3u8 playlists are (intentionally?) invalid and therefore
- # unusable albeit can be fixed by simple string replacement (see
- # https://github.com/rg3/youtube-dl/pull/6337)
- # Since recent ffmpeg and avconv handle encrypted streams just fine encrypted
- # streams are used now.
videopath = material['videopath']
m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
formats = self._extract_m3u8_formats(
m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
-
- video_urlpart = videopath.split('/adaptive/')[1][:-5]
- PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
-
- PG_FORMATS = (
- ('a2t', 512, 288),
- ('a3t', 704, 400),
- ('nettv', 1280, 720),
- )
-
- def pg_format(format_id, width, height):
- return {
- 'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
- 'format_id': 'pg-%s' % format_id,
- 'protocol': 'http',
- 'width': width,
- 'height': height,
- }
-
- if not formats:
- formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
- else:
- pg_formats = []
- for format_id, width, height in PG_FORMATS:
- try:
- # Find hls format with the same width and height corresponding
- # to progressive format and copy metadata from it.
- f = next(f for f in formats if f.get('height') == height)
- # hls formats may have invalid width
- f['width'] = width
- f_copy = f.copy()
- f_copy.update(pg_format(format_id, width, height))
- pg_formats.append(f_copy)
- except StopIteration:
- # Missing hls format does mean that no progressive format with
- # such width and height exists either.
- pass
- formats.extend(pg_formats)
self._sort_formats(formats)
thumbnails = []
'age_limit': 0,
},
},
+ # Episode where <SourceFile> is "NOT-USED", but has other
+ # downloadable sources available.
+ {
+ 'url': 'http://www.ruutu.fi/video/3193728',
+ 'only_matching': True,
+ },
]
def _real_extract(self, url):
video_url = child.text
if (not video_url or video_url in processed_urls or
any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
- return
+ continue
processed_urls.append(video_url)
ext = determine_ext(video_url)
if ext == 'm3u8':
webpage = self._download_webpage(url, article_id)
info = self._search_json_ld(webpage, article_id, default={})
- print(info)
title = info.get('title') or self._og_search_title(webpage, fatal=False)
description = info.get('description') or self._og_search_description(webpage)
import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_parse_urlparse,
+)
from ..utils import (
determine_ext,
int_or_none,
urls = []
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
formats = []
+ subtitles = {}
for asset in clip_data['assets']:
asset_url = asset.get('full_physical_path')
protocol = asset.get('protocol')
urls.append(asset_url)
container = asset.get('video_container')
ext = determine_ext(asset_url)
+ if protocol == 'http_subtitle' or ext == 'vtt':
+ subtitles.setdefault('fr', []).append({'url': asset_url})
+ continue
if container == 'm3u8' or ext == 'm3u8':
- if protocol == 'usp':
+ if protocol == 'usp' and not compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
formats.extend(self._extract_m3u8_formats(
asset_url, video_id, 'mp4', 'm3u8_native',
'duration': int_or_none(clip_data.get('duration')),
'series': get(lambda x: x['program']['title']),
'formats': formats,
+ 'subtitles': subtitles,
}
def _real_extract(self, url):
brightcove_id = self._match_id(url)
return self.url_result(
- smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['IN']}),
+ smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {
+ 'geo_countries': ['IN'],
+ 'referrer': url,
+ }),
'BrightcoveNew', brightcove_id)
},
]
- _CLIENT_ID = 'DQskPX1pntALRzMp4HSxya3Mc0AO66Ro'
- _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
+ _CLIENT_ID = 'LvWovRaJZlWCHql0bISuum8Bd2KX79mb'
@staticmethod
def _extract_urls(webpage):
import re
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ parse_duration,
+ parse_resolution,
+ str_to_int,
+)
class SpankBangIE(InfoExtractor):
'id': '3vvn',
'ext': 'mp4',
'title': 'fantasy solo',
- 'description': 'Watch fantasy solo free HD porn video - 05 minutes - Babe,Masturbation,Solo,Toy - dillion harper masturbates on a bed free adult movies sexy clips.',
+ 'description': 'dillion harper masturbates on a bed',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'silly2587',
'age_limit': 18,
# mobile page
'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
'only_matching': True,
+ }, {
+ # 4k
+ 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(url, video_id, headers={
+ 'Cookie': 'country=US'
+ })
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
raise ExtractorError(
'Video %s is not available' % video_id, expected=True)
- stream_key = self._html_search_regex(
- r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
- webpage, 'stream key')
-
- formats = [{
- 'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
- 'ext': 'mp4',
- 'format_id': '%sp' % height,
- 'height': int(height),
- } for height in re.findall(r'<(?:span|li|p)[^>]+[qb]_(\d+)p', webpage)]
- self._check_formats(formats, video_id)
+ formats = []
+ for mobj in re.finditer(
+ r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
+ webpage):
+ format_id, format_url = mobj.group('id', 'url')
+ f = parse_resolution(format_id)
+ f.update({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ formats.append(f)
self._sort_formats(formats)
title = self._html_search_regex(
r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
- description = self._og_search_description(webpage)
+ description = self._search_regex(
+ r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
+ webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._search_regex(
r'class="user"[^>]*><img[^>]+>([^<]+)',
webpage, 'uploader', default=None)
+ duration = parse_duration(self._search_regex(
+ r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
+ webpage, 'duration', fatal=False))
+ view_count = str_to_int(self._search_regex(
+ r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False))
age_limit = self._rta_search(webpage)
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
+ 'duration': duration,
+ 'view_count': view_count,
'formats': formats,
'age_limit': age_limit,
}
import re
from .common import InfoExtractor
+from ..compat import compat_chr
from ..utils import (
determine_ext,
+ ExtractorError,
int_or_none,
js_to_json,
)
'params': {
'skip_download': True,
},
+ 'skip': 'gone',
}, {
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
'only_matching': True,
}]
def _real_extract(self, url):
+ def decrypt_src(encoded, val):
+ ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
+ encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded)
+ decoded = ''
+ sm = [None] * 4
+ i = 0
+ str_len = len(encoded)
+ while i < str_len:
+ for j in range(4):
+ sm[j % 4] = ALPHABET.index(encoded[i])
+ i += 1
+ char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val
+ decoded += compat_chr(char_code)
+ if sm[2] != 0x40:
+ char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2)
+ decoded += compat_chr(char_code)
+ if sm[3] != 0x40:
+ char_code = ((sm[2] & 0x3) << 0x6) | sm[3]
+ decoded += compat_chr(char_code)
+ return decoded
+
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
formats = []
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
+ mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_)
+ if mobj is None:
+ continue
+
+ format_ = format_.replace(mobj.group(0), '')
+
video = self._parse_json(
- format_, video_id, transform_source=js_to_json, fatal=False)
- if not video:
+ format_, video_id, transform_source=js_to_json,
+ fatal=False) or {}
+
+ mobj = re.search(
+ r'([\'"])(?P<src>(?:(?!\1).)+)\1\s*,\s*(?P<val>\d+)',
+ mobj.group(1))
+ if mobj is None:
continue
- src = video.get('src')
+
+ src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
if not src:
continue
+
ext = determine_ext(src, default_ext=None)
if video.get('type') == 'application/dash+xml' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
'height': int_or_none(video.get('height')),
'tbr': int_or_none(video.get('bitrate')),
})
+
+ if not formats:
+ error = self._search_regex(
+ r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage,
+ 'error', default=None)
+ if not error and '>Sorry' in webpage:
+ error = 'Video %s is not available' % video_id
+ if error:
+ raise ExtractorError(error, expected=True)
+
self._sort_formats(formats)
return {
class TeleBruxellesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt|emission)/?(?P<id>[^/#?]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(?:[^/]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/',
'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9',
}, {
'url': 'http://bx1.be/emission/bxenf1-gastronomie/',
'only_matching': True,
+ }, {
+ 'url': 'https://bx1.be/berchem-sainte-agathe/personnel-carrefour-de-berchem-sainte-agathe-inquiet/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://bx1.be/dernier-jt/',
+ 'only_matching': True,
+ }, {
+ # live stream
+ 'url': 'https://bx1.be/lives/direct-tv/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
webpage = self._download_webpage(url, display_id)
article_id = self._html_search_regex(
- r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None)
+ r'<article[^>]+\bid=["\']post-(\d+)', webpage, 'article ID', default=None)
title = self._html_search_regex(
- r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
+ r'<h1[^>]*>(.+?)</h1>', webpage, 'title',
+ default=None) or self._og_search_title(webpage)
description = self._og_search_description(webpage, default=None)
rtmp_url = self._html_search_regex(
- r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
+ r'file["\']?\s*:\s*"(r(?:tm|mt)ps?://[^/]+/(?:vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*"\.mp4|stream/live))"',
webpage, 'RTMP url')
+ # Yes, they have a typo in scheme name for live stream URLs (e.g.
+ # https://bx1.be/lives/direct-tv/)
+ rtmp_url = re.sub(r'^rmtp', 'rtmp', rtmp_url)
rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
formats = self._extract_wowza_formats(rtmp_url, article_id or display_id)
self._sort_formats(formats)
+ is_live = 'stream/live' in rtmp_url
+
return {
'id': article_id or display_id,
'display_id': display_id,
- 'title': title,
+ 'title': self._live_title(title) if is_live else title,
'description': description,
'formats': formats,
+ 'is_live': is_live,
}
)
-class TeleQuebecIE(InfoExtractor):
+class TeleQuebecBaseIE(InfoExtractor):
+ @staticmethod
+ def _limelight_result(media_id):
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(
+ 'limelight:media:' + media_id, {'geo_countries': ['CA']}),
+ 'ie_key': 'LimelightMedia',
+ }
+
+
+class TeleQuebecIE(TeleQuebecBaseIE):
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york',
- 'md5': 'fe95a0957e5707b1b01f5013e725c90f',
+ # available till 01.01.2023
+ 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
'info_dict': {
- 'id': '20984',
+ 'id': '577116881b4b439084e6b1cf4ef8b1b3',
'ext': 'mp4',
- 'title': 'Le couronnement de New York',
- 'description': 'md5:f5b3d27a689ec6c1486132b2d687d432',
- 'upload_date': '20170201',
- 'timestamp': 1485972222,
- }
+ 'title': 'Un petit choc et puis repart!',
+ 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
+ 'upload_date': '20180222',
+ 'timestamp': 1519326631,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
# no description
'url': 'http://zonevideo.telequebec.tv/media/30261',
def _real_extract(self, url):
media_id = self._match_id(url)
+
media_data = self._download_json(
'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
media_id)['media']
- return {
- '_type': 'url_transparent',
- 'id': media_id,
- 'url': smuggle_url(
- 'limelight:media:' + media_data['streamInfo']['sourceId'],
- {'geo_countries': ['CA']}),
- 'title': media_data['title'],
+
+ info = self._limelight_result(media_data['streamInfo']['sourceId'])
+ info.update({
+ 'title': media_data.get('title'),
'description': try_get(
media_data, lambda x: x['descriptions'][0]['text'], compat_str),
'duration': int_or_none(
media_data.get('durationInMilliseconds'), 1000),
- 'ie_key': 'LimelightMedia',
+ })
+ return info
+
+
+class TeleQuebecEmissionIE(TeleQuebecBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ [^/]+\.telequebec\.tv/emissions/|
+ (?:www\.)?telequebec\.tv/
+ )
+ (?P<id>[^?#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
+ 'info_dict': {
+ 'id': '66648a6aef914fe3badda25e81a4d50a',
+ 'ext': 'mp4',
+ 'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?",
+ 'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014',
+ 'upload_date': '20171024',
+ 'timestamp': 1508862118,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.telequebec.tv/masha-et-michka/epi059masha-et-michka-3-053-078',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.telequebec.tv/documentaire/bebes-sur-mesure/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ media_id = self._search_regex(
+ r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage,
+ 'limelight id')
+
+ info = self._limelight_result(media_id)
+ info.update({
+ 'title': self._og_search_title(webpage, default=None),
+ 'description': self._og_search_description(webpage, default=None),
+ })
+ return info
+
+
+class TeleQuebecLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
+ _TEST = {
+ 'url': 'http://zonevideo.telequebec.tv/endirect/',
+ 'info_dict': {
+ 'id': 'endirect',
+ 'ext': 'mp4',
+ 'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ m3u8_url = None
+ webpage = self._download_webpage(
+ 'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
+ fatal=False)
+ if webpage:
+ m3u8_url = self._search_regex(
+ r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'm3u8 url', default=None, group='url')
+ if not m3u8_url:
+ m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title('Télé-Québec - En direct'),
+ 'is_live': True,
+ 'formats': formats,
}
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+
+from ..utils import (
+ ExtractorError,
+ unified_timestamp,
+)
+
+
+class TennisTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P<id>[-a-z0-9]+)'
+ _TEST = {
+ 'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz',
+ 'info_dict': {
+ 'id': 'indian-wells-2018-verdasco-fritz',
+ 'ext': 'mp4',
+ 'title': 'Fernando Verdasco v Taylor Fritz',
+ 'description': 're:^After his stunning victory.{174}$',
+ 'thumbnail': 'https://atp-prod.akamaized.net/api/images/v1/images/112831/landscape/1242/0',
+ 'timestamp': 1521017381,
+ 'upload_date': '20180314',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires email and password of a subscribed account',
+ }
+ _NETRC_MACHINE = 'tennistv'
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if not username or not password:
+ raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+
+ login_form = {
+ 'Email': username,
+ 'Password': password,
+ }
+ login_json = json.dumps(login_form).encode('utf-8')
+ headers = {
+ 'content-type': 'application/json',
+ 'Referer': 'https://www.tennistv.com/login',
+ 'Origin': 'https://www.tennistv.com',
+ }
+
+ login_result = self._download_json(
+ 'https://www.tennistv.com/api/users/v1/login', None,
+ note='Logging in',
+ errnote='Login failed (wrong password?)',
+ headers=headers,
+ data=login_json)
+
+ if login_result['error']['errorCode']:
+ raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, login_result['error']['errorMessage']))
+
+ if login_result['entitlement'] != 'SUBSCRIBED':
+ self.report_warning('%s may not be subscribed to %s.' % (username, self.IE_NAME))
+
+ self._session_token = login_result['sessionToken']
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ internal_id = self._search_regex(r'video=([0-9]+)', webpage, 'internal video id')
+
+ headers = {
+ 'Origin': 'https://www.tennistv.com',
+ 'authorization': 'ATP %s' % self._session_token,
+ 'content-type': 'application/json',
+ 'Referer': url,
+ }
+ check_data = {
+ 'videoID': internal_id,
+ 'VideoUrlType': 'HLSV3',
+ }
+ check_json = json.dumps(check_data).encode('utf-8')
+ check_result = self._download_json(
+ 'https://www.tennistv.com/api/users/v1/entitlementchecknondiva',
+ video_id, note='Checking video authorization', headers=headers, data=check_json)
+ formats = self._extract_m3u8_formats(check_result['contentUrl'], video_id, ext='mp4')
+
+ vdata_url = 'https://www.tennistv.com/api/channels/v1/de/none/video/%s' % video_id
+ vdata = self._download_json(vdata_url, video_id)
+
+ timestamp = unified_timestamp(vdata['timestamp'])
+ thumbnail = vdata['video']['thumbnailUrl']
+ description = vdata['displayText']['description']
+ title = vdata['video']['title']
+
+ series = vdata['tour']
+ venue = vdata['displayText']['venue']
+ round_str = vdata['seo']['round']
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'series': series,
+ 'season': venue,
+ 'episode': round_str,
+ }
formats = []
for video_file in info.get('Files', []):
video_url, vid_format = video_file.get('URL'), video_file.get('Format')
- if not video_url or not vid_format:
+ if not video_url or video_url == 'NA' or not vid_format:
continue
ext = determine_ext(video_url)
vid_format = vid_format.replace(' ', '')
note='Downloading %s m3u8 information' % vid_format,
errnote='Failed to download %s m3u8 information' % vid_format,
fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id=vid_format,
+ note='Downloading %s MPD manifest' % vid_format,
+ errnote='Failed to download %s MPD manifest' % vid_format,
+ fatal=False))
+ elif ext == 'ism':
+ formats.extend(self._extract_ism_formats(
+ video_url, video_id, ism_id=vid_format,
+ note='Downloading %s ISM manifest' % vid_format,
+ errnote='Failed to download %s ISM manifest' % vid_format,
+ fatal=False))
elif ext in ('mp4', 'wvm'):
# wvm are drm-protected files
formats.append({
from ..compat import compat_str
from ..utils import (
ExtractorError,
+ int_or_none,
parse_iso8601,
parse_duration,
update_url_query,
class TVNowBaseIE(InfoExtractor):
_VIDEO_FIELDS = (
'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
- 'broadcastStartDate', 'isDrm', 'duration', 'manifest.dashclear',
- 'format.defaultImage169Format', 'format.defaultImage169Logo')
+ 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
+ 'manifest.dashclear', 'format.title', 'format.defaultImage169Format',
+ 'format.defaultImage169Logo')
def _call_api(self, path, video_id, query):
return self._download_json(
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
+ 'series': f.get('title'),
+ 'season_number': int_or_none(info.get('season')),
+ 'episode_number': int_or_none(info.get('episode')),
+ 'episode': title,
'formats': formats,
}
_VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
_TESTS = [{
- # rtl
- 'url': 'https://www.tvnow.de/rtl/alarm-fuer-cobra-11/freier-fall/player?return=/rtl',
+ 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
'info_dict': {
- 'id': '385314',
- 'display_id': 'alarm-fuer-cobra-11/freier-fall',
+ 'id': '331082',
+ 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
'ext': 'mp4',
- 'title': 'Freier Fall',
- 'description': 'md5:8c2d8f727261adf7e0dc18366124ca02',
+ 'title': 'Der neue Porsche 911 GT 3',
+ 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1512677700,
- 'upload_date': '20171207',
- 'duration': 2862.0,
+ 'timestamp': 1495994400,
+ 'upload_date': '20170528',
+ 'duration': 5283,
+ 'series': 'GRIP - Das Motormagazin',
+ 'season_number': 14,
+ 'episode_number': 405,
+ 'episode': 'Der neue Porsche 911 GT 3',
},
}, {
# rtl2
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
+ compat_kwargs,
compat_str,
compat_urllib_request,
compat_urlparse,
error_str += ' - %s' % error_data.get('formErrors')
raise ExtractorError(error_str, expected=True)
+ def _download_webpage(self, *args, **kwargs):
+ kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
+ return super(UdemyIE, self)._download_webpage(
+ *args, **compat_kwargs(kwargs))
+
def _download_json(self, url_or_request, *args, **kwargs):
headers = {
'X-Udemy-Snail-Case': 'true',
class VeohIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
+ _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
'uploader': 'LUMOback',
'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
},
+ }, {
+ 'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
+ 'only_matching': True,
}, {
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
import time
import hashlib
import json
+import random
from .adobepass import AdobePassIE
from .youtube import YoutubeIE
from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
from ..utils import (
+ ExtractorError,
int_or_none,
parse_age_limit,
str_or_none,
- parse_duration,
- ExtractorError,
- extract_attributes,
+ try_get,
)
-class ViceBaseIE(AdobePassIE):
- def _extract_preplay_video(self, url, locale, webpage):
- watch_hub_data = extract_attributes(self._search_regex(
- r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
- video_id = watch_hub_data['vms-id']
- title = watch_hub_data['video-title']
+class ViceIE(AdobePassIE):
+ IE_NAME = 'vice'
+ _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)'
+ _TESTS = [{
+ 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
+ 'info_dict': {
+ 'id': '5e647f0125e145c9aef2069412c0cbde',
+ 'ext': 'mp4',
+ 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
+ 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1489664942,
+ 'upload_date': '20170316',
+ 'age_limit': 14,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['UplynkPreplay'],
+ }, {
+ # geo restricted to US
+ 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
+ 'info_dict': {
+ 'id': '930c0ad1f47141cc955087eecaddb0e2',
+ 'ext': 'mp4',
+ 'uploader': 'waypoint',
+ 'title': 'The Signal From Tölva',
+ 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
+ 'uploader_id': '57f7d621e05ca860fa9ccaf9',
+ 'timestamp': 1477941983,
+ 'upload_date': '20161031',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['UplynkPreplay'],
+ }, {
+ 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
+ 'info_dict': {
+ 'id': '581b12b60a0e1f4c0fb6ea2f',
+ 'ext': 'mp4',
+ 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
+ 'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
+ 'uploader': 'VICE',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1485368119,
+ 'upload_date': '20170125',
+ 'age_limit': 14,
+ },
+ 'params': {
+ # AES-encrypted m3u8
+ 'skip_download': True,
+ 'proxy': '127.0.0.1:8118',
+ },
+ 'add_ie': ['UplynkPreplay'],
+ }, {
+ 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
+ 'only_matching': True,
+ }]
+ _PREPLAY_HOST = 'vms.vice'
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)',
+ webpage)
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = ViceIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ def _real_extract(self, url):
+ locale, video_id = re.match(self._VALID_URL, url).groups()
+
+ webpage = self._download_webpage(
+ 'https://video.vice.com/%s/embed/%s' % (locale, video_id),
+ video_id)
+
+ video = self._parse_json(
+ self._search_regex(
+ r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage,
+ 'app state'), video_id)['video']
+ video_id = video.get('vms_id') or video.get('id') or video_id
+ title = video['title']
+ is_locked = video.get('locked')
+ rating = video.get('rating')
+ thumbnail = video.get('thumbnail_url')
+ duration = int_or_none(video.get('duration'))
+ series = try_get(
+ video, lambda x: x['episode']['season']['show']['title'],
+ compat_str)
+ episode_number = try_get(
+ video, lambda x: x['episode']['episode_number'])
+ season_number = try_get(
+ video, lambda x: x['episode']['season']['season_number'])
+ uploader = None
query = {}
- is_locked = watch_hub_data.get('video-locked') == '1'
if is_locked:
resource = self._get_mvpd_resource(
- 'VICELAND', title, video_id,
- watch_hub_data.get('video-rating'))
+ 'VICELAND', title, video_id, rating)
query['tvetoken'] = self._extract_mvpd_auth(
url, video_id, 'VICELAND', resource)
# signature generation algorithm is reverse engineered from signatureGenerator in
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
# https://www.viceland.com/assets/common/js/web.vendor.bundle.js
- exp = int(time.time()) + 14400
+ # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
+ exp = int(time.time()) + 1440
+
query.update({
'exp': exp,
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
+ '_ad_blocked': None,
+ '_ad_unit': '',
+ '_debug': '',
+ 'platform': 'desktop',
+ 'rn': random.randint(10000, 100000),
+ 'fbprebidtoken': '',
})
try:
host = 'www.viceland' if is_locked else self._PREPLAY_HOST
preplay = self._download_json(
- 'https://%s.com/%s/preplay/%s' % (host, locale, video_id),
+ 'https://%s.com/%s/video/preplay/%s' % (host, locale, video_id),
video_id, query=query)
except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
error = json.loads(e.cause.read().decode())
+ error_message = error.get('error_description') or error['details']
raise ExtractorError('%s said: %s' % (
- self.IE_NAME, error['details']), expected=True)
+ self.IE_NAME, error_message), expected=True)
raise
video_data = preplay['video']
'id': video_id,
'title': title,
'description': base.get('body') or base.get('display_body'),
- 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
- 'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')),
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(video_data.get('video_duration')) or duration,
'timestamp': int_or_none(video_data.get('created_at'), 1000),
'age_limit': parse_age_limit(video_data.get('video_rating')),
- 'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
- 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
+ 'series': video_data.get('show_title') or series,
+ 'episode_number': int_or_none(episode.get('episode_number') or episode_number),
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
- 'season_number': int_or_none(watch_hub_data.get('season')),
+ 'season_number': int_or_none(season_number),
'season_id': str_or_none(episode.get('season_id')),
- 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
+ 'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader,
'uploader_id': str_or_none(channel.get('id')),
'subtitles': subtitles,
'ie_key': 'UplynkPreplay',
}
-class ViceIE(ViceBaseIE):
- IE_NAME = 'vice'
- _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
- 'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2',
- 'info_dict': {
- 'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
- 'ext': 'flv',
- 'title': 'Monkey Labs of Holland',
- 'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149',
- },
- 'add_ie': ['Ooyala'],
- }, {
- 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
- 'info_dict': {
- 'id': '5816510690b70e6c5fd39a56',
- 'ext': 'mp4',
- 'uploader': 'Waypoint',
- 'title': 'The Signal From Tölva',
- 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
- 'uploader_id': '57f7d621e05ca860fa9ccaf9',
- 'timestamp': 1477941983,
- 'upload_date': '20161031',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'add_ie': ['UplynkPreplay'],
- }, {
- 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
- 'info_dict': {
- 'id': '581b12b60a0e1f4c0fb6ea2f',
- 'ext': 'mp4',
- 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
- 'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
- 'uploader': 'VICE',
- 'uploader_id': '57a204088cb727dec794c67b',
- 'timestamp': 1485368119,
- 'upload_date': '20170125',
- 'age_limit': 14,
- },
- 'params': {
- # AES-encrypted m3u8
- 'skip_download': True,
- },
- 'add_ie': ['UplynkPreplay'],
- }, {
- 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
- 'only_matching': True,
- }]
- _PREPLAY_HOST = 'video.vice'
-
- def _real_extract(self, url):
- locale, video_id = re.match(self._VALID_URL, url).groups()
- webpage, urlh = self._download_webpage_handle(url, video_id)
- embed_code = self._search_regex(
- r'embedCode=([^&\'"]+)', webpage,
- 'ooyala embed code', default=None)
- if embed_code:
- return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
- youtube_id = self._search_regex(
- r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None)
- if youtube_id:
- return self.url_result(youtube_id, 'Youtube')
- return self._extract_preplay_video(urlh.geturl(), locale, webpage)
-
-
class ViceShowIE(InfoExtractor):
IE_NAME = 'vice:show'
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
'info_dict': {
- 'id': '58dc0a3dee202d2a0ccfcbd8',
+ 'id': '41eae2a47b174a1398357cec55f1f6fc',
'ext': 'mp4',
'title': 'Mormon War on Porn ',
- 'description': 'md5:ad396a2481e7f8afb5ed486878421090',
- 'uploader': 'VICE',
- 'uploader_id': '57a204088cb727dec794c693',
- 'timestamp': 1489160690,
- 'upload_date': '20170310',
+ 'description': 'md5:6394a8398506581d0346b9ab89093fef',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1491883129,
+ 'upload_date': '20170411',
+ 'age_limit': 17,
},
'params': {
# AES-encrypted m3u8
'add_ie': ['UplynkPreplay'],
}, {
'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
- 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
+ 'md5': '7fe8ebc4fa3323efafc127b82bd821d9',
'info_dict': {
'id': '3jstaBeXgAs',
'ext': 'mp4',
'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
- 'uploader_id': 'MotherboardTV',
'uploader': 'Motherboard',
+ 'uploader_id': 'MotherboardTV',
'upload_date': '20140529',
},
'add_ie': ['Youtube'],
+ }, {
+ 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
+ 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
+ 'info_dict': {
+ 'id': 'e2ed435eb67e43efb66e6ef9a6930a88',
+ 'ext': 'mp4',
+ 'title': "Making The World's First Male Sex Doll",
+ 'description': 'md5:916078ef0e032d76343116208b6cc2c4',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1476919911,
+ 'upload_date': '20161019',
+ 'age_limit': 17,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [ViceIE.ie_key()],
}, {
'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
'only_matching': True,
webpage = self._download_webpage(url, display_id)
prefetch_data = self._parse_json(self._search_regex(
- r'window\.__PREFETCH_DATA\s*=\s*({.*});',
- webpage, 'prefetch data'), display_id)
+ r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n',
+ webpage, 'app state'), display_id)['pageData']
body = prefetch_data['body']
def _url_res(video_url, ie_key):
'ie_key': ie_key,
}
+ vice_url = ViceIE._extract_url(webpage)
+ if vice_url:
+ return _url_res(vice_url, ViceIE.ie_key())
+
embed_code = self._search_regex(
r'embedCode=([^&\'"]+)', body,
'ooyala embed code', default=None)
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .vice import ViceBaseIE
-
-
-class VicelandIE(ViceBaseIE):
- _VALID_URL = r'https?://(?:www\.)?viceland\.com/(?P<locale>[^/]+)/video/[^/]+/(?P<id>[a-f0-9]+)'
- _TEST = {
- 'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316',
- 'info_dict': {
- 'id': '588a70d0dba8a16007de7316',
- 'ext': 'mp4',
- 'title': 'TRAPPED (Series Trailer)',
- 'description': 'md5:7a8e95c2b6cd86461502a2845e581ccf',
- 'age_limit': 14,
- 'timestamp': 1485474122,
- 'upload_date': '20170126',
- 'uploader_id': '57a204098cb727dec794c6a3',
- 'uploader': 'Viceland',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'add_ie': ['UplynkPreplay'],
- 'skip': '404',
- }
- _PREPLAY_HOST = 'www.viceland'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- locale = mobj.group('locale')
- webpage = self._download_webpage(url, video_id)
- return self._extract_preplay_video(url, locale, webpage)
thumbnail = clip.get('image')
m3u8_url = m3u8_url or self._search_regex(
- r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?!\1).+)\1',
- webpage, 'hls url')
+ r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'hls url', group='url')
formats = self._extract_m3u8_formats(
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
self._sort_formats(formats)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ get_element_by_id,
+ int_or_none,
+ strip_or_none,
+ unified_strdate,
+ urljoin,
+)
+
+
+class VidLiiIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vidlii\.com/(?:watch|embed)\?.*?\bv=(?P<id>[0-9A-Za-z_-]{11})'
+ _TESTS = [{
+ 'url': 'https://www.vidlii.com/watch?v=tJluaH4BJ3v',
+ 'md5': '9bf7d1e005dfa909b6efb0a1ff5175e2',
+ 'info_dict': {
+ 'id': 'tJluaH4BJ3v',
+ 'ext': 'mp4',
+ 'title': 'Vidlii is against me',
+ 'description': 'md5:fa3f119287a2bfb922623b52b1856145',
+ 'thumbnail': 're:https://.*.jpg',
+ 'uploader': 'APPle5auc31995',
+ 'uploader_url': 'https://www.vidlii.com/user/APPle5auc31995',
+ 'upload_date': '20171107',
+ 'duration': 212,
+ 'view_count': int,
+ 'comment_count': int,
+ 'average_rating': float,
+ 'categories': ['News & Politics'],
+ 'tags': ['Vidlii', 'Jan', 'Videogames'],
+ }
+ }, {
+ 'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
+
+ video_url = self._search_regex(
+ r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage,
+ 'video url', group='url')
+
+ title = self._search_regex(
+ (r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
+ 'title')
+
+ description = self._html_search_meta(
+ ('description', 'twitter:description'), webpage,
+ default=None) or strip_or_none(
+ get_element_by_id('des_text', webpage))
+
+ thumbnail = self._html_search_meta(
+ 'twitter:image', webpage, default=None)
+ if not thumbnail:
+ thumbnail_path = self._search_regex(
+ r'img\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'thumbnail', fatal=False, group='url')
+ if thumbnail_path:
+ thumbnail = urljoin(url, thumbnail_path)
+
+ uploader = self._search_regex(
+ r'<div[^>]+class=["\']wt_person[^>]+>\s*<a[^>]+\bhref=["\']/user/[^>]+>([^<]+)',
+ webpage, 'uploader', fatal=False)
+ uploader_url = 'https://www.vidlii.com/user/%s' % uploader if uploader else None
+
+ upload_date = unified_strdate(self._html_search_meta(
+ 'datePublished', webpage, default=None) or self._search_regex(
+ r'<date>([^<]+)', webpage, 'upload date', fatal=False))
+
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'duration',
+ default=None) or self._search_regex(
+ r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+
+ view_count = int_or_none(self._search_regex(
+ (r'<strong>(\d+)</strong> views',
+ r'Views\s*:\s*<strong>(\d+)</strong>'),
+ webpage, 'view count', fatal=False))
+
+ comment_count = int_or_none(self._search_regex(
+ (r'<span[^>]+id=["\']cmt_num[^>]+>(\d+)',
+ r'Comments\s*:\s*<strong>(\d+)'),
+ webpage, 'comment count', fatal=False))
+
+ average_rating = float_or_none(self._search_regex(
+ r'rating\s*:\s*([\d.]+)', webpage, 'average rating', fatal=False))
+
+ category = self._html_search_regex(
+ r'<div>Category\s*:\s*</div>\s*<div>\s*<a[^>]+>([^<]+)', webpage,
+ 'category', fatal=False)
+ categories = [category] if category else None
+
+ tags = [
+ strip_or_none(tag)
+ for tag in re.findall(
+ r'<a[^>]+\bhref=["\']/results\?.*?q=[^>]*>([^<]+)',
+ webpage) if strip_or_none(tag)
+ ] or None
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_url': uploader_url,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'average_rating': average_rating,
+ 'categories': categories,
+ 'tags': tags,
+ }
class VidziIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+ _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'http://vidzi.tv/cghql9yq6emu.html',
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
}, {
'url': 'http://vidzi.cc/cghql9yq6emu.html',
'only_matching': True,
+ }, {
+ 'url': 'https://vidzi.si/rph9gztxj1et.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
if self._LOGIN_REQUIRED:
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return
- self.report_login()
- webpage = self._download_webpage(self._LOGIN_URL, None, False)
+ webpage = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
token, vuid = self._extract_xsrft_and_vuid(webpage)
- data = urlencode_postdata({
+ data = {
'action': 'login',
'email': username,
'password': password,
'service': 'vimeo',
'token': token,
- })
- login_request = sanitized_Request(self._LOGIN_URL, data)
- login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- login_request.add_header('Referer', self._LOGIN_URL)
+ }
self._set_vimeo_cookie('vuid', vuid)
- self._download_webpage(login_request, None, False, 'Wrong login info')
+ try:
+ self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(data), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': self._LOGIN_URL,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 418:
+ raise ExtractorError(
+ 'Unable to log in: bad username or password',
+ expected=True)
+ raise ExtractorError('Unable to log in')
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
'id': '56015672',
'ext': 'mp4',
'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
- 'description': 'md5:2d3305bad981a06ff79f027f19865021',
+ 'description': 'md5:509a9ad5c9bf97c60faee9203aca4479',
'timestamp': 1355990239,
'upload_date': '20121220',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
_TESTS = [
{
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
- 'md5': '0deae91935c54e00003c2a00646315f0',
+ 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': {
'id': '162222515',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'duration': 195,
'uploader': 'Ruseful2011',
'duration': 893,
'age_limit': 18,
- 'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy'],
+ 'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Beauti', 'Beauties', 'Beautiful', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy', 'Taking'],
},
}, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ NO_DEFAULT,
+ str_to_int,
+)
class XNXXIE(InfoExtractor):
_VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/'
_TESTS = [{
'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
- 'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0',
+ 'md5': '7583e96c15c0f21e9da3453d9920fbba',
'info_dict': {
'id': '55awb78',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Skyrim Test Video',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 469,
+ 'view_count': int,
'age_limit': 18,
},
}, {
def _real_extract(self, url):
video_id = self._match_id(url)
+
webpage = self._download_webpage(url, video_id)
- video_url = self._search_regex(r'flv_url=(.*?)&',
- webpage, 'video URL')
- video_url = compat_urllib_parse_unquote(video_url)
+ def get(meta, default=NO_DEFAULT, fatal=True):
+ return self._search_regex(
+ r'set%s\s*\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % meta,
+ webpage, meta, default=default, fatal=fatal, group='value')
+
+ title = self._og_search_title(
+ webpage, default=None) or get('VideoTitle')
- video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
- webpage, 'title')
+ formats = []
+ for mobj in re.finditer(
+ r'setVideo(?:Url(?P<id>Low|High)|HLS)\s*\(\s*(?P<q>["\'])(?P<url>(?:https?:)?//.+?)(?P=q)', webpage):
+ format_url = mobj.group('url')
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ preference=1, m3u8_id='hls', fatal=False))
+ else:
+ format_id = mobj.group('id')
+ if format_id:
+ format_id = format_id.lower()
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': -1 if format_id == 'low' else 0,
+ })
+ self._sort_formats(formats)
- video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&',
- webpage, 'thumbnail', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage, default=None) or get(
+ 'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False)
+ duration = int_or_none(self._og_search_property('duration', webpage))
+ view_count = str_to_int(self._search_regex(
+ r'id=["\']nb-views-number[^>]+>([\d,.]+)', webpage, 'view count',
+ default=None))
return {
'id': video_id,
- 'url': video_url,
- 'title': video_title,
- 'ext': 'flv',
- 'thumbnail': video_thumbnail,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
'age_limit': 18,
+ 'formats': formats,
}
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+ unescapeHTML,
+)
+
+
+class YapFilesIE(InfoExtractor):
+ _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
+ _VALID_URL = r'https?:%s' % _YAPFILES_URL
+ _TESTS = [{
+ # with hd
+ 'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
+ 'md5': '2db19e2bfa2450568868548a1aa1956c',
+ 'info_dict': {
+ 'id': 'vMDE1NjcyNDUt0413',
+ 'ext': 'mp4',
+ 'title': 'Самый худший пароль WIFI',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 72,
+ },
+ }, {
+ # without hd
+ 'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
+ % YapFilesIE._YAPFILES_URL, webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id, fatal=False)
+
+ player_url = None
+ query = {}
+ if webpage:
+ player_url = self._search_regex(
+ r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'player url', default=None, group='url')
+
+ if not player_url:
+ player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
+ query = {
+ 'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
+ 'type': 'json',
+ 'ref': url,
+ }
+
+ player = self._download_json(
+ player_url, video_id, query=query)['player']
+
+ playlist_url = player['playlist']
+ title = player['title']
+ thumbnail = player.get('poster')
+
+ if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
+ raise ExtractorError(
+ 'Video %s has been removed' % video_id, expected=True)
+
+ playlist = self._download_json(
+ playlist_url, video_id)['player']['main']
+
+ hd_height = int_or_none(player.get('hd'))
+
+ QUALITIES = ('sd', 'hd')
+ quality_key = qualities(QUALITIES)
+ formats = []
+ for format_id in QUALITIES:
+ is_hd = format_id == 'hd'
+ format_url = playlist.get(
+ 'file%s' % ('_hd' if is_hd else ''))
+ if not format_url or not isinstance(format_url, compat_str):
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': quality_key(format_id),
+ 'height': hd_height if is_hd else None,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(player.get('length')),
+ 'formats': formats,
+ }
break
if codecs:
dct.update(parse_codecs(codecs))
+ if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
+ dct['downloader_options'] = {
+ # Youtube throttles chunks >~10M
+ 'http_chunk_size': 10485760,
+ }
formats.append(dct)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
class YoutubeUserIE(YoutubeChannelIE):
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
- _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
+ _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
IE_NAME = 'youtube:user'
}]
-class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
+class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
+ _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
+
+
+class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results
raise ExtractorError(
'[youtube] No video results', expected=True)
- new_videos = self._ids_to_results(orderedSet(re.findall(
- r'href="/watch\?v=(.{11})', html_content)))
+ new_videos = list(self._process_page(html_content))
videos += new_videos
if not new_videos or len(videos) > limit:
break
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
-class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
+class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
- _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
_TESTS = [{
- 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
+ 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
'info_dict': {
- 'id': 'zdfmediathek-trailer-100',
+ 'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
'ext': 'mp4',
- 'title': 'Die neue ZDFmediathek',
- 'description': 'md5:3003d36487fb9a5ea2d1ff60beb55e8d',
- 'duration': 30,
- 'timestamp': 1477627200,
- 'upload_date': '20161028',
- }
+ 'title': 'Die Magie der Farben (2/2)',
+ 'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
+ 'duration': 2615,
+ 'timestamp': 1465021200,
+ 'upload_date': '20160604',
+ },
+ }, {
+ 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
+ 'only_matching': True,
}, {
'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
'only_matching': True,
'--no-resize-buffer',
action='store_true', dest='noresizebuffer', default=False,
help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
+ downloader.add_option(
+ '--http-chunk-size',
+ dest='http_chunk_size', metavar='SIZE', default=None,
+ help='Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
+ 'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)')
downloader.add_option(
'--test',
action='store_true', dest='test', default=False,
temp_filename = prepend_extension(filename, 'temp')
if not info.get('thumbnails'):
- raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.')
+ self._downloader.to_screen('[embedthumbnail] There aren\'t any thumbnails to embed')
+ return [], info
thumbnail_filename = info['thumbnails'][-1]['filename']
compiled_regex_type = type(re.compile(''))
std_headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)',
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
return os.path.join(*sanitized_path)
-# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
-# unwanted failures due to missing protocol
def sanitize_url(url):
- return 'http:%s' % url if url.startswith('//') else url
+ # Prepend protocol-less URLs with `http:` scheme in order to mitigate
+ # the number of unwanted failures due to missing protocol
+ if url.startswith('//'):
+ return 'http:%s' % url
+ # Fix some common typos seen so far
+ COMMON_TYPOS = (
+ # https://github.com/rg3/youtube-dl/issues/15649
+ (r'^httpss://', r'https://'),
+ # https://bx1.be/lives/direct-tv/
+ (r'^rmtp([es]?)://', r'rtmp\1://'),
+ )
+ for mistake, fixup in COMMON_TYPOS:
+ if re.match(mistake, url):
+ return re.sub(mistake, fixup, url)
+ return url
def sanitized_Request(url, *args, **kwargs):
# expected HTTP responses to meet HTTP/1.0 or later (see also
# https://github.com/rg3/youtube-dl/issues/6727)
if sys.version_info < (3, 0):
- kwargs[b'strict'] = True
- hc = http_class(*args, **kwargs)
+ kwargs['strict'] = True
+ hc = http_class(*args, **compat_kwargs(kwargs))
source_address = ydl_handler._params.get('source_address')
if source_address is not None:
sa = (source_address, 0)
if m:
date_str = date_str[:-len(m.group('tz'))]
+ # Python only supports microseconds, so remove nanoseconds
+ m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
+ if m:
+ date_str = m.group(1)
+
for expression in date_formats(day_first):
try:
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
return lookup_unit_table(_UNIT_TABLE, s)
+def parse_resolution(s):
+ if s is None:
+ return {}
+
+ mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
+ if mobj:
+ return {
+ 'width': int(mobj.group('w')),
+ 'height': int(mobj.group('h')),
+ }
+
+ mobj = re.search(r'\b(\d+)[pPiI]\b', s)
+ if mobj:
+ return {'height': int(mobj.group(1))}
+
+ mobj = re.search(r'\b([48])[kK]\b', s)
+ if mobj:
+ return {'height': int(mobj.group(1)) * 540}
+
+ return {}
+
+
def month_by_name(name, lang='en'):
""" Return the number of a month by (locale-independently) English name """
from __future__ import unicode_literals
-__version__ = '2018.01.27'
+__version__ = '2018.03.14'