Petr Novák
Leonardo Taccari
Martin Weinelt
+Surya Oktafendri
+TingPing
+Alexandre Macabies
+version 2018.04.25
+
+Core
+* [utils] Fix match_str for boolean meta fields
++ [Makefile] Add support for pandoc 2 and disable smart extension (#16251)
+* [YoutubeDL] Fix typo in media extension compatibility checker (#16215)
+
+Extractors
++ [openload] Recognize IPv6 stream URLs (#16136, #16137, #16205, #16246,
+ #16250)
++ [twitch] Extract is_live according to status (#16259)
+* [pornflip] Relax URL regular expression (#16258)
+- [etonline] Remove extractor (#16256)
+* [breakcom] Fix extraction (#16254)
++ [youtube] Add ability to authenticate with cookies
+* [youtube:feed] Implement lazy playlist extraction (#10184)
++ [svt] Add support for TV channel live streams (#15279, #15809)
+* [ccma] Fix video extraction (#15931)
+* [rentv] Fix extraction (#15227)
++ [nick] Add support for nickjr.nl (#16230)
+* [extremetube] Fix metadata extraction
++ [keezmovies] Add support for generic embeds (#16134, #16154)
+* [nexx] Extract new azure URLs (#16223)
+* [cbssports] Fix extraction (#16217)
+* [kaltura] Improve embeds detection (#16201)
+* [instagram:user] Fix extraction (#16119)
+* [cbs] Skip DRM asset types (#16104)
+
+
+version 2018.04.16
+
+Extractors
+* [smotri:broadcast] Fix extraction (#16180)
++ [picarto] Add support for picarto.tv (#6205, #12514, #15276, #15551)
+* [vine:user] Fix extraction (#15514, #16190)
+* [pornhub] Relax URL regular expression (#16165)
+* [cbc:watch] Re-acquire device token when expired (#16160)
++ [fxnetworks] Add support for https theplatform URLs (#16125, #16157)
++ [instagram:user] Add request signing (#16119)
++ [twitch] Add support for mobile URLs (#16146)
+
+
+version 2018.04.09
+
+Core
+* [YoutubeDL] Do not save/restore console title while simulate (#16103)
+* [extractor/common] Relax JSON-LD context check (#16006)
+
+Extractors
++ [generic] Add support for tube8 embeds
++ [generic] Add support for share-videos.se embeds (#16089, #16115)
+* [odnoklassniki] Extend URL regular expression (#16081)
+* [steam] Bypass mature content check (#16113)
++ [acast] Extract more metadata
+* [acast] Fix extraction (#16118)
+* [instagram:user] Fix extraction (#16119)
+* [drtuber] Fix title extraction (#16107, #16108)
+* [liveleak] Extend URL regular expression (#16117)
++ [openload] Add support for oload.xyz
+* [openload] Relax stream URL regular expression
+* [openload] Fix extraction (#16099)
++ [svtplay:series] Add support for season URLs
++ [svtplay:series] Add support for series (#11130, #16059)
+
+
+version 2018.04.03
+
+Extractors
++ [tvnow] Add support for shows (#15837)
+* [dramafever] Fix authentication (#16067)
+* [afreecatv] Use partial view only when necessary (#14450)
++ [afreecatv] Add support for authentication (#14450)
++ [nationalgeographic] Add support for new URL schema (#16001, #16054)
+* [xvideos] Fix thumbnail extraction (#15978, #15979)
+* [medialaan] Fix vod id (#16038)
++ [openload] Add support for oload.site (#16039)
+* [naver] Fix extraction (#16029)
+* [dramafever] Partially switch to API v5 (#16026)
+* [abc:iview] Unescape title and series meta fields (#15994)
+* [videa] Extend URL regular expression (#16003)
+
+
+version 2018.03.26.1
+
+Core
++ [downloader/external] Add elapsed time to progress hook (#10876)
+* [downloader/external,fragment] Fix download finalization when writing file
+ to stdout (#10809, #10876, #15799)
+
+Extractors
+* [vrv] Fix extraction on python2 (#15928)
+* [afreecatv] Update referrer (#15947)
++ [24video] Add support for 24video.sexy (#15973)
+* [crackle] Bypass geo restriction
+* [crackle] Fix extraction (#15969)
++ [lenta] Add support for lenta.ru (#15953)
++ [instagram:user] Add pagination (#15934)
+* [youku] Update ccode (#15939)
+* [libsyn] Adapt to new page structure
+
+
+version 2018.03.20
+
+Core
+* [extractor/common] Improve thumbnail extraction for HTML5 entries
+* Generalize XML manifest processing code and improve XSPF parsing
++ [extractor/common] Add _download_xml_handle
++ [extractor/common] Add support for relative URIs in _parse_xspf (#15794)
+
+Extractors
++ [7plus] Extract series metadata (#15862, #15906)
+* [9now] Bypass geo restriction (#15920)
+* [cbs] Skip unavailable assets (#13490, #13506, #15776)
++ [canalc2] Add support for HTML5 videos (#15916, #15919)
++ [ceskatelevize] Add support for iframe embeds (#15918)
++ [prosiebensat1] Add support for galileo.tv (#15894)
++ [generic] Add support for xfileshare embeds (#15879)
+* [bilibili] Switch to v2 playurl API
+* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863)
+* [heise] Improve extraction (#15496, #15784, #15026)
+* [instagram] Fix user videos extraction (#15858)
+
+
version 2018.03.14
Extractors
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
+# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2
+MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi)
+
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
install -d $(DESTDIR)$(BINDIR)
install -m 755 youtube-dl $(DESTDIR)$(BINDIR)
$(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md
README.txt: README.md
- pandoc -f markdown -t plain README.md -o README.txt
+ pandoc -f $(MARKDOWN) -t plain README.md -o README.txt
youtube-dl.1: README.md
$(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md
- pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
+ pandoc -s -f $(MARKDOWN) -t man youtube-dl.1.temp.md -o youtube-dl.1
rm -f youtube-dl.1.temp.md
youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in
## Filesystem Options:
-a, --batch-file FILE File containing URLs to download ('-' for
- stdin)
+ stdin), one URL per line. Lines starting
+ with '#', ';' or ']' are considered as
+ comments and ignored.
--id Use only video ID in file name
-o, --output TEMPLATE Output filename template, see the "OUTPUT
TEMPLATE" for all the info
Filesystem Options:
-a, --batch-file FILE File containing URLs to download ('-' for
- stdin)
+ stdin), one URL per line. Lines starting
+ with '#', ';' or ']' are considered as
+ comments and ignored.
--id Use only video ID in file name
-o, --output TEMPLATE Output filename template, see the "OUTPUT
TEMPLATE" for all the info
1. Fork this repository
2. Check out the source code with:
- git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
+ git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
3. Start a new git branch with
- cd youtube-dl
- git checkout -b yourextractor
+ cd youtube-dl
+ git checkout -b yourextractor
4. Start with this simple template and save it to
youtube_dl/extractor/yourextractor.py:
9. When the tests pass, add the new files and commit them and push the
result, like this:
- $ git add youtube_dl/extractor/extractors.py
- $ git add youtube_dl/extractor/yourextractor.py
- $ git commit -m '[yourextractor] Add new extractor'
- $ git push origin yourextractor
+ $ git add youtube_dl/extractor/extractors.py
+ $ git add youtube_dl/extractor/yourextractor.py
+ $ git commit -m '[yourextractor] Add new extractor'
+ $ git push origin yourextractor
10. Finally, create a pull request. We'll then review and merge it.
- **ESPN**
- **ESPNArticle**
- **EsriVideo**
- - **ETOnline**
- **Europa**
- **EveryonesMixtape**
- **ExpoTV**
- **Lecture2Go**
- **LEGO**
- **Lemonde**
+ - **Lenta**
- **LePlaylist**
- **LetvCloud**: 乐视云
- **Libsyn**
- **PhilharmonieDeParis**: Philharmonie de Paris
- **phoenix.de**
- **Photobucket**
+ - **Picarto**
+ - **PicartoVod**
- **Piksel**
- **Pinkbike**
- **Pladform**
- **SunPorno**
- **SVT**
- **SVTPlay**: SVT Play and Öppet arkiv
+ - **SVTSeries**
- **SWRMediathek**
- **Syfy**
- **SztvHu**
- **TVNoe**
- **TVNow**
- **TVNowList**
+ - **TVNowShow**
- **tvp**: Telewizja Polska
- **tvp:embed**: Telewizja Polska
- **tvp:series**
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
+ def test_parse_xspf(self):
+ _TEST_CASES = [
+ (
+ 'foo_xspf',
+ 'https://example.org/src/foo_xspf.xspf',
+ [{
+ 'id': 'foo_xspf',
+ 'title': 'Pandemonium',
+ 'description': 'Visit http://bigbrother404.bandcamp.com',
+ 'duration': 202.416,
+ 'formats': [{
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.org/src/cd1/track%201.mp3',
+ }],
+ }, {
+ 'id': 'foo_xspf',
+ 'title': 'Final Cartridge (Nichico Twelve Remix)',
+ 'description': 'Visit http://bigbrother404.bandcamp.com',
+ 'duration': 255.857,
+ 'formats': [{
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
+ }],
+ }, {
+ 'id': 'foo_xspf',
+ 'title': 'Rebuilding Nightingale',
+ 'description': 'Visit http://bigbrother404.bandcamp.com',
+ 'duration': 287.915,
+ 'formats': [{
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.org/src/track3.mp3',
+ }, {
+ 'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+ 'url': 'https://example.com/track3.mp3',
+ }]
+ }]
+ ),
+ ]
+
+ for xspf_file, xspf_url, expected_entries in _TEST_CASES:
+ with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
+ mode='r', encoding='utf-8') as f:
+ entries = self.ie._parse_xspf(
+ compat_etree_fromstring(f.read().encode('utf-8')),
+ xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
+ expect_value(self, entries, expected_entries, None)
+ for i in range(len(entries)):
+ expect_dict(self, entries[i], expected_entries[i])
+
if __name__ == '__main__':
unittest.main()
class TestMTVSubtitles(BaseTestSubtitles):
- url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
+ url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
IE = ComedyCentralIE
def getInfoDict(self):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
- self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
+ self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
class TestNRKSubtitles(BaseTestSubtitles):
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 10}))
+ self.assertTrue(match_str('is_live', {'is_live': True}))
+ self.assertFalse(match_str('is_live', {'is_live': False}))
+ self.assertFalse(match_str('is_live', {'is_live': None}))
+ self.assertFalse(match_str('is_live', {}))
+ self.assertFalse(match_str('!is_live', {'is_live': True}))
+ self.assertTrue(match_str('!is_live', {'is_live': False}))
+ self.assertTrue(match_str('!is_live', {'is_live': None}))
+ self.assertTrue(match_str('!is_live', {}))
+ self.assertTrue(match_str('title', {'title': 'abc'}))
+ self.assertTrue(match_str('title', {'title': ''}))
+ self.assertFalse(match_str('!title', {'title': 'abc'}))
+ self.assertFalse(match_str('!title', {'title': ''}))
def test_parse_dfxp_time_expr(self):
self.assertEqual(parse_dfxp_time_expr(None), None)
dl = FakeYDL()
dl.params['extract_flat'] = True
ie = YoutubePlaylistIE(dl)
- result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
+ result = ie.extract('https://www.youtube.com/playlist?list=PL-KKIb8rvtMSrAO9YFbeM6UQrAqoFTUWv')
self.assertIsPlaylist(result)
for entry in result['entries']:
self.assertTrue(entry.get('title'))
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<playlist version="1" xmlns="http://xspf.org/ns/0/">
+ <date>2018-03-09T18:01:43Z</date>
+ <trackList>
+ <track>
+ <location>cd1/track%201.mp3</location>
+ <title>Pandemonium</title>
+ <creator>Foilverb</creator>
+ <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+ <album>Pandemonium EP</album>
+ <trackNum>1</trackNum>
+ <duration>202416</duration>
+ </track>
+ <track>
+ <location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
+ <title>Final Cartridge (Nichico Twelve Remix)</title>
+ <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+ <creator>Foilverb</creator>
+ <album>Pandemonium EP</album>
+ <trackNum>2</trackNum>
+ <duration>255857</duration>
+ </track>
+ <track>
+ <location>track3.mp3</location>
+ <location>https://example.com/track3.mp3</location>
+ <title>Rebuilding Nightingale</title>
+ <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+ <creator>Foilverb</creator>
+ <album>Pandemonium EP</album>
+ <trackNum>3</trackNum>
+ <duration>287915</duration>
+ </track>
+ </trackList>
+</playlist>
+.\" Automatically generated by Pandoc 2.1.3
+.\"
.TH "YOUTUBE\-DL" "1" "" "" ""
+.hy
.SH NAME
.PP
youtube\-dl \- download videos from youtube.com or other video platforms
.SS Filesystem Options:
.TP
.B \-a, \-\-batch\-file \f[I]FILE\f[]
-File containing URLs to download (\[aq]\-\[aq] for stdin)
+File containing URLs to download (\[aq]\-\[aq] for stdin), one URL per
+line.
+Lines starting with \[aq]#\[aq], \[aq];\[aq] or \[aq]]\[aq] are
+considered as comments and ignored.
.RS
.RE
.TP
.PP
To activate authentication with the \f[C]\&.netrc\f[] file you should
pass \f[C]\-\-netrc\f[] to youtube\-dl or place it in the configuration
-file (#configuration).
+file.
.PP
On Windows you may also need to setup the \f[C]%HOME%\f[] environment
variable manually.
The \f[C]\-o\f[] option allows users to indicate a template for the
output file names.
.PP
-\f[B]tl;dr:\f[] navigate me to examples (#output-template-examples).
+\f[B]tl;dr:\f[] navigate me to examples.
.PP
The basic usage is not to set any template arguments when downloading a
single file, like in
an expression that describes format or formats you would like to
download.
.PP
-\f[B]tl;dr:\f[] navigate me to examples (#format-selection-examples).
+\f[B]tl;dr:\f[] navigate me to examples.
.PP
The simplest case is requesting a specific format, for example with
\f[C]\-f\ 22\f[] you can download the format with format code equal to
youtube\-dl 2015.04.26), i.e.
you want to download the best available quality media served as a single
file, you should explicitly specify your choice with \f[C]\-f\ best\f[].
-You may want to add it to the configuration file (#configuration) in
-order not to type it every time you run youtube\-dl.
+You may want to add it to the configuration file in order not to type it
+every time you run youtube\-dl.
.SS Format selection examples
.PP
Note that on Windows you may need to use double quotes instead of
.PP
YouTube has switched to a new video info format in July 2011 which is
not supported by old versions of youtube\-dl.
-See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+See above for how to update youtube\-dl.
.SS ERROR: unable to download video
.PP
YouTube requires an additional signature since September 2012 which is
not supported by old versions of youtube\-dl.
-See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+See above for how to update youtube\-dl.
.SS Video URL contains an ampersand and I\[aq]m getting some strange
output \f[C][1]\ 2839\f[] or
\f[C]\[aq]v\[aq]\ is\ not\ recognized\ as\ an\ internal\ or\ external\ command\f[]
.PP
In February 2015, the new YouTube player contained a character sequence
in a string that was misinterpreted by old versions of youtube\-dl.
-See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+See above for how to update youtube\-dl.
.SS HTTP Error 429: Too Many Requests or 402: Payment Required
.PP
These two error codes indicate that the service is blocking your IP
address because of overuse.
Contact the service and ask them to unblock your IP address, or \- if
you have acquired a whitelisted IP address already \- use the
-\f[C]\-\-proxy\f[] or \f[C]\-\-source\-address\f[]
-options (#network-options) to select another IP address.
+\f[C]\-\-proxy\f[] or \f[C]\-\-source\-address\f[] options to select
+another IP address.
.SS SyntaxError: Non\-ASCII character
.PP
The error
matter what directory you\[aq]re in.
.SS How do I put downloads into a specific folder?
.PP
-Use the \f[C]\-o\f[] to specify an output template (#output-template),
-for example \f[C]\-o\ "/home/user/videos/%(title)s\-%(id)s.%(ext)s"\f[].
+Use the \f[C]\-o\f[] to specify an output template, for example
+\f[C]\-o\ "/home/user/videos/%(title)s\-%(id)s.%(ext)s"\f[].
If you want this for all of your downloads, put the option into your
-configuration file (#configuration).
+configuration file.
.SS How do I download a video starting with a \f[C]\-\f[]?
.PP
Either prepend \f[C]https://www.youtube.com/watch?v=\f[] or separate the
Unfortunately, the youtube\-dl project has grown too large to use
personal email as an effective communication channel.
.PP
-Please read the bug reporting instructions (#bugs) below.
+Please read the bug reporting instructions below.
A lot of bugs lack all the necessary information.
If you can, offer proxy, VPN, or shell access to the youtube\-dl
developers.
.SH Why do I need to go through that much red tape when filing bugs?
.PP
Before we had the issue template, despite our extensive bug reporting
-instructions (#bugs), about 80% of the issue reports we got were
-useless, for instance because people used ancient versions hundreds of
-releases old, because of simple syntactic errors (not in youtube\-dl but
-in general shell usage), because the problem was already reported
-multiple times before, because people did not actually read an error
-message, even if it said "please install ffmpeg", because people did not
-mention the URL they were trying to download and many more simple,
-easy\-to\-avoid problems, many of whom were totally unrelated to
-youtube\-dl.
+instructions, about 80% of the issue reports we got were useless, for
+instance because people used ancient versions hundreds of releases old,
+because of simple syntactic errors (not in youtube\-dl but in general
+shell usage), because the problem was already reported multiple times
+before, because people did not actually read an error message, even if
+it said "please install ffmpeg", because people did not mention the URL
+they were trying to download and many more simple, easy\-to\-avoid
+problems, many of whom were totally unrelated to youtube\-dl.
.PP
youtube\-dl is an open\-source project manned by too few volunteers, so
we\[aq]d rather spend time fixing bugs where we are certain none of
\f[]
.fi
.PP
-See item 6 of new extractor tutorial (#adding-support-for-a-new-site)
-for how to run extractor specific test cases.
+See item 6 of new extractor tutorial for how to run extractor specific
+test cases.
.PP
If you want to create a build of youtube\-dl yourself, you\[aq]ll need
.IP \[bu] 2
.IP
.nf
\f[C]
-git\ clone\ git\@github.com:YOUR_GITHUB_USERNAME/youtube\-dl.git
+\ git\ clone\ git\@github.com:YOUR_GITHUB_USERNAME/youtube\-dl.git
\f[]
.fi
.RE
.IP
.nf
\f[C]
-cd\ youtube\-dl
-git\ checkout\ \-b\ yourextractor
+\ cd\ youtube\-dl
+\ git\ checkout\ \-b\ yourextractor
\f[]
.fi
.RE
return (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252).
Add tests and code for as many as you want.
.IP " 8." 4
-Make sure your code follows youtube\-dl coding
-conventions (#youtube-dl-coding-conventions) and check the code with
-flake8 (https://pypi.python.org/pypi/flake8).
+Make sure your code follows youtube\-dl coding conventions and check the
+code with flake8 (https://pypi.python.org/pypi/flake8).
Also make sure your code works under all
Python (https://www.python.org/) versions claimed supported by
youtube\-dl, namely 2.6, 2.7, and 3.2+.
.IP
.nf
\f[C]
-$\ git\ add\ youtube_dl/extractor/extractors.py
-$\ git\ add\ youtube_dl/extractor/yourextractor.py
-$\ git\ commit\ \-m\ \[aq][yourextractor]\ Add\ new\ extractor\[aq]
-$\ git\ push\ origin\ yourextractor
+\ $\ git\ add\ youtube_dl/extractor/extractors.py
+\ $\ git\ add\ youtube_dl/extractor/yourextractor.py
+\ $\ git\ commit\ \-m\ \[aq][yourextractor]\ Add\ new\ extractor\[aq]
+\ $\ git\ push\ origin\ yourextractor
\f[]
.fi
.RE
(webchat (https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
.PP
\f[B]Please include the full output of youtube\-dl when run with
-\f[C]\-v\f[]\f[], i.e.
+\f[BC]\-v\f[B]\f[], i.e.
\f[B]add\f[] \f[C]\-v\f[] flag to \f[B]your command line\f[], copy the
\f[B]whole\f[] output and post it in the issue body wrapped in ``` for
better formatting.
complete --command youtube-dl --long-option hls-use-mpegts --description 'Use the mpegts container for HLS videos, allowing to play the video while downloading (some players may not be able to play it)'
complete --command youtube-dl --long-option external-downloader --description 'Use the specified external downloader. Currently supports aria2c,avconv,axel,curl,ffmpeg,httpie,wget'
complete --command youtube-dl --long-option external-downloader-args --description 'Give these arguments to the external downloader'
-complete --command youtube-dl --long-option batch-file --short-option a --description 'File containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
+complete --command youtube-dl --long-option batch-file --short-option a --description 'File containing URLs to download ('"'"'-'"'"' for stdin), one URL per line. Lines starting with '"'"'#'"'"', '"'"';'"'"' or '"'"']'"'"' are considered as comments and ignored.' --require-parameter
complete --command youtube-dl --long-option id --description 'Use only video ID in file name'
complete --command youtube-dl --long-option output --short-option o --description 'Output filename template, see the "OUTPUT TEMPLATE" for all the info'
complete --command youtube-dl --long-option autonumber-size
def save_console_title(self):
if not self.params.get('consoletitle', False):
return
+ if self.params.get('simulate', False):
+ return
if compat_os_name != 'nt' and 'TERM' in os.environ:
# Save the title on stack
self._write_string('\033[22;0t', self._screen_file)
def restore_console_title(self):
if not self.params.get('consoletitle', False):
return
+ if self.params.get('simulate', False):
+ return
if compat_os_name != 'nt' and 'TERM' in os.environ:
# Restore the title from stack
self._write_string('\033[23;0t', self._screen_file)
def compatible_formats(formats):
video, audio = formats
# Check extension
- video_ext, audio_ext = audio.get('ext'), video.get('ext')
+ video_ext, audio_ext = video.get('ext'), audio.get('ext')
if video_ext and audio_ext:
COMPATIBLE_EXTS = (
('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
if self.params.get('noprogress', False):
self.to_screen('[download] Download completed')
else:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template = '100%%'
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template += ' of %(_total_bytes_str)s'
if s.get('elapsed') is not None:
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
- else:
- msg_template = '100%% of %(_total_bytes_str)s'
+ msg_template += ' in %(_elapsed_str)s'
self._report_progress_status(
msg_template % s, is_last_line=True)
from __future__ import unicode_literals
import os.path
+import re
import subprocess
import sys
-import re
+import time
from .common import FileDownloader
from ..compat import (
tmpfilename = self.temp_name(filename)
try:
+ started = time.time()
retval = self._call_downloader(tmpfilename, info_dict)
except KeyboardInterrupt:
if not info_dict.get('is_live'):
self.to_screen('[%s] Interrupted by user' % self.get_basename())
if retval == 0:
- fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
- self.try_rename(tmpfilename, filename)
- self._hook_progress({
- 'downloaded_bytes': fsize,
- 'total_bytes': fsize,
+ status = {
'filename': filename,
'status': 'finished',
- })
+ 'elapsed': time.time() - started,
+ }
+ if filename != '-':
+ fsize = os.path.getsize(encodeFilename(tmpfilename))
+ self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
+ self.try_rename(tmpfilename, filename)
+ status.update({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ })
+ self._hook_progress(status)
return True
else:
self.to_stderr('\n')
if os.path.isfile(ytdl_filename):
os.remove(ytdl_filename)
elapsed = time.time() - ctx['started']
- self.try_rename(ctx['tmpfilename'], ctx['filename'])
- fsize = os.path.getsize(encodeFilename(ctx['filename']))
+
+ if ctx['tmpfilename'] == '-':
+ downloaded_bytes = ctx['complete_frags_downloaded_bytes']
+ else:
+ self.try_rename(ctx['tmpfilename'], ctx['filename'])
+ downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
self._hook_progress({
- 'downloaded_bytes': fsize,
- 'total_bytes': fsize,
+ 'downloaded_bytes': downloaded_bytes,
+ 'total_bytes': downloaded_bytes,
'filename': ctx['filename'],
'status': 'finished',
'elapsed': elapsed,
int_or_none,
parse_iso8601,
try_get,
+ unescapeHTML,
update_url_query,
)
# ABC iview programs are normally available for 14 days only.
_TESTS = [{
- 'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00',
+ 'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
'info_dict': {
- 'id': 'ZW0898A003S00',
+ 'id': 'ZY9247A021S00',
'ext': 'mp4',
- 'title': 'Series 5 Ep 3',
- 'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79',
- 'upload_date': '20171228',
- 'uploader_id': 'abc1',
- 'timestamp': 1514499187,
+ 'title': "Gaston's Visit",
+ 'series': "Ben And Holly's Little Kingdom",
+ 'description': 'md5:18db170ad71cf161e006a4c688e33155',
+ 'upload_date': '20180318',
+ 'uploader_id': 'abc4kids',
+ 'timestamp': 1521400959,
},
'params': {
'skip_download': True,
return {
'id': video_id,
- 'title': title,
+ 'title': unescapeHTML(title),
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
'duration': int_or_none(video_params.get('eventDuration')),
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
- 'series': video_params.get('seriesTitle'),
+ 'series': unescapeHTML(video_params.get('seriesTitle')),
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
'episode': self._html_search_meta('episode_title', webpage, default=None),
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ float_or_none,
int_or_none,
+ try_get,
unified_timestamp,
OnDemandPagedList,
)
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
'ext': 'mp3',
'title': '"Where Are You?": Taipei 101, Taiwan',
+ 'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
'timestamp': 1196172000,
'upload_date': '20071127',
- 'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
'duration': 211,
+ 'creator': 'Concierge',
+ 'series': 'Condé Nast Traveler Podcast',
+ 'episode': '"Where Are You?": Taipei 101, Taiwan',
}
}, {
# test with multiple blings
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
- 'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0',
+ 'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
'info_dict': {
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
'ext': 'mp3',
'title': '2. Raggarmordet - Röster ur det förflutna',
+ 'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
'timestamp': 1477346700,
'upload_date': '20161024',
- 'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
- 'duration': 2766,
+ 'duration': 2766.602563,
+ 'creator': 'Anton Berg & Martin Johnson',
+ 'series': 'Spår',
+ 'episode': '2. Raggarmordet - Röster ur det förflutna',
}
}]
def _real_extract(self, url):
channel, display_id = re.match(self._VALID_URL, url).groups()
+ s = self._download_json(
+ 'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id),
+ display_id)['result']
+ media_url = s['url']
cast_data = self._download_json(
- 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)
- e = cast_data['result']['episode']
+ 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
+ display_id)['result']
+ e = cast_data['episode']
+ title = e['name']
return {
'id': compat_str(e['id']),
'display_id': display_id,
- 'url': e['mediaUrl'],
- 'title': e['name'],
- 'description': e.get('description'),
+ 'url': media_url,
+ 'title': title,
+ 'description': e.get('description') or e.get('summary'),
'thumbnail': e.get('image'),
'timestamp': unified_timestamp(e.get('publishingDate')),
- 'duration': int_or_none(e.get('duration')),
+ 'duration': float_or_none(s.get('duration') or e.get('duration')),
+ 'filesize': int_or_none(e.get('contentLength')),
+ 'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
+ 'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
+ 'season_number': int_or_none(e.get('seasonNumber')),
+ 'episode': title,
+ 'episode_number': int_or_none(e.get('episodeNumber')),
}
determine_ext,
ExtractorError,
int_or_none,
+ urlencode_postdata,
xpath_text,
)
)
(?P<id>\d+)
'''
+ _NETRC_MACHINE = 'afreecatv'
_TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
'skip_download': True,
},
}, {
- # adult video
- 'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
+ # PARTIAL_ADULT
+ 'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
'info_dict': {
- 'id': '20171001_F1AE1711_196617479_1',
+ 'id': '20180327_27901457_202289533_1',
'ext': 'mp4',
- 'title': '[생]서아 초심 찾기 방송 (part 1)',
+ 'title': '[생]빨개요♥ (part 1)',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
- 'uploader': 'BJ서아',
+ 'uploader': '[SA]서아',
'uploader_id': 'bjdyrksu',
- 'upload_date': '20171001',
- 'duration': 3600,
- 'age_limit': 18,
+ 'upload_date': '20180327',
+ 'duration': 3601,
},
'params': {
'skip_download': True,
},
+ 'expected_warnings': ['adult content'],
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
video_key['part'] = int(m.group('part'))
return video_key
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'szWork': 'login',
+ 'szType': 'json',
+ 'szUid': username,
+ 'szPassword': password,
+ 'isSaveId': 'false',
+ 'szScriptVar': 'oLoginRet',
+ 'szAction': '',
+ }
+
+ response = self._download_json(
+ 'https://login.afreecatv.com/app/LoginAction.php', None,
+ 'Logging in', data=urlencode_postdata(login_form))
+
+ _ERRORS = {
+ -4: 'Your account has been suspended due to a violation of our terms and policies.',
+ -5: 'https://member.afreecatv.com/app/user_delete_progress.php',
+ -6: 'https://login.afreecatv.com/membership/changeMember.php',
+ -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
+ -9: 'https://member.afreecatv.com/app/pop_login_block.php',
+ -11: 'https://login.afreecatv.com/afreeca/second_login.php',
+ -12: 'https://member.afreecatv.com/app/user_security.php',
+ 0: 'The username does not exist or you have entered the wrong password.',
+ -1: 'The username does not exist or you have entered the wrong password.',
+ -3: 'You have entered your username/password incorrectly.',
+ -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
+ -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
+ -32008: 'You have failed to log in. Please contact our Help Center.',
+ }
+
+ result = int_or_none(response.get('RESULT'))
+ if result != 1:
+ error = _ERRORS.get(result, 'You have failed to log in.')
+ raise ExtractorError(
+ 'Unable to login: %s said: %s' % (self.IE_NAME, error),
+ expected=True)
+
def _real_extract(self, url):
video_id = self._match_id(url)
video_id = self._search_regex(
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
- video_xml = self._download_xml(
- 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
- video_id, headers={
- 'Referer': 'http://vod.afreecatv.com/embed.php',
- }, query={
+ partial_view = False
+ for _ in range(2):
+ query = {
'nTitleNo': video_id,
'nStationNo': station_id,
'nBbsNo': bbs_id,
- 'partialView': 'SKIP_ADULT',
- })
+ }
+ if partial_view:
+ query['partialView'] = 'SKIP_ADULT'
+ video_xml = self._download_xml(
+ 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
+ video_id, 'Downloading video info XML%s'
+ % (' (skipping adult)' if partial_view else ''),
+ video_id, headers={
+ 'Referer': url,
+ }, query=query)
- flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
- if flag and flag != 'SUCCEED':
+ flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
+ if flag and flag == 'SUCCEED':
+ break
+ if flag == 'PARTIAL_ADULT':
+ self._downloader.report_warning(
+ 'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
+ 'Only content suitable for all ages will be downloaded. '
+ 'Provide account credentials if you wish to download restricted content.')
+ partial_view = True
+ continue
+ elif flag == 'ADULT':
+ error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
+ else:
+ error = flag
raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, flag), expected=True)
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
+ else:
+ raise ExtractorError('Unable to download video info')
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
if video_element is None or video_element.text is None:
_TESTS = [{
'url': 'http://www.bilibili.tv/video/av1074402/',
- 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
+ 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
'info_dict': {
'id': '1074402',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': '【金坷垃】金泡沫',
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
- 'duration': 308.315,
- 'timestamp': 1398012660,
+ 'duration': 308.067,
+ 'timestamp': 1398012678,
'upload_date': '20140420',
'thumbnail': r're:^https?://.+\.jpg',
'uploader': '菊子桑',
'url': 'http://www.bilibili.com/video/av8903802/',
'info_dict': {
'id': '8903802',
- 'ext': 'mp4',
'title': '阿滴英文|英文歌分享#6 "Closer',
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
- 'uploader': '阿滴英文',
- 'uploader_id': '65880958',
- 'timestamp': 1488382620,
- 'upload_date': '20170301',
- },
- 'params': {
- 'skip_download': True, # Test metadata only
},
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '8903802_part1',
+ 'ext': 'flv',
+ 'title': '阿滴英文|英文歌分享#6 "Closer',
+ 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
+ 'uploader': '阿滴英文',
+ 'uploader_id': '65880958',
+ 'timestamp': 1488382634,
+ 'upload_date': '20170301',
+ },
+ 'params': {
+ 'skip_download': True, # Test metadata only
+ },
+ }, {
+ 'info_dict': {
+ 'id': '8903802_part2',
+ 'ext': 'flv',
+ 'title': '阿滴英文|英文歌分享#6 "Closer',
+ 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
+ 'uploader': '阿滴英文',
+ 'uploader_id': '65880958',
+ 'timestamp': 1488382634,
+ 'upload_date': '20170301',
+ },
+ 'params': {
+ 'skip_download': True, # Test metadata only
+ },
+ }]
}]
_APP_KEY = '84956560bc028eb7'
webpage = self._download_webpage(url, video_id)
if 'anime/' not in url:
- cid = compat_parse_qs(self._search_regex(
+ cid = self._search_regex(
+ r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
+ default=None
+ ) or compat_parse_qs(self._search_regex(
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
+ r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
webpage, 'player parameters'))['cid'][0]
else:
self._report_error(js)
cid = js['result']['cid']
- payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
- sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
-
headers = {
'Referer': url
}
headers.update(self.geo_verification_headers())
- video_info = self._download_json(
- 'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
- video_id, note='Downloading video info page',
- headers=headers)
-
- if 'durl' not in video_info:
- self._report_error(video_info)
-
entries = []
- for idx, durl in enumerate(video_info['durl']):
- formats = [{
- 'url': durl['url'],
- 'filesize': int_or_none(durl['size']),
- }]
- for backup_url in durl.get('backup_url', []):
- formats.append({
- 'url': backup_url,
- # backup URLs have lower priorities
- 'preference': -2 if 'hd.mp4' in backup_url else -3,
+ RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
+ for num, rendition in enumerate(RENDITIONS, start=1):
+ payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
+ sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
+
+ video_info = self._download_json(
+ 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
+ video_id, note='Downloading video info page',
+ headers=headers, fatal=num == len(RENDITIONS))
+
+ if not video_info:
+ continue
+
+ if 'durl' not in video_info:
+ if num < len(RENDITIONS):
+ continue
+ self._report_error(video_info)
+
+ for idx, durl in enumerate(video_info['durl']):
+ formats = [{
+ 'url': durl['url'],
+ 'filesize': int_or_none(durl['size']),
+ }]
+ for backup_url in durl.get('backup_url', []):
+ formats.append({
+ 'url': backup_url,
+ # backup URLs have lower priorities
+ 'preference': -2 if 'hd.mp4' in backup_url else -3,
+ })
+
+ for a_format in formats:
+ a_format.setdefault('http_headers', {}).update({
+ 'Referer': url,
+ })
+
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': '%s_part%s' % (video_id, idx),
+ 'duration': float_or_none(durl.get('length'), 1000),
+ 'formats': formats,
})
+ break
- for a_format in formats:
- a_format.setdefault('http_headers', {}).update({
- 'Referer': url,
- })
-
- self._sort_formats(formats)
-
- entries.append({
- 'id': '%s_part%s' % (video_id, idx),
- 'duration': float_or_none(durl.get('length'), 1000),
- 'formats': formats,
- })
-
- title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
+ title = self._html_search_regex(
+ ('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
+ group='title')
description = self._html_search_meta('description', webpage)
timestamp = unified_timestamp(self._html_search_regex(
- r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
+ r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
+ default=None) or self._html_search_meta(
+ 'uploadDate', webpage, 'timestamp', default=None))
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
# TODO 'view_count' requires deobfuscating Javascript
}
uploader_mobj = re.search(
- r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
+ r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
webpage)
if uploader_mobj:
info.update({
'uploader': uploader_mobj.group('name'),
'uploader_id': uploader_mobj.group('id'),
})
+ if not info.get('uploader'):
+ info['uploader'] = self._html_search_meta(
+ 'author', webpage, 'uploader', default=None)
for entry in entries:
entry.update(info)
import re
from .common import InfoExtractor
+from .youtube import YoutubeIE
from ..compat import compat_str
-from ..utils import (
- int_or_none,
- parse_age_limit,
-)
+from ..utils import int_or_none
class BreakIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?P<site>break|screenjunkies)\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
+ _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
_TESTS = [{
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
'info_dict': {
'ext': 'mp4',
'title': 'When Girls Act Like D-Bags',
'age_limit': 13,
- }
- }, {
- 'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915',
- 'md5': '5c2b686bec3d43de42bde9ec047536b0',
- 'info_dict': {
- 'id': '2841915',
- 'display_id': 'best-quentin-tarantino-movie',
- 'ext': 'mp4',
- 'title': 'Best Quentin Tarantino Movie',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 3671,
- 'age_limit': 13,
- 'tags': list,
- },
- }, {
- 'url': 'http://www.screenjunkies.com/video/honest-trailers-the-dark-knight',
- 'info_dict': {
- 'id': '2348808',
- 'display_id': 'honest-trailers-the-dark-knight',
- 'ext': 'mp4',
- 'title': 'Honest Trailers - The Dark Knight',
- 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
- 'age_limit': 10,
- 'tags': list,
},
}, {
- # requires subscription but worked around
- 'url': 'http://www.screenjunkies.com/video/knocking-dead-ep-1-the-show-so-far-3003285',
+ # youtube embed
+ 'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work',
'info_dict': {
- 'id': '3003285',
- 'display_id': 'knocking-dead-ep-1-the-show-so-far',
+ 'id': 'RrrDLdeL2HQ',
'ext': 'mp4',
- 'title': 'State of The Dead Recap: Knocking Dead Pilot',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 3307,
- 'age_limit': 13,
- 'tags': list,
+ 'title': 'Whale Watching Boat Crashing Into San Diego Dock',
+ 'description': 'md5:afc1b2772f0a8468be51dd80eb021069',
+ 'upload_date': '20160331',
+ 'uploader': 'Steve Holden',
+ 'uploader_id': 'sdholden07',
},
+ 'params': {
+ 'skip_download': True,
+ }
}, {
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
'only_matching': True,
}]
- _DEFAULT_BITRATES = (48, 150, 320, 496, 864, 2240, 3264)
-
def _real_extract(self, url):
- site, display_id, video_id = re.match(self._VALID_URL, url).groups()
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
- if not video_id:
- webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(
- (r'src=["\']/embed/(\d+)', r'data-video-content-id=["\'](\d+)'),
- webpage, 'video id')
+ webpage = self._download_webpage(url, display_id)
- webpage = self._download_webpage(
- 'http://www.%s.com/embed/%s' % (site, video_id),
- display_id, 'Downloading video embed page')
- embed_vars = self._parse_json(
+ youtube_url = YoutubeIE._extract_url(webpage)
+ if youtube_url:
+ return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
+
+ content = self._parse_json(
self._search_regex(
- r'(?s)embedVars\s*=\s*({.+?})\s*</script>', webpage, 'embed vars'),
+ r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage,
+ 'content'),
display_id)
- youtube_id = embed_vars.get('youtubeId')
- if youtube_id:
- return self.url_result(youtube_id, 'Youtube')
-
- title = embed_vars['contentName']
-
formats = []
- bitrates = []
- for f in embed_vars.get('media', []):
- if not f.get('uri') or f.get('mediaPurpose') != 'play':
+ for video in content:
+ video_url = video.get('url')
+ if not video_url or not isinstance(video_url, compat_str):
continue
- bitrate = int_or_none(f.get('bitRate'))
- if bitrate:
- bitrates.append(bitrate)
+ bitrate = int_or_none(self._search_regex(
+ r'(\d+)_kbps', video_url, 'tbr', default=None))
formats.append({
- 'url': f['uri'],
+ 'url': video_url,
'format_id': 'http-%d' % bitrate if bitrate else 'http',
- 'width': int_or_none(f.get('width')),
- 'height': int_or_none(f.get('height')),
'tbr': bitrate,
- 'format': 'mp4',
})
+ self._sort_formats(formats)
- if not bitrates:
- # When subscriptionLevel > 0, i.e. plus subscription is required
- # media list will be empty. However, hds and hls uris are still
- # available. We can grab them assuming bitrates to be default.
- bitrates = self._DEFAULT_BITRATES
-
- auth_token = embed_vars.get('AuthToken')
+ title = self._search_regex(
+ (r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value')
- def construct_manifest_url(base_url, ext):
- pieces = [base_url]
- pieces.extend([compat_str(b) for b in bitrates])
- pieces.append('_kbps.mp4.%s?%s' % (ext, auth_token))
- return ','.join(pieces)
+ def get(key, name):
+ return int_or_none(self._search_regex(
+ r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name,
+ default=None))
- if bitrates and auth_token:
- hds_url = embed_vars.get('hdsUri')
- if hds_url:
- formats.extend(self._extract_f4m_formats(
- construct_manifest_url(hds_url, 'f4m'),
- display_id, f4m_id='hds', fatal=False))
- hls_url = embed_vars.get('hlsUri')
- if hls_url:
- formats.extend(self._extract_m3u8_formats(
- construct_manifest_url(hls_url, 'm3u8'),
- display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
+ age_limit = get('ratings', 'age limit')
+ video_id = video_id or get('pid', 'video id') or display_id
return {
'id': video_id,
'display_id': display_id,
'title': title,
- 'thumbnail': embed_vars.get('thumbUri'),
- 'duration': int_or_none(embed_vars.get('videoLengthInSeconds')) or None,
- 'age_limit': parse_age_limit(embed_vars.get('audienceRating')),
- 'tags': embed_vars.get('tags', '').split(','),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'age_limit': age_limit,
'formats': formats,
}
webpage = self._download_webpage(
'http://www.canalc2.tv/video/%s' % video_id, video_id)
+ title = self._html_search_regex(
+ r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
+ webpage, 'title')
+
formats = []
for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
if video_url.startswith('rtmp://'):
'url': video_url,
'format_id': 'http',
})
- self._sort_formats(formats)
- title = self._html_search_regex(
- r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')
- duration = parse_duration(self._search_regex(
- r'id=["\']video_duree["\'][^>]*>([^<]+)',
- webpage, 'duration', fatal=False))
+ if formats:
+ info = {
+ 'formats': formats,
+ }
+ else:
+ info = self._parse_html5_media_entries(url, webpage, url)[0]
+
+ self._sort_formats(info['formats'])
- return {
+ info.update({
'id': video_id,
'title': title,
- 'duration': duration,
- 'formats': formats,
- }
+ 'duration': parse_duration(self._search_regex(
+ r'id=["\']video_duree["\'][^>]*>([^<]+)',
+ webpage, 'duration', fatal=False)),
+ })
+ return info
import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+ compat_str,
+ compat_HTTPError,
+)
from ..utils import (
js_to_json,
smuggle_url,
def _call_api(self, path, video_id):
url = path if path.startswith('http') else self._API_BASE_URL + path
- result = self._download_xml(url, video_id, headers={
- 'X-Clearleap-DeviceId': self._device_id,
- 'X-Clearleap-DeviceToken': self._device_token,
- })
+ for _ in range(2):
+ try:
+ result = self._download_xml(url, video_id, headers={
+ 'X-Clearleap-DeviceId': self._device_id,
+ 'X-Clearleap-DeviceToken': self._device_token,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ # Device token has expired, re-acquiring device token
+ self._register_device()
+ continue
+ raise
error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
if error_message:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
return result
def _real_initialize(self):
- if not self._device_id or not self._device_token:
- device = self._downloader.cache.load('cbcwatch', 'device') or {}
- self._device_id, self._device_token = device.get('id'), device.get('token')
- if not self._device_id or not self._device_token:
- result = self._download_xml(
- self._API_BASE_URL + 'device/register',
- None, data=b'<device><type>web</type></device>')
- self._device_id = xpath_text(result, 'deviceId', fatal=True)
- self._device_token = xpath_text(result, 'deviceToken', fatal=True)
- self._downloader.cache.store(
- 'cbcwatch', 'device', {
- 'id': self._device_id,
- 'token': self._device_token,
- })
+ if self._valid_device_token():
+ return
+ device = self._downloader.cache.load('cbcwatch', 'device') or {}
+ self._device_id, self._device_token = device.get('id'), device.get('token')
+ if self._valid_device_token():
+ return
+ self._register_device()
+
+ def _valid_device_token(self):
+ return self._device_id and self._device_token
+
+ def _register_device(self):
+ self._device_id = self._device_token = None
+ result = self._download_xml(
+ self._API_BASE_URL + 'device/register',
+ None, 'Acquiring device token',
+ data=b'<device><type>web</type></device>')
+ self._device_id = xpath_text(result, 'deviceId', fatal=True)
+ self._device_token = xpath_text(result, 'deviceToken', fatal=True)
+ self._downloader.cache.store(
+ 'cbcwatch', 'device', {
+ 'id': self._device_id,
+ 'token': self._device_token,
+ })
def _parse_rss_feed(self, rss):
channel = xpath_element(rss, 'channel', fatal=True)
from .theplatform import ThePlatformFeedIE
from ..utils import (
+ ExtractorError,
int_or_none,
find_xpath_attr,
xpath_element,
asset_types = []
subtitles = {}
formats = []
+ last_e = None
for item in items_data.findall('.//item'):
asset_type = xpath_text(item, 'assetType')
- if not asset_type or asset_type in asset_types:
+ if not asset_type or asset_type in asset_types or asset_type in ('HLS_FPS', 'DASH_CENC'):
continue
asset_types.append(asset_type)
query = {
query['formats'] = 'MPEG4,M3U'
elif asset_type in ('RTMP', 'WIFI', '3G'):
query['formats'] = 'MPEG4,FLV'
- tp_formats, tp_subtitles = self._extract_theplatform_smil(
- update_url_query(tp_release_url, query), content_id,
- 'Downloading %s SMIL data' % asset_type)
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ update_url_query(tp_release_url, query), content_id,
+ 'Downloading %s SMIL data' % asset_type)
+ except ExtractorError as e:
+ last_e = e
+ continue
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ if last_e and not formats:
+ raise last_e
self._sort_formats(formats)
info = self._extract_theplatform_metadata(tp_path, content_id)
class CBSSportsIE(CBSBaseIE):
- _VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
_TESTS = [{
- 'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast',
+ 'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
'info_dict': {
- 'id': '708337219968',
+ 'id': '1214315075735',
'ext': 'mp4',
- 'title': 'Ben Simmons the next LeBron? Not so fast',
- 'description': 'md5:854294f627921baba1f4b9a990d87197',
- 'timestamp': 1466293740,
- 'upload_date': '20160618',
+ 'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
+ 'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
+ 'timestamp': 1524111457,
+ 'upload_date': '20180419',
'uploader': 'CBSI-NEW',
},
'params': {
# m3u8 download
'skip_download': True,
}
+ }, {
+ 'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
+ 'only_matching': True,
}]
def _extract_video_info(self, filter_query, video_id):
return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
def _real_extract(self, url):
- video_id = self._match_id(url)
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ [r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
+ webpage, 'video id')
return self._extract_video_info('byId=%s' % video_id, video_id)
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ clean_html,
int_or_none,
parse_duration,
parse_iso8601,
- clean_html,
+ parse_resolution,
)
def _real_extract(self, url):
media_type, media_id = re.match(self._VALID_URL, url).groups()
- media_data = {}
- formats = []
- profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc']
- for i, profile in enumerate(profiles):
- md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
+
+ media = self._download_json(
+ 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'media': media_type,
'idint': media_id,
- 'profile': profile,
- }, fatal=False)
- if md:
- media_data = md
- media_url = media_data.get('media', {}).get('url')
- if media_url:
- formats.append({
- 'format_id': profile,
- 'url': media_url,
- 'quality': i,
- })
+ })
+
+ formats = []
+ media_url = media['media']['url']
+ if isinstance(media_url, list):
+ for format_ in media_url:
+ format_url = format_.get('file')
+ if not format_url or not isinstance(format_url, compat_str):
+ continue
+ label = format_.get('label')
+ f = parse_resolution(label)
+ f.update({
+ 'url': format_url,
+ 'format_id': label,
+ })
+ formats.append(f)
+ else:
+ formats.append({
+ 'url': media_url,
+ 'vcodec': 'none' if media_type == 'audio' else None,
+ })
self._sort_formats(formats)
- informacio = media_data['informacio']
+ informacio = media['informacio']
title = informacio['titol']
durada = informacio.get('durada', {})
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
subtitles = {}
- subtitols = media_data.get('subtitols', {})
+ subtitols = media.get('subtitols', {})
if subtitols:
sub_url = subtitols.get('url')
if sub_url:
})
thumbnails = []
- imatges = media_data.get('imatges', {})
+ imatges = media.get('imatges', {})
if imatges:
thumbnail_url = imatges.get('url')
if thumbnail_url:
float_or_none,
sanitized_Request,
unescapeHTML,
+ update_url_query,
urlencode_postdata,
USER_AGENTS,
)
# m3u8 download
'skip_download': True,
},
+ }, {
+ # iframe embed
+ 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
webpage = self._download_webpage(url, video_id)
- data_url = unescapeHTML(self._search_regex(
- r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'iframe player url', group='url'))
+ data_url = update_url_query(unescapeHTML(self._search_regex(
+ (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
+ webpage, 'iframe player url', group='url')), query={
+ 'autoStart': 'true',
+ })
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
content, _ = res
return content
+ def _download_xml_handle(
+ self, url_or_request, video_id, note='Downloading XML',
+ errnote='Unable to download XML', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={}):
+ """Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note, errnote, fatal=fatal,
+ encoding=encoding, data=data, headers=headers, query=query)
+ if res is False:
+ return res
+ xml_string, urlh = res
+ return self._parse_xml(
+ xml_string, video_id, transform_source=transform_source,
+ fatal=fatal), urlh
+
def _download_xml(self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML',
transform_source=None, fatal=True, encoding=None,
data=None, headers={}, query={}):
"""Return the xml as an xml.etree.ElementTree.Element"""
- xml_string = self._download_webpage(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query)
- if xml_string is False:
- return xml_string
- return self._parse_xml(
- xml_string, video_id, transform_source=transform_source,
- fatal=fatal)
+ res = self._download_xml_handle(
+ url_or_request, video_id, note=note, errnote=errnote,
+ transform_source=transform_source, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query)
+ return res if res is False else res[0]
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
if transform_source:
})
for e in json_ld:
- if e.get('@context') == 'http://schema.org':
+ if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
item_type = e.get('@type')
if expected_type is not None and expected_type != item_type:
return info
})
return subtitles
- def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
+ def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
xspf = self._download_xml(
- playlist_url, playlist_id, 'Downloading xpsf playlist',
+ xspf_url, playlist_id, 'Downloading xpsf playlist',
'Unable to download xspf manifest', fatal=fatal)
if xspf is False:
return []
- return self._parse_xspf(xspf, playlist_id)
+ return self._parse_xspf(
+ xspf, playlist_id, xspf_url=xspf_url,
+ xspf_base_url=base_url(xspf_url))
- def _parse_xspf(self, playlist, playlist_id):
+ def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
NS_MAP = {
'xspf': 'http://xspf.org/ns/0/',
's1': 'http://static.streamone.nl/player/ns/0',
}
entries = []
- for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
+ for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
title = xpath_text(
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
description = xpath_text(
duration = float_or_none(
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
- formats = [{
- 'url': location.text,
- 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
- 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
- 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
- } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
+ formats = []
+ for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
+ format_url = urljoin(xspf_base_url, location.text)
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'manifest_url': xspf_url,
+ 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+ 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+ 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+ })
self._sort_formats(formats)
entries.append({
return entries
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
- res = self._download_webpage_handle(
+ res = self._download_xml_handle(
mpd_url, video_id,
note=note or 'Downloading MPD manifest',
errnote=errnote or 'Failed to download MPD manifest',
fatal=fatal)
if res is False:
return []
- mpd, urlh = res
+ mpd_doc, urlh = res
mpd_base_url = base_url(urlh.geturl())
return self._parse_mpd_formats(
- compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
+ mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
formats_dict=formats_dict, mpd_url=mpd_url)
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
return formats
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
- res = self._download_webpage_handle(
+ res = self._download_xml_handle(
ism_url, video_id,
note=note or 'Downloading ISM manifest',
errnote=errnote or 'Failed to download ISM manifest',
fatal=fatal)
if res is False:
return []
- ism, urlh = res
+ ism_doc, urlh = res
- return self._parse_ism_formats(
- compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
+ return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
"""
return formats
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
- def absolute_url(video_url):
- return compat_urlparse.urljoin(base_url, video_url)
+ def absolute_url(item_url):
+ return urljoin(base_url, item_url)
def parse_content_type(content_type):
if not content_type:
if src:
_, formats = _media_formats(src, media_type)
media_info['formats'].extend(formats)
- media_info['thumbnail'] = media_attributes.get('poster')
+ media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content:
for source_tag in re.findall(r'<source[^>]+>', media_content):
source_attributes = extract_attributes(source_tag)
# coding: utf-8
from __future__ import unicode_literals, division
+import re
+
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..compat import (
+ compat_str,
+ compat_HTTPError,
+)
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ parse_age_limit,
+ parse_duration,
+ ExtractorError
+)
class CrackleIE(InfoExtractor):
- _GEO_COUNTRIES = ['US']
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
_TEST = {
- 'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
+ # geo restricted to CA
+ 'url': 'https://www.crackle.com/andromeda/2502343',
'info_dict': {
- 'id': '2498934',
+ 'id': '2502343',
'ext': 'mp4',
- 'title': 'Everybody Respects A Bloody Nose',
- 'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 906,
- 'series': 'Comedians In Cars Getting Coffee',
- 'season_number': 8,
- 'episode_number': 4,
- 'subtitles': {
- 'en-US': [
- {'ext': 'vtt'},
- {'ext': 'tt'},
- ]
- },
+ 'title': 'Under The Night',
+ 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
+ 'duration': 2583,
+ 'view_count': int,
+ 'average_rating': 0,
+ 'age_limit': 14,
+ 'genre': 'Action, Sci-Fi',
+ 'creator': 'Allan Kroeker',
+ 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
+ 'release_year': 2000,
+ 'series': 'Andromeda',
+ 'episode': 'Under The Night',
+ 'season_number': 1,
+ 'episode_number': 1,
},
'params': {
# m3u8 download
}
}
- _THUMBNAIL_RES = [
- (120, 90),
- (208, 156),
- (220, 124),
- (220, 220),
- (240, 180),
- (250, 141),
- (315, 236),
- (320, 180),
- (360, 203),
- (400, 300),
- (421, 316),
- (460, 330),
- (460, 460),
- (462, 260),
- (480, 270),
- (587, 330),
- (640, 480),
- (700, 330),
- (700, 394),
- (854, 480),
- (1024, 1024),
- (1920, 1080),
- ]
-
- # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
- _MEDIA_FILE_SLOTS = {
- 'c544.flv': {
- 'width': 544,
- 'height': 306,
- },
- '360p.mp4': {
- 'width': 640,
- 'height': 360,
- },
- '480p.mp4': {
- 'width': 852,
- 'height': 478,
- },
- '480p_1mbps.mp4': {
- 'width': 852,
- 'height': 478,
- },
- }
-
def _real_extract(self, url):
video_id = self._match_id(url)
- config_doc = self._download_xml(
- 'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16',
- video_id, 'Downloading config')
-
- item = self._download_xml(
- 'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
- video_id, headers=self.geo_verification_headers()).find('i')
- title = item.attrib['t']
-
- subtitles = {}
- formats = self._extract_m3u8_formats(
- 'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
- video_id, 'mp4', m3u8_id='hls', fatal=None)
- thumbnails = []
- path = item.attrib.get('p')
- if path:
- for width, height in self._THUMBNAIL_RES:
- res = '%dx%d' % (width, height)
- thumbnails.append({
- 'id': res,
- 'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res),
- 'width': width,
- 'height': height,
- 'resolution': res,
- })
- http_base_url = 'http://ahttp.crackle.com/' + path
- for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
- formats.append({
- 'url': http_base_url + mfs_path,
- 'format_id': 'http-' + mfs_path.split('.')[0],
- 'width': mfs_info['width'],
- 'height': mfs_info['height'],
- })
- for cc in item.findall('cc'):
- locale = cc.attrib.get('l')
- v = cc.attrib.get('v')
- if locale and v:
- if locale not in subtitles:
- subtitles[locale] = []
- for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')):
- subtitles.setdefault(locale, []).append({
- 'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext),
- 'ext': ext,
- })
- self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': item.attrib.get('d'),
- 'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None,
- 'series': item.attrib.get('sn'),
- 'season_number': int_or_none(item.attrib.get('se')),
- 'episode_number': int_or_none(item.attrib.get('ep')),
- 'thumbnails': thumbnails,
- 'subtitles': subtitles,
- 'formats': formats,
- }
+ country_code = self._downloader.params.get('geo_bypass_country', None)
+ countries = [country_code] if country_code else (
+ 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
+
+ last_e = None
+
+ for country in countries:
+ try:
+ media = self._download_json(
+ 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
+ % (video_id, country), video_id,
+ 'Downloading media JSON as %s' % country,
+ 'Unable to download media JSON', query={
+ 'disableProtocols': 'true',
+ 'format': 'json'
+ })
+ except ExtractorError as e:
+ # 401 means geo restriction, trying next country
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ last_e = e
+ continue
+ raise
+
+ media_urls = media.get('MediaURLs')
+ if not media_urls or not isinstance(media_urls, list):
+ continue
+
+ title = media['Title']
+
+ formats = []
+ for e in media['MediaURLs']:
+ if e.get('UseDRM') is True:
+ continue
+ format_url = e.get('Path')
+ if not format_url or not isinstance(format_url, compat_str):
+ continue
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+
+ description = media.get('Description')
+ duration = int_or_none(media.get(
+ 'DurationInSeconds')) or parse_duration(media.get('Duration'))
+ view_count = int_or_none(media.get('CountViews'))
+ average_rating = float_or_none(media.get('UserRating'))
+ age_limit = parse_age_limit(media.get('Rating'))
+ genre = media.get('Genre')
+ release_year = int_or_none(media.get('ReleaseYear'))
+ creator = media.get('Directors')
+ artist = media.get('Cast')
+
+ if media.get('MediaTypeDisplayValue') == 'Full Episode':
+ series = media.get('ShowName')
+ episode = title
+ season_number = int_or_none(media.get('Season'))
+ episode_number = int_or_none(media.get('Episode'))
+ else:
+ series = episode = season_number = episode_number = None
+
+ subtitles = {}
+ cc_files = media.get('ClosedCaptionFiles')
+ if isinstance(cc_files, list):
+ for cc_file in cc_files:
+ if not isinstance(cc_file, dict):
+ continue
+ cc_url = cc_file.get('Path')
+ if not cc_url or not isinstance(cc_url, compat_str):
+ continue
+ lang = cc_file.get('Locale') or 'en'
+ subtitles.setdefault(lang, []).append({'url': cc_url})
+
+ thumbnails = []
+ images = media.get('Images')
+ if isinstance(images, list):
+ for image_key, image_url in images.items():
+ mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
+ if not mobj:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'age_limit': age_limit,
+ 'genre': genre,
+ 'creator': creator,
+ 'artist': artist,
+ 'release_year': release_year,
+ 'series': series,
+ 'episode': episode,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
+
+ raise last_e
from __future__ import unicode_literals
import itertools
+import json
-from .amp import AMPIE
+from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
+ compat_str,
compat_urlparse,
)
from ..utils import (
- ExtractorError,
clean_html,
+ ExtractorError,
int_or_none,
- remove_end,
- sanitized_Request,
- urlencode_postdata
+ parse_age_limit,
+ parse_duration,
+ unified_timestamp,
)
-class DramaFeverBaseIE(AMPIE):
- _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
+class DramaFeverBaseIE(InfoExtractor):
_NETRC_MACHINE = 'dramafever'
- _GEO_COUNTRIES = ['US', 'CA']
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
'consumer secret', default=self._CONSUMER_SECRET)
def _real_initialize(self):
- self._login()
self._consumer_secret = self._get_consumer_secret()
+ self._login()
def _login(self):
(username, password) = self._get_login_info()
'password': password,
}
- request = sanitized_Request(
- self._LOGIN_URL, urlencode_postdata(login_form))
- response = self._download_webpage(
- request, None, 'Logging in')
+ try:
+ response = self._download_json(
+ 'https://www.dramafever.com/api/users/login', None, 'Logging in',
+ data=json.dumps(login_form).encode('utf-8'), headers={
+ 'x-consumer-key': self._consumer_secret,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404):
+ response = self._parse_json(
+ e.cause.read().decode('utf-8'), None)
+ else:
+ raise
- if all(logout_pattern not in response
- for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
- error = self._html_search_regex(
- r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
- response, 'error message', default=None)
- if error:
- raise ExtractorError('Unable to login: %s' % error, expected=True)
- raise ExtractorError('Unable to log in')
+ # Successful login
+ if response.get('result') or response.get('guid') or response.get('user_guid'):
+ return
+
+ errors = response.get('errors')
+ if errors and isinstance(errors, list):
+ error = errors[0]
+ message = error.get('message') or error['reason']
+ raise ExtractorError('Unable to login: %s' % message, expected=True)
+ raise ExtractorError('Unable to log in')
class DramaFeverIE(DramaFeverBaseIE):
IE_NAME = 'dramafever'
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
_TESTS = [{
- 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
+ 'url': 'https://www.dramafever.com/drama/4274/1/Heirs/',
'info_dict': {
- 'id': '4512.1',
- 'ext': 'flv',
- 'title': 'Cooking with Shin',
- 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
+ 'id': '4274.1',
+ 'ext': 'wvm',
+ 'title': 'Heirs - Episode 1',
+ 'description': 'md5:362a24ba18209f6276e032a651c50bc2',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 3783,
+ 'timestamp': 1381354993,
+ 'upload_date': '20131009',
+ 'series': 'Heirs',
+ 'season_number': 1,
'episode': 'Episode 1',
'episode_number': 1,
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1404336058,
- 'upload_date': '20140702',
- 'duration': 344,
},
'params': {
# m3u8 download
'only_matching': True,
}]
+ def _call_api(self, path, video_id, note, fatal=False):
+ return self._download_json(
+ 'https://www.dramafever.com/api/5/' + path,
+ video_id, note=note, headers={
+ 'x-consumer-key': self._consumer_secret,
+ }, fatal=fatal)
+
+ def _get_subtitles(self, video_id):
+ subtitles = {}
+ subs = self._call_api(
+ 'video/%s/subtitles/webvtt/' % video_id, video_id,
+ 'Downloading subtitles JSON', fatal=False)
+ if not subs or not isinstance(subs, list):
+ return subtitles
+ for sub in subs:
+ if not isinstance(sub, dict):
+ continue
+ sub_url = sub.get('url')
+ if not sub_url or not isinstance(sub_url, compat_str):
+ continue
+ subtitles.setdefault(
+ sub.get('code') or sub.get('language') or 'en', []).append({
+ 'url': sub_url
+ })
+ return subtitles
+
def _real_extract(self, url):
video_id = self._match_id(url).replace('/', '.')
- try:
- info = self._extract_feed_info(
- 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError):
- self.raise_geo_restricted(
- msg='Currently unavailable in your country',
- countries=self._GEO_COUNTRIES)
- raise
+ series_id, episode_number = video_id.split('.')
- # title is postfixed with video id for some reason, removing
- if info.get('title'):
- info['title'] = remove_end(info['title'], video_id).strip()
+ video = self._call_api(
+ 'series/%s/episodes/%s/' % (series_id, episode_number), video_id,
+ 'Downloading video JSON')
- series_id, episode_number = video_id.split('.')
- episode_info = self._download_json(
- # We only need a single episode info, so restricting page size to one episode
- # and dealing with page number as with episode number
- r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
- % (self._consumer_secret, series_id, episode_number),
- video_id, 'Downloading episode info JSON', fatal=False)
- if episode_info:
- value = episode_info.get('value')
- if isinstance(value, list):
- for v in value:
- if v.get('type') == 'Episode':
- subfile = v.get('subfile') or v.get('new_subfile')
- if subfile and subfile != 'http://www.dramafever.com/st/':
- info.setdefault('subtitles', {}).setdefault('English', []).append({
- 'ext': 'srt',
- 'url': subfile,
- })
- episode_number = int_or_none(v.get('number'))
- episode_fallback = 'Episode'
- if episode_number:
- episode_fallback += ' %d' % episode_number
- info['episode'] = v.get('title') or episode_fallback
- info['episode_number'] = episode_number
- break
-
- return info
+ formats = []
+ download_assets = video.get('download_assets')
+ if download_assets and isinstance(download_assets, dict):
+ for format_id, format_dict in download_assets.items():
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = format_dict.get('url')
+ if not format_url or not isinstance(format_url, compat_str):
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'filesize': int_or_none(video.get('filesize')),
+ })
+
+ stream = self._call_api(
+ 'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON',
+ fatal=False)
+ if stream:
+ stream_url = stream.get('stream_url')
+ if stream_url:
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ title = video.get('title') or 'Episode %s' % episode_number
+ description = video.get('description')
+ thumbnail = video.get('thumbnail')
+ timestamp = unified_timestamp(video.get('release_date'))
+ duration = parse_duration(video.get('duration'))
+ age_limit = parse_age_limit(video.get('tv_rating'))
+ series = video.get('series_title')
+ season_number = int_or_none(video.get('season'))
+
+ if series:
+ title = '%s - %s' % (series, title)
+
+ subtitles = self.extract_subtitles(video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'age_limit': age_limit,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode_number': int_or_none(episode_number),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
class DramaFeverSeriesIE(DramaFeverBaseIE):
self._sort_formats(formats)
title = self._html_search_regex(
- (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
+ (r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
+ r'<title>([^<]+)\s*@\s+DrTuber',
+ r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
r'<p[^>]+class="title_substrate">([^<]+)</p>',
r'<title>([^<]+) - \d+'),
webpage, 'title')
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class ETOnlineIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?etonline\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://www.etonline.com/tv/211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale/',
- 'info_dict': {
- 'id': '211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale',
- 'title': 'md5:a21ec7d3872ed98335cbd2a046f34ee6',
- 'description': 'md5:8b94484063f463cca709617c79618ccd',
- },
- 'playlist_count': 2,
- }, {
- 'url': 'http://www.etonline.com/media/video/here_are_the_stars_who_love_bringing_their_moms_as_dates_to_the_oscars-211359/',
- 'only_matching': True,
- }]
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911076001/default_default/index.html?videoId=ref:%s'
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
-
- entries = [
- self.url_result(
- self.BRIGHTCOVE_URL_TEMPLATE % video_id, 'BrightcoveNew', video_id)
- for video_id in re.findall(
- r'site\.brightcove\s*\([^,]+,\s*["\'](title_\d+)', webpage)]
-
- return self.playlist_result(
- entries, playlist_id,
- self._og_search_title(webpage, fatal=False),
- self._og_search_description(webpage))
FiveThirtyEightIE,
)
from .esri import EsriVideoIE
-from .etonline import ETOnlineIE
from .europa import EuropaIE
from .everyonesmixtape import EveryonesMixtapeIE
from .expotv import ExpoTVIE
)
from .learnr import LearnrIE
from .lecture2go import Lecture2GoIE
-from .lego import LEGOIE
-from .lemonde import LemondeIE
from .leeco import (
LeIE,
LePlaylistIE,
LetvCloudIE,
)
+from .lego import LEGOIE
+from .lemonde import LemondeIE
+from .lenta import LentaIE
from .libraryofcongress import LibraryOfCongressIE
from .libsyn import LibsynIE
from .lifenews import (
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
+from .picarto import (
+ PicartoIE,
+ PicartoVodIE,
+)
from .piksel import PikselIE
from .pinkbike import PinkbikeIE
from .pladform import PladformIE
from .svt import (
SVTIE,
SVTPlayIE,
+ SVTSeriesIE,
)
from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE
from .tvnow import (
TVNowIE,
TVNowListIE,
+ TVNowShowIE,
)
from .tvp import (
TVPEmbedIE,
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
_TESTS = [{
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
- 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
+ 'md5': '92feaafa4b58e82f261e5419f39c60cb',
'info_dict': {
'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
'ext': 'mp4',
'title': 'Music Video 14 british euro brit european cumshots swallow',
- 'uploader': 'unknown',
+ 'uploader': 'anonim',
'view_count': int,
'age_limit': 18,
}
r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
uploader = self._html_search_regex(
- r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
+ r'Uploaded by:\s*</[^>]+>\s*<a[^>]+>(.+?)</a>',
webpage, 'uploader', fatal=False)
view_count = str_to_int(self._search_regex(
- r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
+ r'Views:\s*</[^>]+>\s*<[^>]+>([\d,\.]+)</',
webpage, 'view count', fatal=False))
info.update({
if 'The content you are trying to access is not available in your region.' in webpage:
self.raise_geo_restricted()
video_data = extract_attributes(self._search_regex(
- r'(<a.+?rel="http://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
+ r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
release_url = video_data['rel']
title = video_data['data-title']
from .tnaflix import TNAFlixNetworkEmbedIE
from .drtuber import DrTuberIE
from .redtube import RedTubeIE
+from .tube8 import Tube8IE
from .vimeo import VimeoIE
from .dailymotion import DailymotionIE
from .dailymail import DailyMailIE
from .springboardplatform import SpringboardPlatformIE
from .yapfiles import YapFilesIE
from .vice import ViceIE
+from .xfileshare import XFileShareIE
class GenericIE(InfoExtractor):
'title': '35871',
'timestamp': 1355743100,
'upload_date': '20121217',
- 'uploader_id': 'batchUser',
+ 'uploader_id': 'cplapp@learn360.com',
},
'add_ie': ['Kaltura'],
},
},
'add_ie': ['Kaltura'],
},
- # EaglePlatform embed (generic URL)
{
- 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
- # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
+ # meta twitter:player
+ 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
'info_dict': {
- 'id': '227304',
+ 'id': '0_01b42zps',
'ext': 'mp4',
- 'title': 'Навальный вышел на свободу',
- 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 87,
- 'view_count': int,
- 'age_limit': 0,
+ 'title': 'Main Twerk (Video)',
+ 'upload_date': '20171208',
+ 'uploader_id': 'sebastian.salinas@thechive.com',
+ 'timestamp': 1512713057,
},
'params': {
'skip_download': True,
},
+ 'add_ie': ['Kaltura'],
},
# referrer protected EaglePlatform embed
{
'params': {
'skip_download': True,
},
- }
+ },
+ {
+ 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
+ 'md5': 'b68d276de422ab07ee1d49388103f457',
+ 'info_dict': {
+ 'id': '83645793',
+ 'title': 'Lock up and get excited',
+ 'ext': 'mp4'
+ },
+ 'skip': 'TODO: fix nested playlists processing in tests',
+ },
# {
# # TODO: find another test
# # http://schema.org/VideoObject
self._sort_formats(smil['formats'])
return smil
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
- return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
+ return self.playlist_result(
+ self._parse_xspf(
+ doc, video_id, xspf_url=url,
+ xspf_base_url=compat_str(full_response.geturl())),
+ video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats(
doc,
if redtube_urls:
return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
+ # Look for embedded Tube8 player
+ tube8_urls = Tube8IE._extract_urls(webpage)
+ if tube8_urls:
+ return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
+
# Look for embedded Tvigle player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
return self.playlist_from_matches(
vice_urls, video_id, video_title, ie=ViceIE.ie_key())
+ xfileshare_urls = XFileShareIE._extract_urls(webpage)
+ if xfileshare_urls:
+ return self.playlist_from_matches(
+ xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
+
+ sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
+ r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
+ webpage)]
+ if sharevideos_urls:
+ return self.playlist_from_matches(
+ sharevideos_urls, video_id, video_title)
+
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():
from ..utils import (
determine_ext,
int_or_none,
+ NO_DEFAULT,
parse_iso8601,
smuggle_url,
xpath_text,
class HeiseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html'
_TESTS = [{
+ # kaltura embed
'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html',
- 'md5': 'ffed432483e922e88545ad9f2f15d30e',
'info_dict': {
- 'id': '2404147',
+ 'id': '1_kkrq94sm',
'ext': 'mp4',
'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone",
- 'format_id': 'mp4_720p',
- 'timestamp': 1411812600,
- 'upload_date': '20140927',
+ 'timestamp': 1512734959,
+ 'upload_date': '20171208',
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
- 'thumbnail': r're:^https?://.*/gallery/$',
- }
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
# YouTube embed
'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
},
}, {
'url': 'https://www.heise.de/video/artikel/nachgehakt-Wie-sichert-das-c-t-Tool-Restric-tor-Windows-10-ab-3700244.html',
- 'md5': '4b58058b46625bdbd841fc2804df95fc',
'info_dict': {
'id': '1_ntrmio2s',
+ 'ext': 'mp4',
+ 'title': "nachgehakt: Wie sichert das c't-Tool Restric'tor Windows 10 ab?",
+ 'description': 'md5:47e8ffb6c46d85c92c310a512d6db271',
'timestamp': 1512470717,
'upload_date': '20171205',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.heise.de/ct/artikel/c-t-uplink-20-8-Staubsaugerroboter-Xiaomi-Vacuum-2-AR-Brille-Meta-2-und-Android-rooten-3959893.html',
+ 'info_dict': {
+ 'id': '1_59mk80sf',
'ext': 'mp4',
- 'title': 'ct10 nachgehakt hos restrictor',
+ 'title': "c't uplink 20.8: Staubsaugerroboter Xiaomi Vacuum 2, AR-Brille Meta 2 und Android rooten",
+ 'description': 'md5:f50fe044d3371ec73a8f79fcebd74afc',
+ 'timestamp': 1517567237,
+ 'upload_date': '20180202',
},
'params': {
'skip_download': True,
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._html_search_meta('fulltitle', webpage, default=None)
- if not title or title == "c't":
- title = self._search_regex(
- r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
- webpage, 'title')
+ def extract_title(default=NO_DEFAULT):
+ title = self._html_search_meta(
+ ('fulltitle', 'title'), webpage, default=None)
+ if not title or title == "c't":
+ title = self._search_regex(
+ r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
+ webpage, 'title', default=None)
+ if not title:
+ title = self._html_search_regex(
+ r'<h1[^>]+\bclass=["\']article_page_title[^>]+>(.+?)<',
+ webpage, 'title', default=default)
+ return title
- yt_urls = YoutubeIE._extract_urls(webpage)
- if yt_urls:
- return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
+ title = extract_title(default=None)
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'description', webpage)
kaltura_url = KalturaIE._extract_url(webpage)
if kaltura_url:
- return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(kaltura_url, {'source_url': url}),
+ 'ie_key': KalturaIE.ie_key(),
+ 'title': title,
+ 'description': description,
+ }
+
+ yt_urls = YoutubeIE._extract_urls(webpage)
+ if yt_urls:
+ return self.playlist_from_matches(
+ yt_urls, video_id, title, ie=YoutubeIE.ie_key())
+
+ title = extract_title()
container_id = self._search_regex(
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
})
self._sort_formats(formats)
- description = self._og_search_description(
- webpage, default=None) or self._html_search_meta(
- 'description', webpage)
-
return {
'id': video_id,
'title': title,
from __future__ import unicode_literals
import itertools
+import hashlib
+import json
import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+ compat_str,
+ compat_HTTPError,
+)
from ..utils import (
+ ExtractorError,
get_element_by_attribute,
int_or_none,
lowercase_escape,
+ std_headers,
try_get,
)
}
}
- def _entries(self, uploader_id):
- query = {
- '__a': 1,
- }
+ _gis_tmpl = None
- def get_count(kind):
+ def _entries(self, data):
+ def get_count(suffix):
return int_or_none(try_get(
- node, lambda x: x['%ss' % kind]['count']))
-
- for page_num in itertools.count(1):
- page = self._download_json(
- 'https://instagram.com/%s/' % uploader_id, uploader_id,
- note='Downloading page %d' % page_num,
- fatal=False, query=query)
- if not page:
- break
+ node, lambda x: x['edge_media_' + suffix]['count']))
- nodes = try_get(page, lambda x: x['user']['media']['nodes'], list)
- if not nodes:
- break
+ uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
+ csrf_token = data['config']['csrf_token']
+ rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
- max_id = None
+ self._set_cookie('instagram.com', 'ig_pr', '1')
- for node in nodes:
- node_id = node.get('id')
- if node_id:
- max_id = node_id
+ cursor = ''
+ for page_num in itertools.count(1):
+ variables = json.dumps({
+ 'id': uploader_id,
+ 'first': 12,
+ 'after': cursor,
+ })
+
+ if self._gis_tmpl:
+ gis_tmpls = [self._gis_tmpl]
+ else:
+ gis_tmpls = [
+ '%s' % rhx_gis,
+ '',
+ '%s:%s' % (rhx_gis, csrf_token),
+ '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
+ ]
+
+ for gis_tmpl in gis_tmpls:
+ try:
+ media = self._download_json(
+ 'https://www.instagram.com/graphql/query/', uploader_id,
+ 'Downloading JSON page %d' % page_num, headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'X-Instagram-GIS': hashlib.md5(
+ ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(),
+ }, query={
+ 'query_hash': '42323d64886122307be10013ad2dcc44',
+ 'variables': variables,
+ })['data']['user']['edge_owner_to_timeline_media']
+ self._gis_tmpl = gis_tmpl
+ break
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ if gis_tmpl != gis_tmpls[-1]:
+ continue
+ raise
+
+ edges = media.get('edges')
+ if not edges or not isinstance(edges, list):
+ break
+ for edge in edges:
+ node = edge.get('node')
+ if not node or not isinstance(node, dict):
+ continue
if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
continue
- video_id = node.get('code')
+ video_id = node.get('shortcode')
if not video_id:
continue
ie=InstagramIE.ie_key(), video_id=video_id)
description = try_get(
- node, [lambda x: x['caption'], lambda x: x['text']['id']],
+ node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
compat_str)
thumbnail = node.get('thumbnail_src') or node.get('display_src')
- timestamp = int_or_none(node.get('date'))
+ timestamp = int_or_none(node.get('taken_at_timestamp'))
- comment_count = get_count('comment')
- like_count = get_count('like')
- view_count = int_or_none(node.get('video_views'))
+ comment_count = get_count('to_comment')
+ like_count = get_count('preview_like')
+ view_count = int_or_none(node.get('video_view_count'))
info.update({
'description': description,
yield info
- if not max_id:
+ page_info = media.get('page_info')
+ if not page_info or not isinstance(page_info, dict):
+ break
+
+ has_next_page = page_info.get('has_next_page')
+ if not has_next_page:
break
- query['max_id'] = max_id
+ cursor = page_info.get('end_cursor')
+ if not cursor or not isinstance(cursor, compat_str):
+ break
def _real_extract(self, url):
- uploader_id = self._match_id(url)
+ username = self._match_id(url)
+
+ webpage = self._download_webpage(url, username)
+
+ data = self._parse_json(
+ self._search_regex(
+ r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
+ username)
+
return self.playlist_result(
- self._entries(uploader_id), uploader_id, uploader_id)
+ self._entries(data), username, username)
''', webpage) or
re.search(
r'''(?xs)
- <iframe[^>]+src=(?P<q1>["'])
- (?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
+ <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+ (?:https?:)?//(?:(?:www|cdnapi)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
(?:(?!(?P=q1)).)*
- [?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
+ [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
(?P=q1)
''', webpage)
)
class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
- 'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0',
+ 'url': 'https://www.keezmovies.com/video/arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money-18070681',
+ 'md5': '2ac69cdb882055f71d82db4311732a1a',
'info_dict': {
- 'id': '1214711',
- 'display_id': 'petite-asian-lady-mai-playing-in-bathtub',
+ 'id': '18070681',
+ 'display_id': 'arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money',
'ext': 'mp4',
- 'title': 'Petite Asian Lady Mai Playing In Bathtub',
- 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'Arab wife want it so bad I see she thirsty and has tiny money.',
+ 'thumbnail': None,
'view_count': int,
'age_limit': 18,
}
}, {
- 'url': 'http://www.keezmovies.com/video/1214711',
+ 'url': 'http://www.keezmovies.com/video/18070681',
'only_matching': True,
}]
- def _extract_info(self, url):
+ def _extract_info(self, url, fatal=True):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = (mobj.group('display_id')
encrypted = False
def extract_format(format_url, height=None):
- if not isinstance(format_url, compat_str) or not format_url.startswith('http'):
+ if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')):
return
if format_url in format_urls:
return
raise ExtractorError(
'Video %s is no longer available' % video_id, expected=True)
- self._sort_formats(formats)
+ try:
+ self._sort_formats(formats)
+ except ExtractorError:
+ if fatal:
+ raise
if not title:
title = self._html_search_regex(
}
def _real_extract(self, url):
- webpage, info = self._extract_info(url)
+ webpage, info = self._extract_info(url, fatal=False)
+ if not info['formats']:
+ return self.url_result(url, 'Generic')
info['view_count'] = str_to_int(self._search_regex(
r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
return info
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class LentaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',
+ 'info_dict': {
+ 'id': '964400',
+ 'ext': 'mp4',
+ 'title': 'Надежду Савченко задержали',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 61,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # EaglePlatform iframe embed
+ 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
+ 'info_dict': {
+ 'id': '227304',
+ 'ext': 'mp4',
+ 'title': 'Навальный вышел на свободу',
+ 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 87,
+ 'view_count': int,
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'vid\s*:\s*["\']?(\d+)', webpage, 'eagleplatform id',
+ default=None)
+ if video_id:
+ return self.url_result(
+ 'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id,
+ ie='EaglePlatform', video_id=video_id)
+
+ return self.url_result(url, ie='Generic')
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+ parse_duration,
+ unified_strdate,
+)
class LibsynIE(InfoExtractor):
_VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
_TESTS = [{
- 'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
- 'md5': '443360ee1b58007bc3dcf09b41d093bb',
+ 'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/',
+ 'md5': '2a55e75496c790cdeb058e7e6c087746',
'info_dict': {
- 'id': '3377616',
+ 'id': '6385796',
'ext': 'mp3',
- 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
- 'description': 'md5:601cb790edd05908957dae8aaa866465',
- 'upload_date': '20150220',
+ 'title': "Champion Minded - Developing a Growth Mindset",
+ 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
+ 'upload_date': '20180320',
'thumbnail': 're:^https?://.*',
},
}, {
url = m.group('mainurl')
webpage = self._download_webpage(url, video_id)
- formats = [{
- 'url': media_url,
- } for media_url in set(re.findall(r'var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
-
podcast_title = self._search_regex(
- r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None)
+ r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None)
+ if podcast_title:
+ podcast_title = podcast_title.strip()
episode_title = self._search_regex(
- r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title')
+ r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title')
+ if episode_title:
+ episode_title = episode_title.strip()
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
description = self._html_search_regex(
- r'<div id="info_text_body">(.+?)</div>', webpage,
+ r'<p\s+id="info_text_body">(.+?)</p>', webpage,
'description', default=None)
- thumbnail = self._search_regex(
- r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
- webpage, 'thumbnail', fatal=False)
+ if description:
+ # Strip non-breaking and normal spaces
+ description = description.replace('\u00A0', ' ').strip()
release_date = unified_strdate(self._search_regex(
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
+ data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block')
+ data = json.loads(data_json)
+
+ formats = [{
+ 'url': data['media_url'],
+ 'format_id': 'main',
+ }, {
+ 'url': data['media_url_libsyn'],
+ 'format_id': 'libsyn',
+ }]
+ thumbnail = data.get('thumbnail_url')
+ duration = parse_duration(data.get('duration'))
+
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': release_date,
+ 'duration': duration,
'formats': formats,
}
class LiveLeakIE(InfoExtractor):
- _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
+ _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
_TESTS = [{
'url': 'http://www.liveleak.com/view?i=757_1364311680',
'md5': '0813c2430bea7a46bf13acf3406992f4',
'title': 'Fuel Depot in China Explosion caught on video',
},
'playlist_count': 3,
+ }, {
+ 'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
+ 'only_matching': True,
}]
@staticmethod
vod_id = config.get('vodId') or self._search_regex(
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
+ r'"vodId"\s*:\s*"(.+?)"',
r'<[^>]+id=["\']vod-(\d+)'),
webpage, 'video_id', default=None)
_VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html'
_TESTS = [{
'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html',
- 'md5': '39a15853632b7b2e5679f92f69b78e91',
+ 'md5': '558fcdafbb63a87c019218d6e49daf8a',
'info_dict': {
'id': '318131',
'display_id': 'amateur-teen-playing-and-masturbating-318131',
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
IE_NAME = 'natgeo'
- _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)'
+ _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
_TESTS = [
{
- 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
+ 'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
'md5': '518c9aa655686cf81493af5cc21e2a04',
'info_dict': {
'id': 'vKInpacll2pC',
'add_ie': ['ThePlatform'],
},
{
- 'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
+ 'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
'md5': 'c4912f656b4cbe58f3e000c489360989',
'info_dict': {
'id': 'Pok5lWCkiEFA',
{
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
'only_matching': True,
+ },
+ {
+ 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
+ 'only_matching': True,
}
]
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"',
- webpage)
- if m_id is None:
+ vid = self._search_regex(
+ r'videoId["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'video id', fatal=None, group='value')
+ in_key = self._search_regex(
+ r'inKey["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'key', default=None, group='value')
+
+ if not vid or not in_key:
error = self._html_search_regex(
r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
webpage, 'error', default=None)
raise ExtractorError(error, expected=True)
raise ExtractorError('couldn\'t extract vid and key')
video_data = self._download_json(
- 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1),
+ 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
video_id, query={
- 'key': m_id.group(2),
+ 'key': in_key,
})
meta = video_data['meta']
title = meta['subject']
azure_locator = stream_data['azureLocator']
- AZURE_URL = 'http://nx%s%02d.akamaized.net/'
-
- def get_cdn_shield_base(shield_type='', prefix='-p'):
+ def get_cdn_shield_base(shield_type='', static=False):
for secure in ('', 's'):
cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
if cdn_shield:
return 'http%s://%s' % (secure, cdn_shield)
else:
- return AZURE_URL % (prefix, int(stream_data['azureAccount'].replace('nexxplayplus', '')))
+ if 'fb' in stream_data['azureAccount']:
+ prefix = 'df' if static else 'f'
+ else:
+ prefix = 'd' if static else 'p'
+ account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
+ return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
azure_stream_base = get_cdn_shield_base()
is_ml = ',' in language
formats.extend(self._extract_ism_formats(
azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
- azure_progressive_base = get_cdn_shield_base('Prog', '-d')
+ azure_progressive_base = get_cdn_shield_base('Prog', True)
azure_file_distribution = stream_data.get('azureFileDistribution')
if azure_file_distribution:
fds = azure_file_distribution.split(',')
class NickBrIE(MTVServicesInfoExtractor):
IE_NAME = 'nickelodeon:br'
- _VALID_URL = r'https?://(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?#.]+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
+ (?:www\.)?nickjr\.nl
+ )
+ /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
+ '''
_TESTS = [{
'url': 'http://www.nickjr.com.br/patrulha-canina/videos/210-labirinto-de-pipoca/',
'only_matching': True,
}, {
'url': 'http://mundonick.uol.com.br/programas/the-loud-house/videos/muitas-irmas/7ljo9j',
'only_matching': True,
+ }, {
+ 'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ ExtractorError,
int_or_none,
float_or_none,
- ExtractorError,
+ smuggle_url,
)
class NineNowIE(InfoExtractor):
IE_NAME = '9now.com.au'
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
+ _GEO_COUNTRIES = ['AU']
_TESTS = [{
# clip
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
return {
'_type': 'url_transparent',
- 'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ 'url': smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': self._GEO_COUNTRIES}),
'id': video_id,
'title': title,
'description': common_data.get('description'),
class OdnoklassnikiIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer|live)/(?P<id>[\d-]+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|m|mobile)\.)?
+ (?:odnoklassniki|ok)\.ru/
+ (?:
+ video(?:embed)?/|
+ web-api/video/moviePlayer/|
+ live/|
+ dk\?.*?st\.mvId=
+ )
+ (?P<id>[\d-]+)
+ '''
_TESTS = [{
# metadata in JSON
'url': 'http://ok.ru/video/20079905452',
}, {
'url': 'https://www.ok.ru/live/484531969818',
'only_matching': True,
+ }, {
+ 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
+ 'only_matching': True,
}]
def _real_extract(self, url):
class OpenloadIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o',
}, {
'url': 'https://oload.stream/f/KnG-kKZdcfY',
'only_matching': True,
+ }, {
+ 'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
+ 'only_matching': True,
}]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
decoded_id = (get_element_by_id('streamurl', webpage) or
get_element_by_id('streamuri', webpage) or
- get_element_by_id('streamurj', webpage))
-
- if not decoded_id:
- raise ExtractorError('Can\'t find stream URL', video_id=video_id)
+ get_element_by_id('streamurj', webpage) or
+ self._search_regex(
+ (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
+ r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
+ r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
+ r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
+ r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
+ 'stream URL'))
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+ try_get,
+ update_url_query,
+ urlencode_postdata,
+)
+
+
+class PicartoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
+ _TEST = {
+ 'url': 'https://picarto.tv/Setz',
+ 'info_dict': {
+ 'id': 'Setz',
+ 'ext': 'mp4',
+ 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'timestamp': int,
+ 'is_live': True
+ },
+ 'skip': 'Stream is offline',
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ stream_page = self._download_webpage(url, channel_id)
+
+ if '>This channel does not exist' in stream_page:
+ raise ExtractorError(
+ 'Channel %s does not exist' % channel_id, expected=True)
+
+ player = self._parse_json(
+ self._search_regex(
+ r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
+ 'player settings'),
+ channel_id, transform_source=js_to_json)
+
+ if player.get('online') is False:
+ raise ExtractorError('Stream is offline', expected=True)
+
+ cdn_data = self._download_json(
+ 'https://picarto.tv/process/channel', channel_id,
+ data=urlencode_postdata({'loadbalancinginfo': channel_id}),
+ note='Downloading load balancing info')
+
+ def get_event(key):
+ return try_get(player, lambda x: x['event'][key], compat_str) or ''
+
+ params = {
+ 'token': player.get('token') or '',
+ 'ticket': get_event('ticket'),
+ 'con': int(time.time() * 1000),
+ 'type': get_event('ticket'),
+ 'scope': get_event('scope'),
+ }
+
+ prefered_edge = cdn_data.get('preferedEdge')
+ default_tech = player.get('defaultTech')
+
+ formats = []
+
+ for edge in cdn_data['edges']:
+ edge_ep = edge.get('ep')
+ if not edge_ep or not isinstance(edge_ep, compat_str):
+ continue
+ edge_id = edge.get('id')
+ for tech in cdn_data['techs']:
+ tech_label = tech.get('label')
+ tech_type = tech.get('type')
+ preference = 0
+ if edge_id == prefered_edge:
+ preference += 1
+ if tech_type == default_tech:
+ preference += 1
+ format_id = []
+ if edge_id:
+ format_id.append(edge_id)
+ if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
+ format_id.append('hls')
+ formats.extend(self._extract_m3u8_formats(
+ update_url_query(
+ 'https://%s/hls/%s/index.m3u8'
+ % (edge_ep, channel_id), params),
+ channel_id, 'mp4', preference=preference,
+ m3u8_id='-'.join(format_id), fatal=False))
+ continue
+ elif tech_type == 'video/mp4' or tech_label == 'MP4':
+ format_id.append('mp4')
+ formats.append({
+ 'url': update_url_query(
+ 'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
+ params),
+ 'format_id': '-'.join(format_id),
+ 'preference': preference,
+ })
+ else:
+ # rtmp format does not seem to work
+ continue
+ self._sort_formats(formats)
+
+ mature = player.get('mature')
+ if mature is None:
+ age_limit = None
+ else:
+ age_limit = 18 if mature is True else 0
+
+ return {
+ 'id': channel_id,
+ 'title': self._live_title(channel_id),
+ 'is_live': True,
+ 'thumbnail': player.get('vodThumb'),
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
+
+
+class PicartoVodIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
+ 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
+ 'info_dict': {
+ 'id': 'ArtofZod_2017.12.12.00.13.23.flv',
+ 'ext': 'mp4',
+ 'title': 'ArtofZod_2017.12.12.00.13.23.flv',
+ 'thumbnail': r're:^https?://.*\.jpg'
+ },
+ }, {
+ 'url': 'https://picarto.tv/videopopout/Plague',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ vod_info = self._parse_json(
+ self._search_regex(
+ r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
+ video_id),
+ video_id, transform_source=js_to_json)
+
+ formats = self._extract_m3u8_formats(
+ vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'thumbnail': vod_info.get('vodThumb'),
+ 'formats': formats,
+ }
class PornFlipIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z-]{11})'
+ _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
'md5': '98c46639849145ae1fd77af532a9278c',
}, {
'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
'only_matching': True,
+ }, {
+ 'url': 'https://www.pornflip.com/v/NG9q6Pb_iK8',
+ 'only_matching': True,
}]
def _real_extract(self, url):
_VALID_URL = r'''(?x)
https?://
(?:
- (?:[a-z]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+ (?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[\da-z]+)
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.pornhub.com/playlist/4667351',
'info_dict': {
'title': 'Nataly Hot',
},
'playlist_mincount': 2,
+ }, {
+ 'url': 'https://de.pornhub.com/playlist/4667351',
+ 'only_matching': True,
}]
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
+ _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
_TESTS = [{
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
'info_dict': {
# Most Viewed Videos
'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
'only_matching': True,
+ }, {
+ 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
+ 'only_matching': True,
}]
def _real_extract(self, url):
(?:
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
)\.(?:de|at|ch)|
- ran\.de|fem\.com|advopedia\.de
+ ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
)
/(?P<id>.+)
'''
'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
'only_matching': True,
},
+ {
+ # geo restricted to Germany
+ 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
+ 'only_matching': True,
+ },
{
'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
'only_matching': True,
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)',
- r'clip[iI]d\s*=\s*["\'](\d+)',
+ r'clip[iI][dD]\s*=\s*["\'](\d+)',
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
r'proMamsId"\s*:\s*"(\d+)',
r'proMamsId"\s*:\s*"(\d+)',
from .common import InfoExtractor
from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+)
class RENTVIE(InfoExtractor):
'info_dict': {
'id': '118577',
'ext': 'mp4',
- 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"'
+ 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"',
+ 'timestamp': 1472230800,
+ 'upload_date': '20160826',
}
}, {
'url': 'http://ren.tv/player/118577',
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id)
- jw_config = self._parse_json(self._search_regex(
- r'config\s*=\s*({.+});', webpage, 'jw config'), video_id)
- return self._parse_jwplayer_data(jw_config, video_id, m3u8_id='hls')
+ config = self._parse_json(self._search_regex(
+ r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id)
+ title = config['title']
+ formats = []
+ for video in config['src']:
+ src = video.get('src')
+ if not src or not isinstance(src, compat_str):
+ continue
+ ext = determine_ext(src)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ })
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': config.get('description'),
+ 'thumbnail': config.get('image'),
+ 'duration': int_or_none(config.get('duration')),
+ 'timestamp': int_or_none(config.get('date')),
+ 'formats': formats,
+ }
class RENTVArticleIE(InfoExtractor):
import re
from .brightcove import BrightcoveNewIE
-from ..utils import update_url_query
+from ..compat import compat_str
+from ..utils import (
+ try_get,
+ update_url_query,
+)
class SevenPlusIE(BrightcoveNewIE):
IE_NAME = '7plus'
_VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
_TESTS = [{
- 'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001',
+ 'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
'info_dict': {
- 'id': 'BEAT-001',
+ 'id': 'MTYS7-003',
'ext': 'mp4',
- 'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds',
- 'description': 'md5:37718bea20a8eedaca7f7361af566131',
+ 'title': 'S7 E3 - Wind Surf',
+ 'description': 'md5:29c6a69f21accda7601278f81b46483d',
'uploader_id': '5303576322001',
- 'upload_date': '20171031',
- 'timestamp': 1509440068,
+ 'upload_date': '20171201',
+ 'timestamp': 1512106377,
+ 'series': 'Mighty Ships',
+ 'season_number': 7,
+ 'episode_number': 3,
+ 'episode': 'Wind Surf',
},
'params': {
'format': 'bestvideo',
value = item.get(src_key)
if value:
info[dst_key] = value
+ info['series'] = try_get(
+ item, lambda x: x['seriesLogo']['name'], compat_str)
+ mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
+ if mobj:
+ info.update({
+ 'season_number': int(mobj.group(1)),
+ 'episode_number': int(mobj.group(2)),
+ 'episode': mobj.group(3),
+ })
return info
IE_DESC = 'Smotri.com broadcasts'
IE_NAME = 'smotri:broadcast'
_VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
+ _NETRC_MACHINE = 'smotri'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
adult_content = False
ticket = self._html_search_regex(
- r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
- broadcast_page, 'broadcast ticket')
+ (r'data-user-file=(["\'])(?P<ticket>(?!\1).+)\1',
+ r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'(?P<ticket>[^']+)'\)"),
+ broadcast_page, 'broadcast ticket', group='ticket')
- url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
+ broadcast_url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
broadcast_password = self._downloader.params.get('videopassword')
if broadcast_password:
- url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
+ broadcast_url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
broadcast_json_page = self._download_webpage(
- url, broadcast_id, 'Downloading broadcast JSON')
+ broadcast_url, broadcast_id, 'Downloading broadcast JSON')
try:
broadcast_json = json.loads(broadcast_json_page)
gameID = m.group('gameID')
playlist_id = gameID
videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
+
+ self._set_cookie('steampowered.com', 'mature_content', '1')
+
webpage = self._download_webpage(videourl, playlist_id)
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
import re
from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
from ..utils import (
determine_ext,
dict_get,
int_or_none,
try_get,
+ urljoin,
+ compat_str,
)
_GEO_COUNTRIES = ['SE']
def _extract_video(self, video_info, video_id):
+ is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
+ m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
formats = []
for vr in video_info['videoReferences']:
player_type = vr.get('playerType') or vr.get('format')
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
vurl, video_id,
- ext='mp4', entry_protocol='m3u8_native',
+ ext='mp4', entry_protocol=m3u8_protocol,
m3u8_id=player_type, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
+ 'is_live': is_live,
}
return info_dict
-class SVTPlayIE(SVTBaseIE):
+class SVTPlayBaseIE(SVTBaseIE):
+ _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
+
+
+class SVTPlayIE(SVTPlayBaseIE):
IE_DESC = 'SVT Play and Öppet arkiv'
- _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
}, {
'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
'only_matching': True,
+ }, {
+ 'url': 'https://www.svtplay.se/kanaler/svt1',
+ 'only_matching': True,
}]
def _real_extract(self, url):
data = self._parse_json(
self._search_regex(
- r'root\["__svtplay"\]\s*=\s*([^;]+);',
- webpage, 'embedded data', default='{}'),
+ self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
+ group='json'),
video_id, fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
+ def adjust_title(info):
+ if info['is_live']:
+ info['title'] = self._live_title(info['title'])
+
if data:
video_info = try_get(
data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
'thumbnail': thumbnail,
})
+ adjust_title(info_dict)
return info_dict
video_id = self._search_regex(
info_dict['title'] = re.sub(
r'\s*\|\s*.+?$', '',
info_dict.get('episode') or self._og_search_title(webpage))
+ adjust_title(info_dict)
return info_dict
+
+
+class SVTSeriesIE(SVTPlayBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.svtplay.se/rederiet',
+ 'info_dict': {
+ 'id': 'rederiet',
+ 'title': 'Rederiet',
+ 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
+ },
+ 'playlist_mincount': 318,
+ }, {
+ 'url': 'https://www.svtplay.se/rederiet?tab=sasong2',
+ 'info_dict': {
+ 'id': 'rederiet-sasong2',
+ 'title': 'Rederiet - Säsong 2',
+ 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
+ },
+ 'playlist_count': 12,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ season_slug = qs.get('tab', [None])[0]
+
+ if season_slug:
+ series_id += '-%s' % season_slug
+
+ webpage = self._download_webpage(
+ url, series_id, 'Downloading series page')
+
+ root = self._parse_json(
+ self._search_regex(
+ self._SVTPLAY_RE, webpage, 'content', group='json'),
+ series_id)
+
+ season_name = None
+
+ entries = []
+ for season in root['relatedVideoContent']['relatedVideosAccordion']:
+ if not isinstance(season, dict):
+ continue
+ if season_slug:
+ if season.get('slug') != season_slug:
+ continue
+ season_name = season.get('name')
+ videos = season.get('videos')
+ if not isinstance(videos, list):
+ continue
+ for video in videos:
+ content_url = video.get('contentUrl')
+ if not content_url or not isinstance(content_url, compat_str):
+ continue
+ entries.append(
+ self.url_result(
+ urljoin(url, content_url),
+ ie=SVTPlayIE.ie_key(),
+ video_title=video.get('title')
+ ))
+
+ metadata = root.get('metaData')
+ if not isinstance(metadata, dict):
+ metadata = {}
+
+ title = metadata.get('title')
+ season_name = season_name or season_slug
+
+ if title and season_name:
+ title = '%s - %s' % (title, season_name)
+ elif season_slug:
+ title = season_slug
+
+ return self.playlist_result(
+ entries, series_id, title, metadata.get('description'))
'only_matching': True,
}]
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)',
+ webpage)
+
def _real_extract(self, url):
webpage, info = self._extract_info(url)
int_or_none,
parse_iso8601,
parse_duration,
+ try_get,
update_url_query,
)
duration = parse_duration(info.get('duration'))
f = info.get('format', {})
+
+ thumbnails = [{
+ 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
+ }]
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+ if thumbnail:
+ thumbnails.append({
+ 'url': thumbnail,
+ })
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
- 'thumbnail': thumbnail,
+ 'thumbnails': thumbnails,
'timestamp': timestamp,
'duration': duration,
'series': f.get('title'),
class TVNowIE(TVNowBaseIE):
- _VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
+ (?P<show_id>[^/]+)/
+ (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
+ '''
_TESTS = [{
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
}, {
# rtl2
'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
- 'only_matching': 'True',
+ 'only_matching': True,
}, {
# rtlnitro
'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
- 'only_matching': 'True',
+ 'only_matching': True,
}, {
# superrtl
'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
- 'only_matching': 'True',
+ 'only_matching': True,
}, {
# ntv
'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
- 'only_matching': 'True',
+ 'only_matching': True,
}, {
# vox
'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
- 'only_matching': 'True',
+ 'only_matching': True,
}, {
# rtlplus
'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
- 'only_matching': 'True',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
+ 'only_matching': True,
}]
def _real_extract(self, url):
return self._extract_video(info, display_id)
-class TVNowListIE(TVNowBaseIE):
- _VALID_URL = r'(?P<base_url>https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/)list/(?P<id>[^?/#&]+)$'
+class TVNowListBaseIE(TVNowBaseIE):
+ _SHOW_VALID_URL = r'''(?x)
+ (?P<base_url>
+ https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
+ (?P<show_id>[^/]+)
+ )
+ '''
+
+ def _extract_list_info(self, display_id, show_id):
+ fields = list(self._SHOW_FIELDS)
+ fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
+ fields.extend(
+ 'formatTabs.formatTabPages.container.movies.%s' % field
+ for field in self._VIDEO_FIELDS)
+ return self._call_api(
+ 'formats/seo', display_id, query={
+ 'fields': ','.join(fields),
+ 'name': show_id + '.php'
+ })
+
+
+class TVNowListIE(TVNowListBaseIE):
+ _VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
_SHOW_FIELDS = ('title', )
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
'title': '30 Minuten Deutschland - Aktuell',
},
'playlist_mincount': 1,
+ }, {
+ 'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
+ 'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return (False if TVNowIE.suitable(url)
+ else super(TVNowListIE, cls).suitable(url))
+
def _real_extract(self, url):
base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
- fields = []
- fields.extend(self._SHOW_FIELDS)
- fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
- fields.extend(
- 'formatTabs.formatTabPages.container.movies.%s' % field
- for field in self._VIDEO_FIELDS)
-
- list_info = self._call_api(
- 'formats/seo', season_id, query={
- 'fields': ','.join(fields),
- 'name': show_id + '.php'
- })
+ list_info = self._extract_list_info(season_id, show_id)
season = next(
season for season in list_info['formatTabs']['items']
if season.get('seoheadline') == season_id)
- title = '%s - %s' % (list_info['title'], season['headline'])
+ title = list_info.get('title')
+ headline = season.get('headline')
+ if title and headline:
+ title = '%s - %s' % (title, headline)
+ else:
+ title = headline or title
entries = []
for container in season['formatTabPages']['items']:
- for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
+ items = try_get(
+ container, lambda x: x['container']['movies']['items'],
+ list) or []
+ for info in items:
seo_url = info.get('seoUrl')
if not seo_url:
continue
+ video_id = info.get('id')
entries.append(self.url_result(
- base_url + seo_url + '/player', 'TVNow', info.get('id')))
+ '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
+ compat_str(video_id) if video_id else None))
return self.playlist_result(
entries, compat_str(season.get('id') or season_id), title)
+
+
+class TVNowShowIE(TVNowListBaseIE):
+ _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
+
+ _SHOW_FIELDS = ('id', 'title', )
+ _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
+ _VIDEO_FIELDS = ()
+
+ _TESTS = [{
+ 'url': 'https://www.tvnow.at/vox/ab-ins-beet',
+ 'info_dict': {
+ 'id': 'ab-ins-beet',
+ 'title': 'Ab ins Beet!',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ 'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
+ else super(TVNowShowIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ base_url, show_id = re.match(self._VALID_URL, url).groups()
+
+ list_info = self._extract_list_info(show_id, show_id)
+
+ entries = []
+ for season_info in list_info['formatTabs']['items']:
+ season_url = season_info.get('seoheadline')
+ if not season_url:
+ continue
+ season_id = season_info.get('id')
+ entries.append(self.url_result(
+ '%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
+ compat_str(season_id) if season_id else None,
+ season_info.get('headline')))
+
+ return self.playlist_result(entries, show_id, list_info.get('title'))
class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video'
- _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.24video.net/video/view/1044982',
class TwitchBaseIE(InfoExtractor):
- _VALID_URL_BASE = r'https?://(?:(?:www|go)\.)?twitch\.tv'
+ _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
_API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'https://usher.ttvnw.net'
return self.playlist_result(entries, info['id'], info['title'])
def _extract_info(self, info):
+ status = info.get('status')
+ if status == 'recording':
+ is_live = True
+ elif status == 'recorded':
+ is_live = False
+ else:
+ is_live = None
return {
'id': info['_id'],
'title': info.get('title') or 'Untitled Broadcast',
'uploader_id': info.get('channel', {}).get('name'),
'timestamp': parse_iso8601(info.get('recorded_at')),
'view_count': int_or_none(info.get('views')),
+ 'is_live': is_live,
}
def _real_extract(self, url):
_VALID_URL = r'''(?x)
https?://
(?:
- (?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
+ (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
player\.twitch\.tv/\?.*?\bvideo=v
)
(?P<id>\d+)
}, {
'url': 'https://www.twitch.tv/videos/6528877',
'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
+ 'only_matching': True,
}]
def _real_extract(self, url):
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
_PLAYLIST_TYPE = 'profile'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.twitch.tv/vanillatv/profile',
'info_dict': {
'id': 'vanillatv',
'title': 'VanillaTV',
},
'playlist_mincount': 412,
- }
+ }, {
+ 'url': 'http://m.twitch.tv/vanillatv/profile',
+ 'only_matching': True,
+ }]
class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
_PLAYLIST_TYPE = 'all videos'
- _TEST = {
+ _TESTS = [{
'url': 'https://www.twitch.tv/spamfish/videos/all',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 869,
- }
+ }, {
+ 'url': 'https://m.twitch.tv/spamfish/videos/all',
+ 'only_matching': True,
+ }]
class TwitchUploadsIE(TwitchVideosBaseIE):
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
_PLAYLIST_TYPE = 'uploads'
- _TEST = {
+ _TESTS = [{
'url': 'https://www.twitch.tv/spamfish/videos/uploads',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 0,
- }
+ }, {
+ 'url': 'https://m.twitch.tv/spamfish/videos/uploads',
+ 'only_matching': True,
+ }]
class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
_PLAYLIST_TYPE = 'past broadcasts'
- _TEST = {
+ _TESTS = [{
'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 0,
- }
+ }, {
+ 'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts',
+ 'only_matching': True,
+ }]
class TwitchHighlightsIE(TwitchVideosBaseIE):
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
_PLAYLIST_TYPE = 'highlights'
- _TEST = {
+ _TESTS = [{
'url': 'https://www.twitch.tv/spamfish/videos/highlights',
'info_dict': {
'id': 'spamfish',
'title': 'Spamfish',
},
'playlist_mincount': 805,
- }
+ }, {
+ 'url': 'https://m.twitch.tv/spamfish/videos/highlights',
+ 'only_matching': True,
+ }]
class TwitchStreamIE(TwitchBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
- (?:(?:www|go)\.)?twitch\.tv/|
+ (?:(?:www|go|m)\.)?twitch\.tv/|
player\.twitch\.tv/\?.*?\bchannel=
)
(?P<id>[^/#?]+)
}, {
'url': 'https://go.twitch.tv/food',
'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/food',
+ 'only_matching': True,
}]
@classmethod
class VideaIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
- videa\.hu/
+ videa(?:kid)?\.hu/
(?:
videok/(?:[^/]+/)*[^?#&]+-|
player\?.*?\bv=|
'id': '8YfIAjxwWGwT8HVQ',
'ext': 'mp4',
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
- 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3',
+ 'thumbnail': r're:^https?://.*',
'duration': 21,
},
}, {
}, {
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
+ 'only_matching': True,
}]
@staticmethod
from __future__ import unicode_literals
import re
-import itertools
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
class VineUserIE(InfoExtractor):
IE_NAME = 'vine:user'
- _VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$'
+ _VALID_URL = r'https?://vine\.co/(?P<u>u/)?(?P<user>[^/]+)'
_VINE_BASE_URL = 'https://vine.co/'
- _TESTS = [
- {
- 'url': 'https://vine.co/Visa',
- 'info_dict': {
- 'id': 'Visa',
- },
- 'playlist_mincount': 46,
- },
- {
- 'url': 'https://vine.co/u/941705360593584128',
- 'only_matching': True,
+ _TESTS = [{
+ 'url': 'https://vine.co/itsruthb',
+ 'info_dict': {
+ 'id': 'itsruthb',
+ 'title': 'Ruth B',
+ 'description': '| Instagram/Twitter: itsruthb | still a lost boy from neverland',
},
- ]
+ 'playlist_mincount': 611,
+ }, {
+ 'url': 'https://vine.co/u/942914934646415360',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if VineIE.suitable(url) else super(VineUserIE, cls).suitable(url)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
profile_data = self._download_json(
profile_url, user, note='Downloading user profile data')
- user_id = profile_data['data']['userId']
- timeline_data = []
- for pagenum in itertools.count(1):
- timeline_url = '%sapi/timelines/users/%s?page=%s&size=100' % (
- self._VINE_BASE_URL, user_id, pagenum)
- timeline_page = self._download_json(
- timeline_url, user, note='Downloading page %d' % pagenum)
- timeline_data.extend(timeline_page['data']['records'])
- if timeline_page['data']['nextPage'] is None:
- break
-
+ data = profile_data['data']
+ user_id = data.get('userId') or data['userIdStr']
+ profile = self._download_json(
+ 'https://archive.vine.co/profiles/%s.json' % user_id, user_id)
entries = [
- self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data]
- return self.playlist_result(entries, user)
+ self.url_result(
+ 'https://vine.co/v/%s' % post_id, ie='Vine', video_id=post_id)
+ for post_id in profile['posts']
+ if post_id and isinstance(post_id, compat_str)]
+ return self.playlist_result(
+ entries, user, profile.get('username'), profile.get('description'))
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
- compat_urlparse,
+ compat_urllib_parse,
)
from ..utils import (
float_or_none,
data = json.dumps(data).encode()
headers['Content-Type'] = 'application/json'
method = 'POST' if data else 'GET'
- base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')])
+ base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')])
oauth_signature = base64.b64encode(hmac.new(
(self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
base_string.encode(), hashlib.sha1).digest()).decode()
- encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '')
+ encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
return self._download_json(
'?'.join([base_url, encoded_query]), video_id,
note='Downloading %s JSON metadata' % note, headers=headers, data=data)
'only_matching': True
}]
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
+ % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
+ webpage)]
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
group='title') or self._og_search_title(webpage)
thumbnail = self._search_regex(
- r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False)
+ (r'setThumbUrl\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1',
+ r'url_bigthumb=(?P<thumbnail>.+?)&'),
+ webpage, 'thumbnail', fatal=False, group='thumbnail')
duration = int_or_none(self._og_search_property(
'duration', webpage, default=None)) or parse_duration(
self._search_regex(
# request basic data
basic_data_params = {
'vid': video_id,
- 'ccode': '0507',
+ 'ccode': '0590',
'client_ip': '192.168.1.1',
'utid': cna,
'client_ts': time.time() / 1000,
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
- if self._LOGIN_REQUIRED:
+ if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return True
def _real_initialize(self):
self._login()
- def _real_extract(self, url):
- page = self._download_webpage(
- 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
-
+ def _entries(self, page):
# The extraction process is the same as for playlists, but the regex
# for the video ids doesn't contain an index
ids = []
# 'recommended' feed has infinite 'load more' and each new portion spins
# the same videos in (sometimes) slightly different order, so we'll check
# for unicity and break when portion has no new videos
- new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
+ new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
if not new_ids:
break
ids.extend(new_ids)
+ for entry in self._ids_to_results(new_ids):
+ yield entry
+
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
if not mobj:
break
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
+ def _real_extract(self, url):
+ page = self._download_webpage(
+ 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
+ self._PLAYLIST_TITLE)
return self.playlist_result(
- self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
+ self._entries(page), playlist_title=self._PLAYLIST_TITLE)
class YoutubeWatchLaterIE(YoutubePlaylistIE):
filesystem.add_option(
'-a', '--batch-file',
dest='batchfile', metavar='FILE',
- help='File containing URLs to download (\'-\' for stdin)')
+ help="File containing URLs to download ('-' for stdin), one URL per line. "
+ "Lines starting with '#', ';' or ']' are considered as comments and ignored.")
filesystem.add_option(
'--id', default=False,
action='store_true', dest='useid', help='Use only video ID in file name')
return op(actual_value, comparison_value)
UNARY_OPERATORS = {
- '': lambda v: v is not None,
- '!': lambda v: v is None,
+ '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
+ '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
}
operator_rex = re.compile(r'''(?x)\s*
(?P<op>%s)\s*(?P<key>[a-z_]+)
from __future__ import unicode_literals
-__version__ = '2018.03.14'
+__version__ = '2018.04.25'