all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean:
- rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
+ rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
+ find -name "*.pyc" -delete
PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
ot: offlinetest
offlinetest: codetest
- nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
+ nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py
tar: youtube-dl.tar.gz
on Windows)
--flat-playlist Do not extract the videos of a playlist,
only list them.
+ --no-color Do not emit color codes in output.
## Network Options:
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
COUNT views
--max-views COUNT Do not download any videos with more than
COUNT views
+ --match-filter FILTER (Experimental) Generic video filter.
+ Specify any key (see help for -o for a list
+ of available keys) to match if the key is
+ present, !key to check if the key is not
+ present,key > NUMBER (like "comment_count >
+ 12", also works with >=, <, <=, !=, =) to
+ compare against a number, and & to require
+ multiple matches. Values which are not
+ known are excluded unless you put a
+ question mark (?) after the operator.For
+ example, to only match videos that have
+ been liked more than 100 times and disliked
+ less than 50 times (or the dislike
+ functionality is not available at the given
+ service), but who also have a description,
+ use --match-filter "like_count > 100 &
+ dislike_count <? 50 & description" .
--no-playlist If the URL refers to a video and a
playlist, download only the video.
+ --yes-playlist If the URL refers to a video and a
+ playlist, download the playlist.
--age-limit YEARS download only videos suitable for the given
age
--download-archive FILE Download only videos not listed in the
--playlist-reverse Download playlist videos in reverse order
--xattr-set-filesize (experimental) set file xattribute
ytdl.filesize with expected filesize
+ --hls-prefer-native (experimental) Use the native HLS
+ downloader instead of ffmpeg.
--external-downloader COMMAND (experimental) Use the specified external
downloader. Currently supports
aria2c,curl,wget
video results by putting a condition in
brackets, as in -f "best[height=720]" (or
-f "[filesize>10M]"). This works for
- filesize, height, width, tbr, abr, vbr, and
- fps and the comparisons <, <=, >, >=, =, !=
- . Formats for which the value is not known
- are excluded unless you put a question mark
- (?) after the operator. You can combine
- format filters, so -f "[height <=?
- 720][tbr>500]" selects up to 720p videos
- (or videos where the height is not known)
- with a bitrate of at least 500 KBit/s. By
- default, youtube-dl will pick the best
- quality. Use commas to download multiple
- audio formats, such as -f
+ filesize, height, width, tbr, abr, vbr,
+ asr, and fps and the comparisons <, <=, >,
+ >=, =, != and for ext, acodec, vcodec,
+ container, and protocol and the comparisons
+ =, != . Formats for which the value is not
+ known are excluded unless you put a
+ question mark (?) after the operator. You
+ can combine format filters, so -f "[height
+ <=? 720][tbr>500]" selects up to 720p
+ videos (or videos where the height is not
+ known) with a bitrate of at least 500
+ KBit/s. By default, youtube-dl will pick
+ the best quality. Use commas to download
+ multiple audio formats, such as -f
136/137/mp4/bestvideo,140/m4a/bestaudio.
You can merge the video and audio of two
formats into a single file using -f <video-
--all-subs downloads all the available subtitles of
the video
--list-subs lists all available subtitles for the video
- --sub-format FORMAT subtitle format (default=srt) ([sbv/vtt]
- youtube only)
+ --sub-format FORMAT subtitle format, accepts formats
+ preference, for example: "ass/srt/best"
--sub-lang LANGS languages of the subtitles to download
(optional) separated by commas, use IETF
language tags like 'en,pt'
postprocessors (default)
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
postprocessors
+ --ffmpeg-location PATH Location of the ffmpeg/avconv binary;
+ either the path to the binary or its
+ containing directory.
--exec CMD Execute a command on the file after
downloading, similar to find's -exec
syntax. Example: --exec 'adb push {}
/sdcard/Music/ && rm {}'
+ --convert-subtitles FORMAT Convert the subtitles to other format
+ (currently supported: srt|ass|vtt)
# CONFIGURATION
### ERROR: no fmt_url_map or conn information found in video info
-youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
+YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
### ERROR: unable to download video ###
-youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
+YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
+
+### ExtractorError: Could not find JS function u'OF'
+
+In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
### SyntaxError: Non-ASCII character ###
### How can I detect whether a given URL is supported by youtube-dl?
-For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
+For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
on Windows)
--flat-playlist Do not extract the videos of a playlist,
only list them.
+ --no-color Do not emit color codes in output.
Network Options:
----------------
COUNT views
--max-views COUNT Do not download any videos with more than
COUNT views
+ --match-filter FILTER (Experimental) Generic video filter.
+ Specify any key (see help for -o for a list
+ of available keys) to match if the key is
+ present, !key to check if the key is not
+ present,key > NUMBER (like "comment_count >
+ 12", also works with >=, <, <=, !=, =) to
+ compare against a number, and & to require
+ multiple matches. Values which are not
+ known are excluded unless you put a
+ question mark (?) after the operator.For
+ example, to only match videos that have
+ been liked more than 100 times and disliked
+ less than 50 times (or the dislike
+ functionality is not available at the given
+ service), but who also have a description,
+ use --match-filter "like_count > 100 &
+ dislike_count <? 50 & description" .
--no-playlist If the URL refers to a video and a
playlist, download only the video.
+ --yes-playlist If the URL refers to a video and a
+ playlist, download the playlist.
--age-limit YEARS download only videos suitable for the given
age
--download-archive FILE Download only videos not listed in the
--playlist-reverse Download playlist videos in reverse order
--xattr-set-filesize (experimental) set file xattribute
ytdl.filesize with expected filesize
+ --hls-prefer-native (experimental) Use the native HLS
+ downloader instead of ffmpeg.
--external-downloader COMMAND (experimental) Use the specified external
downloader. Currently supports
aria2c,curl,wget
video results by putting a condition in
brackets, as in -f "best[height=720]" (or
-f "[filesize>10M]"). This works for
- filesize, height, width, tbr, abr, vbr, and
- fps and the comparisons <, <=, >, >=, =, !=
- . Formats for which the value is not known
- are excluded unless you put a question mark
- (?) after the operator. You can combine
- format filters, so -f "[height <=?
- 720][tbr>500]" selects up to 720p videos
- (or videos where the height is not known)
- with a bitrate of at least 500 KBit/s. By
- default, youtube-dl will pick the best
- quality. Use commas to download multiple
- audio formats, such as -f
+ filesize, height, width, tbr, abr, vbr,
+ asr, and fps and the comparisons <, <=, >,
+ >=, =, != and for ext, acodec, vcodec,
+ container, and protocol and the comparisons
+ =, != . Formats for which the value is not
+ known are excluded unless you put a
+ question mark (?) after the operator. You
+ can combine format filters, so -f "[height
+ <=? 720][tbr>500]" selects up to 720p
+ videos (or videos where the height is not
+ known) with a bitrate of at least 500
+ KBit/s. By default, youtube-dl will pick
+ the best quality. Use commas to download
+ multiple audio formats, such as -f
136/137/mp4/bestvideo,140/m4a/bestaudio.
You can merge the video and audio of two
formats into a single file using -f <video-
--all-subs downloads all the available subtitles of
the video
--list-subs lists all available subtitles for the video
- --sub-format FORMAT subtitle format (default=srt) ([sbv/vtt]
- youtube only)
+ --sub-format FORMAT subtitle format, accepts formats
+ preference, for example: "ass/srt/best"
--sub-lang LANGS languages of the subtitles to download
(optional) separated by commas, use IETF
language tags like 'en,pt'
postprocessors (default)
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
postprocessors
+ --ffmpeg-location PATH Location of the ffmpeg/avconv binary;
+ either the path to the binary or its
+ containing directory.
--exec CMD Execute a command on the file after
downloading, similar to find's -exec
syntax. Example: --exec 'adb push {}
/sdcard/Music/ && rm {}'
+ --convert-subtitles FORMAT Convert the subtitles to other format
+ (currently supported: srt|ass|vtt)
CONFIGURATION
=============
ERROR: no fmt_url_map or conn information found in video info
-youtube has switched to a new video info format in July 2011 which is
-not supported by old versions of youtube-dl. You can update youtube-dl
-with sudo youtube-dl --update.
+YouTube has switched to a new video info format in July 2011 which is
+not supported by old versions of youtube-dl. See above for how to update
+youtube-dl.
ERROR: unable to download video
-youtube requires an additional signature since September 2012 which is
-not supported by old versions of youtube-dl. You can update youtube-dl
-with sudo youtube-dl --update.
+YouTube requires an additional signature since September 2012 which is
+not supported by old versions of youtube-dl. See above for how to update
+youtube-dl.
+
+ExtractorError: Could not find JS function u'OF'
+
+In February 2015, the new YouTube player contained a character sequence
+in a string that was misinterpreted by old versions of youtube-dl. See
+above for how to update youtube-dl.
SyntaxError: Non-ASCII character
For one, have a look at the list of supported sites. Note that it can
sometimes happen that the site changes its URL scheme (say, from
-http://example.com/v/1234567 to http://example.com/v/1234567 ) and
+http://example.com/video/1234567 to http://example.com/v/1234567 ) and
youtube-dl reports an URL of a service in that list as unsupported. In
that case, simply report a bug.
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
- if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
- or test['info_dict']['age_limit'] != 18):
+ if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
+ test['info_dict']['age_limit'] != 18):
print('\nPotential missing age_limit check: {0}'.format(test['name']))
- elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
- and test['info_dict']['age_limit'] == 18):
+ elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
+ test['info_dict']['age_limit'] == 18):
print('\nPotential false negative: {0}'.format(test['name']))
else:
# Supported sites
+ - **1tv**: Первый канал
- **1up.com**
- **220.ro**
- **24video**
- **AddAnime**
- **AdobeTV**
- **AdultSwim**
+ - **Aftenposten**
- **Aftonbladet**
+ - **AirMozilla**
- **AlJazeera**
- **Allocine**
- **AlphaPorno**
- **Brightcove**
- **BuzzFeed**
- **BYUtv**
+ - **Camdemy**
+ - **CamdemyFolder**
- **Canal13cl**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **CBS**
- **CBSNews**: CBS News
+ - **CBSSports**
- **CeskaTelevize**
- **channel9**: Channel 9
- **Chilloutzone**
+ - **chirbit**
+ - **chirbit:profile**
- **Cinchcast**
- **Cinemassacre**
- **clipfish**
- **EllenTV**
- **EllenTV:clips**
- **ElPais**: El País
+ - **Embedly**
- **EMPFlix**
- **Engadget**
- **Eporner**
- **fernsehkritik.tv:postecke**
- **Firedrive**
- **Firstpost**
- - **firsttv**: Видеоархив - Первый канал
- **Flickr**
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
- **Foxgay**
- **Helsinki**: helsinki.fi
- **HentaiStigma**
- **HistoricFilms**
+ - **History**
- **hitbox**
- **hitbox:live**
- **HornBunny**
- **ign.com**
- **imdb**: Internet Movie Database trailers
- **imdb:list**: Internet Movie Database lists
+ - **Imgur**
- **Ina**
- **InfoQ**
- **Instagram**
- **Jove**
- **jpopsuki.tv**
- **Jukebox**
+ - **Kaltura**
- **Kankan**
- **Karaoketv**
- **keek**
- **Ku6**
- **la7.tv**
- **Laola1Tv**
+ - **Letv**
+ - **LetvPlaylist**
+ - **LetvTv**
- **lifenews**: LIFE | NEWS
- **LiveLeak**
- **livestream**
- **mailru**: Видео@Mail.Ru
- **Malemotion**
- **MDR**
+ - **media.ccc.de**
- **metacafe**
- **Metacritic**
- **Mgoon**
- **myvideo**
- **MyVidster**
- **n-tv.de**
+ - **NationalGeographic**
- **Naver**
- **NBA**
- **NBC**
- **nowvideo**: NowVideo
- **npo.nl**
- **npo.nl:live**
+ - **npo.nl:radio**
+ - **npo.nl:radio:fragment**
- **NRK**
- **NRKTV**
- **ntv.ru**
- **Nuvid**
- **NYTimes**
- **ocw.mit.edu**
+ - **Odnoklassniki**
- **OktoberfestTV**
- **on.aol.com**
- **Ooyala**
- **podomatic**
- **PornHd**
- **PornHub**
+ - **PornHubPlaylist**
- **Pornotube**
- **PornoXO**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
+ - **Puls4**
- **Pyvideo**
- **QuickVid**
+ - **R7**
- **radio.de**
- **radiobremen**
- **radiofrance**
- **Roxwel**
- **RTBF**
- **Rte**
+ - **rtl.nl**: rtl.nl and rtlxl.nl
- **RTL2**
- **RTLnow**
- - **rtlxl.nl**
- **RTP**
- **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
- **RUTV**: RUTV.RU
+ - **Sandia**: Sandia National Laboratories
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **SBS**: sbs.com.au
- **soundcloud:playlist**
- **soundcloud:set**
- **soundcloud:user**
- - **Soundgasm**
+ - **soundgasm**
+ - **soundgasm:profile**
- **southpark.cc.com**
- **southpark.de**
- **Space**
- **StreamCZ**
- **StreetVoice**
- **SunPorno**
+ - **SVTPlay**: SVT Play and Öppet arkiv
- **SWRMediathek**
- **Syfy**
- **SztvHu**
- **Turbo**
- **Tutv**
- **tv.dfb.de**
+ - **TV4**: tv4.se and tv4play.se
- **tvigle**: Интернет-телевидение Tvigle.ru
- **tvp.pl**
- **tvp.pl:Series**
- **XVideos**
- **XXXYMovies**
- **Yahoo**: Yahoo screen and movies
+ - **Yam**
- **YesJapan**
- **Ynet**
- **YouJizz**
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **Zapiks**
- **ZDF**
- **ZDFChannel**
- **zingmp3:album**: mp3.zing.vn albums
self.assertTrue(
got.startswith(start_str),
'field %s (value: %r) should start with %r' % (info_field, got, start_str))
+ elif isinstance(expected, compat_str) and expected.startswith('contains:'):
+ got = got_dict.get(info_field)
+ contains_str = expected[len('contains:'):]
+ self.assertTrue(
+ isinstance(got, compat_str),
+ 'Expected a %s object, but got %s for field %s' % (
+ compat_str.__name__, type(got).__name__, info_field))
+ self.assertTrue(
+ contains_str in got,
+ 'field %s (value: %r) should contain %r' % (info_field, got, contains_str))
elif isinstance(expected, type):
got = got_dict.get(info_field)
self.assertTrue(isinstance(got, expected),
info_dict_str += ''.join(
' %s: %s,\n' % (_repr(k), _repr(v))
for k, v in test_info_dict.items() if k not in missing_keys)
- info_dict_str += '\n'
+
+ if info_dict_str:
+ info_dict_str += '\n'
info_dict_str += ''.join(
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
for k in missing_keys)
write_string(
- '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
+ '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
self.assertFalse(
missing_keys,
'Missing keys in test definition: %s' % (
"retries": 10,
"simulate": false,
"subtitleslang": null,
- "subtitlesformat": "srt",
+ "subtitlesformat": "best",
"test": true,
"updatetime": true,
"usenetrc": false,
"writesubtitles": false,
"allsubtitles": false,
"listssubtitles": false,
- "socket_timeout": 20
+ "socket_timeout": 20,
+ "fixup": "never"
}
from test.helper import FakeYDL, assertRegexpMatches
from youtube_dl import YoutubeDL
from youtube_dl.extractor import YoutubeIE
+from youtube_dl.postprocessor.common import PostProcessor
class YDL(FakeYDL):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'G')
+ def test_subtitles(self):
+ def s_formats(lang, autocaption=False):
+ return [{
+ 'ext': ext,
+ 'url': 'http://localhost/video.%s.%s' % (lang, ext),
+ '_auto': autocaption,
+ } for ext in ['vtt', 'srt', 'ass']]
+ subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
+ auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
+ info_dict = {
+ 'id': 'test',
+ 'title': 'Test',
+ 'url': 'http://localhost/video.mp4',
+ 'subtitles': subtitles,
+ 'automatic_captions': auto_captions,
+ 'extractor': 'TEST',
+ }
+
+ def get_info(params={}):
+ params.setdefault('simulate', True)
+ ydl = YDL(params)
+ ydl.report_warning = lambda *args, **kargs: None
+ return ydl.process_video_result(info_dict, download=False)
+
+ result = get_info()
+ self.assertFalse(result.get('requested_subtitles'))
+ self.assertEqual(result['subtitles'], subtitles)
+ self.assertEqual(result['automatic_captions'], auto_captions)
+
+ result = get_info({'writesubtitles': True})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['en']))
+ self.assertTrue(subs['en'].get('data') is None)
+ self.assertEqual(subs['en']['ext'], 'ass')
+
+ result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
+ subs = result['requested_subtitles']
+ self.assertEqual(subs['en']['ext'], 'srt')
+
+ result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['es', 'fr']))
+
+ result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['es', 'pt']))
+ self.assertFalse(subs['es']['_auto'])
+ self.assertTrue(subs['pt']['_auto'])
+
+ result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
+ subs = result['requested_subtitles']
+ self.assertTrue(subs)
+ self.assertEqual(set(subs.keys()), set(['es', 'pt']))
+ self.assertTrue(subs['es']['_auto'])
+ self.assertTrue(subs['pt']['_auto'])
+
def test_add_extra_info(self):
test_dict = {
'extractor': 'Foo',
'vbr': 10,
}), '^\s*10k$')
+ def test_postprocessors(self):
+ filename = 'post-processor-testfile.mp4'
+ audiofile = filename + '.mp3'
+
+ class SimplePP(PostProcessor):
+ def run(self, info):
+ with open(audiofile, 'wt') as f:
+ f.write('EXAMPLE')
+ info['filepath']
+ return False, info
+
+ def run_pp(params):
+ with open(filename, 'wt') as f:
+ f.write('EXAMPLE')
+ ydl = YoutubeDL(params)
+ ydl.add_post_processor(SimplePP())
+ ydl.post_process(filename, {'filepath': filename})
+
+ run_pp({'keepvideo': True})
+ self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
+ self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+ os.unlink(filename)
+ os.unlink(audiofile)
+
+ run_pp({'keepvideo': False})
+ self.assertFalse(os.path.exists(filename), '%s exists' % filename)
+ self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+ os.unlink(audiofile)
+
+
if __name__ == '__main__':
unittest.main()
self.assertEqual(jsi.call_function('f'), -11)
def test_comments(self):
+ 'Skipping: Not yet fully implemented'
+ return
jsi = JSInterpreter('''
function x() {
var x = /* 1 + */ 2;
''')
self.assertEqual(jsi.call_function('x'), 52)
+ jsi = JSInterpreter('''
+ function f() {
+ var x = "/*";
+ var y = 1 /* comment */ + 2;
+ return y;
+ }
+ ''')
+ self.assertEqual(jsi.call_function('f'), 3)
+
def test_precedence(self):
jsi = JSInterpreter('''
function x() {
VimeoIE,
WallaIE,
CeskaTelevizeIE,
+ LyndaIE,
+ NPOIE,
+ ComedyCentralIE,
+ NRKTVIE,
+ RaiIE,
+ VikiIE,
+ ThePlatformIE,
+ RTVEALaCartaIE,
)
def setUp(self):
self.DL = FakeYDL()
- self.ie = self.IE(self.DL)
+ self.ie = self.IE()
+ self.DL.add_info_extractor(self.ie)
def getInfoDict(self):
- info_dict = self.ie.extract(self.url)
+ info_dict = self.DL.extract_info(self.url, download=False)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
- return info_dict['subtitles']
+ subtitles = info_dict['requested_subtitles']
+ if not subtitles:
+ return subtitles
+ for sub_info in subtitles.values():
+ if sub_info.get('data') is None:
+ uf = self.DL.urlopen(sub_info['url'])
+ sub_info['data'] = uf.read().decode('utf-8')
+ return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
class TestYoutubeSubtitles(BaseTestSubtitles):
url = 'QRS8MkLhQmM'
IE = YoutubeIE
- def test_youtube_no_writesubtitles(self):
- self.DL.params['writesubtitles'] = False
- subtitles = self.getSubtitles()
- self.assertEqual(subtitles, None)
-
- def test_youtube_subtitles(self):
- self.DL.params['writesubtitles'] = True
- subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
-
- def test_youtube_subtitles_lang(self):
- self.DL.params['writesubtitles'] = True
- self.DL.params['subtitleslangs'] = ['it']
- subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
-
def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
+ self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
+ self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
+ for lang in ['it', 'fr', 'de']:
+ self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
- def test_youtube_list_subtitles(self):
- self.DL.expect_warning('Video doesn\'t have automatic captions')
- self.DL.params['listsubtitles'] = True
- info_dict = self.getInfoDict()
- self.assertEqual(info_dict, None)
-
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(len(subtitles), 0)
-
- def test_youtube_multiple_langs(self):
- self.url = 'QRS8MkLhQmM'
- self.DL.params['writesubtitles'] = True
- langs = ['it', 'fr', 'de']
- self.DL.params['subtitleslangs'] = langs
- subtitles = self.getSubtitles()
- for lang in langs:
- self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+ self.assertFalse(subtitles)
class TestDailymotionSubtitles(BaseTestSubtitles):
url = 'http://www.dailymotion.com/video/xczg00'
IE = DailymotionIE
- def test_no_writesubtitles(self):
- subtitles = self.getSubtitles()
- self.assertEqual(subtitles, None)
-
- def test_subtitles(self):
- self.DL.params['writesubtitles'] = True
- subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
-
- def test_subtitles_lang(self):
- self.DL.params['writesubtitles'] = True
- self.DL.params['subtitleslangs'] = ['fr']
- subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
-
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(len(subtitles.keys()), 5)
-
- def test_list_subtitles(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
- self.DL.params['listsubtitles'] = True
- info_dict = self.getInfoDict()
- self.assertEqual(info_dict, None)
-
- def test_automatic_captions(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
- self.DL.params['writeautomaticsub'] = True
- self.DL.params['subtitleslang'] = ['en']
- subtitles = self.getSubtitles()
- self.assertTrue(len(subtitles.keys()) == 0)
+ self.assertTrue(len(subtitles.keys()) >= 6)
+ self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
+ self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
+ for lang in ['es', 'fr', 'de']:
+ self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(len(subtitles), 0)
-
- def test_multiple_langs(self):
- self.DL.params['writesubtitles'] = True
- langs = ['es', 'fr', 'de']
- self.DL.params['subtitleslangs'] = langs
- subtitles = self.getSubtitles()
- for lang in langs:
- self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+ self.assertFalse(subtitles)
class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TEDIE
- def test_no_writesubtitles(self):
- subtitles = self.getSubtitles()
- self.assertEqual(subtitles, None)
-
- def test_subtitles(self):
- self.DL.params['writesubtitles'] = True
- subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
-
- def test_subtitles_lang(self):
- self.DL.params['writesubtitles'] = True
- self.DL.params['subtitleslangs'] = ['fr']
- subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
-
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) >= 28)
-
- def test_list_subtitles(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
- self.DL.params['listsubtitles'] = True
- info_dict = self.getInfoDict()
- self.assertEqual(info_dict, None)
-
- def test_automatic_captions(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
- self.DL.params['writeautomaticsub'] = True
- self.DL.params['subtitleslang'] = ['en']
- subtitles = self.getSubtitles()
- self.assertTrue(len(subtitles.keys()) == 0)
-
- def test_multiple_langs(self):
- self.DL.params['writesubtitles'] = True
- langs = ['es', 'fr', 'de']
- self.DL.params['subtitleslangs'] = langs
- subtitles = self.getSubtitles()
- for lang in langs:
+ self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
+ self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
+ for lang in ['es', 'fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
url = 'http://blip.tv/a/a-6603250'
IE = BlipTVIE
- def test_list_subtitles(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
- self.DL.params['listsubtitles'] = True
- info_dict = self.getInfoDict()
- self.assertEqual(info_dict, None)
-
def test_allsubtitles(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
url = 'http://vimeo.com/76979871'
IE = VimeoIE
- def test_no_writesubtitles(self):
- subtitles = self.getSubtitles()
- self.assertEqual(subtitles, None)
-
- def test_subtitles(self):
- self.DL.params['writesubtitles'] = True
- subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
-
- def test_subtitles_lang(self):
- self.DL.params['writesubtitles'] = True
- self.DL.params['subtitleslangs'] = ['fr']
- subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
-
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
-
- def test_list_subtitles(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
- self.DL.params['listsubtitles'] = True
- info_dict = self.getInfoDict()
- self.assertEqual(info_dict, None)
-
- def test_automatic_captions(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
- self.DL.params['writeautomaticsub'] = True
- self.DL.params['subtitleslang'] = ['en']
- subtitles = self.getSubtitles()
- self.assertTrue(len(subtitles.keys()) == 0)
+ self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
+ self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(len(subtitles), 0)
-
- def test_multiple_langs(self):
- self.DL.params['writesubtitles'] = True
- langs = ['es', 'fr', 'de']
- self.DL.params['subtitleslangs'] = langs
- subtitles = self.getSubtitles()
- for lang in langs:
- self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+ self.assertFalse(subtitles)
class TestWallaSubtitles(BaseTestSubtitles):
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
IE = WallaIE
- def test_list_subtitles(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
- self.DL.params['listsubtitles'] = True
- info_dict = self.getInfoDict()
- self.assertEqual(info_dict, None)
-
def test_allsubtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(len(subtitles), 0)
+ self.assertFalse(subtitles)
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
IE = CeskaTelevizeIE
- def test_list_subtitles(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
- self.DL.params['listsubtitles'] = True
- info_dict = self.getInfoDict()
- self.assertEqual(info_dict, None)
-
def test_allsubtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['cs']))
- self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
+ self.assertTrue(len(subtitles['cs']) > 20000)
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(len(subtitles), 0)
+ self.assertFalse(subtitles)
+
+
+class TestLyndaSubtitles(BaseTestSubtitles):
+ url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
+ IE = LyndaIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
+
+
+class TestNPOSubtitles(BaseTestSubtitles):
+ url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
+ IE = NPOIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['nl']))
+ self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
+
+
+class TestMTVSubtitles(BaseTestSubtitles):
+ url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
+ IE = ComedyCentralIE
+
+ def getInfoDict(self):
+ return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
+
+
+class TestNRKSubtitles(BaseTestSubtitles):
+ url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
+ IE = NRKTVIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['no']))
+ self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
+
+
+class TestRaiSubtitles(BaseTestSubtitles):
+ url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
+ IE = RaiIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['it']))
+ self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
+
+
+class TestVikiSubtitles(BaseTestSubtitles):
+ url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
+ IE = VikiIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
+
+
+class TestThePlatformSubtitles(BaseTestSubtitles):
+ # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
+ # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
+ url = 'theplatform:JFUjUE1_ehvq'
+ IE = ThePlatformIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
+
+
+class TestRtveSubtitles(BaseTestSubtitles):
+ url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
+ IE = RTVEALaCartaIE
+
+ def test_allsubtitles(self):
+ print('Skipping, only available from Spain')
+ return
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['es']))
+ self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
if __name__ == '__main__':
def test_func(self):
as_file = os.path.join(TEST_DIR, testfile)
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
- if ((not os.path.exists(swf_file))
- or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
+ if ((not os.path.exists(swf_file)) or
+ os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
# Recompile
try:
subprocess.check_call([
version_tuple,
xpath_with_ns,
render_table,
+ match_str,
)
self.assertEqual(
sanitize_filename('New World record at 0:12:34'),
'New World record at 0_12_34')
+ self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
+ self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
forbidden = '"\0\\/'
for fc in forbidden:
self.assertEqual(parse_duration('2.5 hours'), 9000)
self.assertEqual(parse_duration('02:03:04'), 7384)
self.assertEqual(parse_duration('01:02:03:04'), 93784)
+ self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
def test_fix_xml_ampersands(self):
self.assertEqual(
"playlist":[{"controls":{"all":null}}]
}''')
+ inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
+ json_code = js_to_json(inp)
+ self.assertEqual(json.loads(json_code), json.loads(inp))
+
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
'123 4\n'
'9999 51')
+ def test_match_str(self):
+ self.assertRaises(ValueError, match_str, 'xy>foobar', {})
+ self.assertFalse(match_str('xy', {'x': 1200}))
+ self.assertTrue(match_str('!xy', {'x': 1200}))
+ self.assertTrue(match_str('x', {'x': 1200}))
+ self.assertFalse(match_str('!x', {'x': 1200}))
+ self.assertTrue(match_str('x', {'x': 0}))
+ self.assertFalse(match_str('x>0', {'x': 0}))
+ self.assertFalse(match_str('x>0', {}))
+ self.assertTrue(match_str('x>?0', {}))
+ self.assertTrue(match_str('x>1K', {'x': 1200}))
+ self.assertFalse(match_str('x>2K', {'x': 1200}))
+ self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
+ self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
+ self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 90, 'description': 'foo'}))
+ self.assertTrue(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'description': 'foo'}))
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
+ self.assertFalse(match_str(
+ 'like_count > 100 & dislike_count <? 50 & description',
+ {'like_count': 190, 'dislike_count': 10}))
+
if __name__ == '__main__':
unittest.main()
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
import io
import re
import string
+from test.helper import FakeYDL
from youtube_dl.extractor import YoutubeIE
from youtube_dl.compat import compat_str, compat_urlretrieve
'js',
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
+ ),
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
+ 'js',
+ '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
+ '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
)
]
if not os.path.exists(fn):
compat_urlretrieve(url, fn)
- ie = YoutubeIE()
+ ydl = FakeYDL()
+ ie = YoutubeIE(ydl)
if stype == 'js':
with io.open(fn, encoding='utf-8') as testf:
jscode = testf.read()
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ on\ Windows)
\-\-flat\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ extract\ the\ videos\ of\ a\ playlist,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only\ list\ them.
+\-\-no\-color\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ emit\ color\ codes\ in\ output.
\f[]
.fi
.SS Network Options:
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
\-\-max\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ more\ than
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
+\-\-match\-filter\ FILTER\ \ \ \ \ \ \ \ \ \ \ \ (Experimental)\ Generic\ video\ filter.
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Specify\ any\ key\ (see\ help\ for\ \-o\ for\ a\ list
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ of\ available\ keys)\ to\ match\ if\ the\ key\ is
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ present,\ !key\ to\ check\ if\ the\ key\ is\ not
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ present,key\ >\ NUMBER\ (like\ "comment_count\ >
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 12",\ also\ works\ with\ >=,\ <,\ <=,\ !=,\ =)\ to
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ compare\ against\ a\ number,\ and\ &\ to\ require
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ multiple\ matches.\ Values\ which\ are\ not
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known\ are\ excluded\ unless\ you\ put\ a
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ question\ mark\ (?)\ after\ the\ operator.For
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example,\ to\ only\ match\ videos\ that\ have
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ been\ liked\ more\ than\ 100\ times\ and\ disliked
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ less\ than\ 50\ times\ (or\ the\ dislike
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ functionality\ is\ not\ available\ at\ the\ given
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ service),\ but\ who\ also\ have\ a\ description,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ \ \-\-match\-filter\ "like_count\ >\ 100\ &
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ dislike_count\ <?\ 50\ &\ description"\ .
\-\-no\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ If\ the\ URL\ refers\ to\ a\ video\ and\ a
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist,\ download\ only\ the\ video.
+\-\-yes\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ If\ the\ URL\ refers\ to\ a\ video\ and\ a
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist,\ download\ the\ playlist.
\-\-age\-limit\ YEARS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ suitable\ for\ the\ given
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ age
\-\-download\-archive\ FILE\ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ not\ listed\ in\ the
\-\-playlist\-reverse\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ playlist\ videos\ in\ reverse\ order
\-\-xattr\-set\-filesize\ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)\ set\ file\ xattribute
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ytdl.filesize\ with\ expected\ filesize
+\-\-hls\-prefer\-native\ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)\ Use\ the\ native\ HLS
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloader\ instead\ of\ ffmpeg.
\-\-external\-downloader\ COMMAND\ \ \ \ (experimental)\ Use\ the\ specified\ external
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloader.\ Currently\ supports
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ aria2c,curl,wget
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ results\ by\ putting\ a\ condition\ in
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ brackets,\ as\ in\ \-f\ "best[height=720]"\ (or
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-f\ "[filesize>10M]").\ \ This\ works\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filesize,\ height,\ width,\ tbr,\ abr,\ vbr,\ and
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ fps\ and\ the\ comparisons\ <,\ <=,\ >,\ >=,\ =,\ !=
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ .\ Formats\ for\ which\ the\ value\ is\ not\ known
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ are\ excluded\ unless\ you\ put\ a\ question\ mark
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (?)\ after\ the\ operator.\ You\ can\ combine
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ format\ filters,\ so\ \ \-f\ "[height\ <=?
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 720][tbr>500]"\ selects\ up\ to\ 720p\ videos
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (or\ videos\ where\ the\ height\ is\ not\ known)
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ with\ a\ bitrate\ of\ at\ least\ 500\ KBit/s.\ By
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ youtube\-dl\ will\ pick\ the\ best
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ quality.\ Use\ commas\ to\ download\ multiple
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ audio\ formats,\ such\ as\ \-f
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filesize,\ height,\ width,\ tbr,\ abr,\ vbr,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ asr,\ and\ fps\ and\ the\ comparisons\ <,\ <=,\ >,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ >=,\ =,\ !=\ and\ for\ ext,\ acodec,\ vcodec,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ container,\ and\ protocol\ and\ the\ comparisons
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ =,\ !=\ .\ Formats\ for\ which\ the\ value\ is\ not
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known\ are\ excluded\ unless\ you\ put\ a
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ question\ mark\ (?)\ after\ the\ operator.\ You
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ can\ combine\ format\ filters,\ so\ \ \-f\ "[height
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ <=?\ 720][tbr>500]"\ selects\ up\ to\ 720p
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ videos\ (or\ videos\ where\ the\ height\ is\ not
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known)\ with\ a\ bitrate\ of\ at\ least\ 500
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ KBit/s.\ By\ default,\ youtube\-dl\ will\ pick
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ best\ quality.\ Use\ commas\ to\ download
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ multiple\ audio\ formats,\ such\ as\ \-f
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 136/137/mp4/bestvideo,140/m4a/bestaudio.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ You\ can\ merge\ the\ video\ and\ audio\ of\ two
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ formats\ into\ a\ single\ file\ using\ \-f\ <video\-
\-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ all\ the\ available\ subtitles\ of
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ video
\-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ lists\ all\ available\ subtitles\ for\ the\ video
-\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ subtitle\ format\ (default=srt)\ ([sbv/vtt]
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ youtube\ only)
+\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ subtitle\ format,\ accepts\ formats
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ preference,\ for\ example:\ "ass/srt/best"
\-\-sub\-lang\ LANGS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ languages\ of\ the\ subtitles\ to\ download
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (optional)\ separated\ by\ commas,\ use\ IETF
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ language\ tags\ like\ \[aq]en,pt\[aq]
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ postprocessors\ (default)
\-\-prefer\-ffmpeg\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ ffmpeg\ over\ avconv\ for\ running\ the
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ postprocessors
+\-\-ffmpeg\-location\ PATH\ \ \ \ \ \ \ \ \ \ \ Location\ of\ the\ ffmpeg/avconv\ binary;
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ either\ the\ path\ to\ the\ binary\ or\ its
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ containing\ directory.
\-\-exec\ CMD\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Execute\ a\ command\ on\ the\ file\ after
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloading,\ similar\ to\ find\[aq]s\ \-exec
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ syntax.\ Example:\ \-\-exec\ \[aq]adb\ push\ {}
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /sdcard/Music/\ &&\ rm\ {}\[aq]
+\-\-convert\-subtitles\ FORMAT\ \ \ \ \ \ \ Convert\ the\ subtitles\ to\ other\ format
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ supported:\ srt|ass|vtt)
\f[]
.fi
.SH CONFIGURATION
youtube\-dl in turn.
.SS ERROR: no fmt_url_map or conn information found in video info
.PP
-youtube has switched to a new video info format in July 2011 which is
+YouTube has switched to a new video info format in July 2011 which is
not supported by old versions of youtube\-dl.
-You can update youtube\-dl with \f[C]sudo\ youtube\-dl\ \-\-update\f[].
+See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
.SS ERROR: unable to download video
.PP
-youtube requires an additional signature since September 2012 which is
+YouTube requires an additional signature since September 2012 which is
not supported by old versions of youtube\-dl.
-You can update youtube\-dl with \f[C]sudo\ youtube\-dl\ \-\-update\f[].
+See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+.SS ExtractorError: Could not find JS function u\[aq]OF\[aq]
+.PP
+In February 2015, the new YouTube player contained a character sequence
+in a string that was misinterpreted by old versions of youtube\-dl.
+See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
.SS SyntaxError: Non\-ASCII character
.PP
The error
For one, have a look at the list of supported
sites (docs/supportedsites.md).
Note that it can sometimes happen that the site changes its URL scheme
-(say, from http://example.com/v/1234567 to http://example.com/v/1234567
-) and youtube\-dl reports an URL of a service in that list as
-unsupported.
+(say, from http://example.com/video/1234567 to
+http://example.com/v/1234567 ) and youtube\-dl reports an URL of a
+service in that list as unsupported.
In that case, simply report a bug.
.PP
It is \f[I]not\f[] possible to detect whether a URL is supported or not.
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
- opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --exec"
+ opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
diropts="--cache-dir"
complete --command youtube-dl --long-option default-search --description 'Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.'
complete --command youtube-dl --long-option ignore-config --description 'Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)'
complete --command youtube-dl --long-option flat-playlist --description 'Do not extract the videos of a playlist, only list them.'
+complete --command youtube-dl --long-option no-color --description 'Do not emit color codes in output.'
complete --command youtube-dl --long-option proxy --description 'Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection'
complete --command youtube-dl --long-option socket-timeout --description 'Time to wait before giving up, in seconds'
complete --command youtube-dl --long-option source-address --description 'Client-side IP address to bind to (experimental)'
complete --command youtube-dl --long-option dateafter --description 'download only videos uploaded on or after this date (i.e. inclusive)'
complete --command youtube-dl --long-option min-views --description 'Do not download any videos with less than COUNT views'
complete --command youtube-dl --long-option max-views --description 'Do not download any videos with more than COUNT views'
+complete --command youtube-dl --long-option match-filter --description '(Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" .'
complete --command youtube-dl --long-option no-playlist --description 'If the URL refers to a video and a playlist, download only the video.'
+complete --command youtube-dl --long-option yes-playlist --description 'If the URL refers to a video and a playlist, download the playlist.'
complete --command youtube-dl --long-option age-limit --description 'download only videos suitable for the given age'
complete --command youtube-dl --long-option download-archive --description 'Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.' --require-parameter
complete --command youtube-dl --long-option include-ads --description 'Download advertisements as well (experimental)'
complete --command youtube-dl --long-option test
complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
complete --command youtube-dl --long-option xattr-set-filesize --description '(experimental) set file xattribute ytdl.filesize with expected filesize'
+complete --command youtube-dl --long-option hls-prefer-native --description '(experimental) Use the native HLS downloader instead of ffmpeg.'
complete --command youtube-dl --long-option external-downloader --description '(experimental) Use the specified external downloader. Currently supports aria2c,curl,wget'
complete --command youtube-dl --long-option batch-file --short-option a --description 'file containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
complete --command youtube-dl --long-option id --description 'use only video ID in file name'
complete --command youtube-dl --long-option add-header --description 'specify a custom HTTP header and its value, separated by a colon '"'"':'"'"'. You can use this option multiple times'
complete --command youtube-dl --long-option bidi-workaround --description 'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH'
complete --command youtube-dl --long-option sleep-interval --description 'Number of seconds to sleep before each download.'
-complete --command youtube-dl --long-option format --short-option f --description 'video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", "worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]"). This works for filesize, height, width, tbr, abr, vbr, and fps and the comparisons <, <=, >, >=, =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.'
+complete --command youtube-dl --long-option format --short-option f --description 'video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", "worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]"). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.'
complete --command youtube-dl --long-option all-formats --description 'download all available video formats'
complete --command youtube-dl --long-option prefer-free-formats --description 'prefer free video formats unless a specific one is requested'
complete --command youtube-dl --long-option max-quality --description 'highest quality format to download'
complete --command youtube-dl --long-option write-auto-sub --description 'write automatic subtitle file (youtube only)'
complete --command youtube-dl --long-option all-subs --description 'downloads all the available subtitles of the video'
complete --command youtube-dl --long-option list-subs --description 'lists all available subtitles for the video'
-complete --command youtube-dl --long-option sub-format --description 'subtitle format (default=srt) ([sbv/vtt] youtube only)'
+complete --command youtube-dl --long-option sub-format --description 'subtitle format, accepts formats preference, for example: "ass/srt/best"'
complete --command youtube-dl --long-option sub-lang --description 'languages of the subtitles to download (optional) separated by commas, use IETF language tags like '"'"'en,pt'"'"''
complete --command youtube-dl --long-option username --short-option u --description 'login with this account ID'
complete --command youtube-dl --long-option password --short-option p --description 'account password. If this option is left out, youtube-dl will ask interactively.'
complete --command youtube-dl --long-option fixup --description 'Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; fix file if we can, warn otherwise)'
complete --command youtube-dl --long-option prefer-avconv --description 'Prefer avconv over ffmpeg for running the postprocessors (default)'
complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors'
+complete --command youtube-dl --long-option ffmpeg-location --description 'Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.'
complete --command youtube-dl --long-option exec --description 'Execute a command on the file after downloading, similar to find'"'"'s -exec syntax. Example: --exec '"'"'adb push {} /sdcard/Music/ && rm {}'"'"''
+complete --command youtube-dl --long-option convert-subtitles --description 'Convert the subtitles to other format (currently supported: srt|ass|vtt)'
complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
elif [[ ${prev} == "--recode-video" ]]; then
_arguments '*: :(mp4 flv ogg webm mkv)'
else
- _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --exec)'
+ _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles)'
fi
;;
esac
allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
- subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
+ subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
postprocessor.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
- * status: One of "downloading" and "finished".
+ * status: One of "downloading", "error", or "finished".
Check this first and ignore unknown values.
- If status is one of "downloading" or "finished", the
+ If status is one of "downloading", or "finished", the
following properties may also be present:
* filename: The final filename (always present)
+ * tmpfilename: The filename we're currently writing to
* downloaded_bytes: Bytes on disk
* total_bytes: Size of the whole file, None if unknown
- * tmpfilename: The filename we're currently writing to
+ * total_bytes_estimate: Guess of the eventual file size,
+ None if unavailable.
+ * elapsed: The number of seconds since download started.
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if
unknown
+ * fragment_index: The counter of the currently
+ downloaded video fragment.
+ * fragment_count: The number of fragments (= individual
+ files that will be merged)
Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful.
call_home: Boolean, true iff we are allowed to contact the
youtube-dl servers for debugging.
sleep_interval: Number of seconds to sleep before each download.
- external_downloader: Executable of the external downloader to call.
listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit.
-
+ match_filter: A function that gets called with the info_dict of
+ every video.
+ If it returns a message, the video is ignored.
+ If it returns None, the video is downloaded.
+ match_filter_func in utils.py is one example for this.
+ no_color: Do not emit color codes in output.
+
+ The following options determine which downloader is picked:
+ external_downloader: Executable of the external downloader to call.
+ None or unset for standard (built-in) downloader.
+ hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
raise
if (sys.version_info >= (3,) and sys.platform != 'win32' and
- sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
- and not params.get('restrictfilenames', False)):
+ sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
+ not params.get('restrictfilenames', False)):
# On Python 3, the Unicode filesystem API will throw errors (#1474)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
else:
if self.params.get('no_warnings'):
return
- if self._err_file.isatty() and os.name != 'nt':
+ if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m'
else:
_msg_header = 'WARNING:'
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
- if self._err_file.isatty() and os.name != 'nt':
+ if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;31mERROR:\033[0m'
else:
_msg_header = 'ERROR:'
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
- def _match_entry(self, info_dict):
+ def _match_entry(self, info_dict, incomplete):
""" Returns None iff the file should be downloaded """
video_title = info_dict.get('title', info_dict.get('id', 'video'))
if max_views is not None and view_count > max_views:
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
- return 'Skipping "%s" because it is age restricted' % title
+ return 'Skipping "%s" because it is age restricted' % video_title
if self.in_download_archive(info_dict):
return '%s has already been recorded in archive' % video_title
+
+ if not incomplete:
+ match_filter = self.params.get('match_filter')
+ if match_filter is not None:
+ ret = match_filter(info_dict)
+ if ret is not None:
+ return ret
+
return None
@staticmethod
'extractor_key': ie_result['extractor_key'],
}
- reason = self._match_entry(entry)
+ reason = self._match_entry(entry, incomplete=True)
if reason is not None:
self.to_screen('[download] ' + reason)
continue
'!=': operator.ne,
}
operator_rex = re.compile(r'''(?x)\s*\[
- (?P<key>width|height|tbr|abr|vbr|filesize|fps)
+ (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
\]$
''' % '|'.join(map(re.escape, OPERATORS.keys())))
m = operator_rex.search(format_spec)
+ if m:
+ try:
+ comparison_value = int(m.group('value'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('value'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('value') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid value %r in format specification %r' % (
+ m.group('value'), format_spec))
+ op = OPERATORS[m.group('op')]
+
if not m:
- raise ValueError('Invalid format specification %r' % format_spec)
+ STR_OPERATORS = {
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ str_operator_rex = re.compile(r'''(?x)\s*\[
+ \s*(?P<key>ext|acodec|vcodec|container|protocol)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
+ \s*(?P<value>[a-zA-Z0-9_-]+)
+ \s*\]$
+ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
+ m = str_operator_rex.search(format_spec)
+ if m:
+ comparison_value = m.group('value')
+ op = STR_OPERATORS[m.group('op')]
- try:
- comparison_value = int(m.group('value'))
- except ValueError:
- comparison_value = parse_filesize(m.group('value'))
- if comparison_value is None:
- comparison_value = parse_filesize(m.group('value') + 'B')
- if comparison_value is None:
- raise ValueError(
- 'Invalid value %r in format specification %r' % (
- m.group('value'), format_spec))
- op = OPERATORS[m.group('op')]
+ if not m:
+ raise ValueError('Invalid format specification %r' % format_spec)
def _filter(f):
actual_value = f.get(m.group('key'))
return res
def _calc_cookies(self, info_dict):
- class _PseudoRequest(object):
- def __init__(self, url):
- self.url = url
- self.headers = {}
- self.unverifiable = False
-
- def add_unredirected_header(self, k, v):
- self.headers[k] = v
-
- def get_full_url(self):
- return self.url
-
- def is_unverifiable(self):
- return self.unverifiable
-
- def has_header(self, h):
- return h in self.headers
-
- pr = _PseudoRequest(info_dict['url'])
+ pr = compat_urllib_request.Request(info_dict['url'])
self.cookiejar.add_cookie_header(pr)
- return pr.headers.get('Cookie')
+ return pr.get_header('Cookie')
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
info_dict['timestamp'])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+ if self.params.get('listsubtitles', False):
+ if 'automatic_captions' in info_dict:
+ self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+ self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+ return
+ info_dict['requested_subtitles'] = self.process_subtitles(
+ info_dict['id'], info_dict.get('subtitles'),
+ info_dict.get('automatic_captions'))
+
# This extractors handle format selection themselves
if info_dict['extractor'] in ['Youku']:
if download:
info_dict.update(formats_to_download[-1])
return info_dict
+ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+ """Select the requested subtitles and their format"""
+ available_subs = {}
+ if normal_subtitles and self.params.get('writesubtitles'):
+ available_subs.update(normal_subtitles)
+ if automatic_captions and self.params.get('writeautomaticsub'):
+ for lang, cap_info in automatic_captions.items():
+ if lang not in available_subs:
+ available_subs[lang] = cap_info
+
+ if (not self.params.get('writesubtitles') and not
+ self.params.get('writeautomaticsub') or not
+ available_subs):
+ return None
+
+ if self.params.get('allsubtitles', False):
+ requested_langs = available_subs.keys()
+ else:
+ if self.params.get('subtitleslangs', False):
+ requested_langs = self.params.get('subtitleslangs')
+ elif 'en' in available_subs:
+ requested_langs = ['en']
+ else:
+ requested_langs = [list(available_subs.keys())[0]]
+
+ formats_query = self.params.get('subtitlesformat', 'best')
+ formats_preference = formats_query.split('/') if formats_query else []
+ subs = {}
+ for lang in requested_langs:
+ formats = available_subs.get(lang)
+ if formats is None:
+ self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ continue
+ for ext in formats_preference:
+ if ext == 'best':
+ f = formats[-1]
+ break
+ matches = list(filter(lambda f: f['ext'] == ext, formats))
+ if matches:
+ f = matches[-1]
+ break
+ else:
+ f = formats[-1]
+ self.report_warning(
+ 'No subtitle format found matching "%s" for language %s, '
+ 'using %s' % (formats_query, lang, f['ext']))
+ subs[lang] = f
+ return subs
+
def process_info(self, info_dict):
"""Process a single resolved IE result."""
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
- reason = self._match_entry(info_dict)
+ reason = self._match_entry(info_dict, incomplete=False)
if reason is not None:
self.to_screen('[download] ' + reason)
return
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
- if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
+ if subtitles_are_requested and info_dict.get('requested_subtitles'):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
- subtitles = info_dict['subtitles']
- sub_format = self.params.get('subtitlesformat', 'srt')
- for sub_lang in subtitles.keys():
- sub = subtitles[sub_lang]
- if sub is None:
- continue
+ subtitles = info_dict['requested_subtitles']
+ ie = self.get_info_extractor(info_dict['extractor_key'])
+ for sub_lang, sub_info in subtitles.items():
+ sub_format = sub_info['ext']
+ if sub_info.get('data') is not None:
+ sub_data = sub_info['data']
+ else:
+ try:
+ sub_data = ie._download_webpage(
+ sub_info['url'], info_dict['id'], note=False)
+ except ExtractorError as err:
+ self.report_warning('Unable to download subtitle for "%s": %s' %
+ (sub_lang, compat_str(err.cause)))
+ continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
- subfile.write(sub)
+ subfile.write(sub_data)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
downloaded = []
success = True
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
- if not merger._executable:
+ if not merger.available:
postprocessors = []
self.report_warning('You have requested multiple '
'formats but ffmpeg or avconv are not installed.'
"""Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
if (len(url_list) > 1 and
- '%' not in outtmpl
- and self.params.get('max_downloads') != 1):
+ '%' not in outtmpl and
+ self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl)
for url in url_list:
return res
def list_formats(self, info_dict):
- def line(format, idlen=20):
- return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
- format['format_id'],
- format['ext'],
- self.format_resolution(format),
- self._format_note(format),
- ))
-
formats = info_dict.get('formats', [info_dict])
- idlen = max(len('format code'),
- max(len(f['format_id']) for f in formats))
- formats_s = [
- line(f, idlen) for f in formats
+ table = [
+ [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
+ for f in formats
if f.get('preference') is None or f['preference'] >= -1000]
if len(formats) > 1:
- formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
- formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
+ table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
- header_line = line({
- 'format_id': 'format code', 'ext': 'extension',
- 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
+ header_line = ['format code', 'extension', 'resolution', 'note']
self.to_screen(
- '[info] Available formats for %s:\n%s\n%s' %
- (info_dict['id'], header_line, '\n'.join(formats_s)))
+ '[info] Available formats for %s:\n%s' %
+ (info_dict['id'], render_table(header_line, table)))
def list_thumbnails(self, info_dict):
thumbnails = info_dict.get('thumbnails')
['ID', 'width', 'height', 'URL'],
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
+ def list_subtitles(self, video_id, subtitles, name='subtitles'):
+ if not subtitles:
+ self.to_screen('%s has no %s' % (video_id, name))
+ return
+ self.to_screen(
+ 'Available %s for %s:' % (name, video_id))
+ self.to_screen(render_table(
+ ['Language', 'formats'],
+ [[lang, ', '.join(f['ext'] for f in reversed(formats))]
+ for lang, formats in subtitles.items()]))
+
def urlopen(self, req):
""" Start an HTTP download """
self._write_string('[debug] Python version %s - %s\n' % (
platform.python_version(), platform_name()))
- exe_versions = FFmpegPostProcessor.get_versions()
+ exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version()
exe_str = ', '.join(
'%s %s' % (exe, v)
)
from .utils import (
DateRange,
- DEFAULT_OUTTMPL,
decodeOption,
+ DEFAULT_OUTTMPL,
DownloadError,
+ match_filter_func,
MaxDownloadsReached,
preferredencoding,
read_batch_urls,
if opts.recodevideo is not None:
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
parser.error('invalid video recode format specified')
+ if opts.convertsubtitles is not None:
+ if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
+ parser.error('invalid subtitle format specified')
if opts.date is not None:
date = DateRange.day(opts.date)
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
if opts.outtmpl is not None:
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
- outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
- or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
- or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
- or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
- or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
- or (opts.useid and '%(id)s.%(ext)s')
- or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
- or DEFAULT_OUTTMPL)
+ outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
+ (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
+ (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
+ (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
+ (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
+ (opts.useid and '%(id)s.%(ext)s') or
+ (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
+ DEFAULT_OUTTMPL)
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error('Cannot download a video and extract audio into the same'
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
'key': 'FFmpegVideoConvertor',
'preferedformat': opts.recodevideo,
})
+ if opts.convertsubtitles:
+ postprocessors.append({
+ 'key': 'FFmpegSubtitlesConvertor',
+ 'format': opts.convertsubtitles,
+ })
if opts.embedsubtitles:
postprocessors.append({
'key': 'FFmpegEmbedSubtitle',
- 'subtitlesformat': opts.subtitlesformat,
})
if opts.xattrs:
postprocessors.append({'key': 'XAttrMetadata'})
xattr # Confuse flake8
except ImportError:
parser.error('setting filesize xattr requested but python-xattr is not available')
+ match_filter = (
+ None if opts.match_filter is None
+ else match_filter_func(opts.match_filter))
ydl_opts = {
'usenetrc': opts.usenetrc,
'list_thumbnails': opts.list_thumbnails,
'playlist_items': opts.playlist_items,
'xattr_set_filesize': opts.xattr_set_filesize,
+ 'match_filter': match_filter,
+ 'no_color': opts.no_color,
+ 'ffmpeg_location': opts.ffmpeg_location,
+ 'hls_prefer_native': opts.hls_prefer_native,
}
with YoutubeDL(ydl_opts) as ydl:
from __future__ import unicode_literals
-__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
-
import base64
from math import ceil
data[i] = data[i] + 1
break
return data
+
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
if ed.supports(info_dict):
return ed
+ if protocol == 'm3u8' and params.get('hls_prefer_native'):
+ return NativeHlsFD
+
return PROTOCOL_MAP.get(protocol, HttpFD)
-from __future__ import unicode_literals
+from __future__ import division, unicode_literals
import os
import re
self.ydl = ydl
self._progress_hooks = []
self.params = params
+ self.add_progress_hook(self.report_progress)
@staticmethod
def format_seconds(seconds):
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
self.to_console_title('youtube-dl ' + msg)
- def report_progress(self, percent, data_len_str, speed, eta):
- """Report download progress."""
- if self.params.get('noprogress', False):
+ def report_progress(self, s):
+ if s['status'] == 'finished':
+ if self.params.get('noprogress', False):
+ self.to_screen('[download] Download completed')
+ else:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ if s.get('elapsed') is not None:
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
+ else:
+ msg_template = '100%% of %(_total_bytes_str)s'
+ self._report_progress_status(
+ msg_template % s, is_last_line=True)
+
+ if self.params.get('noprogress'):
return
- if eta is not None:
- eta_str = self.format_eta(eta)
- else:
- eta_str = 'Unknown ETA'
- if percent is not None:
- percent_str = self.format_percent(percent)
+
+ if s['status'] != 'downloading':
+ return
+
+ if s.get('eta') is not None:
+ s['_eta_str'] = self.format_eta(s['eta'])
else:
- percent_str = 'Unknown %'
- speed_str = self.format_speed(speed)
+ s['_eta_str'] = 'Unknown ETA'
- msg = ('%s of %s at %s ETA %s' %
- (percent_str, data_len_str, speed_str, eta_str))
- self._report_progress_status(msg)
+ if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
+ s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
+ elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
+ s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
+ else:
+ if s.get('downloaded_bytes') == 0:
+ s['_percent_str'] = self.format_percent(0)
+ else:
+ s['_percent_str'] = 'Unknown %'
- def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
- if self.params.get('noprogress', False):
- return
- downloaded_str = format_bytes(downloaded_data_len)
- speed_str = self.format_speed(speed)
- elapsed_str = FileDownloader.format_seconds(elapsed)
- msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
- self._report_progress_status(msg)
-
- def report_finish(self, data_len_str, tot_time):
- """Report download finished."""
- if self.params.get('noprogress', False):
- self.to_screen('[download] Download completed')
+ if s.get('speed') is not None:
+ s['_speed_str'] = self.format_speed(s['speed'])
+ else:
+ s['_speed_str'] = 'Unknown speed'
+
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
+ elif s.get('total_bytes_estimate') is not None:
+ s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
+ msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
else:
- self._report_progress_status(
- ('100%% of %s in %s' %
- (data_len_str, self.format_seconds(tot_time))),
- is_last_line=True)
+ if s.get('downloaded_bytes') is not None:
+ s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
+ if s.get('elapsed'):
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
+ else:
+ msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
+ else:
+ msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
+
+ self._report_progress_status(msg_template % s)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
"""
nooverwrites_and_exists = (
- self.params.get('nooverwrites', False)
- and os.path.exists(encodeFilename(filename))
+ self.params.get('nooverwrites', False) and
+ os.path.exists(encodeFilename(filename))
)
continuedl_and_exists = (
- self.params.get('continuedl', False)
- and os.path.isfile(encodeFilename(filename))
- and not self.params.get('nopart', False)
+ self.params.get('continuedl', False) and
+ os.path.isfile(encodeFilename(filename)) and
+ not self.params.get('nopart', False)
)
# Check file already present
class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-o', tmpfilename]
+ cmd = [self.exe, '--location', '-o', tmpfilename]
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--interface')
-from __future__ import unicode_literals
+from __future__ import division, unicode_literals
import base64
import io
from .http import HttpFD
from ..compat import (
compat_urlparse,
+ compat_urllib_error,
)
from ..utils import (
struct_pack,
struct_unpack,
- format_bytes,
encodeFilename,
sanitize_open,
xpath_text,
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
- self.read(1)
+ flags = self.read_unsigned_char()
+ live = flags & 0x20 != 0
# time scale
self.read_unsigned_int()
# CurrentMediaTime
return {
'segments': segments,
'fragments': fragments,
+ 'live': live,
}
def read_bootstrap_info(self):
for segment, fragments_count in segment_run_table['segment_run']:
for _ in range(fragments_count):
res.append((segment, next(fragments_counter)))
+
+ if boot_info['live']:
+ res = res[-2:]
+
return res
self.report_error('Unsupported DRM')
return media
+ def _get_bootstrap_from_url(self, bootstrap_url):
+ bootstrap = self.ydl.urlopen(bootstrap_url).read()
+ return read_bootstrap_info(bootstrap)
+
+ def _update_live_fragments(self, bootstrap_url, latest_fragment):
+ fragments_list = []
+ retries = 30
+ while (not fragments_list) and (retries > 0):
+ boot_info = self._get_bootstrap_from_url(bootstrap_url)
+ fragments_list = build_fragments_list(boot_info)
+ fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
+ if not fragments_list:
+ # Retry after a while
+ time.sleep(5.0)
+ retries -= 1
+
+ if not fragments_list:
+ self.report_error('Failed to update fragments')
+
+ return fragments_list
+
+ def _parse_bootstrap_node(self, node, base_url):
+ if node.text is None:
+ bootstrap_url = compat_urlparse.urljoin(
+ base_url, node.attrib['url'])
+ boot_info = self._get_bootstrap_from_url(bootstrap_url)
+ else:
+ bootstrap_url = None
+ bootstrap = base64.b64decode(node.text)
+ boot_info = read_bootstrap_info(bootstrap)
+ return (boot_info, bootstrap_url)
+
def real_download(self, filename, info_dict):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
self.to_screen('[download] Downloading f4m manifest')
manifest = self.ydl.urlopen(man_url).read()
- self.report_destination(filename)
- http_dl = HttpQuietDownloader(
- self.ydl,
- {
- 'continuedl': True,
- 'quiet': True,
- 'noprogress': True,
- 'ratelimit': self.params.get('ratelimit', None),
- 'test': self.params.get('test', False),
- }
- )
doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f)
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
- if bootstrap_node.text is None:
- bootstrap_url = compat_urlparse.urljoin(
- base_url, bootstrap_node.attrib['url'])
- bootstrap = self.ydl.urlopen(bootstrap_url).read()
- else:
- bootstrap = base64.b64decode(bootstrap_node.text)
+ boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
+ live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
metadata = base64.b64decode(metadata_node.text)
else:
metadata = None
- boot_info = read_bootstrap_info(bootstrap)
fragments_list = build_fragments_list(boot_info)
if self.params.get('test', False):
# For some akamai manifests we'll need to add a query to the fragment url
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
+ self.report_destination(filename)
+ http_dl = HttpQuietDownloader(
+ self.ydl,
+ {
+ 'continuedl': True,
+ 'quiet': True,
+ 'noprogress': True,
+ 'ratelimit': self.params.get('ratelimit', None),
+ 'test': self.params.get('test', False),
+ }
+ )
tmpfilename = self.temp_name(filename)
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
+
write_flv_header(dest_stream)
- write_metadata_tag(dest_stream, metadata)
+ if not live:
+ write_metadata_tag(dest_stream, metadata)
# This dict stores the download progress, it's updated by the progress
# hook
state = {
+ 'status': 'downloading',
'downloaded_bytes': 0,
- 'frag_counter': 0,
+ 'frag_index': 0,
+ 'frag_count': total_frags,
+ 'filename': filename,
+ 'tmpfilename': tmpfilename,
}
start = time.time()
- def frag_progress_hook(status):
- frag_total_bytes = status.get('total_bytes', 0)
- estimated_size = (state['downloaded_bytes'] +
- (total_frags - state['frag_counter']) * frag_total_bytes)
- if status['status'] == 'finished':
+ def frag_progress_hook(s):
+ if s['status'] not in ('downloading', 'finished'):
+ return
+
+ frag_total_bytes = s.get('total_bytes', 0)
+ if s['status'] == 'finished':
state['downloaded_bytes'] += frag_total_bytes
- state['frag_counter'] += 1
- progress = self.calc_percent(state['frag_counter'], total_frags)
- byte_counter = state['downloaded_bytes']
+ state['frag_index'] += 1
+
+ estimated_size = (
+ (state['downloaded_bytes'] + frag_total_bytes) /
+ (state['frag_index'] + 1) * total_frags)
+ time_now = time.time()
+ state['total_bytes_estimate'] = estimated_size
+ state['elapsed'] = time_now - start
+
+ if s['status'] == 'finished':
+ progress = self.calc_percent(state['frag_index'], total_frags)
else:
- frag_downloaded_bytes = status['downloaded_bytes']
- byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
+ frag_downloaded_bytes = s['downloaded_bytes']
frag_progress = self.calc_percent(frag_downloaded_bytes,
frag_total_bytes)
- progress = self.calc_percent(state['frag_counter'], total_frags)
+ progress = self.calc_percent(state['frag_index'], total_frags)
progress += frag_progress / float(total_frags)
- eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
- self.report_progress(progress, format_bytes(estimated_size),
- status.get('speed'), eta)
+ state['eta'] = self.calc_eta(
+ start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
+ state['speed'] = s.get('speed')
+ self._hook_progress(state)
+
http_dl.add_progress_hook(frag_progress_hook)
frags_filenames = []
- for (seg_i, frag_i) in fragments_list:
+ while fragments_list:
+ seg_i, frag_i = fragments_list.pop(0)
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
url = base_url + name
if akamai_pv:
url += '?' + akamai_pv.strip(';')
frag_filename = '%s-%s' % (tmpfilename, name)
- success = http_dl.download(frag_filename, {'url': url})
- if not success:
- return False
- with open(frag_filename, 'rb') as down:
- down_data = down.read()
- reader = FlvReader(down_data)
- while True:
- _, box_type, box_data = reader.read_box_info()
- if box_type == b'mdat':
- dest_stream.write(box_data)
- break
- frags_filenames.append(frag_filename)
+ try:
+ success = http_dl.download(frag_filename, {'url': url})
+ if not success:
+ return False
+ with open(frag_filename, 'rb') as down:
+ down_data = down.read()
+ reader = FlvReader(down_data)
+ while True:
+ _, box_type, box_data = reader.read_box_info()
+ if box_type == b'mdat':
+ dest_stream.write(box_data)
+ break
+ if live:
+ os.remove(frag_filename)
+ else:
+ frags_filenames.append(frag_filename)
+ except (compat_urllib_error.HTTPError, ) as err:
+ if live and (err.code == 404 or err.code == 410):
+ # We didn't keep up with the live window. Continue
+ # with the next available fragment.
+ msg = 'Fragment %d unavailable' % frag_i
+ self.report_warning(msg)
+ fragments_list = []
+ else:
+ raise
+
+ if not fragments_list and live and bootstrap_url:
+ fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
+ total_frags += len(fragments_list)
+ if fragments_list and (fragments_list[0][1] > frag_i + 1):
+ msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
+ self.report_warning(msg)
dest_stream.close()
- self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
+ elapsed = time.time() - start
self.try_rename(tmpfilename, filename)
for frag_file in frags_filenames:
os.remove(frag_file)
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
+ 'elapsed': elapsed,
})
return True
tmpfilename = self.temp_name(filename)
ffpp = FFmpegPostProcessor(downloader=self)
- program = ffpp._executable
- if program is None:
+ if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False
ffpp.check_version()
args = [
encodeArgument(opt)
- for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
+ for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
args.append(encodeFilename(tmpfilename, True))
retval = subprocess.call(args)
return True
else:
self.to_stderr('\n')
- self.report_error('%s exited with code %d' % (program, retval))
+ self.report_error('%s exited with code %d' % (ffpp.basename, retval))
return False
from __future__ import unicode_literals
+import errno
import os
+import socket
import time
-from socket import error as SocketError
-import errno
-
from .common import FileDownloader
from ..compat import (
compat_urllib_request,
ContentTooShortError,
encodeFilename,
sanitize_open,
- format_bytes,
)
resume_len = 0
open_mode = 'wb'
break
- except SocketError as e:
+ except socket.error as e:
if e.errno != errno.ECONNRESET:
# Connection reset is no problem, just retry
raise
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
- data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
# Progress message
speed = self.calc_speed(start, now, byte_counter - resume_len)
if data_len is None:
- eta = percent = None
+ eta = None
else:
- percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
- self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
+ 'status': 'downloading',
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
- 'status': 'downloading',
'eta': eta,
'speed': speed,
+ 'elapsed': now - start,
})
if is_test and byte_counter == data_len:
return False
if tmpfilename != '-':
stream.close()
- self.report_finish(data_len_str, (time.time() - start))
+
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': data_len,
+ 'tmpfilename': tmpfilename,
+ 'status': 'error',
+ })
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
+ 'elapsed': time.time() - start,
})
return True
from ..utils import (
check_executable,
encodeFilename,
- format_bytes,
get_exe_version,
)
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
- eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent)
- speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len)
+ time_now = time.time()
+ eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
+ speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
- data_len_str = '~' + format_bytes(data_len)
- self.report_progress(percent, data_len_str, speed, eta)
- cursor_in_new_line = False
self._hook_progress({
+ 'status': 'downloading',
'downloaded_bytes': downloaded_data_len,
- 'total_bytes': data_len,
+ 'total_bytes_estimate': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
- 'status': 'downloading',
'eta': eta,
+ 'elapsed': time_now - start,
'speed': speed,
})
+ cursor_in_new_line = False
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
downloaded_data_len = int(float(mobj.group(1)) * 1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
- self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
- cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
+ 'elapsed': time_now - start,
'speed': speed,
})
+ cursor_in_new_line = False
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen('')
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
- basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
+ basic_args = [
+ 'rtmpdump', '--verbose', '-r', url,
+ '-o', encodeFilename(tmpfilename, True)]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
from .addanime import AddAnimeIE
from .adobetv import AdobeTVIE
from .adultswim import AdultSwimIE
+from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE
+from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .anitube import AnitubeIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
+from .camdemy import (
+ CamdemyIE,
+ CamdemyFolderIE
+)
from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
from .cbsnews import CBSNewsIE
+from .cbssports import CBSSportsIE
+from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
+from .chirbit import (
+ ChirbitIE,
+ ChirbitProfileIE,
+)
from .cinchcast import CinchcastIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
from .collegerama import CollegeRamaIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comcarcoff import ComCarCoffIE
-from .commonmistakes import CommonMistakesIE
+from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .condenast import CondeNastIE
from .cracked import CrackedIE
from .criterion import CriterionIE
EllenTVClipsIE,
)
from .elpais import ElPaisIE
+from .embedly import EmbedlyIE
from .empflix import EMPFlixIE
from .engadget import EngadgetIE
from .eporner import EpornerIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .historicfilms import HistoricFilmsIE
+from .history import HistoryIE
from .hitbox import HitboxIE, HitboxLiveIE
from .hornbunny import HornBunnyIE
from .hostingbulk import HostingBulkIE
ImdbIE,
ImdbListIE
)
+from .imgur import ImgurIE
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE, InstagramUserIE
from .jove import JoveIE
from .jukebox import JukeboxIE
from .jpopsukitv import JpopsukiIE
+from .kaltura import KalturaIE
from .kankan import KankanIE
from .karaoketv import KaraoketvIE
from .keezmovies import KeezMoviesIE
from .ku6 import Ku6IE
from .la7 import LA7IE
from .laola1tv import Laola1TvIE
+from .letv import (
+ LetvIE,
+ LetvTvIE,
+ LetvPlaylistIE
+)
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE
from .livestream import (
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE
+from .nationalgeographic import NationalGeographicIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import (
from .npo import (
NPOIE,
NPOLiveIE,
+ NPORadioIE,
+ NPORadioFragmentIE,
TegenlichtVproIE,
)
from .nrk import (
from .ntvru import NTVRuIE
from .nytimes import NYTimesIE
from .nuvid import NuvidIE
+from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
from .ooyala import OoyalaIE
from .openfilm import OpenFilmIE
from .playvid import PlayvidIE
from .podomatic import PodomaticIE
from .pornhd import PornHdIE
-from .pornhub import PornHubIE
+from .pornhub import (
+ PornHubIE,
+ PornHubPlaylistIE,
+)
from .pornotube import PornotubeIE
from .pornoxo import PornoXOIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
+from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
from .quickvid import QuickVidIE
+from .r7 import R7IE
from .radiode import RadioDeIE
from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
from .roxwel import RoxwelIE
from .rtbf import RTBFIE
from .rte import RteIE
-from .rtlnl import RtlXlIE
+from .rtlnl import RtlNlIE
from .rtlnow import RTLnowIE
from .rtl2 import RTL2IE
from .rtp import RTPIE
RutubePersonIE,
)
from .rutv import RUTVIE
+from .sandia import SandiaIE
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE
SoundcloudUserIE,
SoundcloudPlaylistIE
)
-from .soundgasm import SoundgasmIE
+from .soundgasm import (
+ SoundgasmIE,
+ SoundgasmProfileIE
+)
from .southpark import (
SouthParkIE,
SouthparkDeIE,
from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE
from .sunporno import SunPornoIE
+from .svtplay import SVTPlayIE
from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE
from .tunein import TuneInIE
from .turbo import TurboIE
from .tutv import TutvIE
+from .tv4 import TV4IE
from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE
YahooIE,
YahooSearchIE,
)
+from .yam import YamIE
from .yesjapan import YesJapanIE
from .ynet import YnetIE
from .youjizz import YouJizzIE
YoutubeUserIE,
YoutubeWatchLaterIE,
)
+from .zapiks import ZapiksIE
from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import (
ZingMp3SongIE,
def _real_extract(self, url):
video_id = self._match_id(url)
-
webpage = self._download_webpage(url, video_id)
player = self._parse_json(
self._html_search_meta('datepublished', webpage, 'upload date'))
duration = parse_duration(
- self._html_search_meta('duration', webpage, 'duration')
- or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
+ self._html_search_meta('duration', webpage, 'duration') or
+ self._search_regex(
+ r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
+ webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex(
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
},
],
'info_dict': {
+ 'id': 'rQxZvXQ4ROaSOqq-or2Mow',
'title': 'Rick and Morty - Pilot',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
}
}
],
'info_dict': {
+ 'id': '-t8CamQlQ2aYZ49ItZCFog',
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ xpath_with_ns,
+ xpath_text,
+ find_xpath_attr,
+)
+
+
+class AftenpostenIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
+
+ _TEST = {
+ 'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
+ 'md5': 'fd828cd29774a729bf4d4425fe192972',
+ 'info_dict': {
+ 'id': '21039',
+ 'ext': 'mov',
+ 'title': 'TRAILER: "Sweatshop" - I can´t take any more',
+ 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
+ 'timestamp': 1416927969,
+ 'upload_date': '20141125',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._html_search_regex(
+ r'data-xs-id="(\d+)"', webpage, 'video id')
+
+ data = self._download_xml(
+ 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
+
+ NS_MAP = {
+ 'atom': 'http://www.w3.org/2005/Atom',
+ 'xt': 'http://xstream.dk/',
+ 'media': 'http://search.yahoo.com/mrss/',
+ }
+
+ entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
+
+ title = xpath_text(
+ entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
+ description = xpath_text(
+ entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
+ timestamp = parse_iso8601(xpath_text(
+ entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
+
+ formats = []
+ media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
+ for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
+ media_url = media_content.get('url')
+ if not media_url:
+ continue
+ tbr = int_or_none(media_content.get('bitrate'))
+ mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
+ if mobj:
+ formats.append({
+ 'url': mobj.group('url'),
+ 'play_path': 'mp4:%s' % mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'ext': 'flv',
+ 'tbr': tbr,
+ 'format_id': 'rtmp-%d' % tbr,
+ })
+ else:
+ formats.append({
+ 'url': media_url,
+ 'tbr': tbr,
+ })
+ self._sort_formats(formats)
+
+ link = find_xpath_attr(
+ entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
+ if link is not None:
+ formats.append({
+ 'url': link.get('href'),
+ 'format_id': link.get('rel'),
+ })
+
+ thumbnails = [{
+ 'url': splash.get('url'),
+ 'width': int_or_none(splash.get('width')),
+ 'height': int_or_none(splash.get('height')),
+ } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class AirMozillaIE(InfoExtractor):
+ _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
+ _TEST = {
+ 'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
+ 'md5': '2e3e7486ba5d180e829d453875b9b8bf',
+ 'info_dict': {
+ 'id': '6x4q2w',
+ 'ext': 'mp4',
+ 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
+ 'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
+ 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
+ 'timestamp': 1422487800,
+ 'upload_date': '20150128',
+ 'location': 'SFO Commons',
+ 'duration': 3780,
+ 'view_count': int,
+ 'categories': ['Main'],
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
+
+ embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
+ jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
+ metadata = self._parse_json(jwconfig, video_id)
+
+ formats = [{
+ 'url': source['file'],
+ 'ext': source['type'],
+ 'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
+ 'format': source['label'],
+ 'height': int(source['label'].rstrip('p')),
+ } for source in metadata['playlist'][0]['sources']]
+ self._sort_formats(formats)
+
+ view_count = int_or_none(self._html_search_regex(
+ r'Views since archived: ([0-9]+)',
+ webpage, 'view count', fatal=False))
+ timestamp = parse_iso8601(self._html_search_regex(
+ r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
+ duration = parse_duration(self._search_regex(
+ r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
+ webpage, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'formats': formats,
+ 'url': self._og_search_url(webpage),
+ 'display_id': display_id,
+ 'thumbnail': metadata['playlist'][0].get('image'),
+ 'description': self._og_search_description(webpage),
+ 'timestamp': timestamp,
+ 'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
+ 'duration': duration,
+ 'view_count': view_count,
+ 'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
+ }
'id': 'wP8On',
'ext': 'mp4',
'title': 'تیم گلکسی 11 - زومیت',
+ 'age_limit': 0,
},
# 'skip': 'Extremely unreliable',
}
video_id + '/vt/frame')
webpage = self._download_webpage(embed_url, video_id)
- video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
+ video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
+ r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
for i, video_url in enumerate(video_urls):
req = HEADRequest(video_url)
res = self._request_webpage(
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
thumbnail = self._search_regex(
- r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
+ r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'thumbnail': thumbnail,
+ 'age_limit': self._family_friendly_search(webpage),
}
class AppleTrailersIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+ _TESTS = [{
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
+ 'info_dict': {
+ 'id': 'manofsteel',
+ },
"playlist": [
{
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
},
},
]
- }
+ }, {
+ 'url': 'http://trailers.apple.com/ca/metropole/autrui/',
+ 'only_matching': True,
+ }]
_JSON_RE = r'iTunes.playURL\((.*?)\);'
import time
import hmac
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
)
-class AtresPlayerIE(SubtitlesInfoExtractor):
+class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
_TESTS = [
{
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
subtitles = {}
- subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
- if subtitle:
- subtitles['es'] = subtitle
-
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
+ subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
+ if subtitle_url:
+ subtitles['es'] = [{
+ 'ext': 'srt',
+ 'url': subtitle_url,
+ }]
return {
'id': video_id,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
- 'subtitles': self.extract_subtitles(video_id, subtitles),
+ 'subtitles': subtitles,
}
'duration': int(info['length']),
'view_count': int(info['views_total']),
'uploader': info['username'],
- 'uploader_id': info['uid'],
+ 'uploader_id': info['owner']['uid'],
}
download_link = m_download.group(1)
video_id = self._search_regex(
- r'var TralbumData = {.*?id: (?P<id>\d+),?$',
- webpage, 'video id', flags=re.MULTILINE | re.DOTALL)
+ r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$',
+ webpage, 'video id')
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
# We get the dictionary of the track from some javascript code
- info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
- info = json.loads(info)[0]
+ all_info = self._parse_json(self._search_regex(
+ r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
+ info = all_info[0]
# We pick mp3-320 for now, until format selection can be easily implemented.
mp3_info = info['downloads']['mp3-320']
# If we try to use this url it says the link has expired
initial_url = mp3_info['url']
- re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
- m_url = re.match(re_url, initial_url)
+ m_url = re.match(
+ r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
+ initial_url)
# We build the url we will use to get the final track url
# This url is build in Bandcamp in the script download_bunde_*.js
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
# If we could correctly generate the .rand field the url would be
# in the "download_url" key
- final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
+ final_url = self._search_regex(
+ r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL')
return {
'id': video_id,
class BandcampAlbumIE(InfoExtractor):
IE_NAME = 'Bandcamp:album'
- _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))'
+ _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
_TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
],
'info_dict': {
'title': 'Jazz Format Mixtape vol.1',
+ 'id': 'jazz-format-mixtape-vol-1',
+ 'uploader_id': 'blazo',
},
'params': {
'playlistend': 2
},
- 'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
+ 'skip': 'Bandcamp imposes download limits.'
}, {
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
'info_dict': {
'title': 'Hierophany of the Open Grave',
+ 'uploader_id': 'nightbringer',
+ 'id': 'hierophany-of-the-open-grave',
},
'playlist_mincount': 9,
}, {
'url': 'http://dotscale.bandcamp.com',
'info_dict': {
'title': 'Loom',
+ 'id': 'dotscale',
+ 'uploader_id': 'dotscale',
},
'playlist_mincount': 7,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('subdomain')
- title = mobj.group('title')
- display_id = title or playlist_id
- webpage = self._download_webpage(url, display_id)
+ uploader_id = mobj.group('subdomain')
+ album_id = mobj.group('album_id')
+ playlist_id = album_id or uploader_id
+ webpage = self._download_webpage(url, playlist_id)
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
if not tracks_paths:
raise ExtractorError('The page doesn\'t contain any tracks')
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
return {
'_type': 'playlist',
+ 'uploader_id': uploader_id,
'id': playlist_id,
- 'display_id': display_id,
'title': title,
'entries': entries,
}
import xml.etree.ElementTree
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..utils import ExtractorError
from ..compat import compat_HTTPError
-class BBCCoUkIE(SubtitlesInfoExtractor):
+class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
formats.extend(conn_formats)
return formats
- def _extract_captions(self, media, programme_id):
+ def _get_subtitles(self, media, programme_id):
subtitles = {}
for connection in self._extract_connections(media):
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
srt = ''
+
+ def _extract_text(p):
+ if p.text is not None:
+ stripped_text = p.text.strip()
+ if stripped_text:
+ return stripped_text
+ return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
for pos, p in enumerate(ps):
- srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
- p.text.strip() if p.text is not None else '')
- subtitles[lang] = srt
+ srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
+ subtitles[lang] = [
+ {
+ 'url': connection.get('href'),
+ 'ext': 'ttml',
+ },
+ {
+ 'data': srt,
+ 'ext': 'srt',
+ },
+ ]
return subtitles
def _download_media_selector(self, programme_id):
elif kind == 'video':
formats.extend(self._extract_video(media, programme_id))
elif kind == 'captions':
- subtitles = self._extract_captions(media, programme_id)
+ subtitles = self.extract_subtitles(media, programme_id)
return formats, subtitles
formats, subtitles = self._download_media_selector(programme_id)
return programme_id, title, description, duration, formats, subtitles
except ExtractorError as ee:
- if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+ if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
raise
# fallback to legacy playlist
else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(programme_id, subtitles)
- return
-
self._sort_formats(formats)
return {
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
_TEST = {
'url': 'http://beeg.com/5416503',
- 'md5': '634526ae978711f6b748fe0dd6c11f57',
+ 'md5': '1bff67111adb785c51d1b42959ec10e5',
'info_dict': {
'id': '5416503',
'ext': 'mp4',
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
-from ..utils import remove_start
+from ..utils import (
+ remove_start,
+ int_or_none,
+)
class BlinkxIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
+ _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
IE_NAME = 'blinkx'
_TEST = {
- 'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
- 'md5': '2e9a07364af40163a908edbf10bb2492',
+ 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
+ 'md5': '337cf7a344663ec79bf93a526a2e06c7',
'info_dict': {
- 'id': '8aQUy7GV',
+ 'id': 'Da0Gw3xc',
'ext': 'mp4',
- 'title': 'Police Car Rolls Away',
- 'uploader': 'stupidvideos.com',
- 'upload_date': '20131215',
- 'timestamp': 1387068000,
- 'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
- 'duration': 14.886,
- 'thumbnails': [{
- 'width': 100,
- 'height': 76,
- 'resolution': '100x76',
- 'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
- }],
+ 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
+ 'uploader': 'IGN News',
+ 'upload_date': '20150217',
+ 'timestamp': 1424215740,
+ 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
+ 'duration': 47.743333,
},
}
- def _real_extract(self, rl):
- m = re.match(self._VALID_URL, rl)
- video_id = m.group('id')
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
display_id = video_id[:8]
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
elif m['type'] in ('flv', 'mp4'):
vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff')
- tbr = (int(m['vbr']) + int(m['abr'])) // 1000
+ vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
+ abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
+ tbr = vbr + abr if vbr and abr else None
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
formats.append({
'format_id': format_id,
'url': m['link'],
'vcodec': vcodec,
'acodec': acodec,
- 'abr': int(m['abr']) // 1000,
- 'vbr': int(m['vbr']) // 1000,
+ 'abr': abr,
+ 'vbr': vbr,
'tbr': tbr,
- 'width': int(m['w']),
- 'height': int(m['h']),
+ 'width': int_or_none(m.get('w')),
+ 'height': int_or_none(m.get('h')),
})
self._sort_formats(formats)
import re
from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
from ..compat import (
compat_str,
)
-class BlipTVIE(SubtitlesInfoExtractor):
+class BlipTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
_TESTS = [
categories = [category.text for category in item.findall('category')]
formats = []
- subtitles = {}
+ subtitles_urls = {}
media_group = item.find(media('group'))
for media_content in media_group.findall(media('content')):
}
lang = role.rpartition('-')[-1].strip().lower()
langcode = LANGS.get(lang, lang)
- subtitles[langcode] = url
+ subtitles_urls[langcode] = url
elif media_type.startswith('video/'):
formats.append({
'url': real_url,
})
self._sort_formats(formats)
- # subtitles
- video_subtitles = self.extract_subtitles(video_id, subtitles)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
+ subtitles = self.extract_subtitles(video_id, subtitles_urls)
return {
'id': video_id,
'thumbnail': thumbnail,
'categories': categories,
'formats': formats,
- 'subtitles': video_subtitles,
+ 'subtitles': subtitles,
}
- def _download_subtitle_url(self, sub_lang, url):
- # For some weird reason, blip.tv serves a video instead of subtitles
- # when we request with a common UA
- req = compat_urllib_request.Request(url)
- req.add_header('User-Agent', 'youtube-dl')
- return self._download_webpage(req, None, note=False)
+ def _get_subtitles(self, video_id, subtitles_urls):
+ subtitles = {}
+ for lang, url in subtitles_urls.items():
+ # For some weird reason, blip.tv serves a video instead of subtitles
+ # when we request with a common UA
+ req = compat_urllib_request.Request(url)
+ req.add_header('User-Agent', 'youtube-dl')
+ subtitles[lang] = [{
+ # The extension is 'srt' but it's actually an 'ass' file
+ 'ext': 'ass',
+ 'data': self._download_webpage(req, None, note=False),
+ }]
+ return subtitles
class BlipTVUserIE(InfoExtractor):
class BloombergIE(InfoExtractor):
- _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
+ _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
_TEST = {
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- name = mobj.group('name')
+ name = self._match_id(url)
webpage = self._download_webpage(url, name)
+
f4m_url = self._search_regex(
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
'f4m url')
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
'info_dict': {
'title': 'Sealife',
+ 'id': '3550319591001',
},
'playlist_mincount': 7,
},
playlist_info = json_data['videoList']
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
- return self.playlist_result(videos, playlist_id=playlist_info['id'],
+ return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
def _extract_video_info(self, video_info):
'skip_download': True, # Got enough YouTube download tests
},
'info_dict': {
+ 'id': 'look-at-this-cute-dog-omg',
'description': 're:Munchkin the Teddy Bear is back ?!',
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
},
'ext': 'mp4',
'upload_date': '20141124',
'uploader_id': 'CindysMunchkin',
- 'description': 're:© 2014 Munchkin the Shih Tzu',
- 'uploader': 'Munchkin the Shih Tzu',
+ 'description': 're:© 2014 Munchkin the',
+ 'uploader': 're:^Munchkin the',
'title': 're:Munchkin the Teddy Bear gets her exercise',
},
}]
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse,
+ compat_urlparse,
+)
+from ..utils import (
+ parse_iso8601,
+ str_to_int,
+)
+
+
+class CamdemyIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
+ _TESTS = [{
+ # single file
+ 'url': 'http://www.camdemy.com/media/5181/',
+ 'md5': '5a5562b6a98b37873119102e052e311b',
+ 'info_dict': {
+ 'id': '5181',
+ 'ext': 'mp4',
+ 'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'description': '',
+ 'creator': 'ss11spring',
+ 'upload_date': '20130114',
+ 'timestamp': 1358154556,
+ 'view_count': int,
+ }
+ }, {
+ # With non-empty description
+ 'url': 'http://www.camdemy.com/media/13885',
+ 'md5': '4576a3bb2581f86c61044822adbd1249',
+ 'info_dict': {
+ 'id': '13885',
+ 'ext': 'mp4',
+ 'title': 'EverCam + Camdemy QuickStart',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
+ 'creator': 'evercam',
+ 'upload_date': '20140620',
+ 'timestamp': 1403271569,
+ }
+ }, {
+ # External source
+ 'url': 'http://www.camdemy.com/media/14842',
+ 'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
+ 'info_dict': {
+ 'id': '2vsYQzNIsJo',
+ 'ext': 'mp4',
+ 'upload_date': '20130211',
+ 'uploader': 'Hun Kim',
+ 'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
+ 'uploader_id': 'hunkimtutorials',
+ 'title': 'Excel 2013 Tutorial - How to add Password Protection',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ page = self._download_webpage(url, video_id)
+
+ src_from = self._html_search_regex(
+ r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
+ 'external source', default=None)
+ if src_from:
+ return self.url_result(src_from)
+
+ oembed_obj = self._download_json(
+ 'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
+
+ thumb_url = oembed_obj['thumbnail_url']
+ video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
+ file_list_doc = self._download_xml(
+ compat_urlparse.urljoin(video_folder, 'fileList.xml'),
+ video_id, 'Filelist XML')
+ file_name = file_list_doc.find('./video/item/fileName').text
+ video_url = compat_urlparse.urljoin(video_folder, file_name)
+
+ timestamp = parse_iso8601(self._html_search_regex(
+ r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
+ page, 'creation time', fatal=False),
+ delimiter=' ', timezone=datetime.timedelta(hours=8))
+ view_count = str_to_int(self._html_search_regex(
+ r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
+ page, 'view count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': oembed_obj['title'],
+ 'thumbnail': thumb_url,
+ 'description': self._html_search_meta('description', page),
+ 'creator': oembed_obj['author_name'],
+ 'duration': oembed_obj['duration'],
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ }
+
+
+class CamdemyFolderIE(InfoExtractor):
+ _VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)'
+ _TESTS = [{
+ # links with trailing slash
+ 'url': 'http://www.camdemy.com/folder/450',
+ 'info_dict': {
+ 'id': '450',
+ 'title': '信號與系統 2012 & 2011 (Signals and Systems)',
+ },
+ 'playlist_mincount': 145
+ }, {
+ # links without trailing slash
+ # and multi-page
+ 'url': 'http://www.camdemy.com/folder/853',
+ 'info_dict': {
+ 'id': '853',
+ 'title': '科學計算 - 使用 Matlab'
+ },
+ 'playlist_mincount': 20
+ }, {
+ # with displayMode parameter. For testing the codes to add parameters
+ 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
+ 'info_dict': {
+ 'id': '853',
+ 'title': '科學計算 - 使用 Matlab'
+ },
+ 'playlist_mincount': 20
+ }]
+
+ def _real_extract(self, url):
+ folder_id = self._match_id(url)
+
+ # Add displayMode=list so that all links are displayed in a single page
+ parsed_url = list(compat_urlparse.urlparse(url))
+ query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
+ query.update({'displayMode': 'list'})
+ parsed_url[4] = compat_urllib_parse.urlencode(query)
+ final_url = compat_urlparse.urlunparse(parsed_url)
+
+ page = self._download_webpage(final_url, folder_id)
+ matches = re.findall(r"href='(/media/\d+/?)'", page)
+
+ entries = [self.url_result('http://www.camdemy.com' + media_path)
+ for media_path in matches]
+
+ folder_title = self._html_search_meta('keywords', page)
+
+ return self.playlist_result(entries, folder_id, folder_title)
class CanalplusIE(InfoExtractor):
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
- _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
+ _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
_SITE_ID_MAP = {
'canalplus.fr': 'cplus',
'piwiplus.fr': 'teletoon',
'd8.tv': 'd8',
+ 'itele.fr': 'itele',
}
_TESTS = [{
'upload_date': '20131108',
},
'skip': 'videos get deleted after a while',
+ }, {
+ 'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
+ 'md5': '65aa83ad62fe107ce29e564bb8712580',
+ 'info_dict': {
+ 'id': '1213714',
+ 'ext': 'flv',
+ 'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
+ 'description': 'md5:8216206ec53426ea6321321f3b3c16db',
+ 'upload_date': '20150211',
+ },
}]
def _real_extract(self, url):
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
real_id = self._search_regex(
r"video\.settings\.pid\s*=\s*'([^']+)';",
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class CBSSportsIE(InfoExtractor):
+ _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
+ 'info_dict': {
+ 'id': '_d5_GbO8p1sT',
+ 'ext': 'flv',
+ 'title': 'US Open flashbacks: 1990s',
+ 'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ section = mobj.group('section')
+ video_id = mobj.group('id')
+ all_videos = self._download_json(
+ 'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
+ video_id)
+ # The json file contains the info of all the videos in the section
+ video_info = next(v for v in all_videos if v['pcid'] == video_id)
+ return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ qualities,
+ unified_strdate,
+)
+
+
+class CCCIE(InfoExtractor):
+ IE_NAME = 'media.ccc.de'
+ _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
+
+ _TEST = {
+ 'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
+ 'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
+ 'info_dict': {
+ 'id': '20131228183',
+ 'ext': 'mp4',
+ 'title': 'Introduction to Processor Design',
+ 'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'view_count': int,
+ 'upload_date': '20131229',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if self._downloader.params.get('prefer_free_formats'):
+ preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
+ else:
+ preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
+
+ title = self._html_search_regex(
+ r'(?s)<h1>(.*?)</h1>', webpage, 'title')
+ description = self._html_search_regex(
+ r"(?s)<p class='description'>(.*?)</p>",
+ webpage, 'description', fatal=False)
+ upload_date = unified_strdate(self._html_search_regex(
+ r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
+ webpage, 'upload date', fatal=False))
+ view_count = int_or_none(self._html_search_regex(
+ r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
+ webpage, 'view count', fatal=False))
+
+ matches = re.finditer(r'''(?xs)
+ <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
+ <a\s+href='(?P<http_url>[^']+)'>\s*
+ (?:
+ .*?
+ <a\s+href='(?P<torrent_url>[^']+\.torrent)'
+ )?''', webpage)
+ formats = []
+ for m in matches:
+ format = m.group('format')
+ format_id = self._search_regex(
+ r'.*/([a-z0-9_-]+)/[^/]*$',
+ m.group('http_url'), 'format id', default=None)
+ vcodec = 'h264' if 'h264' in format_id else (
+ 'none' if format_id in ('mp3', 'opus') else None
+ )
+ formats.append({
+ 'format_id': format_id,
+ 'format': format,
+ 'url': m.group('http_url'),
+ 'vcodec': vcodec,
+ 'preference': preference(format_id),
+ })
+
+ if m.group('torrent_url'):
+ formats.append({
+ 'format_id': 'torrent-%s' % (format if format_id is None else format_id),
+ 'format': '%s (torrent)' % format,
+ 'proto': 'torrent',
+ 'format_note': '(unsupported; will just download the .torrent file)',
+ 'vcodec': vcodec,
+ 'preference': -100 + preference(format_id),
+ 'url': m.group('torrent_url'),
+ })
+ self._sort_formats(formats)
+
+ thumbnail = self._html_search_regex(
+ r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'view_count': view_count,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
import re
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)
-class CeskaTelevizeIE(SubtitlesInfoExtractor):
+class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
_TESTS = [
subtitles = {}
subs = item.get('subtitles')
if subs:
- subtitles['cs'] = subs[0]['url']
-
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
-
- subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
+ subtitles = self.extract_subtitles(episode_id, subs)
return {
'id': episode_id,
'subtitles': subtitles,
}
+ def _get_subtitles(self, episode_id, subs):
+ original_subtitles = self._download_webpage(
+ subs[0]['url'], episode_id, 'Downloading subtitles')
+ srt_subs = self._fix_subtitles(original_subtitles)
+ return {
+ 'cs': [{
+ 'ext': 'srt',
+ 'data': srt_subs,
+ }]
+ }
+
@staticmethod
def _fix_subtitles(subtitles):
""" Convert millisecond-based subtitles to SRT """
- if subtitles is None:
- return subtitles # subtitles not requested
def _msectotimecode(msec):
""" Helper utility to convert milliseconds to timecode """
else:
yield line
- fixed_subtitles = {}
- for k, v in subtitles.items():
- fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
- return fixed_subtitles
+ return "\r\n".join(_fix_subtitle(subtitles))
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ int_or_none,
+)
+
+
+class ChirbitIE(InfoExtractor):
+ IE_NAME = 'chirbit'
+ _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'http://chirb.it/PrIPv5',
+ 'md5': '9847b0dad6ac3e074568bf2cfb197de8',
+ 'info_dict': {
+ 'id': 'PrIPv5',
+ 'ext': 'mp3',
+ 'title': 'Фасадстрой',
+ 'duration': 52,
+ 'view_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ 'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://chirb.it/%s' % audio_id, audio_id)
+
+ audio_url = self._search_regex(
+ r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
+
+ title = self._search_regex(
+ r'itemprop="name">([^<]+)', webpage, 'title')
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'itemprop="playCount"\s*>(\d+)', webpage,
+ 'listen count', fatal=False))
+ comment_count = int_or_none(self._search_regex(
+ r'>(\d+) Comments?:', webpage,
+ 'comment count', fatal=False))
+
+ return {
+ 'id': audio_id,
+ 'url': audio_url,
+ 'title': title,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ }
+
+
+class ChirbitProfileIE(InfoExtractor):
+ IE_NAME = 'chirbit:profile'
+ _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://chirbit.com/ScarletBeauty',
+ 'info_dict': {
+ 'id': 'ScarletBeauty',
+ 'title': 'Chirbits by ScarletBeauty',
+ },
+ 'playlist_mincount': 3,
+ }
+
+ def _real_extract(self, url):
+ profile_id = self._match_id(url)
+
+ rss = self._download_xml(
+ 'http://chirbit.com/rss/%s' % profile_id, profile_id)
+
+ entries = [
+ self.url_result(audio_url.text, 'Chirbit')
+ for audio_url in rss.findall('./channel/item/link')]
+
+ title = rss.find('./channel/title').text
+
+ return self.playlist_result(entries, profile_id, title)
})
self._sort_formats(formats)
+ subtitles = self._extract_subtitles(cdoc, guid)
+
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
entries.append({
'id': guid,
'duration': duration,
'thumbnail': thumbnail,
'description': description,
+ 'subtitles': subtitles,
})
return {
compiled_regex_type,
ExtractorError,
float_or_none,
- HEADRequest,
int_or_none,
RegexNotFoundError,
sanitize_filename,
If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader.
location: Physical location where the video was filmed.
- subtitles: The subtitle file contents as a dictionary in the format
- {language: subtitles}.
+ subtitles: The available subtitles as a dictionary in the format
+ {language: subformats}. "subformats" is a list sorted from
+ lower to higher preference, each element is a dictionary
+ with the "ext" entry and one of:
+ * "data": The subtitles file contents
+ * "url": A url pointing to the subtitles file
+ automatic_captions: Like 'subtitles', used by the YoutubeIE for
+ automatically generated captions
duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
+ average_rating: Average rating give by users, the scale used depends on the webpage
comment_count: Number of comments on the video
comments: A list of comments, each with one or more of the following
properties (all but one of text or html optional):
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
- self.initialize()
- return self._real_extract(url)
+ try:
+ self.initialize()
+ return self._real_extract(url)
+ except ExtractorError:
+ raise
+ except compat_http_client.IncompleteRead as e:
+ raise ExtractorError('A network error has occured.', cause=e, expected=True)
+ except (KeyError, StopIteration) as e:
+ raise ExtractorError('An extractor error has occured.', cause=e)
def set_downloader(self, downloader):
"""Sets the downloader for this IE."""
if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
+ if '<title>The URL you requested has been blocked</title>' in content[:512]:
+ msg = (
+ 'Access to this webpage has been blocked by Indian censorship. '
+ 'Use a VPN or proxy server (with --proxy) to route around it.')
+ block_msg = self._html_search_regex(
+ r'</h1><p>(.*?)</p>',
+ content, 'block message', default=None)
+ if block_msg:
+ msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
+ raise ExtractorError(msg, expected=True)
return content
if mobj:
break
- if os.name != 'nt' and sys.stderr.isatty():
+ if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
_name = '\033[0;34m%s\033[0m' % name
else:
_name = name
}
return RATING_TABLE.get(rating.lower(), None)
+ def _family_friendly_search(self, html):
+ # See http://schema.org/VideoObject
+ family_friendly = self._html_search_meta('isFamilyFriendly', html)
+
+ if not family_friendly:
+ return None
+
+ RATING_TABLE = {
+ '1': 0,
+ 'true': 0,
+ '0': 18,
+ 'false': 18,
+ }
+ return RATING_TABLE.get(family_friendly.lower(), None)
+
def _twitter_search_player(self, html):
return self._html_search_meta('twitter:player', html,
'twitter card player')
f.get('language_preference') if f.get('language_preference') is not None else -1,
f.get('quality') if f.get('quality') is not None else -1,
f.get('tbr') if f.get('tbr') is not None else -1,
+ f.get('filesize') if f.get('filesize') is not None else -1,
f.get('vbr') if f.get('vbr') is not None else -1,
- ext_preference,
f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1,
+ ext_preference,
f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference,
f.get('fps') if f.get('fps') is not None else -1,
- f.get('filesize') if f.get('filesize') is not None else -1,
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
f.get('source_preference') if f.get('source_preference') is not None else -1,
f.get('format_id'),
def _is_valid_url(self, url, video_id, item='video'):
try:
- self._request_webpage(
- HEADRequest(url), video_id,
- 'Checking %s URL' % item)
+ self._request_webpage(url, video_id, 'Checking %s URL' % item)
return True
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
- manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
- + (media_el.attrib.get('href') or media_el.attrib.get('url')))
+ manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
+ (media_el.attrib.get('href') or media_el.attrib.get('url')))
tbr = int_or_none(media_el.attrib.get('bitrate'))
formats.append({
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
'url': m3u8_url,
'ext': ext,
'protocol': 'm3u8',
- 'preference': -1,
+ 'preference': preference - 1 if preference else -1,
'resolution': 'multiple',
'format_note': 'Quality selection URL',
}]
note='Downloading m3u8 information',
errnote='Failed to download m3u8 information')
last_info = None
+ last_media = None
kv_rex = re.compile(
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
for line in m3u8_doc.splitlines():
if v.startswith('"'):
v = v[1:-1]
last_info[m.group('key')] = v
+ elif line.startswith('#EXT-X-MEDIA:'):
+ last_media = {}
+ for m in kv_rex.finditer(line):
+ v = m.group('val')
+ if v.startswith('"'):
+ v = v[1:-1]
+ last_media[m.group('key')] = v
elif line.startswith('#') or not line.strip():
continue
else:
width_str, height_str = resolution.split('x')
f['width'] = int(width_str)
f['height'] = int(height_str)
+ if last_media is not None:
+ f['m3u8_media'] = last_media
+ last_media = None
formats.append(f)
last_info = {}
self._sort_formats(formats)
formats = []
rtmp_count = 0
- for video in smil.findall('./body/switch/video'):
- src = video.get('src')
- if not src:
- continue
- bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
- width = int_or_none(video.get('width'))
- height = int_or_none(video.get('height'))
- proto = video.get('proto')
- if not proto:
- if base:
- if base.startswith('rtmp'):
- proto = 'rtmp'
- elif base.startswith('http'):
- proto = 'http'
- ext = video.get('ext')
- if proto == 'm3u8':
- formats.extend(self._extract_m3u8_formats(src, video_id, ext))
- elif proto == 'rtmp':
- rtmp_count += 1
- streamer = video.get('streamer') or base
- formats.append({
- 'url': streamer,
- 'play_path': src,
- 'ext': 'flv',
- 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
- 'tbr': bitrate,
- 'width': width,
- 'height': height,
- })
+ if smil.findall('./body/seq/video'):
+ video = smil.findall('./body/seq/video')[0]
+ fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
+ formats.extend(fmts)
+ else:
+ for video in smil.findall('./body/switch/video'):
+ fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
+ formats.extend(fmts)
+
self._sort_formats(formats)
return formats
+ def _parse_smil_video(self, video, video_id, base, rtmp_count):
+ src = video.get('src')
+ if not src:
+ return ([], rtmp_count)
+ bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+ width = int_or_none(video.get('width'))
+ height = int_or_none(video.get('height'))
+ proto = video.get('proto')
+ if not proto:
+ if base:
+ if base.startswith('rtmp'):
+ proto = 'rtmp'
+ elif base.startswith('http'):
+ proto = 'http'
+ ext = video.get('ext')
+ if proto == 'm3u8':
+ return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
+ elif proto == 'rtmp':
+ rtmp_count += 1
+ streamer = video.get('streamer') or base
+ return ([{
+ 'url': streamer,
+ 'play_path': src,
+ 'ext': 'flv',
+ 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'width': width,
+ 'height': height,
+ }], rtmp_count)
+ elif proto.startswith('http'):
+ return ([{
+ 'url': base + src,
+ 'ext': ext or 'flv',
+ 'tbr': bitrate,
+ 'width': width,
+ 'height': height,
+ }], rtmp_count)
+
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()
any_restricted = any_restricted or is_restricted
return not any_restricted
+ def extract_subtitles(self, *args, **kwargs):
+ if (self._downloader.params.get('writesubtitles', False) or
+ self._downloader.params.get('listsubtitles')):
+ return self._get_subtitles(*args, **kwargs)
+ return {}
+
+ def _get_subtitles(self, *args, **kwargs):
+ raise NotImplementedError("This method must be implemented by subclasses")
+
+ def extract_automatic_captions(self, *args, **kwargs):
+ if (self._downloader.params.get('writeautomaticsub', False) or
+ self._downloader.params.get('listsubtitles')):
+ return self._get_automatic_captions(*args, **kwargs)
+ return {}
+
+ def _get_automatic_captions(self, *args, **kwargs):
+ raise NotImplementedError("This method must be implemented by subclasses")
+
class SearchInfoExtractor(InfoExtractor):
"""
'That doesn\'t make any sense. '
'Simply remove the parameter in your command or configuration.'
) % url
- if self._downloader.params.get('verbose'):
+ if not self._downloader.params.get('verbose'):
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
raise ExtractorError(msg, expected=True)
+
+
+class UnicodeBOMIE(InfoExtractor):
+ IE_DESC = False
+ _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
+
+ _TESTS = [{
+ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ real_url = self._match_id(url)
+ self.report_warning(
+ 'Your URL starts with a Byte Order Mark (BOM). '
+ 'Removing the BOM and looking for "%s" ...' % real_url)
+ return self.url_result(real_url)
from hashlib import sha1
from math import pow, sqrt, floor
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
aes_cbc_decrypt,
inc,
)
-from .common import InfoExtractor
-class CrunchyrollIE(SubtitlesInfoExtractor):
+class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
return output
+ def _get_subtitles(self, video_id, webpage):
+ subtitles = {}
+ for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
+ sub_page = self._download_webpage(
+ 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
+ video_id, note='Downloading subtitles for ' + sub_name)
+ id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
+ iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
+ data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
+ if not id or not iv or not data:
+ continue
+ id = int(id)
+ iv = base64.b64decode(iv)
+ data = base64.b64decode(data)
+
+ subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+ lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
+ if not lang_code:
+ continue
+ sub_root = xml.etree.ElementTree.fromstring(subtitle)
+ subtitles[lang_code] = [
+ {
+ 'ext': 'srt',
+ 'data': self._convert_subtitles_to_srt(sub_root),
+ },
+ {
+ 'ext': 'ass',
+ 'data': self._convert_subtitles_to_ass(sub_root),
+ },
+ ]
+ return subtitles
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
'format_id': video_format,
})
- subtitles = {}
- sub_format = self._downloader.params.get('subtitlesformat', 'srt')
- for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
- sub_page = self._download_webpage(
- 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
- video_id, note='Downloading subtitles for ' + sub_name)
- id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
- iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
- data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
- if not id or not iv or not data:
- continue
- id = int(id)
- iv = base64.b64decode(iv)
- data = base64.b64decode(data)
-
- subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
- lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
- if not lang_code:
- continue
- sub_root = xml.etree.ElementTree.fromstring(subtitle)
- if sub_format == 'ass':
- subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
- else:
- subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
-
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
+ subtitles = self.extract_subtitles(video_id, webpage)
return {
'id': video_id,
import itertools
from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
from ..compat import (
compat_str,
return request
-class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
+class DailymotionIE(DailymotionBaseInfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
# subtitles
video_subtitles = self.extract_subtitles(video_id, webpage)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, webpage)
- return
view_count = str_to_int(self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)',
'view_count': view_count,
}
- def _get_available_subtitles(self, video_id, webpage):
+ def _get_subtitles(self, video_id, webpage):
try:
sub_list = self._download_webpage(
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
return {}
info = json.loads(sub_list)
if (info['total'] > 0):
- sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
+ sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
return sub_lang_list
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
'info_dict': {
'title': 'SPORT',
+ 'id': 'xv4bw_nqtv_sport',
},
'playlist_mincount': 20,
}]
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade'
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
}
}
r"flashvars.pvg_id=\"(\d+)\";",
webpage, 'ID')
- json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
- + video_id)
+ json_url = (
+ 'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
+ video_id)
info = self._download_json(json_url, title, 'Downloading JSON config')
video_url = info['renditions'][0]['url']
from __future__ import unicode_literals
-import re
-import time
-
from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+)
class DotsubIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
_TEST = {
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
'md5': '0914d4d69605090f623b7ac329fea66e',
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
'ext': 'flv',
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
+ 'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
+ 'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
+ 'duration': 3169,
'uploader': '4v4l0n42',
- 'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
- 'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
+ 'timestamp': 1292248482.625,
'upload_date': '20101213',
+ 'view_count': int,
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
- info = self._download_json(info_url, video_id)
- date = time.gmtime(info['dateCreated'] / 1000) # The timestamp is in miliseconds
+ video_id = self._match_id(url)
+
+ info = self._download_json(
+ 'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
+ video_url = info.get('mediaURI')
+
+ if not video_url:
+ webpage = self._download_webpage(url, video_id)
+ video_url = self._search_regex(
+ r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
return {
'id': video_id,
- 'url': info['mediaURI'],
+ 'url': video_url,
'ext': 'flv',
'title': info['title'],
- 'thumbnail': info['screenshotURI'],
- 'description': info['description'],
- 'uploader': info['user'],
- 'view_count': info['numberOfViews'],
- 'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
+ 'description': info.get('description'),
+ 'thumbnail': info.get('screenshotURI'),
+ 'duration': int_or_none(info.get('duration'), 1000),
+ 'uploader': info.get('user'),
+ 'timestamp': float_or_none(info.get('dateCreated'), 1000),
+ 'view_count': int_or_none(info.get('numberOfViews')),
}
'id': '1740434',
'display_id': 'hot-perky-blonde-naked-golf',
'ext': 'mp4',
- 'title': 'Hot Perky Blonde Naked Golf',
+ 'title': 'hot perky blonde naked golf',
'like_count': int,
'dislike_count': int,
'comment_count': int,
r'<source src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex(
- r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
+ [r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
+ webpage, 'title')
thumbnail = self._html_search_regex(
r'poster="([^"]+)"',
from __future__ import unicode_literals
-from .subtitles import SubtitlesInfoExtractor
-from .common import ExtractorError
+from .common import InfoExtractor, ExtractorError
from ..utils import parse_iso8601
-class DRTVIE(SubtitlesInfoExtractor):
+class DRTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
_TEST = {
}
for subs in subtitles_list:
lang = subs['Language']
- subtitles[LANGS.get(lang, lang)] = subs['Uri']
+ subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
if not formats and restricted_to_denmark:
raise ExtractorError(
self._sort_formats(formats)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
-
return {
'id': video_id,
'title': title,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
- 'subtitles': self.extract_subtitles(video_id, subtitles),
+ 'subtitles': subtitles,
}
--- /dev/null
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class EmbedlyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
+ _TESTS = [{
+ 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
title = self._html_search_regex(
r'<title>(.*?) - EPORNER', webpage, 'title')
- redirect_code = self._html_search_regex(
- r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
- webpage, 'redirect_code')
- redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
+ redirect_url = 'http://www.eporner.com/config5/%s' % video_id
player_code = self._download_webpage(
redirect_url, display_id, note='Downloading player config')
'duration': duration,
'view_count': view_count,
'formats': formats,
- 'age_limit': self._rta_search(webpage),
+ 'age_limit': 18,
}
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
+ compat_urllib_request,
)
from ..utils import (
ExtractorError,
+ js_to_json,
+ parse_duration,
)
class EscapistIE(InfoExtractor):
- _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
+ _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
_TEST = {
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
'id': '6618',
'ext': 'mp4',
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
- 'uploader': 'the-escapist-presents',
+ 'uploader_id': 'the-escapist-presents',
+ 'uploader': 'The Escapist Presents',
'title': "Breaking Down Baldur's Gate",
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 264,
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- showName = mobj.group('showname')
- video_id = mobj.group('id')
-
- self.report_extraction(video_id)
- webpage = self._download_webpage(url, video_id)
-
- videoDesc = self._html_search_regex(
- r'<meta name="description" content="([^"]*)"',
- webpage, 'description', fatal=False)
-
- playerUrl = self._og_search_video_url(webpage, name='player URL')
-
- title = self._html_search_regex(
- r'<meta name="title" content="([^"]*)"',
- webpage, 'title').split(' : ')[-1]
-
- configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
- configUrl = compat_urllib_parse.unquote(configUrl)
+ video_id = self._match_id(url)
+ webpage_req = compat_urllib_request.Request(url)
+ webpage_req.add_header('User-Agent', self._USER_AGENT)
+ webpage = self._download_webpage(webpage_req, video_id)
+
+ uploader_id = self._html_search_regex(
+ r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
+ webpage, 'uploader ID', fatal=False)
+ uploader = self._html_search_regex(
+ r"<h1\s+class='headline'>(.*?)</a>",
+ webpage, 'uploader', fatal=False)
+ description = self._html_search_meta('description', webpage)
+ duration = parse_duration(self._html_search_meta('duration', webpage))
+
+ raw_title = self._html_search_meta('title', webpage, fatal=True)
+ title = raw_title.partition(' : ')[2]
+
+ config_url = compat_urllib_parse.unquote(self._html_search_regex(
+ r'''(?x)
+ (?:
+ <param\s+name="flashvars".*?\s+value="config=|
+ flashvars="config=
+ )
+ (https?://[^"&]+)
+ ''',
+ webpage, 'config URL'))
formats = []
+ ad_formats = []
- def _add_format(name, cfgurl, quality):
+ def _add_format(name, cfg_url, quality):
+ cfg_req = compat_urllib_request.Request(cfg_url)
+ cfg_req.add_header('User-Agent', self._USER_AGENT)
config = self._download_json(
- cfgurl, video_id,
+ cfg_req, video_id,
'Downloading ' + name + ' configuration',
'Unable to download ' + name + ' configuration',
- transform_source=lambda s: s.replace("'", '"'))
+ transform_source=js_to_json)
playlist = config['playlist']
- formats.append({
- 'url': playlist[1]['url'],
- 'format_id': name,
- 'quality': quality,
- })
-
- _add_format('normal', configUrl, quality=0)
- hq_url = (configUrl +
- ('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
+ for p in playlist:
+ if p.get('eventCategory') == 'Video':
+ ar = formats
+ elif p.get('eventCategory') == 'Video Postroll':
+ ar = ad_formats
+ else:
+ continue
+
+ ar.append({
+ 'url': p['url'],
+ 'format_id': name,
+ 'quality': quality,
+ 'http_headers': {
+ 'User-Agent': self._USER_AGENT,
+ },
+ })
+
+ _add_format('normal', config_url, quality=0)
+ hq_url = (config_url +
+ ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
try:
_add_format('hq', hq_url, quality=1)
except ExtractorError:
pass # That's fine, we'll just use normal quality
-
self._sort_formats(formats)
- return {
+ if '/escapist/sales-marketing/' in formats[-1]['url']:
+ raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
+
+ res = {
'id': video_id,
'formats': formats,
- 'uploader': showName,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
- 'description': videoDesc,
- 'player_url': playerUrl,
+ 'description': description,
+ 'duration': duration,
}
+
+ if self._downloader.params.get('include_ads') and ad_formats:
+ self._sort_formats(ad_formats)
+ ad_res = {
+ 'id': '%s-ad' % video_id,
+ 'title': '%s (Postroll)' % title,
+ 'formats': ad_formats,
+ }
+ return {
+ '_type': 'playlist',
+ 'entries': [res, ad_res],
+ 'title': title,
+ 'id': video_id,
+ }
+
+ return res
params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw)
video_data = params['video_data'][0]
- video_url = video_data.get('hd_src')
- if not video_url:
- video_url = video_data['sd_src']
- if not video_url:
- raise ExtractorError('Cannot find video URL')
+
+ formats = []
+ for quality in ['sd', 'hd']:
+ src = video_data.get('%s_src' % quality)
+ if src is not None:
+ formats.append({
+ 'format_id': quality,
+ 'url': src,
+ })
+ if not formats:
+ raise ExtractorError('Cannot find video formats')
video_title = self._html_search_regex(
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
return {
'id': video_id,
'title': video_title,
- 'url': video_url,
+ 'formats': formats,
'duration': int_or_none(video_data.get('video_duration')),
'thumbnail': video_data.get('thumbnail_src'),
}
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
- title = self._html_search_meta('twitter:title', page, 'title')
+
+ title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
description = self._html_search_meta('twitter:description', page, 'title')
data = self._download_xml(
'height': int(details.find('./height').text.strip()),
} for details in item.findall('./source/file_details') if details.find('./file').text
]
+ self._sort_formats(formats)
return {
'id': video_id,
# encoding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import int_or_none
class FirstTVIE(InfoExtractor):
- IE_NAME = 'firsttv'
- IE_DESC = 'Ð\92идеоаÑ\80Ñ\85ив - Ð\9fеÑ\80вÑ\8bй канал'
- _VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
+ IE_NAME = '1tv'
+ IE_DESC = 'Первый канал'
+ _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.1tv.ru/videoarchive/73390',
- 'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
+ 'md5': '777f525feeec4806130f4f764bc18a4f',
'info_dict': {
'id': '73390',
'ext': 'mp4',
'title': 'Олимпийские канатные дороги',
- 'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
- 'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'duration': 149,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'skip': 'Only works from Russia',
+ }, {
+ 'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
+ 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
+ 'info_dict': {
+ 'id': '35930',
+ 'ext': 'mp4',
+ 'title': 'Наедине со всеми. Людмила Сенчина',
+ 'description': 'md5:89553aed1d641416001fe8d450f06cb9',
+ 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
+ 'duration': 2694,
},
'skip': 'Only works from Russia',
- }
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, 'Downloading page')
video_url = self._html_search_regex(
- r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
+ r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
+ webpage, 'video URL')
title = self._html_search_regex(
- r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
+ [r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
+ r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
description = self._html_search_regex(
- r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
+ r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
+ webpage, 'description', default=None) or self._html_search_meta(
+ 'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
- duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
+ duration = self._og_search_property(
+ 'video:duration', webpage,
+ 'video duration', fatal=False)
- like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
- webpage, 'like count', fatal=False)
- dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
- webpage, 'dislike count', fatal=False)
+ like_count = self._html_search_regex(
+ r'title="Понравилось".*?/></label> \[(\d+)\]',
+ webpage, 'like count', default=None)
+ dislike_count = self._html_search_regex(
+ r'title="Не понравилось".*?/></label> \[(\d+)\]',
+ webpage, 'dislike count', default=None)
return {
'id': video_id,
IE_NAME = '5min'
_VALID_URL = r'''(?x)
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
+ https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
5min:)
(?P<id>\d+)
'''
+# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ xpath_with_ns,
+)
class GamekingsIE(InfoExtractor):
- _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
- _TEST = {
+ _VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)'
+ _TESTS = [{
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
# MD5 is flaky, seems to change regularly
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
'info_dict': {
- 'id': '20130811',
+ 'id': 'phoenix-wright-ace-attorney-dual-destinies-review',
'ext': 'mp4',
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
- }
- }
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }, {
+ # vimeo video
+ 'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/',
+ 'md5': '12bf04dfd238e70058046937657ea68d',
+ 'info_dict': {
+ 'id': 'the-legend-of-zelda-majoras-mask',
+ 'ext': 'mp4',
+ 'title': 'The Legend of Zelda: Majora’s Mask',
+ 'description': 'md5:9917825fe0e9f4057601fe1e38860de3',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
+ video_id = self._match_id(url)
- mobj = re.match(self._VALID_URL, url)
- name = mobj.group('name')
- webpage = self._download_webpage(url, name)
- video_url = self._og_search_video_url(webpage)
+ webpage = self._download_webpage(url, video_id)
- video = re.search(r'[0-9]+', video_url)
- video_id = video.group(0)
+ playlist_id = self._search_regex(
+ r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id')
- # Todo: add medium format
- video_url = video_url.replace(video_id, 'large/' + video_id)
+ playlist = self._download_xml(
+ 'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
+ video_id)
+
+ NS_MAP = {
+ 'jwplayer': 'http://rss.jwpcdn.com/'
+ }
+
+ item = playlist.find('./channel/item')
+
+ thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail')
+ video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file')
return {
'id': video_id,
- 'ext': 'mp4',
'url': video_url,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
+ 'thumbnail': thumbnail,
}
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
int_or_none,
webpage = self._download_webpage(url, video_id)
og_title = self._og_search_title(webpage)
- title = og_title.replace(' - Video bei GameStar.de', '').strip()
+ title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title)
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
compat_urllib_parse,
compat_urllib_request,
)
+from ..utils import remove_end
class GDCVaultIE(InfoExtractor):
def _parse_flv(self, xml_description):
video_formats = []
- akami_url = xml_description.find('./metadata/akamaiHost').text
+ akamai_url = xml_description.find('./metadata/akamaiHost').text
slide_video_path = xml_description.find('./metadata/slideVideo').text
video_formats.append({
- 'url': 'rtmp://' + akami_url + '/' + slide_video_path,
+ 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+ 'play_path': remove_end(slide_video_path, '.flv'),
+ 'ext': 'flv',
'format_note': 'slide deck video',
'quality': -2,
'preference': -2,
})
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
video_formats.append({
- 'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
+ 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+ 'play_path': remove_end(speaker_video_path, '.flv'),
+ 'ext': 'flv',
'format_note': 'speaker video',
'quality': -1,
'preference': -1,
{
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
'info_dict': {
+ 'id': '1986',
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
},
'playlist_mincount': 2,
'upload_date': '20150126',
},
'add_ie': ['Viddler'],
- }
+ },
+ # jwplayer YouTube
+ {
+ 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
+ 'info_dict': {
+ 'id': 'Mrj4DVp2zeA',
+ 'ext': 'mp4',
+ 'upload_date': '20150212',
+ 'uploader': 'The National Archives UK',
+ 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
+ 'uploader_id': 'NationalArchives08',
+ 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
+ },
+ },
+ # rtl.nl embed
+ {
+ 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'aanslagen-kopenhagen',
+ 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
+ }
+ },
+ # Zapiks embed
+ {
+ 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
+ 'info_dict': {
+ 'id': '118046',
+ 'ext': 'mp4',
+ 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
+ }
+ },
+ # Kaltura embed
+ {
+ 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
+ 'info_dict': {
+ 'id': '1_eergr3h1',
+ 'ext': 'mp4',
+ 'upload_date': '20150226',
+ 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
+ 'timestamp': int,
+ 'title': 'John Carlson Postgame 2/25/15',
+ },
+ },
]
def report_following_redirect(self, new_url):
'entries': entries,
}
+ # Look for embedded rtl.nl player
+ matches = re.findall(
+ r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
+ webpage)
+ if matches:
+ return _playlist_from_matches(matches, ie='RtlNl')
+
# Look for embedded (iframe) Vimeo player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
player_url = unescapeHTML(mobj.group('url'))
surl = smuggle_url(player_url, {'Referer': url})
return self.url_result(surl)
-
# Look for embedded (swf embed) Vimeo player
mobj = re.search(
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
# Look for embedded sbs.com.au player
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
+ r'''(?x)
+ (?:
+ <meta\s+property="og:video"\s+content=|
+ <iframe[^>]+?src=
+ )
+ (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'SBS')
if mobj is not None:
return self.url_result(mobj.group('url'), 'Livestream')
+ # Look for Zapiks embed
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Zapiks')
+
+ # Look for Kaltura embeds
+ mobj = re.search(
+ r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
+ if mobj is not None:
+ return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
+
def check_video(vurl):
+ if YoutubeIE.suitable(vurl):
+ return True
vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath)
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
JWPlayerOptions|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
)
- .*?file\s*:\s*["\'](.*?)["\']''', webpage))
+ .*?
+ ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
if not found:
# Broaden the search a little bit
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
return entries[0]
else:
for num, e in enumerate(entries, start=1):
- e['title'] = '%s (%d)' % (e['title'], num)
+ # 'url' results don't have a title
+ if e.get('title') is not None:
+ e['title'] = '%s (%d)' % (e['title'], num)
return {
'_type': 'playlist',
'entries': entries,
duration = parse_duration(self._html_search_regex(
r'<span class="duration">\s*-?\s*(.*?)</span>',
webpage, 'duration', fatal=False))
- family_friendly = self._html_search_meta(
- 'isFamilyFriendly', webpage, default='false')
flashvars = compat_parse_qs(self._html_search_regex(
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
'title': title,
'thumbnail': thumbnail,
'duration': duration,
- 'age_limit': 0 if family_friendly == 'true' else 18,
+ 'age_limit': self._family_friendly_search(webpage),
}
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class HistoryIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
+
+ _TESTS = [{
+ 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
+ 'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
+ 'info_dict': {
+ 'id': 'bLx5Dv5Aka1G',
+ 'ext': 'mp4',
+ 'title': "Bet You Didn't Know: Valentine's Day",
+ 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
+ },
+ 'add_ie': ['ThePlatform'],
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
+ webpage, 'video url')
+
+ return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))
},
{
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
+ 'info_dict': {
+ 'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
+ },
'playlist': [
{
'info_dict': {
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ ExtractorError,
+)
+
+
+class ImgurIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
+
+ _TESTS = [{
+ 'url': 'https://i.imgur.com/A61SaA1.gifv',
+ 'info_dict': {
+ 'id': 'A61SaA1',
+ 'ext': 'mp4',
+ 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+ 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
+ },
+ }, {
+ 'url': 'https://imgur.com/A61SaA1',
+ 'info_dict': {
+ 'id': 'A61SaA1',
+ 'ext': 'mp4',
+ 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+ 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ width = int_or_none(self._search_regex(
+ r'<param name="width" value="([0-9]+)"',
+ webpage, 'width', fatal=False))
+ height = int_or_none(self._search_regex(
+ r'<param name="height" value="([0-9]+)"',
+ webpage, 'height', fatal=False))
+
+ video_elements = self._search_regex(
+ r'(?s)<div class="video-elements">(.*?)</div>',
+ webpage, 'video elements', default=None)
+ if not video_elements:
+ raise ExtractorError(
+ 'No sources found for video %s. Maybe an image?' % video_id,
+ expected=True)
+
+ formats = []
+ for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
+ formats.append({
+ 'format_id': m.group('type').partition('/')[2],
+ 'url': self._proto_relative_url(m.group('src')),
+ 'ext': mimetype2ext(m.group('type')),
+ 'acodec': 'none',
+ 'width': width,
+ 'height': height,
+ 'http_headers': {
+ 'User-Agent': 'youtube-dl (like wget)',
+ },
+ })
+
+ gif_json = self._search_regex(
+ r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
+ webpage, 'GIF code', fatal=False)
+ if gif_json:
+ gifd = self._parse_json(
+ gif_json, video_id, transform_source=js_to_json)
+ formats.append({
+ 'format_id': 'gif',
+ 'preference': -10,
+ 'width': width,
+ 'height': height,
+ 'ext': 'gif',
+ 'acodec': 'none',
+ 'vcodec': 'gif',
+ 'container': 'gif',
+ 'url': self._proto_relative_url(gifd['gifUrl']),
+ 'filesize': gifd.get('size'),
+ 'http_headers': {
+ 'User-Agent': 'youtube-dl (like wget)',
+ },
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ 'title': self._og_search_title(webpage),
+ }
r'comment_count\s*=\s*\'([^\']+)\';',
webpage, 'comment_count', fatal=False)
- family_friendly = self._html_search_meta(
- 'isFamilyFriendly', webpage, 'age limit', fatal=False)
-
content_url = self._html_search_meta(
'contentURL', webpage, 'content URL', fatal=False)
ext = determine_ext(content_url, 'mp4')
'duration': duration,
'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count),
- 'age_limit': 18 if family_friendly == 'False' else 0,
+ 'age_limit': self._family_friendly_search(webpage),
'formats': formats,
}
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class KalturaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:kaltura:|
+ https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_
+ )(?P<partner_id>\d+)
+ (?::|
+ /(?:[^/]+/)*?entry_id/
+ )(?P<id>[0-9a-z_]+)'''
+ _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
+ _TESTS = [
+ {
+ 'url': 'kaltura:269692:1_1jc2y3e4',
+ 'md5': '3adcbdb3dcc02d647539e53f284ba171',
+ 'info_dict': {
+ 'id': '1_1jc2y3e4',
+ 'ext': 'mp4',
+ 'title': 'Track 4',
+ 'upload_date': '20131219',
+ 'uploader_id': 'mlundberg@wolfgangsvault.com',
+ 'description': 'The Allman Brothers Band, 12/16/1981',
+ 'thumbnail': 're:^https?://.*/thumbnail/.*',
+ 'timestamp': int,
+ },
+ },
+ {
+ 'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
+ 'only_matching': True,
+ },
+ ]
+
+ def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
+ params = actions[0]
+ if len(actions) > 1:
+ for i, a in enumerate(actions[1:], start=1):
+ for k, v in a.items():
+ params['%d:%s' % (i, k)] = v
+
+ query = compat_urllib_parse.urlencode(params)
+ url = self._API_BASE + query
+ data = self._download_json(url, video_id, *args, **kwargs)
+
+ status = data if len(actions) == 1 else data[0]
+ if status.get('objectType') == 'KalturaAPIException':
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, status['message']))
+
+ return data
+
+ def _get_kaltura_signature(self, video_id, partner_id):
+ actions = [{
+ 'apiVersion': '3.1',
+ 'expiry': 86400,
+ 'format': 1,
+ 'service': 'session',
+ 'action': 'startWidgetSession',
+ 'widgetId': '_%s' % partner_id,
+ }]
+ return self._kaltura_api_call(
+ video_id, actions, note='Downloading Kaltura signature')['ks']
+
+ def _get_video_info(self, video_id, partner_id):
+ signature = self._get_kaltura_signature(video_id, partner_id)
+ actions = [
+ {
+ 'action': 'null',
+ 'apiVersion': '3.1.5',
+ 'clientTag': 'kdp:v3.8.5',
+ 'format': 1, # JSON, 2 = XML, 3 = PHP
+ 'service': 'multirequest',
+ 'ks': signature,
+ },
+ {
+ 'action': 'get',
+ 'entryId': video_id,
+ 'service': 'baseentry',
+ 'version': '-1',
+ },
+ {
+ 'action': 'getContextData',
+ 'contextDataParams:objectType': 'KalturaEntryContextDataParams',
+ 'contextDataParams:referrer': 'http://www.kaltura.com/',
+ 'contextDataParams:streamerType': 'http',
+ 'entryId': video_id,
+ 'service': 'baseentry',
+ },
+ ]
+ return self._kaltura_api_call(
+ video_id, actions, note='Downloading video info JSON')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ partner_id, entry_id = mobj.group('partner_id'), mobj.group('id')
+
+ info, source_data = self._get_video_info(entry_id, partner_id)
+
+ formats = [{
+ 'format_id': '%(fileExt)s-%(bitrate)s' % f,
+ 'ext': f['fileExt'],
+ 'tbr': f['bitrate'],
+ 'fps': f.get('frameRate'),
+ 'filesize_approx': int_or_none(f.get('size'), invscale=1024),
+ 'container': f.get('containerFormat'),
+ 'vcodec': f.get('videoCodecId'),
+ 'height': f.get('height'),
+ 'width': f.get('width'),
+ 'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
+ } for f in source_data['flavorAssets']]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': info['name'],
+ 'formats': formats,
+ 'description': info.get('description'),
+ 'thumbnail': info.get('thumbnailUrl'),
+ 'duration': info.get('duration'),
+ 'timestamp': info.get('createdAt'),
+ 'uploader_id': info.get('userId'),
+ 'view_count': info.get('plays'),
+ }
+# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import random
import re
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ xpath_text,
+)
class Laola1TvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
_TEST = {
- 'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html',
+ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': {
- 'id': '250019',
+ 'id': '227883',
'ext': 'mp4',
- 'title': 'Bitburger Open Grand Prix Gold - Court 1',
- 'categories': ['Badminton'],
- 'uploader': 'BWF - Badminton World Federation',
- 'is_live': True,
+ 'title': 'Straubing Tigers - Kölner Haie',
+ 'categories': ['Eishockey'],
+ 'is_live': False,
},
'params': {
'skip_download': True,
}
}
- _BROKEN = True # Not really - extractor works fine, but f4m downloader does not support live streams yet.
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
flashvars = dict((m[0], m[1]) for m in flashvars_m)
+ partner_id = self._search_regex(
+ r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id')
+
xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
- 'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % (
- video_id, portal, lang))
+ 'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % (
+ video_id, partner_id, portal, lang))
hd_doc = self._download_xml(xml_url, video_id)
- title = hd_doc.find('.//video/title').text
- flash_url = hd_doc.find('.//video/url').text
- categories = hd_doc.find('.//video/meta_sports').text.split(',')
- uploader = hd_doc.find('.//video/meta_organistation').text
+ title = xpath_text(hd_doc, './/video/title', fatal=True)
+ flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
+ uploader = xpath_text(hd_doc, './/video/meta_organistation')
+ is_live = xpath_text(hd_doc, './/video/islive') == 'true'
+
+ categories = xpath_text(hd_doc, './/video/meta_sports')
+ if categories:
+ categories = categories.split(',')
ident = random.randint(10000000, 99999999)
token_url = '%s&ident=%s&klub=0&unikey=0×tamp=%s&auth=%s' % (
token_doc = self._download_xml(
token_url, video_id, note='Downloading token')
token_attrib = token_doc.find('.//token').attrib
- if token_attrib.get('auth') == 'blocked':
- raise ExtractorError('Token error: ' % token_attrib.get('comment'))
+ if token_attrib.get('auth') in ('blocked', 'restricted'):
+ raise ExtractorError(
+ 'Token error: %s' % token_attrib.get('comment'), expected=True)
video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
token_attrib['url'], token_attrib['auth'])
return {
'id': video_id,
- 'is_live': True,
+ 'is_live': is_live,
'title': title,
'url': video_url,
'uploader': uploader,
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+ compat_urllib_parse,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ parse_iso8601,
+)
+
+
+class LetvIE(InfoExtractor):
+ _VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
+
+ _TESTS = [{
+ 'url': 'http://www.letv.com/ptv/vplay/22005890.html',
+ 'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
+ 'info_dict': {
+ 'id': '22005890',
+ 'ext': 'mp4',
+ 'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
+ 'timestamp': 1424747397,
+ 'upload_date': '20150224',
+ 'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
+ }
+ }, {
+ 'url': 'http://www.letv.com/ptv/vplay/1415246.html',
+ 'info_dict': {
+ 'id': '1415246',
+ 'ext': 'mp4',
+ 'title': '美人天下01',
+ 'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
+ },
+ 'expected_warnings': [
+ 'publish time'
+ ]
+ }]
+ # http://www.letv.com/ptv/vplay/1118082.html
+ # This video is available only in Mainland China
+
+ @staticmethod
+ def urshift(val, n):
+ return val >> n if val >= 0 else (val + 0x100000000) >> n
+
+ # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
+ def ror(self, param1, param2):
+ _loc3_ = 0
+ while _loc3_ < param2:
+ param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
+ _loc3_ += 1
+ return param1
+
+ def calc_time_key(self, param1):
+ _loc2_ = 773625421
+ _loc3_ = self.ror(param1, _loc2_ % 13)
+ _loc3_ = _loc3_ ^ _loc2_
+ _loc3_ = self.ror(_loc3_, _loc2_ % 17)
+ return _loc3_
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ page = self._download_webpage(url, media_id)
+ params = {
+ 'id': media_id,
+ 'platid': 1,
+ 'splatid': 101,
+ 'format': 1,
+ 'tkey': self.calc_time_key(int(time.time())),
+ 'domain': 'www.letv.com'
+ }
+ play_json = self._download_json(
+ 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params),
+ media_id, 'playJson data')
+
+ # Check for errors
+ playstatus = play_json['playstatus']
+ if playstatus['status'] == 0:
+ flag = playstatus['flag']
+ if flag == 1:
+ msg = 'Country %s auth error' % playstatus['country']
+ else:
+ msg = 'Generic error. flag = %d' % flag
+ raise ExtractorError(msg, expected=True)
+
+ playurl = play_json['playurl']
+
+ formats = ['350', '1000', '1300', '720p', '1080p']
+ dispatch = playurl['dispatch']
+
+ urls = []
+ for format_id in formats:
+ if format_id in dispatch:
+ media_url = playurl['domain'][0] + dispatch[format_id][0]
+
+ # Mimic what flvxz.com do
+ url_parts = list(compat_urlparse.urlparse(media_url))
+ qs = dict(compat_urlparse.parse_qs(url_parts[4]))
+ qs.update({
+ 'platid': '14',
+ 'splatid': '1401',
+ 'tss': 'no',
+ 'retry': 1
+ })
+ url_parts[4] = compat_urllib_parse.urlencode(qs)
+ media_url = compat_urlparse.urlunparse(url_parts)
+
+ url_info_dict = {
+ 'url': media_url,
+ 'ext': determine_ext(dispatch[format_id][1])
+ }
+
+ if format_id[-1:] == 'p':
+ url_info_dict['height'] = format_id[:-1]
+
+ urls.append(url_info_dict)
+
+ publish_time = parse_iso8601(self._html_search_regex(
+ r'发布时间 ([^<>]+) ', page, 'publish time', fatal=False),
+ delimiter=' ', timezone=datetime.timedelta(hours=8))
+ description = self._html_search_meta('description', page, fatal=False)
+
+ return {
+ 'id': media_id,
+ 'formats': urls,
+ 'title': playurl['title'],
+ 'thumbnail': playurl['pic'],
+ 'description': description,
+ 'timestamp': publish_time,
+ }
+
+
+class LetvTvIE(InfoExtractor):
+ _VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
+ _TESTS = [{
+ 'url': 'http://www.letv.com/tv/46177.html',
+ 'info_dict': {
+ 'id': '46177',
+ 'title': '美人天下',
+ 'description': 'md5:395666ff41b44080396e59570dbac01c'
+ },
+ 'playlist_count': 35
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ page = self._download_webpage(url, playlist_id)
+
+ media_urls = list(set(re.findall(
+ r'http://www.letv.com/ptv/vplay/\d+.html', page)))
+ entries = [self.url_result(media_url, ie='Letv')
+ for media_url in media_urls]
+
+ title = self._html_search_meta('keywords', page,
+ fatal=False).split(',')[0]
+ description = self._html_search_meta('description', page, fatal=False)
+
+ return self.playlist_result(entries, playlist_id, playlist_title=title,
+ playlist_description=description)
+
+
+class LetvPlaylistIE(LetvTvIE):
+ _VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
+ _TESTS = [{
+ 'url': 'http://tv.letv.com/izt/wuzetian/index.html',
+ 'info_dict': {
+ 'id': 'wuzetian',
+ 'title': '武媚娘传奇',
+ 'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
+ },
+ # This playlist contains some extra videos other than the drama itself
+ 'playlist_mincount': 96
+ }, {
+ 'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
+ 'info_dict': {
+ 'id': 'lswjzzjc',
+ # The title should be "劲舞青春", but I can't find a simple way to
+ # determine the playlist title
+ 'title': '乐视午间自制剧场',
+ 'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
+ },
+ 'playlist_mincount': 7
+ }]
'url': 'http://new.livestream.com/tedx/cityenglish',
'info_dict': {
'title': 'TEDCity2.0 (English)',
+ 'id': '2245590',
},
'playlist_mincount': 4,
}, {
if is_relevant(video_data, video_id)]
if video_id is None:
# This is an event page:
- return self.playlist_result(videos, info['id'], info['full_name'])
+ return self.playlist_result(
+ videos, '%s' % info['id'], info['full_name'])
else:
if not videos:
raise ExtractorError('Cannot find video %s' % video_id)
import re
import json
-from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_str,
)
-class LyndaIE(SubtitlesInfoExtractor):
+class LyndaIE(InfoExtractor):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
- _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
+ _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
_NETRC_MACHINE = 'lynda'
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
'info_dict': {
'title': 'Using the exercise files',
'duration': 68
}
- }
+ }, {
+ 'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
+ 'only_matching': True,
+ }]
def _real_initialize(self):
self._login()
self._check_formats(formats, video_id)
self._sort_formats(formats)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, page)
- return
-
- subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
+ subtitles = self.extract_subtitles(video_id, page)
return {
'id': video_id,
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
raise ExtractorError('Unable to log in')
- def _fix_subtitles(self, subtitles):
- if subtitles is None:
- return subtitles # subtitles not requested
-
- fixed_subtitles = {}
- for k, v in subtitles.items():
- subs = json.loads(v)
- if len(subs) == 0:
+ def _fix_subtitles(self, subs):
+ srt = ''
+ for pos in range(0, len(subs) - 1):
+ seq_current = subs[pos]
+ m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
+ if m_current is None:
continue
- srt = ''
- for pos in range(0, len(subs) - 1):
- seq_current = subs[pos]
- m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
- if m_current is None:
- continue
- seq_next = subs[pos + 1]
- m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
- if m_next is None:
- continue
- appear_time = m_current.group('timecode')
- disappear_time = m_next.group('timecode')
- text = seq_current['Caption']
- srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
- if srt:
- fixed_subtitles[k] = srt
- return fixed_subtitles
-
- def _get_available_subtitles(self, video_id, webpage):
+ seq_next = subs[pos + 1]
+ m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
+ if m_next is None:
+ continue
+ appear_time = m_current.group('timecode')
+ disappear_time = m_next.group('timecode')
+ text = seq_current['Caption'].lstrip()
+ srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
+ if srt:
+ return srt
+
+ def _get_subtitles(self, video_id, webpage):
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
- sub = self._download_webpage(url, None, False)
- sub_json = json.loads(sub)
- return {'en': url} if len(sub_json) > 0 else {}
+ subs = self._download_json(url, None, False)
+ if subs:
+ return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
+ else:
+ return {}
class LyndaCourseIE(InfoExtractor):
from .common import InfoExtractor
from .youtube import YoutubeIE
-from ..compat import (
- compat_urlparse,
-)
from ..utils import (
clean_html,
ExtractorError,
'upload_date': '20121109',
'uploader_id': 'MIT',
'uploader': 'MIT OpenCourseWare',
- # 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
}
},
{
'uploader_id': 'MIT',
'uploader': 'MIT OpenCourseWare',
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
- # 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
}
}
]
metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
metadata = re.split(r', ?', metadata)
yt = metadata[1]
- subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
else:
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
metadata = re.sub(r'[\'"]', '', embed_media.group(1))
metadata = re.split(r', ?', metadata)
yt = metadata[1]
- subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
else:
raise ExtractorError('Unable to find embedded YouTube video.')
video_id = YoutubeIE.extract_id(yt)
'title': title,
'description': description,
'url': yt,
- 'url_transparent'
- 'subtitles': subs,
'ie_key': 'Youtube',
}
IE_NAME = 'mitele.es'
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
'md5': '6a75fe9d0d3275bead0cb683c616fddb',
'info_dict': {
'display_id': 'programa-144',
'duration': 2913,
},
- }
+ }]
def _real_extract(self, url):
episode = self._match_id(url)
class MporaIE(InfoExtractor):
- _VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
+ _VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
IE_NAME = 'MPORA'
_TEST = {
webpage = self._download_webpage(url, video_id)
data_json = self._search_regex(
- r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
+ [r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;",
+ r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"],
+ webpage, 'json')
data = self._parse_json(data_json, video_id)
uploader = data['info_overlay'].get('username')
import re
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
return '{http://search.yahoo.com/mrss/}%s' % tag
-class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
+class MTVServicesInfoExtractor(InfoExtractor):
_MOBILE_TEMPLATE = None
@staticmethod
def _extract_subtitles(self, mdoc, mtvn_id):
subtitles = {}
- FORMATS = {
- 'scc': 'cea-608',
- 'eia-608': 'cea-608',
- 'xml': 'ttml',
- }
- subtitles_format = FORMATS.get(
- self._downloader.params.get('subtitlesformat'), 'ttml')
for transcript in mdoc.findall('.//transcript'):
if transcript.get('kind') != 'captions':
continue
lang = transcript.get('srclang')
- for typographic in transcript.findall('./typographic'):
- captions_format = typographic.get('format')
- if captions_format == subtitles_format:
- subtitles[lang] = compat_str(typographic.get('src'))
- break
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(mtvn_id, subtitles)
- return self.extract_subtitles(mtvn_id, subtitles)
+ subtitles[lang] = [{
+ 'url': compat_str(typographic.get('src')),
+ 'ext': typographic.get('format')
+ } for typographic in transcript.findall('./typographic')]
+ return subtitles
def _get_video_info(self, itemdoc):
uri = itemdoc.find('guid').text
webpage, 'mgid')
videos_info = self._get_videos_info(mgid)
- if self._downloader.params.get('listsubtitles', False):
- return
return videos_info
import re
from .common import InfoExtractor
-from ..utils import (
- parse_duration,
- unified_strdate,
-)
class MusicVaultIE(InfoExtractor):
_VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
_TEST = {
'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
- 'md5': '2cdbb3ae75f7fb3519821507d2fb3c15',
+ 'md5': '3adcbdb3dcc02d647539e53f284ba171',
'info_dict': {
'id': '1010863',
'ext': 'mp4',
'duration': 244,
'uploader': 'The Allman Brothers Band',
'thumbnail': 're:^https?://.*/thumbnail/.*',
- 'upload_date': '19811216',
+ 'upload_date': '20131219',
'location': 'Capitol Theatre (Passaic, NJ)',
'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
+ 'timestamp': int,
}
}
r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
title = self._html_search_regex(
r'<h2.*?>(.*?)</h2>', data_div, 'title')
- upload_date = unified_strdate(self._html_search_regex(
- r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))
location = self._html_search_regex(
r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
- duration = parse_duration(self._html_search_meta('duration', webpage))
-
- VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'
kaltura_id = self._search_regex(
r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
webpage, 'kaltura ID')
- video_url = VIDEO_URL_TEMPLATE % {
- 'entry_id': kaltura_id,
- 'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'),
- 'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'),
- }
+ wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid')
return {
'id': mobj.group('id'),
- 'url': video_url,
- 'ext': 'mp4',
+ '_type': 'url_transparent',
+ 'url': 'kaltura:%s:%s' % (wid, kaltura_id),
+ 'ie_key': 'Kaltura',
'display_id': display_id,
'uploader_id': mobj.group('uploader_id'),
'thumbnail': thumbnail,
'description': self._html_search_meta('description', webpage),
- 'upload_date': upload_date,
'location': location,
'title': title,
'uploader': uploader,
- 'duration': duration,
}
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ smuggle_url,
+ url_basename,
+)
+
+
+class NationalGeographicIE(InfoExtractor):
+ _VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
+
+ _TEST = {
+ 'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
+ 'info_dict': {
+ 'id': '4DmDACA6Qtk_',
+ 'ext': 'flv',
+ 'title': 'Mating Crabs Busted by Sharks',
+ 'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
+ },
+ 'add_ie': ['ThePlatform'],
+ }
+
+ def _real_extract(self, url):
+ name = url_basename(url)
+
+ webpage = self._download_webpage(url, name)
+ feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
+ guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
+
+ feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
+ content = feed.find('.//{http://search.yahoo.com/mrss/}content')
+ theplatform_id = url_basename(content.attrib.get('url'))
+
+ return self.url_result(smuggle_url(
+ 'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
+ # For some reason, the normal links don't work and we must force the use of f4m
+ {'force_smil_url': True}))
from __future__ import unicode_literals
import re
-import json
from .common import InfoExtractor
from ..compat import (
_TESTS = [
{
- 'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
+ 'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
# md5 checksum is not stable
'info_dict': {
- 'id': 'bTmnLCvIbaaH',
+ 'id': 'c9xnCo0YPOPH',
'ext': 'flv',
- 'title': 'I Am a Firefighter',
- 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
+ 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
+ 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
},
},
{
class NBCNewsIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
- ((video/.+?/(?P<id>\d+))|
- (feature/[^/]+/(?P<title>.+)))
+ _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
+ (?:video/.+?/(?P<id>\d+)|
+ (?:feature|nightly-news)/[^/]+/(?P<title>.+))
'''
_TESTS = [
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
},
},
+ {
+ 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
+ 'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
+ 'info_dict': {
+ 'id': 'sekXqyTVnmN3',
+ 'ext': 'mp4',
+ 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
+ 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
+ },
+ },
]
def _real_extract(self, url):
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
}
else:
- # "feature" pages use theplatform.com
+ # "feature" and "nightly-news" pages use theplatform.com
title = mobj.group('title')
webpage = self._download_webpage(url, title)
bootstrap_json = self._search_regex(
- r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
- flags=re.MULTILINE)
- bootstrap = json.loads(bootstrap_json)
+ r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
+ webpage, 'bootstrap json', flags=re.MULTILINE)
+ bootstrap = self._parse_json(bootstrap_json, video_id)
info = bootstrap['results'][0]['video']
mpxid = info['mpxId']
'timestamp': 1344858571,
'age_limit': 12,
},
+ 'params': {
+ 'skip_download': 'Download only works from Germany',
+ }
}
def _real_extract(self, url):
from __future__ import unicode_literals
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..utils import (
fix_xml_ampersands,
parse_duration,
)
-class NPOBaseIE(SubtitlesInfoExtractor):
+class NPOBaseIE(InfoExtractor):
def _get_token(self, video_id):
token_page = self._download_webpage(
'http://ida.omroep.nl/npoplayer/i.js',
class NPOIE(NPOBaseIE):
IE_NAME = 'npo.nl'
- _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
_TESTS = [
{
subtitles = {}
if metadata.get('tt888') == 'ja':
- subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id
-
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
-
- subtitles = self.extract_subtitles(video_id, subtitles)
+ subtitles['nl'] = [{
+ 'ext': 'vtt',
+ 'url': 'http://e.omroep.nl/tt888/%s' % video_id,
+ }]
return {
'id': video_id,
class NPOLiveIE(NPOBaseIE):
IE_NAME = 'npo.nl:live'
- _VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)'
_TEST = {
'url': 'http://www.npo.nl/live/npo-1',
}
+class NPORadioIE(InfoExtractor):
+ IE_NAME = 'npo.nl:radio'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
+
+ _TEST = {
+ 'url': 'http://www.npo.nl/radio/radio-1',
+ 'info_dict': {
+ 'id': 'radio-1',
+ 'ext': 'mp3',
+ 'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }
+
+ @staticmethod
+ def _html_get_attribute_regex(attribute):
+ return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ self._html_get_attribute_regex('data-channel'), webpage, 'title')
+
+ stream = self._parse_json(
+ self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
+ video_id)
+
+ codec = stream.get('codec')
+
+ return {
+ 'id': video_id,
+ 'url': stream['url'],
+ 'title': self._live_title(title),
+ 'acodec': codec,
+ 'ext': codec,
+ 'is_live': True,
+ }
+
+
+class NPORadioFragmentIE(InfoExtractor):
+ IE_NAME = 'npo.nl:radio:fragment'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
+ 'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
+ 'info_dict': {
+ 'id': '174356',
+ 'ext': 'mp3',
+ 'title': 'Jubileumconcert Willeke Alberti',
+ },
+ }
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, audio_id)
+
+ title = self._html_search_regex(
+ r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
+ webpage, 'title')
+
+ audio_url = self._search_regex(
+ r"data-streams='([^']+)'", webpage, 'audio url')
+
+ return {
+ 'id': audio_id,
+ 'url': audio_url,
+ 'title': title,
+ }
+
+
class TegenlichtVproIE(NPOIE):
IE_NAME = 'tegenlicht.vpro.nl'
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
parse_duration,
unified_strdate,
)
-from .subtitles import SubtitlesInfoExtractor
class NRKIE(InfoExtractor):
}
-class NRKTVIE(SubtitlesInfoExtractor):
+class NRKTVIE(InfoExtractor):
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
_TESTS = [
if self._downloader.params.get('verbose', False):
self.to_screen('[debug] %s' % txt)
- def _extract_captions(self, subtitlesurl, video_id, baseurl):
+ def _get_subtitles(self, subtitlesurl, video_id, baseurl):
url = "%s%s" % (baseurl, subtitlesurl)
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
- captions = self._download_xml(url, video_id, 'Downloading subtitles')
+ captions = self._download_xml(
+ url, video_id, 'Downloading subtitles',
+ transform_source=lambda s: s.replace(r'<br />', '\r\n'))
lang = captions.get('lang', 'no')
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
srt = ''
duration = parse_duration(p.get('dur'))
starttime = self._seconds2str(begin)
endtime = self._seconds2str(begin + duration)
- text = '\n'.join(p.itertext())
- srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
- return {lang: srt}
+ srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
+ return {lang: [
+ {'ext': 'ttml', 'url': url},
+ {'ext': 'srt', 'data': srt},
+ ]}
def _extract_f4m(self, manifest_url, video_id):
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
webpage, 'subtitle URL', default=None)
subtitles = None
if subtitles_url:
- subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
+ subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl)
return {
'id': video_id,
from .common import InfoExtractor
from ..utils import (
- unescapeHTML
+ clean_html,
+ xpath_text,
+ int_or_none,
)
_TESTS = [
{
'url': 'http://www.ntv.ru/novosti/863142/',
+ 'md5': 'ba7ea172a91cb83eb734cad18c10e723',
'info_dict': {
'id': '746000',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+ 'thumbnail': 're:^http://.*\.jpg',
'duration': 136,
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
},
{
'url': 'http://www.ntv.ru/video/novosti/750370/',
+ 'md5': 'adecff79691b4d71e25220a191477124',
'info_dict': {
'id': '750370',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+ 'thumbnail': 're:^http://.*\.jpg',
'duration': 172,
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
},
{
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
+ 'md5': '82dbd49b38e3af1d00df16acbeab260c',
'info_dict': {
'id': '747480',
- 'ext': 'flv',
- 'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
- 'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
+ 'ext': 'mp4',
+ 'title': '«Сегодня». 21 марта 2014 года. 16:00',
+ 'description': '«Сегодня». 21 марта 2014 года. 16:00',
+ 'thumbnail': 're:^http://.*\.jpg',
'duration': 1496,
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
},
{
'url': 'http://www.ntv.ru/kino/Koma_film',
+ 'md5': 'f825770930937aa7e5aca0dc0d29319a',
'info_dict': {
- 'id': '758100',
- 'ext': 'flv',
+ 'id': '1007609',
+ 'ext': 'mp4',
'title': 'Остросюжетный фильм «Кома»',
'description': 'Остросюжетный фильм «Кома»',
+ 'thumbnail': 're:^http://.*\.jpg',
'duration': 5592,
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
},
{
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
+ 'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
'info_dict': {
'id': '751482',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': '«Дело врачей»: «Деревце жизни»',
'description': '«Дело врачей»: «Деревце жизни»',
+ 'thumbnail': 're:^http://.*\.jpg',
'duration': 2590,
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
- page = self._download_webpage(url, video_id)
- video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
+ webpage = self._download_webpage(url, video_id)
- player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
- title = unescapeHTML(player.find('./data/title').text)
- description = unescapeHTML(player.find('./data/description').text)
+ video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id')
- video = player.find('./data/video')
- video_id = video.find('./id').text
- thumbnail = video.find('./splash').text
- duration = int(video.find('./totaltime').text)
- view_count = int(video.find('./views').text)
- puid22 = video.find('./puid22').text
+ player = self._download_xml(
+ 'http://www.ntv.ru/vi%s/' % video_id,
+ video_id, 'Downloading video XML')
+ title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
+ description = clean_html(xpath_text(player, './data/description', 'description'))
- apps = {
- '4': 'video1',
- '7': 'video2',
- }
+ video = player.find('./data/video')
+ video_id = xpath_text(video, './id', 'video id')
+ thumbnail = xpath_text(video, './splash', 'thumbnail')
+ duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
+ view_count = int_or_none(xpath_text(video, './views', 'view count'))
- app = apps.get(puid22, apps['4'])
+ token = self._download_webpage(
+ 'http://stat.ntv.ru/services/access/token',
+ video_id, 'Downloading access token')
formats = []
for format_id in ['', 'hi', 'webm']:
- file = video.find('./%sfile' % format_id)
- if file is None:
+ file_ = video.find('./%sfile' % format_id)
+ if file_ is None:
continue
size = video.find('./%ssize' % format_id)
formats.append({
- 'url': 'rtmp://media.ntv.ru/%s' % app,
- 'app': app,
- 'play_path': file.text,
- 'rtmp_conn': 'B:1',
- 'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
- 'page_url': 'http://www.ntv.ru',
- 'flash_version': 'LNX 11,2,202,341',
- 'rtmp_live': True,
- 'ext': 'flv',
- 'filesize': int(size.text),
+ 'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
+ 'filesize': int_or_none(size.text if size is not None else None),
})
self._sort_formats(formats)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ unified_strdate,
+ int_or_none,
+ qualities,
+)
+
+
+class OdnoklassnikiIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://ok.ru/video/20079905452',
+ 'md5': '8e24ad2da6f387948e7a7d44eb8668fe',
+ 'info_dict': {
+ 'id': '20079905452',
+ 'ext': 'mp4',
+ 'title': 'Культура меняет нас (прекрасный ролик!))',
+ 'duration': 100,
+ 'upload_date': '20141207',
+ 'uploader_id': '330537914540',
+ 'uploader': 'Виталий Добровольский',
+ 'like_count': int,
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ player = self._parse_json(
+ self._search_regex(
+ r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'),
+ video_id)
+
+ metadata = self._parse_json(player['flashvars']['metadata'], video_id)
+
+ movie = metadata['movie']
+ title = movie['title']
+ thumbnail = movie.get('poster')
+ duration = int_or_none(movie.get('duration'))
+
+ author = metadata.get('author', {})
+ uploader_id = author.get('id')
+ uploader = author.get('name')
+
+ upload_date = unified_strdate(self._html_search_meta(
+ 'ya:ovs:upload_date', webpage, 'upload date'))
+
+ age_limit = None
+ adult = self._html_search_meta(
+ 'ya:ovs:adult', webpage, 'age limit')
+ if adult:
+ age_limit = 18 if adult == 'true' else 0
+
+ like_count = int_or_none(metadata.get('likeCount'))
+
+ quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
+
+ formats = [{
+ 'url': f['url'],
+ 'ext': 'mp4',
+ 'format_id': f['name'],
+ 'quality': quality(f['name']),
+ } for f in metadata['videos']]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'like_count': like_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
# encoding: utf-8
from __future__ import unicode_literals
-import json
-import re
-
from .common import InfoExtractor
from ..utils import (
js_to_json,
class PatreonIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
+ _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
_TESTS = [
{
'url': 'http://www.patreon.com/creation?hid=743933',
'thumbnail': 're:^https?://.*$',
},
},
+ {
+ 'url': 'https://www.patreon.com/creation?hid=1682498',
+ 'info_dict': {
+ 'id': 'SU4fj_aEMVw',
+ 'ext': 'mp4',
+ 'title': 'I\'m on Patreon!',
+ 'uploader': 'TraciJHines',
+ 'thumbnail': 're:^https?://.*$',
+ 'upload_date': '20150211',
+ 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
+ 'uploader_id': 'TraciJHines',
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ }
+ }
]
# Currently Patreon exposes download URL via hidden CSS, so login is not
'''
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group(1)
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage).strip()
attach_fn = self._html_search_regex(
r'<div class="attach"><a target="_blank" href="([^"]+)">',
webpage, 'attachment URL', default=None)
+ embed = self._html_search_regex(
+ r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
+ webpage, 'embedded URL', default=None)
+
if attach_fn is not None:
video_url = 'http://www.patreon.com' + attach_fn
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
+ elif embed is not None:
+ return self.url_result(embed)
else:
- playlist_js = self._search_regex(
+ playlist = self._parse_json(self._search_regex(
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
- webpage, 'playlist JSON')
- playlist_json = js_to_json(playlist_js)
- playlist = json.loads(playlist_json)
+ webpage, 'playlist JSON'),
+ video_id, transform_source=js_to_json)
data = playlist[0]
video_url = self._proto_relative_url(data['mp3'])
thumbnail = self._proto_relative_url(data.get('cover'))
quality = qualities(['sd', 'hd'])
sources = json.loads(js_to_json(self._search_regex(
- r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
+ r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
+ webpage, 'sources')))
formats = []
- for container, s in sources.items():
- for qname, video_url in s.items():
- formats.append({
- 'url': video_url,
- 'container': container,
- 'format_id': '%s-%s' % (container, qname),
- 'quality': quality(qname),
- })
+ for qname, video_url in sources.items():
+ if not video_url:
+ continue
+ formats.append({
+ 'url': video_url,
+ 'format_id': qname,
+ 'quality': quality(qname),
+ })
self._sort_formats(formats)
return {
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex(
- r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
+ r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
if thumbnail:
'formats': formats,
'age_limit': 18,
}
+
+
+class PornHubPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.pornhub.com/playlist/6201671',
+ 'info_dict': {
+ 'id': '6201671',
+ 'title': 'P0p4',
+ },
+ 'playlist_mincount': 35,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
+ for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
+ ]
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
+ playlist_id)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist.get('title'), playlist.get('description'))
--- /dev/null
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ unified_strdate,
+ int_or_none,
+)
+
+
+class Puls4IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816',
+ 'md5': '49f6a6629747eeec43cef6a46b5df81d',
+ 'info_dict': {
+ 'id': '2716816',
+ 'ext': 'mp4',
+ 'title': 'Pro und Contra vom 23.02.2015',
+ 'description': 'md5:293e44634d9477a67122489994675db6',
+ 'duration': 2989,
+ 'upload_date': '20150224',
+ 'uploader': 'PULS_4',
+ },
+ 'skip': 'Only works from Germany',
+ }, {
+ 'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106',
+ 'md5': '6a48316c8903ece8dab9b9a7bf7a59ec',
+ 'info_dict': {
+ 'id': '1298106',
+ 'ext': 'mp4',
+ 'title': 'Lucky Fritz',
+ },
+ 'skip': 'Only works from Germany',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ error_message = self._html_search_regex(
+ r'<div class="message-error">(.+?)</div>',
+ webpage, 'error message', default=None)
+ if error_message:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
+
+ real_url = self._html_search_regex(
+ r'\"fsk-button\".+?href=\"([^"]+)',
+ webpage, 'fsk_button', default=None)
+ if real_url:
+ webpage = self._download_webpage(real_url, video_id)
+
+ player = self._search_regex(
+ r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}',
+ webpage, 'player')
+
+ player_json = self._parse_json(
+ '[%s]' % player, video_id,
+ transform_source=lambda s: s.replace('undefined,', ''))
+
+ formats = None
+ result = None
+
+ for v in player_json:
+ if isinstance(v, list) and not formats:
+ formats = [{
+ 'url': f['url'],
+ 'format': 'hd' if f.get('hd') else 'sd',
+ 'width': int_or_none(f.get('size_x')),
+ 'height': int_or_none(f.get('size_y')),
+ 'tbr': int_or_none(f.get('bitrate')),
+ } for f in v]
+ self._sort_formats(formats)
+ elif isinstance(v, dict) and not result:
+ result = {
+ 'id': video_id,
+ 'title': v['videopartname'].strip(),
+ 'description': v.get('videotitle'),
+ 'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')),
+ 'upload_date': unified_strdate(v.get('clipreleasetime')),
+ 'uploader': v.get('channel'),
+ }
+
+ result['formats'] = formats
+
+ return result
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ unescapeHTML,
+ int_or_none,
+)
+
+
+class R7IE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://
+ (?:
+ (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
+ noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
+ player\.r7\.com/video/i/
+ )
+ (?P<id>[\da-f]{24})
+ '''
+ _TESTS = [{
+ 'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
+ 'md5': '403c4e393617e8e8ddc748978ee8efde',
+ 'info_dict': {
+ 'id': '54e7050b0cf2ff57e0279389',
+ 'ext': 'mp4',
+ 'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 98,
+ 'like_count': int,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://player.r7.com/video/i/%s' % video_id, video_id)
+
+ item = self._parse_json(js_to_json(self._search_regex(
+ r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
+
+ title = unescapeHTML(item['title'])
+ thumbnail = item.get('init', {}).get('thumbUri')
+ duration = None
+
+ statistics = item.get('statistics', {})
+ like_count = int_or_none(statistics.get('likes'))
+ view_count = int_or_none(statistics.get('views'))
+
+ formats = []
+ for format_key, format_dict in item['playlist'][0].items():
+ src = format_dict.get('src')
+ if not src:
+ continue
+ format_id = format_dict.get('format') or format_key
+ if duration is None:
+ duration = format_dict.get('duration')
+ if '.f4m' in src:
+ formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
+ elif src.endswith('.m3u8'):
+ formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
+ else:
+ formats.append({
+ 'url': src,
+ 'format_id': format_id,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'like_count': like_count,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
from __future__ import unicode_literals
-import json
-
from .common import InfoExtractor
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
_TEST = {
'url': 'http://ndr2.radio.de/',
- 'md5': '3b4cdd011bc59174596b6145cda474a4',
'info_dict': {
'id': 'ndr2',
'ext': 'mp3',
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:591c49c702db1a33751625ebfb67f273',
'thumbnail': 're:^https?://.*\.png',
+ 'is_live': True,
},
'params': {
'skip_download': True,
def _real_extract(self, url):
radio_id = self._match_id(url)
-
webpage = self._download_webpage(url, radio_id)
+ jscode = self._search_regex(
+ r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n",
+ webpage, 'broadcast')
- broadcast = json.loads(self._search_regex(
- r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
- webpage, 'broadcast'))
-
+ broadcast = self._parse_json(jscode, radio_id)
title = self._live_title(broadcast['name'])
description = broadcast.get('description') or broadcast.get('shortDescription')
- thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
+ thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100')
formats = [{
'url': stream['streamUrl'],
import re
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
)
)
-class RaiIE(SubtitlesInfoExtractor):
+class RaiIE(InfoExtractor):
_VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
_TESTS = [
{
'ext': 'mp4',
})
- if self._downloader.params.get('listsubtitles', False):
- page = self._download_webpage(url, video_id)
- self._list_available_subtitles(video_id, page)
- return
-
- subtitles = {}
- if self._have_to_download_any_subtitles:
- page = self._download_webpage(url, video_id)
- subtitles = self.extract_subtitles(video_id, page)
+ subtitles = self.extract_subtitles(video_id, url)
return {
'id': video_id,
'subtitles': subtitles,
}
- def _get_available_subtitles(self, video_id, webpage):
+ def _get_subtitles(self, video_id, url):
+ webpage = self._download_webpage(url, video_id)
subtitles = {}
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
if m:
SRT_EXT = '.srt'
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
- subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
+ subtitles['it'] = [{
+ 'ext': 'srt',
+ 'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions),
+ }]
return subtitles
+# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import parse_duration
+from ..utils import (
+ int_or_none,
+ parse_duration,
+)
-class RtlXlIE(InfoExtractor):
- IE_NAME = 'rtlxl.nl'
- _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
+class RtlNlIE(InfoExtractor):
+ IE_NAME = 'rtl.nl'
+ IE_DESC = 'rtl.nl and rtlxl.nl'
+ _VALID_URL = r'''(?x)
+ https?://(www\.)?
+ (?:
+ rtlxl\.nl/\#!/[^/]+/|
+ rtl\.nl/system/videoplayer/[^?#]+?/video_embed\.html\#uuid=
+ )
+ (?P<id>[0-9a-f-]+)'''
- _TEST = {
+ _TESTS = [{
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
'md5': 'cc16baa36a6c169391f0764fa6b16654',
'info_dict': {
'upload_date': '20140814',
'duration': 576.880,
},
- }
+ }, {
+ 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
+ 'md5': 'dea7474214af1271d91ef332fb8be7ea',
+ 'info_dict': {
+ 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
+ 'ext': 'mp4',
+ 'timestamp': 1424039400,
+ 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
+ 'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
+ 'upload_date': '20150215',
+ 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
+ }
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- uuid = mobj.group('uuid')
-
+ uuid = self._match_id(url)
info = self._download_json(
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
uuid)
material = info['material'][0]
- episode_info = info['episodes'][0]
-
progname = info['abstracts'][0]['name']
subtitle = material['title'] or info['episodes'][0]['name']
+ description = material.get('synopsis') or info['episodes'][0]['synopsis']
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
videopath = material['videopath'].replace('.f4m', '.m3u8')
'quality': 0,
}
])
-
self._sort_formats(formats)
+ thumbnails = []
+ meta = info.get('meta', {})
+ for p in ('poster_base_url', '"thumb_base_url"'):
+ if not meta.get(p):
+ continue
+
+ thumbnails.append({
+ 'url': self._proto_relative_url(meta[p] + uuid),
+ 'width': int_or_none(self._search_regex(
+ r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
+ 'height': int_or_none(self._search_regex(
+ r'/sz=[0-9]+x([0-9]+)',
+ meta[p], 'thumbnail height', fatal=False))
+ })
+
return {
'id': uuid,
'title': '%s - %s' % (progname, subtitle),
'formats': formats,
'timestamp': material['original_date'],
- 'description': episode_info['synopsis'],
+ 'description': description,
'duration': parse_duration(material.get('duration')),
+ 'thumbnails': thumbnails,
}
},
},
{
+ 'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5',
+ 'info_dict': {
+ 'id': '188729',
+ 'ext': 'flv',
+ 'upload_date': '20150204',
+ 'description': 'md5:5e1ce23095e61a79c166d134b683cecc',
+ 'title': 'Der Bachelor - Folge 4',
+ }
+ }, {
'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
'only_matching': True,
},
'player_url': video_page_url + 'includes/vodplayer.swf',
}
else:
- fmt = {
- 'url': filename.text,
- }
+ mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
+ if mobj:
+ fmt = {
+ 'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
+ 'play_path': 'mp4:' + mobj.group('play_path'),
+ 'page_url': url,
+ 'player_url': video_page_url + 'includes/vodplayer.swf',
+ }
+ else:
+ fmt = {
+ 'url': filename.text,
+ }
fmt.update({
'width': int_or_none(filename.get('width')),
'height': int_or_none(filename.get('height')),
# coding: utf-8
from __future__ import unicode_literals
-import json
+import re
from .common import InfoExtractor
-from ..utils import js_to_json
class RTPIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
_TESTS = [{
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
+ 'md5': 'e736ce0c665e459ddb818546220b4ef8',
'info_dict': {
'id': 'e174042',
'ext': 'mp3',
'description': 'As paixões musicais de António Cartaxo e António Macedo',
'thumbnail': 're:^https?://.*\.jpg',
},
- 'params': {
- 'skip_download': True, # RTMP download
- },
}, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
'only_matching': True,
player_config = self._search_regex(
r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
- config = json.loads(js_to_json(player_config))
+ config = self._parse_json(player_config, video_id)
path, ext = config.get('file').rsplit('.', 1)
formats = [{
+ 'format_id': 'rtmp',
+ 'ext': ext,
+ 'vcodec': config.get('type') == 'audio' and 'none' or None,
+ 'preference': -2,
+ 'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
'app': config.get('application'),
'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
'page_url': url,
- 'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
'rtmp_live': config.get('live', False),
- 'ext': ext,
- 'vcodec': config.get('type') == 'audio' and 'none' or None,
'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
'rtmp_real_time': True,
}]
+ # Construct regular HTTP download URLs
+ replacements = {
+ 'audio': {
+ 'format_id': 'mp3',
+ 'pattern': r'^nas2\.share/wavrss/',
+ 'repl': 'http://rsspod.rtp.pt/podcasts/',
+ 'vcodec': 'none',
+ },
+ 'video': {
+ 'format_id': 'mp4_h264',
+ 'pattern': r'^nas2\.share/h264/',
+ 'repl': 'http://rsspod.rtp.pt/videocasts/',
+ 'vcodec': 'h264',
+ },
+ }
+ r = replacements[config['type']]
+ if re.match(r['pattern'], config['file']) is not None:
+ formats.append({
+ 'format_id': r['format_id'],
+ 'url': re.sub(r['pattern'], r['repl'], config['file']),
+ 'vcodec': r['vcodec'],
+ })
+
+ self._sort_formats(formats)
+
return {
'id': video_id,
'title': title,
import time
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
- struct_unpack,
+ float_or_none,
remove_end,
+ struct_unpack,
)
'id': '2491869',
'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
+ 'duration': 5024.566,
},
}, {
'note': 'Live stream',
).replace('.net.rtve', '.multimedia.cdn.rtve')
video_path = self._download_webpage(
auth_url, video_id, 'Getting video url')
- # Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
+ # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
# the right Content-Length header and the mp4 format
- video_url = (
- 'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
- '&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
- )
+ video_url = compat_urlparse.urljoin(
+ 'http://mvod1.akcdn.rtve.es/', video_path)
+
+ subtitles = None
+ if info.get('sbtFile') is not None:
+ subtitles = self.extract_subtitles(video_id, info['sbtFile'])
return {
'id': video_id,
'url': video_url,
'thumbnail': info.get('image'),
'page_url': url,
+ 'subtitles': subtitles,
+ 'duration': float_or_none(info.get('duration'), scale=1000),
}
+ def _get_subtitles(self, video_id, sub_file):
+ subs = self._download_json(
+ sub_file + '.json', video_id,
+ 'Downloading subtitles info')['page']['items']
+ return dict(
+ (s['lang'], [{'ext': 'vtt', 'url': s['src']}])
+ for s in subs)
+
class RTVELiveIE(InfoExtractor):
IE_NAME = 'rtve.es:live'
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_request,
+ compat_urlparse,
+)
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ unified_strdate,
+)
+
+
+class SandiaIE(InfoExtractor):
+ IE_DESC = 'Sandia National Laboratories'
+ _VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
+ _TEST = {
+ 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
+ 'md5': '9422edc9b9a60151727e4b6d8bef393d',
+ 'info_dict': {
+ 'id': '24aace4429fc450fb5b38cdbf424a66e1d',
+ 'ext': 'mp4',
+ 'title': 'Xyce Software Training - Section 1',
+ 'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
+ 'upload_date': '20120904',
+ 'duration': 7794,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ req = compat_urllib_request.Request(url)
+ req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
+ webpage = self._download_webpage(req, video_id)
+
+ js_path = self._search_regex(
+ r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
+ webpage, 'JS code URL')
+ js_url = compat_urlparse.urljoin(url, js_path)
+
+ js_code = self._download_webpage(
+ js_url, video_id, note='Downloading player')
+
+ def extract_str(key, **args):
+ return self._search_regex(
+ r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
+ js_code, key, **args)
+
+ def extract_data(key, **args):
+ data_json = extract_str(key, **args)
+ if data_json is None:
+ return data_json
+ return self._parse_json(
+ data_json, video_id, transform_source=js_to_json)
+
+ formats = []
+ for i in itertools.count():
+ fd = extract_data('VideoUrls[%d]' % i, default=None)
+ if fd is None:
+ break
+ formats.append({
+ 'format_id': '%s' % i,
+ 'format_note': fd['MimeType'].partition('/')[2],
+ 'ext': mimetype2ext(fd['MimeType']),
+ 'url': fd['Location'],
+ 'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
+ })
+ self._sort_formats(formats)
+
+ slide_baseurl = compat_urlparse.urljoin(
+ url, extract_data('SlideBaseUrl'))
+ slide_template = slide_baseurl + re.sub(
+ r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
+ slides = []
+ last_slide_time = 0
+ for i in itertools.count(1):
+ sd = extract_str('Slides[%d]' % i, default=None)
+ if sd is None:
+ break
+ timestamp = int_or_none(self._search_regex(
+ r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
+ sd, 'slide %s timestamp' % i, fatal=False))
+ slides.append({
+ 'url': slide_template % i,
+ 'duration': timestamp - last_slide_time,
+ })
+ last_slide_time = timestamp
+ formats.append({
+ 'format_id': 'slides',
+ 'protocol': 'slideshow',
+ 'url': json.dumps(slides),
+ 'preference': -10000, # Downloader not yet written
+ })
+ self._sort_formats(formats)
+
+ title = extract_data('Title')
+ description = extract_data('Description', fatal=False)
+ duration = int_or_none(extract_data(
+ 'Duration', fatal=False), scale=1000)
+ upload_date = unified_strdate(extract_data('AirDate', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ }
'id': '437BE28B89D799D7',
'title': 'big_buck_bunny_720p_surround.avi',
'ext': 'avi',
- 'thumbnail': 're:^http://.*\.jpg$',
}
}
''', webpage, 'hash')
fields = {
- "hash": confirm_hash,
+ "hash": confirm_hash.encode('utf-8'),
"confirm": "Continue as Free User"
}
webpage, 'title', default=None)
thumbnail = self._html_search_regex(
r'<img\s+src="([^"]*)".+?name="bg"',
- webpage, 'thumbnail')
+ webpage, 'thumbnail', default=None)
formats = [{
'format_id': 'sd',
+++ /dev/null
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- HEADRequest,
- urlhandle_detect_ext,
-)
-
-
-class SoulAnimeWatchingIE(InfoExtractor):
- IE_NAME = "soulanime:watching"
- IE_DESC = "SoulAnime video"
- _TEST = {
- 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
- 'md5': '05fae04abf72298098b528e98abf4298',
- 'info_dict': {
- 'id': 'seirei-tsukai-no-blade-dance-episode-9',
- 'ext': 'mp4',
- 'title': 'seirei-tsukai-no-blade-dance-episode-9',
- 'description': 'seirei-tsukai-no-blade-dance-episode-9'
- }
- }
- _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- domain = mobj.group('domain')
-
- page = self._download_webpage(url, video_id)
-
- video_url_encoded = self._html_search_regex(
- r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
- video_url = "http://www.soul-anime." + domain + video_url_encoded
-
- ext_req = HEADRequest(video_url)
- ext_handle = self._request_webpage(
- ext_req, video_id, note='Determining extension')
- ext = urlhandle_detect_ext(ext_handle)
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'ext': ext,
- 'title': video_id,
- 'description': video_id
- }
-
-
-class SoulAnimeSeriesIE(InfoExtractor):
- IE_NAME = "soulanime:series"
- IE_DESC = "SoulAnime Series"
-
- _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
-
- _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
-
- _TEST = {
- 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
- 'info_dict': {
- 'id': 'black-rock-shooter-tv'
- },
- 'playlist_count': 8
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- series_id = mobj.group('id')
- domain = mobj.group('domain')
-
- pattern = re.compile(self._EPISODE_REGEX)
-
- page = self._download_webpage(url, series_id, "Downloading series page")
- mobj = pattern.findall(page)
-
- entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
-
- return self.playlist_result(entries, series_id)
class SoundgasmIE(InfoExtractor):
+ IE_NAME = 'soundgasm'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
'title': audio_title,
'description': description
}
+
+
+class SoundgasmProfileIE(InfoExtractor):
+ IE_NAME = 'soundgasm:profile'
+ _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
+ _TEST = {
+ 'url': 'http://soundgasm.net/u/ytdl',
+ 'info_dict': {
+ 'id': 'ytdl',
+ },
+ 'playlist_count': 1,
+ }
+
+ def _real_extract(self, url):
+ profile_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, profile_id)
+
+ entries = [
+ self.url_result(audio_url, 'Soundgasm')
+ for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)]
+
+ return self.playlist_result(entries, profile_id)
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
+import hashlib
+import time
+
from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_request,
+)
from ..utils import (
int_or_none,
)
+def _get_api_key(api_path):
+ if api_path.endswith('?'):
+ api_path = api_path[:-1]
+
+ api_key = 'fb5f58a820353bd7095de526253c14fd'
+ a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
+ return hashlib.md5(a.encode('ascii')).hexdigest()
+
+
class StreamCZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
+ _API_URL = 'http://www.stream.cz/API'
_TESTS = [{
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
def _real_extract(self, url):
video_id = self._match_id(url)
- data = self._download_json(
- 'http://www.stream.cz/API/episode/%s' % video_id, video_id)
+ api_path = '/episode/%s' % video_id
+
+ req = compat_urllib_request.Request(self._API_URL + api_path)
+ req.add_header('Api-Password', _get_api_key(api_path))
+ data = self._download_json(req, video_id)
formats = []
for quality, video in enumerate(data['video_qualities']):
+++ /dev/null
-from __future__ import unicode_literals
-from .common import InfoExtractor
-
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
-)
-
-
-class SubtitlesInfoExtractor(InfoExtractor):
- @property
- def _have_to_download_any_subtitles(self):
- return any([self._downloader.params.get('writesubtitles', False),
- self._downloader.params.get('writeautomaticsub')])
-
- def _list_available_subtitles(self, video_id, webpage):
- """ outputs the available subtitles for the video """
- sub_lang_list = self._get_available_subtitles(video_id, webpage)
- auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
- sub_lang = ",".join(list(sub_lang_list.keys()))
- self.to_screen('%s: Available subtitles for video: %s' %
- (video_id, sub_lang))
- auto_lang = ",".join(auto_captions_list.keys())
- self.to_screen('%s: Available automatic captions for video: %s' %
- (video_id, auto_lang))
-
- def extract_subtitles(self, video_id, webpage):
- """
- returns {sub_lang: sub} ,{} if subtitles not found or None if the
- subtitles aren't requested.
- """
- if not self._have_to_download_any_subtitles:
- return None
- available_subs_list = {}
- if self._downloader.params.get('writeautomaticsub', False):
- available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
- if self._downloader.params.get('writesubtitles', False):
- available_subs_list.update(self._get_available_subtitles(video_id, webpage))
-
- if not available_subs_list: # error, it didn't get the available subtitles
- return {}
- if self._downloader.params.get('allsubtitles', False):
- sub_lang_list = available_subs_list
- else:
- if self._downloader.params.get('subtitleslangs', False):
- requested_langs = self._downloader.params.get('subtitleslangs')
- elif 'en' in available_subs_list:
- requested_langs = ['en']
- else:
- requested_langs = [list(available_subs_list.keys())[0]]
-
- sub_lang_list = {}
- for sub_lang in requested_langs:
- if sub_lang not in available_subs_list:
- self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
- continue
- sub_lang_list[sub_lang] = available_subs_list[sub_lang]
-
- subtitles = {}
- for sub_lang, url in sub_lang_list.items():
- subtitle = self._request_subtitle_url(sub_lang, url)
- if subtitle:
- subtitles[sub_lang] = subtitle
- return subtitles
-
- def _download_subtitle_url(self, sub_lang, url):
- return self._download_webpage(url, None, note=False)
-
- def _request_subtitle_url(self, sub_lang, url):
- """ makes the http request for the subtitle """
- try:
- sub = self._download_subtitle_url(sub_lang, url)
- except ExtractorError as err:
- self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
- return
- if not sub:
- self._downloader.report_warning('Did not fetch video subtitles')
- return
- return sub
-
- def _get_available_subtitles(self, video_id, webpage):
- """
- returns {sub_lang: url} or {} if not available
- Must be redefined by the subclasses
- """
-
- # By default, allow implementations to simply pass in the result
- assert isinstance(webpage, dict), \
- '_get_available_subtitles not implemented'
- return webpage
-
- def _get_available_automatic_caption(self, video_id, webpage):
- """
- returns {sub_lang: url} or {} if not available
- Must be redefined by the subclasses that support automatic captions,
- otherwise it will return {}
- """
- self._downloader.report_warning('Automatic Captions not supported by this server')
- return {}
formats = []
quality = qualities(['mp4', 'flv'])
- for video_url in re.findall(r'<source src="([^"]+)"', webpage):
+ for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
video_ext = determine_ext(video_url)
formats.append({
'url': video_url,
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+)
+
+
+class SVTPlayIE(InfoExtractor):
+ IE_DESC = 'SVT Play and Öppet arkiv'
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
+ 'md5': 'ade3def0643fa1c40587a422f98edfd9',
+ 'info_dict': {
+ 'id': '2609989',
+ 'ext': 'flv',
+ 'title': 'SM veckan vinter, Örebro - Rally, final',
+ 'duration': 4500,
+ 'thumbnail': 're:^https?://.*[\.-]jpg$',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
+ 'md5': 'c3101a17ce9634f4c1f9800f0746c187',
+ 'info_dict': {
+ 'id': '1058509',
+ 'ext': 'flv',
+ 'title': 'Farlig kryssning',
+ 'duration': 2566,
+ 'thumbnail': 're:^https?://.*[\.-]jpg$',
+ 'age_limit': 0,
+ },
+ 'skip': 'Only works from Sweden',
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ host = mobj.group('host')
+
+ info = self._download_json(
+ 'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id)
+
+ title = info['context']['title']
+ thumbnail = info['context'].get('thumbnailImage')
+
+ video_info = info['video']
+ formats = []
+ for vr in video_info['videoReferences']:
+ vurl = vr['url']
+ ext = determine_ext(vurl)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ vurl, video_id,
+ ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=vr.get('playerType')))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ vurl + '?hdcore=3.3.0', video_id,
+ f4m_id=vr.get('playerType')))
+ else:
+ formats.append({
+ 'format_id': vr.get('playerType'),
+ 'url': vurl,
+ })
+ self._sort_formats(formats)
+
+ duration = video_info.get('materialLength')
+ age_limit = 18 if video_info.get('inappropriateForChildren') else 0
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ }
from __future__ import unicode_literals
+import base64
import re
from .common import InfoExtractor
+from ..utils import qualities
class TeamcocoIE(InfoExtractor):
'id': '80187',
'ext': 'mp4',
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
- 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
+ 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
+ 'age_limit': 0,
}
}, {
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
'info_dict': {
'id': '19705',
'ext': 'mp4',
- "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
- "title": "Louis C.K. Interview Pt. 1 11/3/11"
+ 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
+ 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
+ 'age_limit': 0,
}
}
]
+ _VIDEO_ID_REGEXES = (
+ r'"eVar42"\s*:\s*(\d+)',
+ r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
+ r'"id_not"\s*:\s*(\d+)'
+ )
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
- video_id = mobj.group("video_id")
+ video_id = mobj.group('video_id')
if not video_id:
video_id = self._html_search_regex(
- r'<div\s+class="player".*?data-id="(\d+?)"',
- webpage, 'video id')
+ self._VIDEO_ID_REGEXES, webpage, 'video id')
- data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
- data = self._download_xml(
- data_url, display_id, 'Downloading data webpage')
+ embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
+ embed = self._download_webpage(
+ embed_url, video_id, 'Downloading embed page')
+
+ encoded_data = self._search_regex(
+ r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
+ data = self._parse_json(
+ base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
- qualities = ['500k', '480p', '1000k', '720p', '1080p']
formats = []
- for filed in data.findall('files/file'):
- if filed.attrib.get('playmode') == 'all':
- # it just duplicates one of the entries
- break
- file_url = filed.text
- m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
+ get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
+ for filed in data['files']:
+ m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
if m_format is not None:
format_id = m_format.group(1)
else:
- format_id = filed.attrib['bitrate']
+ format_id = filed['bitrate']
tbr = (
- int(filed.attrib['bitrate'])
- if filed.attrib['bitrate'].isdigit()
+ int(filed['bitrate'])
+ if filed['bitrate'].isdigit()
else None)
- try:
- quality = qualities.index(format_id)
- except ValueError:
- quality = -1
formats.append({
- 'url': file_url,
+ 'url': filed['url'],
'ext': 'mp4',
'tbr': tbr,
'format_id': format_id,
- 'quality': quality,
+ 'quality': get_quality(format_id),
})
self._sort_formats(formats)
'id': video_id,
'display_id': display_id,
'formats': formats,
- 'title': self._og_search_title(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'description': self._og_search_description(webpage),
+ 'title': data['title'],
+ 'thumbnail': data.get('thumb', {}).get('href'),
+ 'description': data.get('teaser'),
+ 'age_limit': self._family_friendly_search(webpage),
}
import json
import re
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..compat import (
compat_str,
)
-class TEDIE(SubtitlesInfoExtractor):
+class TEDIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?P<proto>https?://)
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
'params': {
'skip_download': True,
},
+ }, {
+ # YouTube video
+ 'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
+ 'add_ie': ['Youtube'],
+ 'info_dict': {
+ 'id': 'aFBIPO-P7LM',
+ 'ext': 'mp4',
+ 'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
+ 'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
+ 'uploader': 'TEDx Talks',
+ 'uploader_id': 'TEDxTalks',
+ 'upload_date': '20111216',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
_NATIVE_FORMATS = {
talk_info = self._extract_info(webpage)['talks'][0]
- if talk_info.get('external') is not None:
- self.to_screen('Found video from %s' % talk_info['external']['service'])
+ external = talk_info.get('external')
+ if external:
+ service = external['service']
+ self.to_screen('Found video from %s' % service)
+ ext_url = None
+ if service.lower() == 'youtube':
+ ext_url = external.get('code')
return {
'_type': 'url',
- 'url': talk_info['external']['uri'],
+ 'url': ext_url or external['uri'],
}
formats = [{
self._sort_formats(formats)
video_id = compat_str(talk_info['id'])
- # subtitles
- video_subtitles = self.extract_subtitles(video_id, talk_info)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, talk_info)
- return
thumbnail = talk_info['thumb']
if not thumbnail.startswith('http'):
'uploader': talk_info['speaker'],
'thumbnail': thumbnail,
'description': self._og_search_description(webpage),
- 'subtitles': video_subtitles,
+ 'subtitles': self._get_subtitles(video_id, talk_info),
'formats': formats,
'duration': talk_info.get('duration'),
}
- def _get_available_subtitles(self, video_id, talk_info):
+ def _get_subtitles(self, video_id, talk_info):
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
if languages:
sub_lang_list = {}
for l in languages:
- url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
- sub_lang_list[l] = url
+ sub_lang_list[l] = [
+ {
+ 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
+ 'ext': ext,
+ }
+ for ext in ['ted', 'srt']
+ ]
return sub_lang_list
else:
- self._downloader.report_warning('video doesn\'t have subtitles')
return {}
def _watch_info(self, url, name):
class TelecincoIE(MiTeleIE):
IE_NAME = 'telecinco.es'
- _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
+ _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
'info_dict': {
'id': 'MDSVID20141015_0058',
'title': 'Con Martín Berasategui, hacer un bacalao al ...',
'duration': 662,
},
- }
+ }, {
+ 'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
+ 'only_matching': True,
+ }]
import re
from .common import InfoExtractor
-from ..utils import ExtractorError
class TheOnionIE(InfoExtractor):
- _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
+ _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
_TEST = {
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
'md5': '19eaa9a39cf9b9804d982e654dc791ee',
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- article_id = mobj.group('article_id')
-
- webpage = self._download_webpage(url, article_id)
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'"videoId":\s(\d+),', webpage, 'video ID')
thumbnail = self._og_search_thumbnail(webpage)
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
- if not sources:
- raise ExtractorError(
- 'No sources found for video %s' % video_id, expected=True)
-
formats = []
for src, type_ in sources:
if type_ == 'video/mp4':
})
elif type_ == 'application/x-mpegURL':
formats.extend(
- self._extract_m3u8_formats(src, video_id, preference=-1))
+ self._extract_m3u8_formats(src, display_id, preference=-1))
else:
self.report_warning(
'Encountered unexpected format: %s' % type_)
-
self._sort_formats(formats)
return {
'id': video_id,
+ 'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
import re
import json
+import time
+import hmac
+import binascii
+import hashlib
-from .subtitles import SubtitlesInfoExtractor
+
+from .common import InfoExtractor
from ..compat import (
compat_str,
)
determine_ext,
ExtractorError,
xpath_with_ns,
+ unsmuggle_url,
)
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
-class ThePlatformIE(SubtitlesInfoExtractor):
+class ThePlatformIE(InfoExtractor):
_VALID_URL = r'''(?x)
- (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
+ (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|theplatform:)(?P<id>[^/\?&]+)'''
},
}
+ @staticmethod
+ def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
+ flags = '10' if include_qs else '00'
+ expiration_date = '%x' % (int(time.time()) + life)
+
+ def str_to_hex(str):
+ return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
+
+ def hex_to_str(hex):
+ return binascii.a2b_hex(hex)
+
+ relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
+ clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
+ checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
+ sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
+ return '%s&sig=%s' % (url, sig)
+
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
mobj = re.match(self._VALID_URL, url)
+ provider_id = mobj.group('provider_id')
video_id = mobj.group('id')
- if mobj.group('config'):
+
+ if not provider_id:
+ provider_id = 'dJ5BDC'
+
+ if smuggled_data.get('force_smil_url', False):
+ smil_url = url
+ elif mobj.group('config'):
config_url = url + '&form=json'
config_url = config_url.replace('swf/', 'config/')
config_url = config_url.replace('onsite/', 'onsite/config/')
config = self._download_json(config_url, video_id, 'Downloading config')
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
else:
- smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
- 'format=smil&mbr=true'.format(video_id))
+ smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?'
+ 'format=smil&mbr=true'.format(provider_id, video_id))
+
+ sig = smuggled_data.get('sig')
+ if sig:
+ smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
meta = self._download_xml(smil_url, video_id)
try:
else:
raise ExtractorError(error_msg, expected=True)
- info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
+ info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id)
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)
captions = info.get('captions')
if isinstance(captions, list):
for caption in captions:
- lang, src = caption.get('lang'), caption.get('src')
- if lang and src:
- subtitles[lang] = src
-
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
-
- subtitles = self.extract_subtitles(video_id, subtitles)
+ lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
+ subtitles[lang] = [{
+ 'ext': 'srt' if mime == 'text/srt' else 'ttml',
+ 'url': src,
+ }]
head = meta.find(_x('smil:head'))
body = meta.find(_x('smil:body'))
+# coding: utf-8
from __future__ import unicode_literals
-import json
+import re
from .common import InfoExtractor
+from ..utils import ExtractorError
class TriluliluIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/video-[^/]+/(?P<id>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/(?:video-[^/]+/)?(?P<id>[^/#\?]+)'
_TEST = {
'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
+ 'md5': 'c1450a00da251e2769b74b9005601cac',
'info_dict': {
- 'id': 'big-buck-bunny-1',
+ 'id': 'ae2899e124140b',
'ext': 'mp4',
'title': 'Big Buck Bunny',
'description': ':) pentru copilul din noi',
},
- # Server ignores Range headers (--test)
- 'params': {
- 'skip_download': True
- }
}
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ if re.search(r'Fişierul nu este disponibil pentru vizionare în ţara dumneavoastră', webpage):
+ raise ExtractorError(
+ 'This video is not available in your country.', expected=True)
+ elif re.search('Fişierul poate fi accesat doar de către prietenii lui', webpage):
+ raise ExtractorError('This video is private.', expected=True)
+
+ flashvars_str = self._search_regex(
+ r'block_flash_vars\s*=\s*(\{[^\}]+\})', webpage, 'flashvars', fatal=False, default=None)
+ if flashvars_str:
+ flashvars = self._parse_json(flashvars_str, display_id)
+ else:
+ raise ExtractorError(
+ 'This page does not contain videos', expected=True)
+
+ if flashvars['isMP3'] == 'true':
+ raise ExtractorError(
+ 'Audio downloads are currently not supported', expected=True)
+
+ video_id = flashvars['hash']
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
- description = self._og_search_description(webpage)
-
- log_str = self._search_regex(
- r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, 'log info')
- log = json.loads(log_str)
+ description = self._og_search_description(webpage, default=None)
format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
- 'video-formats2' % log)
+ 'video-formats2' % flashvars)
format_doc = self._download_xml(
format_url, video_id,
note='Downloading formats',
'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
'&source=site&hash=%(hash)s&username=%(userid)s&'
'key=ministhebest&format=%%s&sig=&exp=' %
- log)
+ flashvars)
formats = [
{
- 'format': fnode.text,
+ 'format_id': fnode.text.partition('-')[2],
'url': video_url_template % fnode.text,
'ext': fnode.text.partition('-')[0]
}
]
return {
- '_type': 'video',
'id': video_id,
+ 'display_id': display_id,
'formats': formats,
'title': title,
'description': description,
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+)
+
+
+class TV4IE(InfoExtractor):
+ IE_DESC = 'tv4.se and tv4play.se'
+ _VALID_URL = r'''(?x)https?://(?:www\.)?
+ (?:
+ tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
+ tv4play\.se/
+ (?:
+ (?:program|barn)/(?:[^\?]+)\?video_id=|
+ iframe/video/|
+ film/|
+ sport/|
+ )
+ )(?P<id>[0-9]+)'''
+ _TESTS = [
+ {
+ 'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
+ 'md5': '909d6454b87b10a25aa04c4bdd416a9b',
+ 'info_dict': {
+ 'id': '2491650',
+ 'ext': 'mp4',
+ 'title': 'Kalla Fakta 5 (english subtitles)',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': int,
+ 'upload_date': '20131125',
+ },
+ },
+ {
+ 'url': 'http://www.tv4play.se/iframe/video/3054113',
+ 'md5': '77f851c55139ffe0ebd41b6a5552489b',
+ 'info_dict': {
+ 'id': '3054113',
+ 'ext': 'mp4',
+ 'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
+ 'timestamp': int,
+ 'upload_date': '20150130',
+ },
+ },
+ {
+ 'url': 'http://www.tv4play.se/sport/3060959',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.tv4play.se/film/2378136',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = self._download_json(
+ 'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
+
+ # If is_geo_restricted is true, it doesn't neceserally mean we can't download it
+ if info['is_geo_restricted']:
+ self.report_warning('This content might not be available in your country due to licensing restrictions.')
+ if info['requires_subscription']:
+ raise ExtractorError('This content requires subscription.', expected=True)
+
+ sources_data = self._download_json(
+ 'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON')
+ sources = sources_data['playback']
+
+ formats = []
+ for item in sources.get('items', {}).get('item', []):
+ ext, bitrate = item['mediaFormat'], item['bitrate']
+ formats.append({
+ 'format_id': '%s_%s' % (ext, bitrate),
+ 'tbr': bitrate,
+ 'ext': ext,
+ 'url': item['url'],
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': info['title'],
+ 'formats': formats,
+ 'description': info.get('description'),
+ 'timestamp': parse_iso8601(info.get('broadcast_date_time')),
+ 'duration': info.get('duration'),
+ 'thumbnail': info.get('image'),
+ 'is_live': sources.get('live'),
+ }
# encoding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
float_or_none,
class TvigleIE(InfoExtractor):
IE_NAME = 'tvigle'
IE_DESC = 'Интернет-телевидение Tvigle.ru'
- _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
+ _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))'
_TESTS = [
{
'duration': 186.080,
'age_limit': 0,
},
- },
+ }, {
+ 'url': 'https://cloud.tvigle.ru/video/5267604/',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
- video_id = self._html_search_regex(
- r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id')
+ if not video_id:
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._html_search_regex(
+ r'<li class="video-preview current_playing" id="(\d+)">',
+ webpage, 'video id')
video_data = self._download_json(
'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
expected=True)
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
- response = super(TwitchBaseIE, self)._download_json(url, video_id, note)
+ headers = {
+ 'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2',
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ for cookie in self._downloader.cookiejar:
+ if cookie.name == 'api_token':
+ headers['Twitch-Api-Token'] = cookie.value
+ request = compat_urllib_request.Request(url, headers=headers)
+ response = super(TwitchBaseIE, self)._download_json(request, video_id, note)
self._handle_error(response)
return response
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
channel_id, 'mp4')
+ # prefer the 'source' stream, the others are limited to 30 fps
+ def _sort_source(f):
+ if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
+ return 1
+ return 0
+ formats = sorted(formats, key=_sort_source)
+
view_count = stream.get('viewers')
timestamp = parse_iso8601(stream.get('created_at'))
thumbnail = (
None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
- formats = [{
- 'url': v.attrib['src'],
- 'width': int_or_none(v.attrib.get('width')),
- 'height': int_or_none(v.attrib.get('height')),
- 'filesize': int_or_none(v.attrib.get('size')),
- 'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
- 'ext': v.attrib.get('ext'),
- } for v in switch.findall('./video')
- if v.attrib.get('proto') == 'http']
+ formats = []
+ for v in switch.findall('./video'):
+ proto = v.attrib.get('proto')
+ if proto not in ['http', 'rtmp']:
+ continue
+ f = {
+ 'width': int_or_none(v.attrib.get('width')),
+ 'height': int_or_none(v.attrib.get('height')),
+ 'filesize': int_or_none(v.attrib.get('size')),
+ 'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
+ 'ext': v.attrib.get('ext'),
+ }
+ src = v.attrib['src']
+ if proto == 'http':
+ if self._is_valid_url(src, video_id):
+ f['url'] = src
+ formats.append(f)
+ elif proto == 'rtmp':
+ f.update({
+ 'url': v.attrib['streamer'],
+ 'play_path': src,
+ 'rtmp_real_time': True,
+ })
+ formats.append(f)
+ self._sort_formats(formats)
return {
'id': video_id,
import re
+from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
unescapeHTML,
unified_strdate,
US_RATINGS,
)
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
-class VikiIE(SubtitlesInfoExtractor):
+class VikiIE(InfoExtractor):
IE_NAME = 'viki'
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
# subtitles
video_subtitles = self.extract_subtitles(video_id, info_webpage)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, info_webpage)
- return
return {
'id': video_id,
'upload_date': upload_date,
}
- def _get_available_subtitles(self, video_id, info_webpage):
+ def _get_subtitles(self, video_id, info_webpage):
res = {}
- for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
+ for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
sturl = unescapeHTML(sturl_html)
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
if not m:
continue
- res[m.group('lang')] = sturl
+ res[m.group('lang')] = [{
+ 'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
+ 'ext': 'vtt',
+ }]
return res
import json
import re
import itertools
+import hashlib
from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_parse,
InAdvancePagedList,
int_or_none,
RegexNotFoundError,
+ smuggle_url,
std_headers,
unsmuggle_url,
urlencode_postdata,
self._download_webpage(login_request, None, False, 'Wrong login info')
-class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
+class VimeoIE(VimeoBaseInfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword', None)
if password is None:
- raise ExtractorError('This video is protected by a password, use the --video-password option')
+ raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
data = compat_urllib_parse.urlencode({
'password': password,
password_request = compat_urllib_request.Request(pass_url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Cookie', 'xsrft=%s' % token)
- self._download_webpage(password_request, video_id,
- 'Verifying the password',
- 'Wrong password')
+ return self._download_webpage(
+ password_request, video_id,
+ 'Verifying the password', 'Wrong password')
def _verify_player_video_password(self, url, video_id):
password = self._downloader.params.get('videopassword', None)
if mobj.group('pro') or mobj.group('player'):
url = 'http://player.vimeo.com/video/' + video_id
+ password = self._downloader.params.get('videopassword', None)
+ if password:
+ headers['Cookie'] = '%s_password=%s' % (
+ video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
+
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers)
try:
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
- if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
+ if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
+ if data and '_video_password_verified' in data:
+ raise ExtractorError('video password verification failed!')
self._verify_video_password(url, video_id, webpage)
- return self._real_extract(url)
+ return self._real_extract(
+ smuggle_url(url, {'_video_password_verified': 'verified'}))
else:
raise ExtractorError('Unable to extract info section',
cause=e)
text_tracks = config['request'].get('text_tracks')
if text_tracks:
for tt in text_tracks:
- subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
-
- video_subtitles = self.extract_subtitles(video_id, subtitles)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
+ subtitles[tt['lang']] = [{
+ 'ext': 'vtt',
+ 'url': 'http://vimeo.com' + tt['url'],
+ }]
return {
'id': video_id,
'view_count': view_count,
'like_count': like_count,
'comment_count': comment_count,
- 'subtitles': video_subtitles,
+ 'subtitles': subtitles,
}
_TESTS = [{
'url': 'http://vimeo.com/channels/tributes',
'info_dict': {
+ 'id': 'tributes',
'title': 'Vimeo Tributes',
},
'playlist_mincount': 25,
def _extract_list_title(self, webpage):
return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
+ def _login_list_password(self, page_url, list_id, webpage):
+ login_form = self._search_regex(
+ r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
+ webpage, 'login form', default=None)
+ if not login_form:
+ return webpage
+
+ password = self._downloader.params.get('videopassword', None)
+ if password is None:
+ raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
+ fields = dict(re.findall(r'''(?x)<input\s+
+ type="hidden"\s+
+ name="([^"]+)"\s+
+ value="([^"]*)"
+ ''', login_form))
+ token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+ fields['token'] = token
+ fields['password'] = password
+ post = compat_urllib_parse.urlencode(fields)
+ password_path = self._search_regex(
+ r'action="([^"]+)"', login_form, 'password URL')
+ password_url = compat_urlparse.urljoin(page_url, password_path)
+ password_request = compat_urllib_request.Request(password_url, post)
+ password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
+ self._set_cookie('vimeo.com', 'xsrft', token)
+
+ return self._download_webpage(
+ password_request, list_id,
+ 'Verifying the password', 'Wrong password')
+
def _extract_videos(self, list_id, base_url):
video_ids = []
for pagenum in itertools.count(1):
+ page_url = self._page_url(base_url, pagenum)
webpage = self._download_webpage(
- self._page_url(base_url, pagenum), list_id,
+ page_url, list_id,
'Downloading page %s' % pagenum)
+
+ if pagenum == 1:
+ webpage = self._login_list_password(page_url, list_id, webpage)
+
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
'url': 'http://vimeo.com/nkistudio/videos',
'info_dict': {
'title': 'Nki',
+ 'id': 'nkistudio',
},
'playlist_mincount': 66,
}]
_TESTS = [{
'url': 'http://vimeo.com/album/2632481',
'info_dict': {
+ 'id': '2632481',
'title': 'Staff Favorites: November 2013',
},
'playlist_mincount': 13,
+ }, {
+ 'note': 'Password-protected album',
+ 'url': 'https://vimeo.com/album/3253534',
+ 'info_dict': {
+ 'title': 'test',
+ 'id': '3253534',
+ },
+ 'playlist_count': 1,
+ 'params': {
+ 'videopassword': 'youtube-dl',
+ }
}]
def _page_url(self, base_url, pagenum):
return '%s/page:%d/' % (base_url, pagenum)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- album_id = mobj.group('id')
+ album_id = self._match_id(url)
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
_TESTS = [{
'url': 'http://vimeo.com/groups/rolexawards',
'info_dict': {
+ 'id': 'rolexawards',
'title': 'Rolex Awards for Enterprise',
},
'playlist_mincount': 73,
'url': 'https://vimeo.com/user755559/likes/',
'playlist_mincount': 293,
"info_dict": {
+ 'id': 'user755559_likes',
"description": "See all the videos urza likes",
"title": 'Videos urza likes',
},
_TEMPLATE_URL = 'https://vk.com/videos'
_TEST = {
'url': 'http://vk.com/videos205387401',
+ 'info_dict': {
+ 'id': '205387401',
+ },
'playlist_mincount': 4,
}
import re
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..utils import (
xpath_text,
int_or_none,
)
-class WallaIE(SubtitlesInfoExtractor):
+class WallaIE(InfoExtractor):
_VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
_TEST = {
'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
subtitles = {}
for subtitle in item.findall('./subtitles/subtitle'):
lang = xpath_text(subtitle, './title')
- subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = xpath_text(subtitle, './src')
-
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
-
- subtitles = self.extract_subtitles(video_id, subtitles)
+ subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
+ 'ext': 'srt',
+ 'url': xpath_text(subtitle, './src'),
+ }]
formats = []
for quality in item.findall('./qualities/quality'):
'title': 'Servicezeit',
'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
'upload_date': '20140310',
+ 'is_live': False
},
'params': {
'skip_download': True,
'title': 'Marga Spiegel ist tot',
'description': 'md5:2309992a6716c347891c045be50992e4',
'upload_date': '20140311',
+ 'is_live': False
},
'params': {
'skip_download': True,
'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
'description': 'md5:2309992a6716c347891c045be50992e4',
'upload_date': '20091129',
+ 'is_live': False
},
},
{
'title': 'Flavia Coelho: Amar é Amar',
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
'upload_date': '20140717',
+ 'is_live': False
},
},
{
'info_dict': {
'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
}
+ },
+ {
+ 'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
+ 'info_dict': {
+ 'id': 'mdb-103364',
+ 'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
+ 'ext': 'flv',
+ 'upload_date': '20150212',
+ 'is_live': True
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}
]
video_url = flashvars['dslSrc'][0]
title = flashvars['trackerClipTitle'][0]
thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
+ is_live = flashvars.get('isLive', ['0'])[0] == '1'
+
+ if is_live:
+ title = self._live_title(title)
if 'trackerClipAirTime' in flashvars:
upload_date = flashvars['trackerClipAirTime'][0]
if video_url.endswith('.f4m'):
video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
ext = 'flv'
+ elif video_url.endswith('.smil'):
+ fmt = self._extract_smil_formats(video_url, page_id)[0]
+ video_url = fmt['url']
+ sep = '&' if '?' in video_url else '?'
+ video_url += sep
+ video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
+ ext = fmt['ext']
else:
ext = determine_ext(video_url)
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
+ 'is_live': is_live
}
description = self._html_search_meta('description', webpage)
thumbnail = self._og_search_thumbnail(webpage)
- story_filename = self._search_regex(
- r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
- speaker_id = self._search_regex(
- r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
- story_id = self._search_regex(
- r'\.storyId\((\d+)\)', webpage, 'story ID')
- speaker_type = self._search_regex(
- r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
- great_life = self._search_regex(
- r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
+ embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
+ r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
+ webpage, 'embed params').split(',')]
+
+ (
+ _, speaker_id, story_id, story_duration,
+ speaker_type, great_life, _thumbnail, _has_subtitles,
+ story_filename, _story_order) = embed_params
+
is_great_life_series = great_life == 'true'
- duration = int_or_none(self._search_regex(
- r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
+ duration = int_or_none(story_duration)
# URL building, see: http://www.webofstories.com/scripts/player.js
ms_prefix = ''
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
'ext': 'mp4',
'upload_date': '20150202',
- 'uploader_id': 'bbright',
- 'creator': 'bbright',
+ 'uploader_id': 'jdesai',
+ 'creator': 'jdesai',
'categories': list, # a long list
'duration': 90,
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
'id': 'kVTUy_G222_',
'ext': 'mp4',
'title': 'strange erotica',
- 'description': 'http://www.xtube.com an ET kind of thing',
+ 'description': 'contains:an ET kind of thing',
'uploader': 'greenshowers',
'duration': 450,
'age_limit': 18,
_TESTS = [
{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
- 'md5': '4962b075c08be8690a922ee026d05e69',
'info_dict': {
'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
'ext': 'mp4',
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ float_or_none,
+ month_by_abbreviation,
+)
+
+
+class YamIE(InfoExtractor):
+ _VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)'
+
+ _TESTS = [{
+ # An audio hosted on Yam
+ 'url': 'http://mymedia.yam.com/m/2283921',
+ 'md5': 'c011b8e262a52d5473d9c2e3c9963b9c',
+ 'info_dict': {
+ 'id': '2283921',
+ 'ext': 'mp3',
+ 'title': '發現 - 趙薇 京華煙雲主題曲',
+ 'uploader_id': 'princekt',
+ 'upload_date': '20080807',
+ 'duration': 313.0,
+ }
+ }, {
+ # An external video hosted on YouTube
+ 'url': 'http://mymedia.yam.com/m/3598173',
+ 'md5': '0238ceec479c654e8c2f1223755bf3e9',
+ 'info_dict': {
+ 'id': 'pJ2Deys283c',
+ 'ext': 'mp4',
+ 'upload_date': '20150202',
+ 'uploader': '新莊社大瑜伽社',
+ 'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff',
+ 'uploader_id': '2323agoy',
+ 'title': '外婆的澎湖灣KTV-潘安邦',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ page = self._download_webpage(url, video_id)
+
+ # Is it hosted externally on YouTube?
+ youtube_url = self._html_search_regex(
+ r'<embed src="(http://www.youtube.com/[^"]+)"',
+ page, 'YouTube url', default=None)
+ if youtube_url:
+ return self.url_result(youtube_url, 'Youtube')
+
+ api_page = self._download_webpage(
+ 'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id,
+ note='Downloading API page')
+ api_result_obj = compat_urlparse.parse_qs(api_page)
+
+ uploader_id = self._html_search_regex(
+ r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z]+)"',
+ page, 'uploader id', fatal=False)
+ mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2}) ' +
+ r'(?P<day>\d{1,2}), (?P<year>\d{4})', page)
+ if mobj:
+ upload_date = '%s%02d%02d' % (
+ mobj.group('year'),
+ month_by_abbreviation(mobj.group('mon')),
+ int(mobj.group('day')))
+ else:
+ upload_date = None
+ duration = float_or_none(api_result_obj['totaltime'][0], scale=1000)
+
+ return {
+ 'id': video_id,
+ 'url': api_result_obj['mp3file'][0],
+ 'title': self._html_search_meta('description', page),
+ 'duration': duration,
+ 'uploader_id': uploader_id,
+ 'upload_date': upload_date,
+ }
import traceback
from .common import InfoExtractor, SearchInfoExtractor
-from .subtitles import SubtitlesInfoExtractor
from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter
from ..compat import (
from ..utils import (
clean_html,
ExtractorError,
+ float_or_none,
get_element_by_attribute,
get_element_by_id,
int_or_none,
return
-class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
+class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com'
_VALID_URL = r"""(?x)^
(
if cache_spec is not None:
return lambda s: ''.join(s[i] for i in cache_spec)
+ download_note = (
+ 'Downloading player %s' % player_url
+ if self._downloader.params.get('verbose') else
+ 'Downloading %s player %s' % (player_type, player_id)
+ )
if player_type == 'js':
code = self._download_webpage(
player_url, video_id,
- note='Downloading %s player %s' % (player_type, player_id),
+ note=download_note,
errnote='Download of %s failed' % player_url)
res = self._parse_sig_js(code)
elif player_type == 'swf':
urlh = self._request_webpage(
player_url, video_id,
- note='Downloading %s player %s' % (player_type, player_id),
+ note=download_note,
errnote='Download of %s failed' % player_url)
code = urlh.read()
res = self._parse_sig_swf(code)
else:
assert False, 'Invalid player type %r' % player_type
- if cache_spec is None:
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_res = res(test_string)
- cache_spec = [ord(c) for c in cache_res]
+ test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ cache_res = res(test_string)
+ cache_spec = [ord(c) for c in cache_res]
self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
return res
raise ExtractorError(
'Signature extraction failed: ' + tb, cause=e)
- def _get_available_subtitles(self, video_id, webpage):
+ def _get_subtitles(self, video_id, webpage):
try:
subs_doc = self._download_xml(
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
lang = track.attrib['lang_code']
if lang in sub_lang_list:
continue
- params = compat_urllib_parse.urlencode({
- 'lang': lang,
- 'v': video_id,
- 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
- 'name': track.attrib['name'].encode('utf-8'),
- })
- url = 'https://www.youtube.com/api/timedtext?' + params
- sub_lang_list[lang] = url
+ sub_formats = []
+ for ext in ['sbv', 'vtt', 'srt']:
+ params = compat_urllib_parse.urlencode({
+ 'lang': lang,
+ 'v': video_id,
+ 'fmt': ext,
+ 'name': track.attrib['name'].encode('utf-8'),
+ })
+ sub_formats.append({
+ 'url': 'https://www.youtube.com/api/timedtext?' + params,
+ 'ext': ext,
+ })
+ sub_lang_list[lang] = sub_formats
if not sub_lang_list:
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
return sub_lang_list
- def _get_available_automatic_caption(self, video_id, webpage):
+ def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
- sub_format = self._downloader.params.get('subtitlesformat', 'srt')
self.to_screen('%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
sub_lang_list = {}
for lang_node in caption_list.findall('target'):
sub_lang = lang_node.attrib['lang_code']
- params = compat_urllib_parse.urlencode({
- 'lang': original_lang,
- 'tlang': sub_lang,
- 'fmt': sub_format,
- 'ts': timestamp,
- 'kind': caption_kind,
- })
- sub_lang_list[sub_lang] = caption_url + '&' + params
+ sub_formats = []
+ for ext in ['sbv', 'vtt', 'srt']:
+ params = compat_urllib_parse.urlencode({
+ 'lang': original_lang,
+ 'tlang': sub_lang,
+ 'fmt': ext,
+ 'ts': timestamp,
+ 'kind': caption_kind,
+ })
+ sub_formats.append({
+ 'url': caption_url + '&' + params,
+ 'ext': ext,
+ })
+ sub_lang_list[sub_lang] = sub_formats
return sub_lang_list
# An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
- f.update(self._formats.get(format_id, {}).items())
- formats.append(f)
+ full_info = self._formats.get(format_id, {}).copy()
+ full_info.update(f)
+ formats.append(full_info)
else:
existing_format.update(f)
return formats
# subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage)
-
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, video_webpage)
- return
+ automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
if 'length_seconds' not in video_info:
self._downloader.report_warning('unable to extract video duration')
'description': video_description,
'categories': video_categories,
'subtitles': video_subtitles,
+ 'automatic_captions': automatic_captions,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
+ 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
'formats': formats,
}
| p/
)
(
- (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
+ (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots
|(?:MC)[\w\.]*
)
.*
|
- ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
+ ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
for vid_id in ids]
def _extract_mix(self, playlist_id):
- # The mixes are generated from a a single video
+ # The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
webpage = self._download_webpage(
else:
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
- if playlist_id.startswith('RD'):
+ if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
# Mixes require a custom extraction process
return self._extract_mix(playlist_id)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ xpath_with_ns,
+ xpath_text,
+ int_or_none,
+)
+
+
+class ZapiksIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
+ _TESTS = [
+ {
+ 'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
+ 'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
+ 'info_dict': {
+ 'id': '80798',
+ 'ext': 'mp4',
+ 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
+ 'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 528,
+ 'timestamp': 1359044972,
+ 'upload_date': '20130124',
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ },
+ {
+ 'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ if not video_id:
+ video_id = self._search_regex(
+ r'data-media-id="(\d+)"', webpage, 'video id')
+
+ playlist = self._download_xml(
+ 'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
+ display_id)
+
+ NS_MAP = {
+ 'jwplayer': 'http://rss.jwpcdn.com/'
+ }
+
+ def ns(path):
+ return xpath_with_ns(path, NS_MAP)
+
+ item = playlist.find('./channel/item')
+
+ title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = xpath_text(
+ item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration', default=None))
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage, 'upload date', default=None), ' ')
+
+ view_count = int_or_none(self._search_regex(
+ r'UserPlays:(\d+)', webpage, 'view count', default=None))
+ comment_count = int_or_none(self._search_regex(
+ r'UserComments:(\d+)', webpage, 'comment count', default=None))
+
+ formats = []
+ for source in item.findall(ns('./jwplayer:source')):
+ format_id = source.attrib['label']
+ f = {
+ 'url': source.attrib['file'],
+ 'format_id': format_id,
+ }
+ m = re.search(r'^(?P<height>\d+)[pP]', format_id)
+ if m:
+ f['height'] = int(m.group('height'))
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ }
def __init__(self, code, objects=None):
if objects is None:
objects = {}
- self.code = self._remove_comments(code)
+ self.code = code
self._functions = {}
self._objects = objects
- def _remove_comments(self, code):
- return re.sub(r'(?s)/\*.*?\*/', '', code)
-
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0:
raise ExtractorError('Recursion limit reached')
action='store_const', dest='extract_flat', const='in_playlist',
default=False,
help='Do not extract the videos of a playlist, only list them.')
+ general.add_option(
+ '--no-color', '--no-colors',
+ action='store_true', dest='no_color',
+ default=False,
+ help='Do not emit color codes in output.')
network = optparse.OptionGroup(parser, 'Network Options')
network.add_option(
'--max-views',
metavar='COUNT', dest='max_views', default=None, type=int,
help='Do not download any videos with more than COUNT views')
+ selection.add_option(
+ '--match-filter',
+ metavar='FILTER', dest='match_filter', default=None,
+ help=(
+ '(Experimental) Generic video filter. '
+ 'Specify any key (see help for -o for a list of available keys) to'
+ ' match if the key is present, '
+ '!key to check if the key is not present,'
+ 'key > NUMBER (like "comment_count > 12", also works with '
+ '>=, <, <=, !=, =) to compare against a number, and '
+ '& to require multiple matches. '
+ 'Values which are not known are excluded unless you'
+ ' put a question mark (?) after the operator.'
+ 'For example, to only match videos that have been liked more than '
+ '100 times and disliked less than 50 times (or the dislike '
+ 'functionality is not available at the given service), but who '
+ 'also have a description, use --match-filter '
+ '"like_count > 100 & dislike_count <? 50 & description" .'
+ ))
selection.add_option(
'--no-playlist',
action='store_true', dest='noplaylist', default=False,
help='If the URL refers to a video and a playlist, download only the video.')
+ selection.add_option(
+ '--yes-playlist',
+ action='store_false', dest='noplaylist', default=False,
+ help='If the URL refers to a video and a playlist, download the playlist.')
selection.add_option(
'--age-limit',
metavar='YEARS', dest='age_limit', default=None, type=int,
' You can filter the video results by putting a condition in'
' brackets, as in -f "best[height=720]"'
' (or -f "[filesize>10M]"). '
- ' This works for filesize, height, width, tbr, abr, vbr, and fps'
- ' and the comparisons <, <=, >, >=, =, != .'
+ ' This works for filesize, height, width, tbr, abr, vbr, asr, and fps'
+ ' and the comparisons <, <=, >, >=, =, !='
+ ' and for ext, acodec, vcodec, container, and protocol'
+ ' and the comparisons =, != .'
' Formats for which the value is not known are excluded unless you'
' put a question mark (?) after the operator.'
' You can combine format filters, so '
help='lists all available subtitles for the video')
subtitles.add_option(
'--sub-format',
- action='store', dest='subtitlesformat', metavar='FORMAT', default='srt',
- help='subtitle format (default=srt) ([sbv/vtt] youtube only)')
+ action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
+ help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
subtitles.add_option(
'--sub-lang', '--sub-langs', '--srt-lang',
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
'--xattr-set-filesize',
dest='xattr_set_filesize', action='store_true',
help='(experimental) set file xattribute ytdl.filesize with expected filesize')
+ downloader.add_option(
+ '--hls-prefer-native',
+ dest='hls_prefer_native', action='store_true',
+ help='(experimental) Use the native HLS downloader instead of ffmpeg.')
downloader.add_option(
'--external-downloader',
dest='external_downloader', metavar='COMMAND',
action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP)
verbosity.add_option(
- '--print-traffic',
+ '--print-traffic', '--dump-headers',
dest='debug_printtraffic', action='store_true', default=False,
help='Display sent and read HTTP traffic')
verbosity.add_option(
'--prefer-ffmpeg',
action='store_true', dest='prefer_ffmpeg',
help='Prefer ffmpeg over avconv for running the postprocessors')
+ postproc.add_option(
+ '--ffmpeg-location', '--avconv-location', metavar='PATH',
+ dest='ffmpeg_location',
+ help='Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.')
postproc.add_option(
'--exec',
metavar='CMD', dest='exec_cmd',
help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
+ postproc.add_option(
+ '--convert-subtitles', '--convert-subs',
+ metavar='FORMAT', dest='convertsubtitles', default=None,
+ help='Convert the subtitles to other format (currently supported: srt|ass|vtt)')
parser.add_option_group(general)
parser.add_option_group(network)
if opts.verbose:
write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
- commandLineConf = sys.argv[1:]
- if '--ignore-config' in commandLineConf:
- systemConf = []
- userConf = []
+ command_line_conf = sys.argv[1:]
+ if '--ignore-config' in command_line_conf:
+ system_conf = []
+ user_conf = []
else:
- systemConf = _readOptions('/etc/youtube-dl.conf')
- if '--ignore-config' in systemConf:
- userConf = []
+ system_conf = _readOptions('/etc/youtube-dl.conf')
+ if '--ignore-config' in system_conf:
+ user_conf = []
else:
- userConf = _readUserConf()
- argv = systemConf + userConf + commandLineConf
+ user_conf = _readUserConf()
+ argv = system_conf + user_conf + command_line_conf
opts, args = parser.parse_args(argv)
if opts.verbose:
- write_string('[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
- write_string('[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
- write_string('[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
+ write_string('[debug] System config: ' + repr(_hide_login_info(system_conf)) + '\n')
+ write_string('[debug] User config: ' + repr(_hide_login_info(user_conf)) + '\n')
+ write_string('[debug] Command-line args: ' + repr(_hide_login_info(command_line_conf)) + '\n')
return parser, opts, args
FFmpegMergerPP,
FFmpegMetadataPP,
FFmpegVideoConvertorPP,
+ FFmpegSubtitlesConvertorPP,
)
from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP
'FFmpegMergerPP',
'FFmpegMetadataPP',
'FFmpegPostProcessor',
+ 'FFmpegSubtitlesConvertorPP',
'FFmpegVideoConvertorPP',
'XAttrMetadataPP',
]
from __future__ import unicode_literals
+import io
import os
import subprocess
import sys
class FFmpegPostProcessor(PostProcessor):
def __init__(self, downloader=None, deletetempfiles=False):
PostProcessor.__init__(self, downloader)
- self._versions = self.get_versions()
self._deletetempfiles = deletetempfiles
+ self._determine_executables()
def check_version(self):
- if not self._executable:
+ if not self.available:
raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
- required_version = '10-0' if self._uses_avconv() else '1.0'
+ required_version = '10-0' if self.basename == 'avconv' else '1.0'
if is_outdated_version(
- self._versions[self._executable], required_version):
+ self._versions[self.basename], required_version):
warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
- self._executable, self._executable, required_version)
+ self.basename, self.basename, required_version)
if self._downloader:
self._downloader.report_warning(warning)
@staticmethod
- def get_versions():
- programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
- return dict((p, get_exe_version(p, args=['-version'])) for p in programs)
-
- @property
- def available(self):
- return self._executable is not None
+ def get_versions(downloader=None):
+ return FFmpegPostProcessor(downloader)._versions
- @property
- def _executable(self):
- if self._downloader.params.get('prefer_ffmpeg', False):
+ def _determine_executables(self):
+ programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
+ prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)
+
+ self.basename = None
+ self.probe_basename = None
+
+ self._paths = None
+ self._versions = None
+ if self._downloader:
+ location = self._downloader.params.get('ffmpeg_location')
+ if location is not None:
+ if not os.path.exists(location):
+ self._downloader.report_warning(
+ 'ffmpeg-location %s does not exist! '
+ 'Continuing without avconv/ffmpeg.' % (location))
+ self._versions = {}
+ return
+ elif not os.path.isdir(location):
+ basename = os.path.splitext(os.path.basename(location))[0]
+ if basename not in programs:
+ self._downloader.report_warning(
+ 'Cannot identify executable %s, its basename should be one of %s. '
+ 'Continuing without avconv/ffmpeg.' %
+ (location, ', '.join(programs)))
+ self._versions = {}
+ return None
+ location = os.path.dirname(os.path.abspath(location))
+ if basename in ('ffmpeg', 'ffprobe'):
+ prefer_ffmpeg = True
+
+ self._paths = dict(
+ (p, os.path.join(location, p)) for p in programs)
+ self._versions = dict(
+ (p, get_exe_version(self._paths[p], args=['-version']))
+ for p in programs)
+ if self._versions is None:
+ self._versions = dict(
+ (p, get_exe_version(p, args=['-version'])) for p in programs)
+ self._paths = dict((p, p) for p in programs)
+
+ if prefer_ffmpeg:
prefs = ('ffmpeg', 'avconv')
else:
prefs = ('avconv', 'ffmpeg')
for p in prefs:
if self._versions[p]:
- return p
- return None
+ self.basename = p
+ break
- @property
- def _probe_executable(self):
- if self._downloader.params.get('prefer_ffmpeg', False):
+ if prefer_ffmpeg:
prefs = ('ffprobe', 'avprobe')
else:
prefs = ('avprobe', 'ffprobe')
for p in prefs:
if self._versions[p]:
- return p
- return None
+ self.probe_basename = p
+ break
+
+ @property
+ def available(self):
+ return self.basename is not None
- def _uses_avconv(self):
- return self._executable == 'avconv'
+ @property
+ def executable(self):
+ return self._paths[self.basename]
+
+ @property
+ def probe_executable(self):
+ return self._paths[self.probe_basename]
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
self.check_version()
files_cmd = []
for path in input_paths:
files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
- cmd = ([encodeFilename(self._executable, True), encodeArgument('-y')] +
+ cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
files_cmd +
[encodeArgument(o) for o in opts] +
[encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
stderr = stderr.decode('utf-8', 'replace')
def get_audio_codec(self, path):
- if not self._probe_executable:
+ if not self.probe_executable:
raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
try:
cmd = [
- encodeFilename(self._probe_executable, True),
+ encodeFilename(self.probe_executable, True),
encodeArgument('-show_streams'),
encodeFilename(self._ffmpeg_filename_argument(path), True)]
- handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
+ handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
output = handle.communicate()[0]
if handle.wait() != 0:
return None
if filecodec is None:
raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
- uses_avconv = self._uses_avconv()
more_opts = []
if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
# Lossless, but in another container
acodec = 'copy'
extension = 'm4a'
- more_opts = ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc']
+ more_opts = ['-bsf:a', 'aac_adtstoasc']
elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
# Lossless if possible
acodec = 'copy'
more_opts = []
if self._preferredquality is not None:
if int(self._preferredquality) < 10:
- more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality]
+ more_opts += ['-q:a', self._preferredquality]
else:
- more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k']
+ more_opts += ['-b:a', self._preferredquality + 'k']
else:
# We convert the audio (lossy)
acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
if self._preferredquality is not None:
# The opus codec doesn't support the -aq option
if int(self._preferredquality) < 10 and extension != 'opus':
- more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality]
+ more_opts += ['-q:a', self._preferredquality]
else:
- more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k']
+ more_opts += ['-b:a', self._preferredquality + 'k']
if self._preferredcodec == 'aac':
more_opts += ['-f', 'adts']
if self._preferredcodec == 'm4a':
- more_opts += ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc']
+ more_opts += ['-bsf:a', 'aac_adtstoasc']
if self._preferredcodec == 'vorbis':
extension = 'ogg'
if self._preferredcodec == 'wav':
if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
else:
- self._downloader.to_screen('[' + self._executable + '] Destination: ' + new_path)
+ self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
self.run_ffmpeg(path, new_path, acodec, more_opts)
except:
etype, e, tb = sys.exc_info()
if isinstance(e, AudioConversionError):
msg = 'audio conversion failed: ' + e.msg
else:
- msg = 'error running ' + self._executable
+ msg = 'error running ' + self.basename
raise PostProcessingError(msg)
# Try to update the date time for extracted audio file.
'zu': 'zul',
}
- def __init__(self, downloader=None, subtitlesformat='srt'):
- super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
- self._subformat = subtitlesformat
-
@classmethod
def _conver_lang_code(cls, code):
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
if information['ext'] != 'mp4':
self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
return True, information
- if not information.get('subtitles'):
+ subtitles = information.get('requested_subtitles')
+ if not subtitles:
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
return True, information
- sub_langs = [key for key in information['subtitles']]
+ sub_langs = list(subtitles.keys())
filename = information['filepath']
- input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
+ input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
opts = [
'-map', '0',
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info
+
+
+class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, format=None):
+ super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
+ self.format = format
+
+ def run(self, info):
+ subs = info.get('requested_subtitles')
+ filename = info['filepath']
+ new_ext = self.format
+ new_format = new_ext
+ if new_format == 'vtt':
+ new_format = 'webvtt'
+ if subs is None:
+ self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
+ return True, info
+ self._downloader.to_screen('[ffmpeg] Converting subtitles')
+ for lang, sub in subs.items():
+ ext = sub['ext']
+ if ext == new_ext:
+ self._downloader.to_screen(
+ '[ffmpeg] Subtitle file for %s is already in the requested'
+ 'format' % new_ext)
+ continue
+ new_file = subtitles_filename(filename, lang, new_ext)
+ self.run_ffmpeg(
+ subtitles_filename(filename, lang, ext),
+ new_file, ['-f', new_format])
+
+ with io.open(new_file, 'rt', encoding='utf-8') as f:
+ subs[lang] = {
+ 'ext': ext,
+ 'data': f.read(),
+ }
+
+ return True, info
import json
import locale
import math
+import operator
import os
import pipes
import platform
compiled_regex_type = type(re.compile(''))
std_headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
}
+ENGLISH_MONTH_NAMES = [
+ 'January', 'February', 'March', 'April', 'May', 'June',
+ 'July', 'August', 'September', 'October', 'November', 'December']
+
+
def preferredencoding():
"""Get preferred encoding.
# Common case of "Foreign band name - English song title"
if restricted and result.startswith('-_'):
result = result[2:]
+ if result.startswith('-'):
+ result = '_' + result[len('-'):]
if not result:
result = '_'
return result
req, **kwargs)
-def parse_iso8601(date_str, delimiter='T'):
+def parse_iso8601(date_str, delimiter='T', timezone=None):
""" Return a UNIX timestamp from the given date """
if date_str is None:
return None
- m = re.search(
- r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
- date_str)
- if not m:
- timezone = datetime.timedelta()
- else:
- date_str = date_str[:-len(m.group(0))]
- if not m.group('sign'):
+ if timezone is None:
+ m = re.search(
+ r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
+ date_str)
+ if not m:
timezone = datetime.timedelta()
else:
- sign = 1 if m.group('sign') == '+' else -1
- timezone = datetime.timedelta(
- hours=sign * int(m.group('hours')),
- minutes=sign * int(m.group('minutes')))
+ date_str = date_str[:-len(m.group(0))]
+ if not m.group('sign'):
+ timezone = datetime.timedelta()
+ else:
+ sign = 1 if m.group('sign') == '+' else -1
+ timezone = datetime.timedelta(
+ hours=sign * int(m.group('hours')),
+ minutes=sign * int(m.group('minutes')))
date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
dt = datetime.datetime.strptime(date_str, date_format) - timezone
return calendar.timegm(dt.timetuple())
def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None:
return True
- return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
- or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+ return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
+ GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
if not_a_console(h):
return False
def month_by_name(name):
""" Return the number of a month by (locale-independently) English name """
- ENGLISH_NAMES = [
- 'January', 'February', 'March', 'April', 'May', 'June',
- 'July', 'August', 'September', 'October', 'November', 'December']
try:
- return ENGLISH_NAMES.index(name) + 1
+ return ENGLISH_MONTH_NAMES.index(name) + 1
+ except ValueError:
+ return None
+
+
+def month_by_abbreviation(abbrev):
+ """ Return the number of a month by (locale-independently) English
+ abbreviations """
+
+ try:
+ return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
except ValueError:
return None
(?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
+ \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
(?:
(?:
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
return float_or_none(m.group('only_hours'), invscale=60 * 60)
if m.group('secs'):
res += int(m.group('secs'))
+ if m.group('mins_reversed'):
+ res += int(m.group('mins_reversed')) * 60
if m.group('mins'):
res += int(m.group('mins')) * 60
if m.group('hours'):
res += int(m.group('hours')) * 60 * 60
+ if m.group('hours_reversed'):
+ res += int(m.group('hours_reversed')) * 60 * 60
if m.group('days'):
res += int(m.group('days')) * 24 * 60 * 60
if m.group('ms'):
return '"%s"' % v
res = re.sub(r'''(?x)
- "(?:[^"\\]*(?:\\\\|\\")?)*"|
- '(?:[^'\\]*(?:\\\\|\\')?)*'|
+ "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
+ '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
[a-zA-Z_][.a-zA-Z_0-9]*
''', fix_kv, code)
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
return ' '.join(shlex_quote(a) for a in args)
+def mimetype2ext(mt):
+ _, _, res = mt.rpartition('/')
+
+ return {
+ 'x-ms-wmv': 'wmv',
+ 'x-mp4-fragmented': 'mp4',
+ }.get(res, res)
+
+
def urlhandle_detect_ext(url_handle):
try:
url_handle.headers
if e:
return e
- return getheader('Content-Type').split("/")[1]
+ return mimetype2ext(getheader('Content-Type'))
def age_restricted(content_limit, age_limit):
max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
return '\n'.join(format_str % tuple(row) for row in table)
+
+
+def _match_one(filter_part, dct):
+ COMPARISON_OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>[a-z_]+)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?:
+ (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
+ (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
+ )
+ \s*$
+ ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = COMPARISON_OPERATORS[m.group('op')]
+ if m.group('strval') is not None:
+ if m.group('op') not in ('=', '!='):
+ raise ValueError(
+ 'Operator %s does not support string values!' % m.group('op'))
+ comparison_value = m.group('strval')
+ else:
+ try:
+ comparison_value = int(m.group('intval'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('intval'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('intval') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid integer value %r in filter part %r' % (
+ m.group('intval'), filter_part))
+ actual_value = dct.get(m.group('key'))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+
+ UNARY_OPERATORS = {
+ '': lambda v: v is not None,
+ '!': lambda v: v is None,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<op>%s)\s*(?P<key>[a-z_]+)
+ \s*$
+ ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = UNARY_OPERATORS[m.group('op')]
+ actual_value = dct.get(m.group('key'))
+ return op(actual_value)
+
+ raise ValueError('Invalid filter part %r' % filter_part)
+
+
+def match_str(filter_str, dct):
+ """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
+
+ return all(
+ _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
+
+
+def match_filter_func(filter_str):
+ def _match_func(info_dict):
+ if match_str(filter_str, info_dict):
+ return None
+ else:
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
+ return _match_func
from __future__ import unicode_literals
-__version__ = '2015.02.06'
+__version__ = '2015.02.28'