+version 2018.06.18
+
+Core
+* [downloader/rtmp] Fix downloading in verbose mode (#16736)
+
+Extractors
++ [markiza] Add support for markiza.sk (#16750)
+* [wat] Try all supported adaptive URLs
++ [6play] Add support for rtlplay.be and extract hd usp formats
++ [rtbf] Add support for audio and live streams (#9638, #11923)
++ [rtbf] Extract HLS, DASH and all HTTP formats
++ [rtbf] Extract subtitles
++ [rtbf] Fixup specific HTTP URLs (#16101)
++ [expressen] Add support for expressen.se
+* [vidzi] Fix extraction (#16678)
+* [pbs] Improve extraction (#16623, #16684)
+* [bilibili] Restrict cid regular expression (#16638, #16734)
+
+
+version 2018.06.14
+
+Core
+* [downloader/http] Fix retry on error when streaming to stdout (#16699)
+
+Extractors
++ [discoverynetworks] Add support for disco-api videos (#16724)
++ [dailymotion] Add support for password protected videos (#9789)
++ [abc:iview] Add support for livestreams (#12354)
+* [abc:iview] Fix extraction (#16704)
++ [crackle] Add support for sonycrackle.com (#16698)
++ [tvnet] Add support for tvnet.gov.vn (#15462)
+* [nrk] Update API hosts and try all previously known ones (#16690)
+* [wimp] Fix Youtube embeds extraction
+
+
+version 2018.06.11
+
+Extractors
+* [npo] Extend URL regular expression and add support for npostart.nl (#16682)
++ [inc] Add support for another embed schema (#16666)
+* [tv4] Fix format extraction (#16650)
++ [nexx] Add support for free cdn (#16538)
++ [pbs] Add another cove id pattern (#15373)
++ [rbmaradio] Add support for 192k format (#16631)
+
+
+version 2018.06.04
+
+Extractors
++ [camtube] Add support for camtube.co
++ [twitter:card] Extract guest token (#16609)
++ [chaturbate] Use geo verification headers
++ [bbc] Add support for bbcthree (#16612)
+* [youtube] Move metadata extraction after video availability check
++ [youtube] Extract track and artist
++ [safari] Add support for new URL schema (#16614)
+* [adn] Fix extraction
+
+
+version 2018.06.02
+
+Core
+* [utils] Improve determine_ext
+
+Extractors
++ [facebook] Add support for tahoe player videos (#15441, #16554)
+* [cbc] Improve extraction (#16583, #16593)
+* [openload] Improve ext extraction (#16595)
++ [twitter:card] Add support for another endpoint (#16586)
++ [openload] Add support for oload.win and oload.download (#16592)
+* [audimedia] Fix extraction (#15309)
++ [francetv] Add support for sport.francetvinfo.fr (#15645)
+* [mlb] Improve extraction (#16587)
+- [nhl] Remove old extractors
+* [rbmaradio] Check formats availability (#16585)
+
+
+version 2018.05.30
+
+Core
+* [downloader/rtmp] Generalize download messages and report time elapsed
+ on finish
+* [downloader/rtmp] Gracefully handle live streams interrupted by user
+
+Extractors
+* [teamcoco] Fix extraction for full episodes (#16573)
+* [spiegel] Fix info extraction (#16538)
++ [apa] Add support for apa.at (#15041, #15672)
++ [bellmedia] Add support for bnnbloomberg.ca (#16560)
++ [9c9media] Extract MPD formats and subtitles
+* [cammodels] Use geo verification headers
++ [ufctv] Add support for authentication (#16542)
++ [cammodels] Add support for cammodels.com (#14499)
+* [utils] Fix style id extraction for namespaced id attribute in dfxp2srt
+ (#16551)
+* [soundcloud] Detect format extension (#16549)
+* [cbc] Fix playlist title extraction (#16502)
++ [tumblr] Detect and report sensitive media (#13829)
++ [tumblr] Add support for authentication (#15133)
+
+
+version 2018.05.26
+
+Core
+* [utils] Improve parse_age_limit
+
+Extractors
+* [audiomack] Stringify video id (#15310)
+* [izlesene] Fix extraction (#16233, #16271, #16407)
++ [indavideo] Add support for generic embeds (#11989)
+* [indavideo] Fix extraction (#11221)
+* [indavideo] Sign download URLs (#16174)
++ [peertube] Add support for PeerTube based sites (#16301, #16329)
+* [imgur] Fix extraction (#16537)
++ [hidive] Add support for authentication (#16534)
++ [nbc] Add support for stream.nbcsports.com (#13911)
++ [viewlift] Add support for hoichoi.tv (#16536)
+* [go90] Extract age limit and detect DRM protection(#10127)
+* [viewlift] fix extraction for snagfilms.com (#15766)
+* [globo] Improve extraction (#4189)
+ * Add support for authentication
+ * Simplify URL signing
+ * Extract DASH and MSS formats
+* [leeco] Fix extraction (#16464)
+* [teamcoco] Add fallback for format extraction (#16484)
+* [teamcoco] Improve URL regular expression (#16484)
+* [imdb] Improve extraction (#4085, #14557)
+
+
+version 2018.05.18
+
+Extractors
+* [vimeo:likes] Relax URL regular expression and fix single page likes
+ extraction (#16475)
+* [pluralsight] Fix clip id extraction (#16460)
++ [mychannels] Add support for mychannels.com (#15334)
+- [moniker] Remove extractor (#15336)
+* [pbs] Fix embed data extraction (#16474)
++ [mtv] Add support for paramountnetwork.com and bellator.com (#15418)
+* [youtube] Fix hd720 format position
+* [dailymotion] Remove fragment part from m3u8 URLs (#8915)
+* [3sat] Improve extraction (#15350)
+ * Extract all formats
+ * Extract more format metadata
+ * Improve format sorting
+ * Use hls native downloader
+ * Detect and bypass geo-restriction
++ [dtube] Add support for d.tube (#15201)
+* [options] Fix typo (#16450)
+* [youtube] Improve format filesize extraction (#16453)
+* [youtube] Make uploader extraction non fatal (#16444)
+* [youtube] Fix extraction for embed restricted live streams (#16433)
+* [nbc] Improve info extraction (#16440)
+* [twitch:clips] Fix extraction (#16429)
+* [redditr] Relax URL regular expression (#16426, #16427)
+* [mixcloud] Bypass throttling for HTTP formats (#12579, #16424)
++ [nick] Add support for nickjr.de (#13230)
+* [teamcoco] Fix extraction (#16374)
+
+
+version 2018.05.09
+
+Core
+* [YoutubeDL] Ensure ext exists for automatic captions
+* Introduce --geo-bypass-ip-block
+
+Extractors
++ [udemy] Extract asset captions
++ [udemy] Extract stream URLs (#16372)
++ [businessinsider] Add support for businessinsider.com (#16387, #16388, #16389)
++ [cloudflarestream] Add support for cloudflarestream.com (#16375)
+* [watchbox] Fix extraction (#16356)
+* [discovery] Extract Affiliate/Anonymous Auth Token from cookies (#14954)
++ [itv:btcc] Add support for itv.com/btcc (#16139)
+* [tunein] Use live title for live streams (#16347)
+* [itv] Improve extraction (#16253)
+
+
+version 2018.05.01
+
+Core
+* [downloader/fragment] Restart download if .ytdl file is corrupt (#16312)
++ [extractor/common] Extract interaction statistic
++ [utils] Add merge_dicts
++ [extractor/common] Add _download_json_handle
+
+Extractors
+* [kaltura] Improve iframe embeds detection (#16337)
++ [udemy] Extract outputs renditions (#16289, #16291, #16320, #16321, #16334,
+ #16335)
++ [zattoo] Add support for zattoo.com and mobiltv.quickline.com (#14668, #14676)
+* [yandexmusic] Convert release_year to int
+* [udemy] Override _download_webpage_handle instead of _download_webpage
+* [xiami] Override _download_webpage_handle instead of _download_webpage
+* [yandexmusic] Override _download_webpage_handle instead of _download_webpage
+* [youtube] Correctly disable polymer on all requests (#16323, #16326)
+* [generic] Prefer enclosures over links in RSS feeds (#16189)
++ [redditr] Add support for old.reddit.com URLs (#16274)
+* [nrktv] Update API host (#16324)
++ [imdb] Extract all formats (#16249)
++ [vimeo] Extract JSON-LD (#16295)
+* [funk:channel] Improve extraction (#16285)
+
+
version 2018.04.25
Core
## Network Options:
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
- To enable experimental SOCKS proxy, specify
- a proper scheme. For example
+ To enable SOCKS proxy, specify a proper
+ scheme. For example
socks5://127.0.0.1:1080/. Pass in an empty
string (--proxy "") for direct connection
--socket-timeout SECONDS Time to wait before giving up, in seconds
--geo-verification-proxy URL Use this proxy to verify the IP address for
some geo-restricted sites. The default
proxy specified by --proxy (or none, if the
- options is not present) is used for the
+ option is not present) is used for the
actual downloading.
--geo-bypass Bypass geographic restriction via faking
- X-Forwarded-For HTTP header (experimental)
+ X-Forwarded-For HTTP header
--no-geo-bypass Do not bypass geographic restriction via
faking X-Forwarded-For HTTP header
- (experimental)
--geo-bypass-country CODE Force bypass geographic restriction with
explicitly provided two-letter ISO 3166-2
- country code (experimental)
+ country code
+ --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
+ explicitly provided IP block in CIDR
+ notation
## Video Selection:
--playlist-start NUMBER Playlist video to start at (default is 1)
--playlist-reverse Download playlist videos in reverse order
--playlist-random Download playlist videos in random order
--xattr-set-filesize Set file xattribute ytdl.filesize with
- expected file size (experimental)
+ expected file size
--hls-prefer-native Use the native HLS downloader instead of
ffmpeg
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
Network Options:
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
- To enable experimental SOCKS proxy, specify
- a proper scheme. For example
+ To enable SOCKS proxy, specify a proper
+ scheme. For example
socks5://127.0.0.1:1080/. Pass in an empty
string (--proxy "") for direct connection
--socket-timeout SECONDS Time to wait before giving up, in seconds
--geo-verification-proxy URL Use this proxy to verify the IP address for
some geo-restricted sites. The default
proxy specified by --proxy (or none, if the
- options is not present) is used for the
+ option is not present) is used for the
actual downloading.
--geo-bypass Bypass geographic restriction via faking
- X-Forwarded-For HTTP header (experimental)
+ X-Forwarded-For HTTP header
--no-geo-bypass Do not bypass geographic restriction via
faking X-Forwarded-For HTTP header
- (experimental)
--geo-bypass-country CODE Force bypass geographic restriction with
explicitly provided two-letter ISO 3166-2
- country code (experimental)
+ country code
+ --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
+ explicitly provided IP block in CIDR
+ notation
Video Selection:
--playlist-reverse Download playlist videos in reverse order
--playlist-random Download playlist videos in random order
--xattr-set-filesize Set file xattribute ytdl.filesize with
- expected file size (experimental)
+ expected file size
--hls-prefer-native Use the native HLS downloader instead of
ffmpeg
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
#!/usr/bin/env python3
from __future__ import unicode_literals
-import hashlib
-import urllib.request
import json
versions_info = json.load(open('update/versions.json'))
version = versions_info['latest']
-URL = versions_info['versions'][version]['bin'][0]
-
-data = urllib.request.urlopen(URL).read()
+version_dict = versions_info['versions'][version]
# Read template page
with open('download.html.in', 'r', encoding='utf-8') as tmplf:
template = tmplf.read()
-sha256sum = hashlib.sha256(data).hexdigest()
template = template.replace('@PROGRAM_VERSION@', version)
-template = template.replace('@PROGRAM_URL@', URL)
-template = template.replace('@PROGRAM_SHA256SUM@', sha256sum)
-template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0])
-template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1])
-template = template.replace('@TAR_URL@', versions_info['versions'][version]['tar'][0])
-template = template.replace('@TAR_SHA256SUM@', versions_info['versions'][version]['tar'][1])
+template = template.replace('@PROGRAM_URL@', version_dict['bin'][0])
+template = template.replace('@PROGRAM_SHA256SUM@', version_dict['bin'][1])
+template = template.replace('@EXE_URL@', version_dict['exe'][0])
+template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1])
+template = template.replace('@TAR_URL@', version_dict['tar'][0])
+template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1])
with open('download.html', 'w', encoding='utf-8') as dlf:
dlf.write(template)
for fn in glob.glob('*.html*'):
with io.open(fn, encoding='utf-8') as f:
content = f.read()
- newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content)
+ newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
if content != newc:
tmpFn = fn + '.part'
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
- **8tracks**
- **91porn**
- **9c9media**
- - **9c9media:stack**
- **9gag**
- **9now.com.au**
- **abc.net.au**
- **anitube.se**
- **Anvato**
- **AnySex**
+ - **APA**
- **Aparat**
- **AppleConnect**
- **AppleDaily**: 臺灣蘋果日報
- **Beatport**
- **Beeg**
- **BehindKink**
+ - **Bellator**
- **BellMedia**
- **Bet**
- **Bigflix**
- **BRMediathek**: Bayerischer Rundfunk Mediathek
- **bt:article**: Bergens Tidende Articles
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
+ - **BusinessInsider**
- **BuzzFeed**
- **BYUtv**
- **Camdemy**
- **CamdemyFolder**
+ - **CamModels**
+ - **CamTube**
- **CamWithHer**
- **canalc2.tv**
- **Canalplus**: mycanal.fr and piwiplus.fr
- **ClipRs**
- **Clipsyndicate**
- **CloserToTruth**
+ - **CloudflareStream**
- **cloudtime**: CloudTime
- **Cloudy**
- **Clubic**
- **DrTuber**
- **drtv**
- **drtv:live**
+ - **DTube**
- **Dumpert**
- **dvtv**: http://video.aktualne.cz/
- **dw**
- **Europa**
- **EveryonesMixtape**
- **ExpoTV**
+ - **Expressen**
- **ExtremeTube**
- **EyedoTV**
- **facebook**
- **ImgurAlbum**
- **Ina**
- **Inc**
- - **Indavideo**
- **IndavideoEmbed**
- **InfoQ**
- **Instagram**
- **Ir90Tv**
- **ITTF**
- **ITV**
+ - **ITVBTCC**
- **ivi**: ivi.ru
- **ivi:compilation**: ivi.ru compilations
- **ivideon**: Ivideon TV
- **mailru**: Видео@Mail.Ru
- **mailru:music**: Музыка@Mail.Ru
- **mailru:music:search**: Музыка@Mail.Ru
- - **MakersChannel**
- **MakerTV**
- **mangomolo:live**
- **mangomolo:video**
- **ManyVids**
+ - **Markiza**
+ - **MarkizaPage**
- **massengeschmack.tv**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
- **Mofosex**
- **Mojvideo**
- - **Moniker**: allmyvideos.net and vidspot.net
- **Morningstar**: morningstar.com
- **Motherless**
- **MotherlessGroup**
- **mva:course**: Microsoft Virtual Academy courses
- **Mwave**
- **MwaveMeetGreet**
+ - **MyChannels**
- **MySpace**
- **MySpace:album**
- **MySpass**
- **nbcolympics**
- **nbcolympics:stream**
- **NBCSports**
+ - **NBCSportsStream**
- **NBCSportsVPlayer**
- **ndr**: NDR.de - Norddeutscher Rundfunk
- **ndr:embed**
- **nfl.com**
- **NhkVod**
- **nhl.com**
- - **nhl.com:news**: NHL news
- - **nhl.com:videocenter**
- - **nhl.com:videocenter:category**: NHL videocenter category
- **nick.com**
- **nick.de**
- **nickelodeon:br**
- **PacktPubCourse**
- **PandaTV**: 熊猫TV
- **pandora.tv**: 판도라TV
+ - **ParamountNetwork**
- **parliamentlive.tv**: UK parliament videos
- **Patreon**
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- **pcmag**
- **PearVideo**
+ - **PeerTube**
- **People**
- **PerformGroup**
- **periscope**: Periscope
- **qqmusic:playlist**: QQ音乐 - 歌单
- **qqmusic:singer**: QQ音乐 - 歌手
- **qqmusic:toplist**: QQ音乐 - 排行榜
+ - **Quickline**
+ - **QuicklineLive**
- **R7**
- **R7Article**
- **radio.de**
- **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **Spiegeltv**
- - **Spike**
+ - **sport.francetvinfo.fr**
- **Sport5**
- **SportBoxEmbed**
- **SportDeutschland**
- **tvigle**: Интернет-телевидение Tvigle.ru
- **tvland.com**
- **TVN24**
+ - **TVNet**
- **TVNoe**
- **TVNow**
- **TVNowList**
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **Zaq1**
+ - **Zattoo**
+ - **ZattooLive**
- **ZDF**
- **ZDFChannel**
- **zingmp3**: mp3.zing.vn
universal = True
[flake8]
-exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
+exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
ignore = E402,E501,E731,E741
is_html,
js_to_json,
limit_length,
+ merge_dicts,
mimetype2ext,
month_by_name,
multipart_encode,
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
+ self.assertEqual(determine_ext('foobar', None), None)
def test_find_xpath_attr(self):
testxml = '''<root>
self.assertEqual(parse_age_limit('PG-13'), 13)
self.assertEqual(parse_age_limit('TV-14'), 14)
self.assertEqual(parse_age_limit('TV-MA'), 17)
+ self.assertEqual(parse_age_limit('TV14'), 14)
+ self.assertEqual(parse_age_limit('TV_G'), 0)
def test_parse_duration(self):
self.assertEqual(parse_duration(None), None)
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
+ def test_merge_dicts(self):
+ self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
+ self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': 1}, {'a': None}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': 1}, {'a': ''}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': 1}, {}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': None}, {'a': 1}), {'a': 1})
+ self.assertEqual(merge_dicts({'a': ''}, {'a': 1}), {'a': ''})
+ self.assertEqual(merge_dicts({'a': ''}, {'a': 'abc'}), {'a': 'abc'})
+ self.assertEqual(merge_dicts({'a': None}, {'a': ''}, {'a': 'abc'}), {'a': 'abc'})
+
def test_encode_compat_str(self):
self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
.TP
.B \-\-proxy \f[I]URL\f[]
Use the specified HTTP/HTTPS/SOCKS proxy.
-To enable experimental SOCKS proxy, specify a proper scheme.
+To enable SOCKS proxy, specify a proper scheme.
For example socks5://127.0.0.1:1080/.
Pass in an empty string (\-\-proxy "") for direct connection
.RS
.TP
.B \-\-geo\-verification\-proxy \f[I]URL\f[]
Use this proxy to verify the IP address for some geo\-restricted sites.
-The default proxy specified by \-\-proxy (or none, if the options is not
+The default proxy specified by \-\-proxy (or none, if the option is not
present) is used for the actual downloading.
.RS
.RE
.TP
.B \-\-geo\-bypass
Bypass geographic restriction via faking X\-Forwarded\-For HTTP header
-(experimental)
.RS
.RE
.TP
.B \-\-no\-geo\-bypass
Do not bypass geographic restriction via faking X\-Forwarded\-For HTTP
-header (experimental)
+header
.RS
.RE
.TP
.B \-\-geo\-bypass\-country \f[I]CODE\f[]
Force bypass geographic restriction with explicitly provided two\-letter
-ISO 3166\-2 country code (experimental)
+ISO 3166\-2 country code
+.RS
+.RE
+.TP
+.B \-\-geo\-bypass\-ip\-block \f[I]IP_BLOCK\f[]
+Force bypass geographic restriction with explicitly provided IP block in
+CIDR notation
.RS
.RE
.SS Video Selection:
.RE
.TP
.B \-\-xattr\-set\-filesize
-Set file xattribute ytdl.filesize with expected file size (experimental)
+Set file xattribute ytdl.filesize with expected file size
.RS
.RE
.TP
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
- opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs"
+ opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --geo-bypass-ip-block --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
diropts="--cache-dir"
complete --command youtube-dl --long-option mark-watched --description 'Mark videos watched (YouTube only)'
complete --command youtube-dl --long-option no-mark-watched --description 'Do not mark videos watched (YouTube only)'
complete --command youtube-dl --long-option no-color --description 'Do not emit color codes in output'
-complete --command youtube-dl --long-option proxy --description 'Use the specified HTTP/HTTPS/SOCKS proxy. To enable experimental SOCKS proxy, specify a proper scheme. For example socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") for direct connection'
+complete --command youtube-dl --long-option proxy --description 'Use the specified HTTP/HTTPS/SOCKS proxy. To enable SOCKS proxy, specify a proper scheme. For example socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") for direct connection'
complete --command youtube-dl --long-option socket-timeout --description 'Time to wait before giving up, in seconds'
complete --command youtube-dl --long-option source-address --description 'Client-side IP address to bind to'
complete --command youtube-dl --long-option force-ipv4 --short-option 4 --description 'Make all connections via IPv4'
complete --command youtube-dl --long-option force-ipv6 --short-option 6 --description 'Make all connections via IPv6'
-complete --command youtube-dl --long-option geo-verification-proxy --description 'Use this proxy to verify the IP address for some geo-restricted sites. The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.'
+complete --command youtube-dl --long-option geo-verification-proxy --description 'Use this proxy to verify the IP address for some geo-restricted sites. The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading.'
complete --command youtube-dl --long-option cn-verification-proxy
-complete --command youtube-dl --long-option geo-bypass --description 'Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)'
-complete --command youtube-dl --long-option no-geo-bypass --description 'Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)'
-complete --command youtube-dl --long-option geo-bypass-country --description 'Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)'
+complete --command youtube-dl --long-option geo-bypass --description 'Bypass geographic restriction via faking X-Forwarded-For HTTP header'
+complete --command youtube-dl --long-option no-geo-bypass --description 'Do not bypass geographic restriction via faking X-Forwarded-For HTTP header'
+complete --command youtube-dl --long-option geo-bypass-country --description 'Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code'
+complete --command youtube-dl --long-option geo-bypass-ip-block --description 'Force bypass geographic restriction with explicitly provided IP block in CIDR notation'
complete --command youtube-dl --long-option playlist-start --description 'Playlist video to start at (default is %default)'
complete --command youtube-dl --long-option playlist-end --description 'Playlist video to end at (default is last)'
complete --command youtube-dl --long-option playlist-items --description 'Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.'
complete --command youtube-dl --long-option test
complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
complete --command youtube-dl --long-option playlist-random --description 'Download playlist videos in random order'
-complete --command youtube-dl --long-option xattr-set-filesize --description 'Set file xattribute ytdl.filesize with expected file size (experimental)'
+complete --command youtube-dl --long-option xattr-set-filesize --description 'Set file xattribute ytdl.filesize with expected file size'
complete --command youtube-dl --long-option hls-prefer-native --description 'Use the native HLS downloader instead of ffmpeg'
complete --command youtube-dl --long-option hls-prefer-ffmpeg --description 'Use ffmpeg instead of the native HLS downloader'
complete --command youtube-dl --long-option hls-use-mpegts --description 'Use the mpegts container for HLS videos, allowing to play the video while downloading (some players may not be able to play it)'
elif [[ ${prev} == "--recode-video" ]]; then
_arguments '*: :(mp4 flv ogg webm mkv)'
else
- _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs)'
+ _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --geo-bypass-ip-block --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs)'
fi
;;
esac
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
geo_verification_proxy: URL of the proxy to use for IP address verification
- on geo-restricted sites. (Experimental)
+ on geo-restricted sites.
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
- "warn": only emit a warning
- "detect_or_warn": check whether we can do anything
about it, warn otherwise (default)
- source_address: (Experimental) Client-side IP address to bind to.
+ source_address: Client-side IP address to bind to.
call_home: Boolean, true iff we are allowed to contact the
youtube-dl servers for debugging.
sleep_interval: Number of seconds to sleep before each download when
match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output.
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
- HTTP header (experimental)
+ HTTP header
geo_bypass_country:
Two-letter ISO 3166-2 country code that will be used for
explicit geographic restriction bypassing via faking
- X-Forwarded-For HTTP header (experimental)
+ X-Forwarded-For HTTP header
+ geo_bypass_ip_block:
+ IP range in CIDR notation that will be used similarly to
+ geo_bypass_country
The following options determine which downloader is picked:
external_downloader: Executable of the external downloader to call.
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+ for cc_kind in ('subtitles', 'automatic_captions'):
+ cc = info_dict.get(cc_kind)
+ if cc:
+ for _, subtitle in cc.items():
+ for subtitle_format in subtitle:
+ if subtitle_format.get('url'):
+ subtitle_format['url'] = sanitize_url(subtitle_format['url'])
+ if subtitle_format.get('ext') is None:
+ subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
+
+ automatic_captions = info_dict.get('automatic_captions')
subtitles = info_dict.get('subtitles')
- if subtitles:
- for _, subtitle in subtitles.items():
- for subtitle_format in subtitle:
- if subtitle_format.get('url'):
- subtitle_format['url'] = sanitize_url(subtitle_format['url'])
- if subtitle_format.get('ext') is None:
- subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
if self.params.get('listsubtitles', False):
if 'automatic_captions' in info_dict:
- self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+ self.list_subtitles(
+ info_dict['id'], automatic_captions, 'automatic captions')
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
return
+
info_dict['requested_subtitles'] = self.process_subtitles(
- info_dict['id'], subtitles,
- info_dict.get('automatic_captions'))
+ info_dict['id'], subtitles, automatic_captions)
# We now pick which formats have to be downloaded
if info_dict.get('formats') is None:
'config_location': opts.config_location,
'geo_bypass': opts.geo_bypass,
'geo_bypass_country': opts.geo_bypass_country,
+ 'geo_bypass_ip_block': opts.geo_bypass_ip_block,
# just for deprecation check
'autonumber': opts.autonumber if opts.autonumber is True else None,
'usetitle': opts.usetitle if opts.usetitle is True else None,
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
- (experimental)
external_downloader_args: A list of additional command-line arguments for the
external downloader.
hls_use_mpegts: Use the mpegts container for HLS videos.
return not ctx['live'] and not ctx['tmpfilename'] == '-'
def _read_ytdl_file(self, ctx):
+ assert 'ytdl_corrupt' not in ctx
stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
- ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
- stream.close()
+ try:
+ ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+ except Exception:
+ ctx['ytdl_corrupt'] = True
+ finally:
+ stream.close()
def _write_ytdl_file(self, ctx):
frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
if self.__do_ytdl_file(ctx):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
self._read_ytdl_file(ctx)
- if ctx['fragment_index'] > 0 and resume_len == 0:
+ is_corrupt = ctx.get('ytdl_corrupt') is True
+ is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
+ if is_corrupt or is_inconsistent:
+ message = (
+ '.ytdl file is corrupt' if is_corrupt else
+ 'Inconsistent state of incomplete fragment download')
self.report_warning(
- 'Inconsistent state of incomplete fragment download. '
- 'Restarting from the beginning...')
+ '%s. Restarting from the beginning...' % message)
ctx['fragment_index'] = resume_len = 0
+ if 'ytdl_corrupt' in ctx:
+ del ctx['ytdl_corrupt']
self._write_ytdl_file(ctx)
else:
self._write_ytdl_file(ctx)
before = start # start measuring
def retry(e):
- if ctx.tmpfilename != '-':
+ to_stdout = ctx.tmpfilename == '-'
+ if not to_stdout:
ctx.stream.close()
ctx.stream = None
- ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
+ ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
raise RetryDownload(e)
while True:
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
- while not proc_stderr_closed:
- # read line from stderr
- line = ''
- while True:
- char = proc.stderr.read(1)
- if not char:
- proc_stderr_closed = True
- break
- if char in [b'\r', b'\n']:
- break
- line += char.decode('ascii', 'replace')
- if not line:
- # proc_stderr_closed is True
- continue
- mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
- if mobj:
- downloaded_data_len = int(float(mobj.group(1)) * 1024)
- percent = float(mobj.group(2))
- if not resume_percent:
- resume_percent = percent
- resume_downloaded_data_len = downloaded_data_len
- time_now = time.time()
- eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
- speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
- data_len = None
- if percent > 0:
- data_len = int(downloaded_data_len * 100 / percent)
- self._hook_progress({
- 'status': 'downloading',
- 'downloaded_bytes': downloaded_data_len,
- 'total_bytes_estimate': data_len,
- 'tmpfilename': tmpfilename,
- 'filename': filename,
- 'eta': eta,
- 'elapsed': time_now - start,
- 'speed': speed,
- })
- cursor_in_new_line = False
- else:
- # no percent for live streams
- mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
+ try:
+ while not proc_stderr_closed:
+ # read line from stderr
+ line = ''
+ while True:
+ char = proc.stderr.read(1)
+ if not char:
+ proc_stderr_closed = True
+ break
+ if char in [b'\r', b'\n']:
+ break
+ line += char.decode('ascii', 'replace')
+ if not line:
+ # proc_stderr_closed is True
+ continue
+ mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1)) * 1024)
+ percent = float(mobj.group(2))
+ if not resume_percent:
+ resume_percent = percent
+ resume_downloaded_data_len = downloaded_data_len
time_now = time.time()
- speed = self.calc_speed(start, time_now, downloaded_data_len)
+ eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
+ speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
+ data_len = None
+ if percent > 0:
+ data_len = int(downloaded_data_len * 100 / percent)
self._hook_progress({
+ 'status': 'downloading',
'downloaded_bytes': downloaded_data_len,
+ 'total_bytes_estimate': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
- 'status': 'downloading',
+ 'eta': eta,
'elapsed': time_now - start,
'speed': speed,
})
cursor_in_new_line = False
- elif self.params.get('verbose', False):
- if not cursor_in_new_line:
- self.to_screen('')
- cursor_in_new_line = True
- self.to_screen('[rtmpdump] ' + line)
- proc.wait()
+ else:
+ # no percent for live streams
+ mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
+ if mobj:
+ downloaded_data_len = int(float(mobj.group(1)) * 1024)
+ time_now = time.time()
+ speed = self.calc_speed(start, time_now, downloaded_data_len)
+ self._hook_progress({
+ 'downloaded_bytes': downloaded_data_len,
+ 'tmpfilename': tmpfilename,
+ 'filename': filename,
+ 'status': 'downloading',
+ 'elapsed': time_now - start,
+ 'speed': speed,
+ })
+ cursor_in_new_line = False
+ elif self.params.get('verbose', False):
+ if not cursor_in_new_line:
+ self.to_screen('')
+ cursor_in_new_line = True
+ self.to_screen('[rtmpdump] ' + line)
+ finally:
+ proc.wait()
if not cursor_in_new_line:
self.to_screen('')
return proc.returncode
RD_INCOMPLETE = 2
RD_NO_CONNECT = 3
- retval = run_rtmpdump(args)
+ started = time.time()
+
+ try:
+ retval = run_rtmpdump(args)
+ except KeyboardInterrupt:
+ if not info_dict.get('is_live'):
+ raise
+ retval = RD_SUCCESS
+ self.to_screen('\n[rtmpdump] Interrupted by user')
if retval == RD_NO_CONNECT:
self.report_error('[rtmpdump] Could not connect to RTMP server.')
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('[rtmpdump] %s bytes' % prevsize)
+ self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
args = basic_args + ['--resume']
if retval == RD_FAILED:
break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('[rtmpdump] %s bytes' % fsize)
+ self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
+ 'elapsed': time.time() - started,
})
return True
else:
class ABCIViewIE(InfoExtractor):
IE_NAME = 'abc.net.au:iview'
- _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
_GEO_COUNTRIES = ['AU']
# ABC iview programs are normally available for 14 days only.
_TESTS = [{
- 'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
+ 'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
'info_dict': {
- 'id': 'ZY9247A021S00',
+ 'id': 'ZX9371A050S00',
'ext': 'mp4',
- 'title': "Gaston's Visit",
+ 'title': "Gaston's Birthday",
'series': "Ben And Holly's Little Kingdom",
- 'description': 'md5:18db170ad71cf161e006a4c688e33155',
- 'upload_date': '20180318',
+ 'description': 'md5:f9de914d02f226968f598ac76f105bcf',
+ 'upload_date': '20180604',
'uploader_id': 'abc4kids',
- 'timestamp': 1521400959,
+ 'timestamp': 1528140219,
},
'params': {
'skip_download': True,
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- video_params = self._parse_json(self._search_regex(
- r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
- title = video_params.get('title') or video_params['seriesTitle']
- stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
+ video_params = self._download_json(
+ 'https://iview.abc.net.au/api/programs/' + video_id, video_id)
+ title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
+ stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
- house_number = video_params.get('episodeHouseNumber')
- path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
+ house_number = video_params.get('episodeHouseNumber') or video_id
+ path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
int(time.time()), house_number)
sig = hmac.new(
- 'android.content.res.Resources'.encode('utf-8'),
+ b'android.content.res.Resources',
path.encode('utf-8'), hashlib.sha256).hexdigest()
token = self._download_webpage(
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
'ext': 'vtt',
}]
+ is_live = video_params.get('livestream') == '1'
+ if is_live:
+ title = self._live_title(title)
+
return {
'id': video_id,
- 'title': unescapeHTML(title),
- 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
- 'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
+ 'title': title,
+ 'description': video_params.get('description'),
+ 'thumbnail': video_params.get('thumbnail'),
'duration': int_or_none(video_params.get('eventDuration')),
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
'series': unescapeHTML(video_params.get('seriesTitle')),
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
- 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
- 'episode': self._html_search_meta('episode_title', webpage, default=None),
+ 'season_number': int_or_none(self._search_regex(
+ r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
+ 'episode_number': int_or_none(self._search_regex(
+ r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
+ 'episode_id': house_number,
'uploader_id': video_params.get('channel'),
'formats': formats,
'subtitles': subtitles,
+ 'is_live': is_live,
}
# coding: utf-8
from __future__ import unicode_literals
+import base64
+import binascii
import json
import os
+import random
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt
)
from ..utils import (
bytes_to_intlist,
+ bytes_to_long,
ExtractorError,
float_or_none,
intlist_to_bytes,
+ long_to_bytes,
+ pkcs1pad,
srt_subtitles_timecode,
strip_or_none,
urljoin,
}
}
_BASE_URL = 'http://animedigitalnetwork.fr'
+ _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
def _get_subtitles(self, sub_path, video_id):
if not sub_path:
enc_subtitles = self._download_webpage(
urljoin(self._BASE_URL, sub_path),
- video_id, fatal=False, headers={
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
- })
+ video_id, fatal=False)
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
- bytes_to_intlist(b'\xc8\x6e\x06\xbc\xbe\xc6\x49\xf5\x88\x0d\xc8\x47\xc4\x27\x0c\x60'),
+ bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')),
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
error = None
if not links:
links_url = player_config.get('linksurl') or options['videoUrl']
- links_data = self._download_json(urljoin(
- self._BASE_URL, links_url), video_id)
+ token = options['token']
+ self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
+ message = bytes_to_intlist(json.dumps({
+ 'k': self._K,
+ 'e': 60,
+ 't': token,
+ }))
+ padded_message = intlist_to_bytes(pkcs1pad(message, 128))
+ n, e = self._RSA_KEY
+ encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
+ authorization = base64.b64encode(encrypted_message).decode()
+ links_data = self._download_json(
+ urljoin(self._BASE_URL, links_url), video_id, headers={
+ 'Authorization': 'Bearer ' + authorization,
+ })
links = links_data.get('links') or {}
metas = metas or links_data.get('meta') or {}
- sub_path = sub_path or links_data.get('subtitles')
+ sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token
error = links_data.get('error')
title = metas.get('title') or video_info['title']
}]
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
mobj = re.match(self._VALID_URL, url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ js_to_json,
+)
+
+
+class APAIE(InfoExtractor):
+ _VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
+ 'md5': '2b12292faeb0a7d930c778c7a5b4759b',
+ 'info_dict': {
+ 'id': 'jjv85FdZ',
+ 'ext': 'mp4',
+ 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 254,
+ 'timestamp': 1519211149,
+ 'upload_date': '20180221',
+ },
+ }, {
+ 'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://uvp-rma.sf.apa.at/embed/70404cca-2f47-4855-bbb8-20b1fae58f76',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ jwplatform_id = self._search_regex(
+ r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
+ 'jwplatform id', default=None)
+
+ if jwplatform_id:
+ return self.url_result(
+ 'jwplatform:' + jwplatform_id, ie='JWPlatform',
+ video_id=video_id)
+
+ sources = self._parse_json(
+ self._search_regex(
+ r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
+ video_id, transform_source=js_to_json)
+
+ formats = []
+ for source in sources:
+ if not isinstance(source, dict):
+ continue
+ source_url = source.get('file')
+ if not source_url or not isinstance(source_url, compat_str):
+ continue
+ ext = determine_ext(source_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': source_url,
+ })
+ self._sort_formats(formats)
+
+ thumbnail = self._search_regex(
+ r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'thumbnail', fatal=False, group='url')
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
self._login()
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
from ..utils import (
int_or_none,
parse_iso8601,
- sanitized_Request,
)
class AudiMediaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)'
+ _TESTS = [{
'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
'md5': '79a8b71c46d49042609795ab59779b66',
'info_dict': {
'duration': 74022,
'view_count': int,
}
- }
- # extracted from https://audimedia.tv/assets/embed/embedded-player.js (dataSourceAuthToken)
- _AUTH_TOKEN = 'e25b42847dba18c6c8816d5d8ce94c326e06823ebf0859ed164b3ba169be97f2'
+ }, {
+ 'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
raw_payload = self._search_regex([
- r'class="amtv-embed"[^>]+id="([^"]+)"',
- r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
+ r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"',
+ r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"',
+ r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"',
+ r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"',
+ r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})',
], webpage, 'raw payload')
- _, stage_mode, video_id, lang = raw_payload.split('-')
+ _, stage_mode, video_id, _ = raw_payload.split('-')
# TODO: handle s and e stage_mode (live streams and ended live streams)
if stage_mode not in ('s', 'e'):
- request = sanitized_Request(
- 'https://audimedia.tv/api/video/v1/videos/%s?embed[]=video_versions&embed[]=thumbnail_image&where[content_language_iso]=%s' % (video_id, lang),
- headers={'X-Auth-Token': self._AUTH_TOKEN})
- json_data = self._download_json(request, video_id)['results']
+ video_data = self._download_json(
+ 'https://www.audimedia.tv/api/video/v1/videos/' + video_id,
+ video_id, query={
+ 'embed[]': ['video_versions', 'thumbnail_image'],
+ })['results']
formats = []
- stream_url_hls = json_data.get('stream_url_hls')
+ stream_url_hls = video_data.get('stream_url_hls')
if stream_url_hls:
formats.extend(self._extract_m3u8_formats(
stream_url_hls, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
- stream_url_hds = json_data.get('stream_url_hds')
+ stream_url_hds = video_data.get('stream_url_hds')
if stream_url_hds:
formats.extend(self._extract_f4m_formats(
stream_url_hds + '?hdcore=3.4.0',
video_id, f4m_id='hds', fatal=False))
- for video_version in json_data.get('video_versions'):
+ for video_version in video_data.get('video_versions', []):
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
if not video_version_url:
continue
return {
'id': video_id,
- 'title': json_data['title'],
- 'description': json_data.get('subtitle'),
- 'thumbnail': json_data.get('thumbnail_image', {}).get('file'),
- 'timestamp': parse_iso8601(json_data.get('publication_date')),
- 'duration': int_or_none(json_data.get('duration')),
- 'view_count': int_or_none(json_data.get('view_count')),
+ 'title': video_data['title'],
+ 'description': video_data.get('subtitle'),
+ 'thumbnail': video_data.get('thumbnail_image', {}).get('file'),
+ 'timestamp': parse_iso8601(video_data.get('publication_date')),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'view_count': int_or_none(video_data.get('view_count')),
'formats': formats,
}
return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
return {
- 'id': api_response.get('id', album_url_tag),
+ 'id': compat_str(api_response.get('id', album_url_tag)),
'uploader': api_response.get('artist'),
'title': api_response.get('title'),
'url': api_response['url'],
}
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
float_or_none,
get_element_by_class,
int_or_none,
+ js_to_json,
parse_duration,
parse_iso8601,
try_get,
# single video article embedded with data-media-vpid
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
+ 'info_dict': {
+ 'id': 'p06556y7',
+ 'ext': 'mp4',
+ 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
+ 'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
}]
@classmethod
'subtitles': subtitles,
}
+ bbc3_config = self._parse_json(
+ self._search_regex(
+ r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
+ 'bbcthree config', default='{}'),
+ playlist_id, transform_source=js_to_json, fatal=False)
+ if bbc3_config:
+ bbc3_playlist = try_get(
+ bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
+ dict)
+ if bbc3_playlist:
+ playlist_title = bbc3_playlist.get('title') or playlist_title
+ thumbnail = bbc3_playlist.get('holdingImageURL')
+ entries = []
+ for bbc3_item in bbc3_playlist['items']:
+ programme_id = bbc3_item.get('versionID')
+ if not programme_id:
+ continue
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ entries.append({
+ 'id': programme_id,
+ 'title': playlist_title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
def extract_all(pattern):
return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False),
(?:
ctv|
tsn|
- bnn|
+ bnn(?:bloomberg)?|
thecomedynetwork|
discovery|
discoveryvelocity|
much\.com
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
_TESTS = [{
- 'url': 'http://www.ctv.ca/video/player?vid=706966',
- 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
+ 'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
+ 'md5': '36d3ef559cfe8af8efe15922cd3ce950',
'info_dict': {
- 'id': '706966',
- 'ext': 'mp4',
- 'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
- 'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
- 'upload_date': '20150919',
- 'timestamp': 1442624700,
+ 'id': '1403070',
+ 'ext': 'flv',
+ 'title': 'David Cockfield\'s Top Picks',
+ 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
+ 'upload_date': '20180525',
+ 'timestamp': 1527288600,
},
- 'expected_warnings': ['HTTP Error 404'],
}, {
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
'only_matching': True,
'investigationdiscovery': 'invdisc',
'animalplanet': 'aniplan',
'etalk': 'ctv',
+ 'bnnbloomberg': 'bnn',
}
def _real_extract(self, url):
if 'anime/' not in url:
cid = self._search_regex(
- r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
+ r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
default=None
) or compat_parse_qs(self._search_regex(
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ 'ip_blocks': smuggled_data.get('geo_ip_blocks'),
+ })
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+
+
+class BusinessInsiderIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
+ 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
+ 'info_dict': {
+ 'id': 'hZRllCfw',
+ 'ext': 'mp4',
+ 'title': "Here's how much radiation you're exposed to in everyday life",
+ 'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd',
+ 'upload_date': '20170709',
+ 'timestamp': 1499606400,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ jwplatform_id = self._search_regex(
+ (r'data-media-id=["\']([a-zA-Z0-9]{8})',
+ r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
+ r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'),
+ webpage, 'jwplatform id')
+ return self.url_result(
+ 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
+ video_id=video_id)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class CamModelsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.cammodels.com/cam/AutumnKnight/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, user_id, headers=self.geo_verification_headers())
+
+ manifest_root = self._html_search_regex(
+ r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
+
+ if not manifest_root:
+ ERRORS = (
+ ("I'm offline, but let's stay connected", 'This user is currently offline'),
+ ('in a private show', 'This user is in a private show'),
+ ('is currently performing LIVE', 'This model is currently performing live'),
+ )
+ for pattern, message in ERRORS:
+ if pattern in webpage:
+ error = message
+ expected = True
+ break
+ else:
+ error = 'Unable to find manifest URL root'
+ expected = False
+ raise ExtractorError(error, expected=expected)
+
+ manifest = self._download_json(
+ '%s%s.json' % (manifest_root, user_id), user_id)
+
+ formats = []
+ for format_id, format_dict in manifest['formats'].items():
+ if not isinstance(format_dict, dict):
+ continue
+ encodings = format_dict.get('encodings')
+ if not isinstance(encodings, list):
+ continue
+ vcodec = format_dict.get('videoCodec')
+ acodec = format_dict.get('audioCodec')
+ for media in encodings:
+ if not isinstance(media, dict):
+ continue
+ media_url = media.get('location')
+ if not media_url or not isinstance(media_url, compat_str):
+ continue
+
+ format_id_list = [format_id]
+ height = int_or_none(media.get('videoHeight'))
+ if height is not None:
+ format_id_list.append('%dp' % height)
+ f = {
+ 'url': media_url,
+ 'format_id': '-'.join(format_id_list),
+ 'width': int_or_none(media.get('videoWidth')),
+ 'height': height,
+ 'vbr': int_or_none(media.get('videoKbps')),
+ 'abr': int_or_none(media.get('audioKbps')),
+ 'fps': int_or_none(media.get('fps')),
+ 'vcodec': vcodec,
+ 'acodec': acodec,
+ }
+ if 'rtmp' in format_id:
+ f['ext'] = 'flv'
+ elif 'hls' in format_id:
+ f.update({
+ 'ext': 'mp4',
+ # hls skips fragments, preferring rtmp
+ 'preference': -1,
+ })
+ else:
+ continue
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': user_id,
+ 'title': self._live_title(user_id),
+ 'is_live': True,
+ 'formats': formats,
+ }
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class CamTubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female',
+ 'info_dict': {
+ 'id': '42ad3956-dd5b-445a-8313-803ea6079fac',
+ 'display_id': 'minafay-030618-1136-chaturbate-female',
+ 'ext': 'mp4',
+ 'title': 'minafay-030618-1136-chaturbate-female',
+ 'duration': 1274,
+ 'timestamp': 1528018608,
+ 'upload_date': '20180603',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _API_BASE = 'https://api.camtube.co'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ token = self._download_json(
+ '%s/rpc/session/new' % self._API_BASE, display_id,
+ 'Downloading session token')['token']
+
+ self._set_cookie('api.camtube.co', 'session', token)
+
+ video = self._download_json(
+ '%s/recordings/%s' % (self._API_BASE, display_id), display_id,
+ headers={'Referer': url})
+
+ video_id = video['uuid']
+ timestamp = unified_timestamp(video.get('createdAt'))
+ duration = int_or_none(video.get('duration'))
+ view_count = int_or_none(video.get('viewCount'))
+ like_count = int_or_none(video.get('likeCount'))
+ creator = video.get('stageName')
+
+ formats = [{
+ 'url': '%s/recordings/%s/manifest.m3u8'
+ % (self._API_BASE, video_id),
+ 'format_id': 'hls',
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ }]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': display_id,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'creator': creator,
+ 'formats': formats,
+ }
xpath_element,
xpath_with_ns,
find_xpath_attr,
+ orderedSet,
parse_duration,
parse_iso8601,
parse_age_limit,
+ strip_or_none,
int_or_none,
ExtractorError,
)
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
+ title = self._og_search_title(webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
entries = [
self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+ media_ids = []
+ for media_id_re in (
+ r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
+ r'<div[^>]+\bid=["\']player-(\d+)',
+ r'guid["\']\s*:\s*["\'](\d+)'):
+ media_ids.extend(re.findall(media_id_re, webpage))
entries.extend([
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
- for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
+ for media_id in orderedSet(media_ids)])
return self.playlist_result(
- entries, display_id,
- self._og_search_title(webpage, fatal=False),
+ entries, display_id, strip_or_none(title),
self._og_search_description(webpage))
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(
+ url, video_id, headers=self.geo_verification_headers())
m3u8_urls = []
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class CloudflareStreamIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:watch\.)?cloudflarestream\.com/|
+ embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=
+ )
+ (?P<id>[\da-f]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
+ 'info_dict': {
+ 'id': '31c9291ab41fac05471db4e73aa11717',
+ 'ext': 'mp4',
+ 'title': '31c9291ab41fac05471db4e73aa11717',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ formats = self._extract_m3u8_formats(
+ 'https://cloudflarestream.com/%s/manifest/video.m3u8' % video_id,
+ video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ 'https://cloudflarestream.com/%s/manifest/video.mpd' % video_id,
+ video_id, mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
_GEO_BYPASS attribute may be set to False in order to disable
geo restriction bypass mechanisms for a particular extractor.
Though it won't disable explicit geo restriction bypass based on
- country code provided with geo_bypass_country. (experimental)
+ country code provided with geo_bypass_country.
_GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
countries for this extractor. One of these countries will be used by
geo restriction bypass mechanism right away in order to bypass
- geo restriction, of course, if the mechanism is not disabled. (experimental)
+ geo restriction, of course, if the mechanism is not disabled.
- NB: both these geo attributes are experimental and may change in future
- or be completely removed.
+ _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
+ IP blocks in CIDR notation for this extractor. One of these IP blocks
+ will be used by geo restriction bypass mechanism similarly
+ to _GEO_COUNTRIES.
Finally, the _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
_x_forwarded_for_ip = None
_GEO_BYPASS = True
_GEO_COUNTRIES = None
+ _GEO_IP_BLOCKS = None
_WORKING = True
def __init__(self, downloader=None):
def initialize(self):
"""Initializes an instance (authentication, etc)."""
- self._initialize_geo_bypass(self._GEO_COUNTRIES)
+ self._initialize_geo_bypass({
+ 'countries': self._GEO_COUNTRIES,
+ 'ip_blocks': self._GEO_IP_BLOCKS,
+ })
if not self._ready:
self._real_initialize()
self._ready = True
- def _initialize_geo_bypass(self, countries):
+ def _initialize_geo_bypass(self, geo_bypass_context):
"""
Initialize geo restriction bypass mechanism.
HTTP requests.
This method will be used for initial geo bypass mechanism initialization
- during the instance initialization with _GEO_COUNTRIES.
+ during the instance initialization with _GEO_COUNTRIES and
+ _GEO_IP_BLOCKS.
- You may also manually call it from extractor's code if geo countries
+ You may also manually call it from extractor's code if geo bypass
information is not available beforehand (e.g. obtained during
- extraction) or due to some another reason.
+ extraction) or due to some other reason. In this case you should pass
+ this information in geo bypass context passed as first argument. It may
+ contain following fields:
+
+ countries: List of geo unrestricted countries (similar
+ to _GEO_COUNTRIES)
+ ip_blocks: List of geo unrestricted IP blocks in CIDR notation
+ (similar to _GEO_IP_BLOCKS)
+
"""
if not self._x_forwarded_for_ip:
- country_code = self._downloader.params.get('geo_bypass_country', None)
- # If there is no explicit country for geo bypass specified and
- # the extractor is known to be geo restricted let's fake IP
- # as X-Forwarded-For right away.
- if (not country_code and
- self._GEO_BYPASS and
- self._downloader.params.get('geo_bypass', True) and
- countries):
- country_code = random.choice(countries)
- if country_code:
- self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+
+ # Geo bypass mechanism is explicitly disabled by user
+ if not self._downloader.params.get('geo_bypass', True):
+ return
+
+ if not geo_bypass_context:
+ geo_bypass_context = {}
+
+ # Backward compatibility: previously _initialize_geo_bypass
+ # expected a list of countries, some 3rd party code may still use
+ # it this way
+ if isinstance(geo_bypass_context, (list, tuple)):
+ geo_bypass_context = {
+ 'countries': geo_bypass_context,
+ }
+
+ # The whole point of geo bypass mechanism is to fake IP
+ # as X-Forwarded-For HTTP header based on some IP block or
+ # country code.
+
+ # Path 1: bypassing based on IP block in CIDR notation
+
+ # Explicit IP block specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
+
+ # Otherwise use random IP block from geo bypass context but only
+ # if extractor is known as geo bypassable
+ if not ip_block:
+ ip_blocks = geo_bypass_context.get('ip_blocks')
+ if self._GEO_BYPASS and ip_blocks:
+ ip_block = random.choice(ip_blocks)
+
+ if ip_block:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen(
+ '[debug] Using fake IP %s as X-Forwarded-For.'
+ % self._x_forwarded_for_ip)
+ return
+
+ # Path 2: bypassing based on country code
+
+ # Explicit country code specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ country = self._downloader.params.get('geo_bypass_country', None)
+
+ # Otherwise use random country code from geo bypass context but
+ # only if extractor is known as geo bypassable
+ if not country:
+ countries = geo_bypass_context.get('countries')
+ if self._GEO_BYPASS and countries:
+ country = random.choice(countries)
+
+ if country:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
if self._downloader.params.get('verbose', False):
self._downloader.to_screen(
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
- % (self._x_forwarded_for_ip, country_code.upper()))
+ % (self._x_forwarded_for_ip, country.upper()))
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
else:
self.report_warning(errmsg + str(ve))
- def _download_json(self, url_or_request, video_id,
- note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata',
- transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={}):
- json_string = self._download_webpage(
+ def _download_json_handle(
+ self, url_or_request, video_id, note='Downloading JSON metadata',
+ errnote='Unable to download JSON metadata', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={}):
+ """Return a tuple (JSON object, URL handle)"""
+ res = self._download_webpage_handle(
url_or_request, video_id, note, errnote, fatal=fatal,
encoding=encoding, data=data, headers=headers, query=query)
- if (not fatal) and json_string is False:
- return None
+ if res is False:
+ return res
+ json_string, urlh = res
return self._parse_json(
- json_string, video_id, transform_source=transform_source, fatal=fatal)
+ json_string, video_id, transform_source=transform_source,
+ fatal=fatal), urlh
+
+ def _download_json(
+ self, url_or_request, video_id, note='Downloading JSON metadata',
+ errnote='Unable to download JSON metadata', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={}):
+ res = self._download_json_handle(
+ url_or_request, video_id, note=note, errnote=errnote,
+ transform_source=transform_source, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query)
+ return res if res is False else res[0]
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
if transform_source:
if isinstance(json_ld, dict):
json_ld = [json_ld]
+ INTERACTION_TYPE_MAP = {
+ 'CommentAction': 'comment',
+ 'AgreeAction': 'like',
+ 'DisagreeAction': 'dislike',
+ 'LikeAction': 'like',
+ 'DislikeAction': 'dislike',
+ 'ListenAction': 'view',
+ 'WatchAction': 'view',
+ 'ViewAction': 'view',
+ }
+
+ def extract_interaction_statistic(e):
+ interaction_statistic = e.get('interactionStatistic')
+ if not isinstance(interaction_statistic, list):
+ return
+ for is_e in interaction_statistic:
+ if not isinstance(is_e, dict):
+ continue
+ if is_e.get('@type') != 'InteractionCounter':
+ continue
+ interaction_type = is_e.get('interactionType')
+ if not isinstance(interaction_type, compat_str):
+ continue
+ interaction_count = int_or_none(is_e.get('userInteractionCount'))
+ if interaction_count is None:
+ continue
+ count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
+ if not count_kind:
+ continue
+ count_key = '%s_count' % count_kind
+ if info.get(count_key) is not None:
+ continue
+ info[count_key] = interaction_count
+
def extract_video_object(e):
assert e['@type'] == 'VideoObject'
info.update({
'height': int_or_none(e.get('height')),
'view_count': int_or_none(e.get('interactionCount')),
})
+ extract_interaction_statistic(e)
for e in json_ld:
if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
class CrackleIE(InfoExtractor):
- _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
+ _TESTS = [{
# geo restricted to CA
'url': 'https://www.crackle.com/andromeda/2502343',
'info_dict': {
# m3u8 download
'skip_download': True,
}
- }
+ }, {
+ 'url': 'https://www.sonycrackle.com/andromeda/2502343',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
})
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995',
- 'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
+ 'md5': '9b8624ba66351a23e0b6e1391971f9af',
'info_dict': {
'id': '901995',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Extended: \'That person cannot be me\' Johnson says',
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284,
return result['data']
def _real_initialize(self):
- (email, password) = self._get_login_info()
+ email, password = self._get_login_info()
if email is None:
return
result = self._download_json(
# coding: utf-8
from __future__ import unicode_literals
-import re
-import json
+import base64
+import hashlib
import itertools
+import json
+import random
+import re
+import string
from .common import InfoExtractor
-
+from ..compat import compat_struct_pack
from ..utils import (
determine_ext,
error_to_compat_str,
'uploader': 'Deadline',
'uploader_id': 'x1xm8ri',
'age_limit': 0,
- 'view_count': int,
},
}, {
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
player = self._parse_json(player_v5, video_id)
metadata = player['metadata']
+ if metadata.get('error', {}).get('type') == 'password_protected':
+ password = self._downloader.params.get('videopassword')
+ if password:
+ r = int(metadata['id'][1:], 36)
+ us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=')
+ t = ''.join(random.choice(string.ascii_letters) for i in range(10))
+ n = us64e(compat_struct_pack('I', r))
+ i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest())
+ metadata = self._download_json(
+ 'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id)
+
self._check_error(metadata)
formats = []
continue
ext = mimetype2ext(type_) or determine_ext(media_url)
if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
+ m3u8_formats = self._extract_m3u8_formats(
media_url, video_id, 'mp4', preference=-1,
- m3u8_id='hls', fatal=False))
+ m3u8_id='hls', fatal=False)
+ for f in m3u8_formats:
+ f['url'] = f['url'].split('#')[0]
+ formats.append(f)
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
def _check_error(self, info):
error = info.get('error')
- if info.get('error') is not None:
- title = error['title']
+ if error:
+ title = error.get('title') or error['message']
# See https://developer.dailymotion.com/api#access-error
if error.get('code') == 'DM007':
self.raise_geo_restricted(msg=title)
import string
from .discoverygo import DiscoveryGoBaseIE
-from ..compat import compat_str
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
from ..utils import (
ExtractorError,
try_get,
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
video_id = video['id']
- access_token = self._download_json(
- 'https://www.%s.com/anonymous' % site, display_id, query={
- 'authRel': 'authorization',
- 'client_id': try_get(
- react_data, lambda x: x['application']['apiClientId'],
- compat_str) or '3020a40c2356a645b4b4',
- 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
- 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
- })['access_token']
+ access_token = None
+ cookies = self._get_cookies(url)
+
+ # prefer Affiliate Auth Token over Anonymous Auth Token
+ auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
+ if auth_storage_cookie and auth_storage_cookie.value:
+ auth_storage = self._parse_json(compat_urllib_parse_unquote(
+ compat_urllib_parse_unquote(auth_storage_cookie.value)),
+ video_id, fatal=False) or {}
+ access_token = auth_storage.get('a') or auth_storage.get('access_token')
+
+ if not access_token:
+ access_token = self._download_json(
+ 'https://www.%s.com/anonymous' % site, display_id, query={
+ 'authRel': 'authorization',
+ 'client_id': try_get(
+ react_data, lambda x: x['application']['apiClientId'],
+ compat_str) or '3020a40c2356a645b4b4',
+ 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
+ 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
+ })['access_token']
try:
stream = self._download_json(
'Authorization': 'Bearer ' + access_token,
})
except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
e_description = self._parse_json(
e.cause.read().decode(), display_id)['description']
if 'resource not available for country' in e_description:
import re
-from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
+from .dplay import DPlayIE
from ..compat import (
compat_parse_qs,
compat_urlparse,
from ..utils import smuggle_url
-class DiscoveryNetworksDeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))'
+class DiscoveryNetworksDeIE(DPlayIE):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/
+ (?:
+ .*\#(?P<id>\d+)|
+ (?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
+ programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
+ )'''
_TESTS = [{
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
+ alternate_id = mobj.group('alternate_id')
+ if alternate_id:
+ self._initialize_geo_bypass({
+ 'countries': ['DE'],
+ })
+ return self._get_disco_api_info(
+ url, '%s/%s' % (mobj.group('programme'), alternate_id),
+ 'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
brightcove_id = mobj.group('id')
if not brightcove_id:
title = mobj.group('title')
'only_matching': True,
}]
+ def _get_disco_api_info(self, url, display_id, disco_host, realm):
+ disco_base = 'https://' + disco_host
+ token = self._download_json(
+ '%s/token' % disco_base, display_id, 'Downloading token',
+ query={
+ 'realm': realm,
+ })['data']['attributes']['token']
+ headers = {
+ 'Referer': url,
+ 'Authorization': 'Bearer ' + token,
+ }
+ video = self._download_json(
+ '%s/content/videos/%s' % (disco_base, display_id), display_id,
+ headers=headers, query={
+ 'include': 'show'
+ })
+ video_id = video['data']['id']
+ info = video['data']['attributes']
+ title = info['name']
+ formats = []
+ for format_id, format_dict in self._download_json(
+ '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
+ display_id, headers=headers)['data']['attributes']['streaming'].items():
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = format_dict.get('url')
+ if not format_url:
+ continue
+ ext = determine_ext(format_url)
+ if format_id == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, display_id, mpd_id='dash', fatal=False))
+ elif format_id == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ self._sort_formats(formats)
+
+ series = None
+ try:
+ included = video.get('included')
+ if isinstance(included, list):
+ show = next(e for e in included if e.get('type') == 'show')
+ series = try_get(
+ show, lambda x: x['attributes']['name'], compat_str)
+ except StopIteration:
+ pass
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': info.get('description'),
+ 'duration': float_or_none(
+ info.get('videoDuration'), scale=1000),
+ 'timestamp': unified_timestamp(info.get('publishStart')),
+ 'series': series,
+ 'season_number': int_or_none(info.get('seasonNumber')),
+ 'episode_number': int_or_none(info.get('episodeNumber')),
+ 'age_limit': int_or_none(info.get('minimum_age')),
+ 'formats': formats,
+ }
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
domain = mobj.group('domain')
- self._initialize_geo_bypass([mobj.group('country').upper()])
+ self._initialize_geo_bypass({
+ 'countries': [mobj.group('country').upper()],
+ })
webpage = self._download_webpage(url, display_id)
if not video_id:
host = mobj.group('host')
- disco_base = 'https://disco-api.%s' % host
- self._download_json(
- '%s/token' % disco_base, display_id, 'Downloading token',
- query={
- 'realm': host.replace('.', ''),
- })
- video = self._download_json(
- '%s/content/videos/%s' % (disco_base, display_id), display_id,
- headers={
- 'Referer': url,
- 'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1',
- }, query={
- 'include': 'show'
- })
- video_id = video['data']['id']
- info = video['data']['attributes']
- title = info['name']
- formats = []
- for format_id, format_dict in self._download_json(
- '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
- display_id)['data']['attributes']['streaming'].items():
- if not isinstance(format_dict, dict):
- continue
- format_url = format_dict.get('url')
- if not format_url:
- continue
- ext = determine_ext(format_url)
- if format_id == 'dash' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- format_url, display_id, mpd_id='dash', fatal=False))
- elif format_id == 'hls' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, display_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls',
- fatal=False))
- else:
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- })
- self._sort_formats(formats)
-
- series = None
- try:
- included = video.get('included')
- if isinstance(included, list):
- show = next(e for e in included if e.get('type') == 'show')
- series = try_get(
- show, lambda x: x['attributes']['name'], compat_str)
- except StopIteration:
- pass
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': info.get('description'),
- 'duration': float_or_none(
- info.get('videoDuration'), scale=1000),
- 'timestamp': unified_timestamp(info.get('publishStart')),
- 'series': series,
- 'season_number': int_or_none(info.get('seasonNumber')),
- 'episode_number': int_or_none(info.get('episodeNumber')),
- 'age_limit': int_or_none(info.get('minimum_age')),
- 'formats': formats,
- }
+ return self._get_disco_api_info(
+ url, display_id, 'disco-api.' + host, host.replace('.', ''))
info = self._download_json(
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
self._login()
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
unified_strdate,
xpath_text,
determine_ext,
- qualities,
float_or_none,
ExtractorError,
)
class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
- _VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+ _GEO_COUNTRIES = ['DE']
+ _VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
_TESTS = [
{
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
param_groups = {}
for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
- group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace'))
+ group_id = param_group.get(self._xpath_ns(
+ 'id', 'http://www.w3.org/XML/1998/namespace'))
params = {}
for param in param_group:
params[param.get('name')] = param.get('value')
src = video.get('src')
if not src:
continue
- bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+ bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
group_id = video.get('paramGroup')
param_group = param_groups[group_id]
for proto in param_group['protocols'].split(','):
note='Downloading video info',
errnote='Failed to download video info')
- status_code = doc.find('./status/statuscode')
- if status_code is not None and status_code.text != 'ok':
- code = status_code.text
- if code == 'notVisibleAnymore':
+ status_code = xpath_text(doc, './status/statuscode')
+ if status_code and status_code != 'ok':
+ if status_code == 'notVisibleAnymore':
message = 'Video %s is not available' % video_id
else:
- message = '%s returned error: %s' % (self.IE_NAME, code)
+ message = '%s returned error: %s' % (self.IE_NAME, status_code)
raise ExtractorError(message, expected=True)
- title = doc.find('.//information/title').text
- description = xpath_text(doc, './/information/detail', 'description')
- duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
- uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
- uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
- upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
-
- def xml_to_thumbnails(fnode):
- thumbnails = []
- for node in fnode:
- thumbnail_url = node.text
- if not thumbnail_url:
- continue
- thumbnail = {
- 'url': thumbnail_url,
- }
- if 'key' in node.attrib:
- m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
- if m:
- thumbnail['width'] = int(m.group(1))
- thumbnail['height'] = int(m.group(2))
- thumbnails.append(thumbnail)
- return thumbnails
-
- thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage'))
-
- format_nodes = doc.findall('.//formitaeten/formitaet')
- quality = qualities(['veryhigh', 'high', 'med', 'low'])
-
- def get_quality(elem):
- return quality(xpath_text(elem, 'quality'))
- format_nodes.sort(key=get_quality)
- format_ids = []
+ title = xpath_text(doc, './/information/title', 'title', True)
+
+ urls = []
formats = []
- for fnode in format_nodes:
- video_url = fnode.find('url').text
+ for fnode in doc.findall('.//formitaeten/formitaet'):
+ video_url = xpath_text(fnode, 'url')
+ if not video_url or video_url in urls:
+ continue
+ urls.append(video_url)
+
is_available = 'http://www.metafilegenerator' not in video_url
- if not is_available:
+ geoloced = 'static_geoloced_online' in video_url
+ if not is_available or geoloced:
continue
+
format_id = fnode.attrib['basetype']
- quality = xpath_text(fnode, './quality', 'quality')
format_m = re.match(r'''(?x)
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
''', format_id)
ext = determine_ext(video_url, None) or format_m.group('container')
- if ext not in ('smil', 'f4m', 'm3u8'):
- format_id = format_id + '-' + quality
- if format_id in format_ids:
- continue
if ext == 'meta':
continue
if video_url.startswith('https://'):
continue
formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id=format_id, fatal=False))
else:
- proto = format_m.group('proto').lower()
-
- abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000)
- vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000)
+ quality = xpath_text(fnode, './quality')
+ if quality:
+ format_id += '-' + quality
- width = int_or_none(xpath_text(fnode, './width', 'width'))
- height = int_or_none(xpath_text(fnode, './height', 'height'))
+ abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
+ vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
- filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))
-
- format_note = ''
- if not format_note:
- format_note = None
+ tbr = int_or_none(self._search_regex(
+ r'_(\d+)k', video_url, 'bitrate', None))
+ if tbr and vbr and not abr:
+ abr = tbr - vbr
formats.append({
'format_id': format_id,
'vcodec': format_m.group('vcodec'),
'abr': abr,
'vbr': vbr,
- 'width': width,
- 'height': height,
- 'filesize': filesize,
- 'format_note': format_note,
- 'protocol': proto,
- '_available': is_available,
+ 'tbr': tbr,
+ 'width': int_or_none(xpath_text(fnode, './width')),
+ 'height': int_or_none(xpath_text(fnode, './height')),
+ 'filesize': int_or_none(xpath_text(fnode, './filesize')),
+ 'protocol': format_m.group('proto').lower(),
})
- format_ids.append(format_id)
+
+ geolocation = xpath_text(doc, './/details/geolocation')
+ if not formats and geolocation and geolocation != 'none':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
self._sort_formats(formats)
+ thumbnails = []
+ for node in doc.findall('.//teaserimages/teaserimage'):
+ thumbnail_url = node.text
+ if not thumbnail_url:
+ continue
+ thumbnail = {
+ 'url': thumbnail_url,
+ }
+ thumbnail_key = node.get('key')
+ if thumbnail_key:
+ m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
+ if m:
+ thumbnail['width'] = int(m.group(1))
+ thumbnail['height'] = int(m.group(2))
+ thumbnails.append(thumbnail)
+
+ upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
+
return {
'id': video_id,
'title': title,
- 'description': description,
- 'duration': duration,
+ 'description': xpath_text(doc, './/information/detail'),
+ 'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
'thumbnails': thumbnails,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
+ 'uploader': xpath_text(doc, './/details/originChannelTitle'),
+ 'uploader_id': xpath_text(doc, './/details/originChannelId'),
'upload_date': upload_date,
'formats': formats,
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
+ video_id = self._match_id(url)
+ details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
return self.extract_from_xml_url(video_id, details_url)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+from socket import timeout
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class DTubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
+ _TEST = {
+ 'url': 'https://d.tube/#!/v/benswann/zqd630em',
+ 'md5': 'a03eaa186618ffa7a3145945543a251e',
+ 'info_dict': {
+ 'id': 'zqd630em',
+ 'ext': 'mp4',
+ 'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
+ 'description': 'md5:700d164e066b87f9eac057949e4227c2',
+ 'uploader_id': 'benswann',
+ 'upload_date': '20180222',
+ 'timestamp': 1519328958,
+ },
+ 'params': {
+ 'format': '480p',
+ },
+ }
+
+ def _real_extract(self, url):
+ uploader_id, video_id = re.match(self._VALID_URL, url).groups()
+ result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({
+ 'jsonrpc': '2.0',
+ 'method': 'get_content',
+ 'params': [uploader_id, video_id],
+ }).encode())['result']
+
+ metadata = json.loads(result['json_metadata'])
+ video = metadata['video']
+ content = video['content']
+ info = video.get('info', {})
+ title = info.get('title') or result['title']
+
+ def canonical_url(h):
+ if not h:
+ return None
+ return 'https://ipfs.io/ipfs/' + h
+
+ formats = []
+ for q in ('240', '480', '720', '1080', ''):
+ video_url = canonical_url(content.get('video%shash' % q))
+ if not video_url:
+ continue
+ format_id = (q + 'p') if q else 'Source'
+ try:
+ self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
+ self._downloader._opener.open(video_url, timeout=5).close()
+ except timeout as e:
+ self.to_screen(
+ '%s: %s URL is invalid, skipping' % (video_id, format_id))
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ 'height': int_or_none(q),
+ 'ext': 'mp4',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': content.get('description'),
+ 'thumbnail': canonical_url(info.get('snaphash')),
+ 'tags': content.get('tags') or metadata.get('tags'),
+ 'duration': info.get('duration'),
+ 'formats': formats,
+ 'timestamp': parse_iso8601(result.get('created')),
+ 'uploader_id': uploader_id,
+ }
}, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True,
- }, {
- 'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
- 'md5': '87defe16681b1429c91f7a74809823c6',
- 'info_dict': {
- 'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
- 'ext': 'mp4',
- 'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
- },
- 'params': {
- 'skip_download': True,
- },
}]
def _parse_video_metadata(self, js, video_id, live_js=None):
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ unescapeHTML,
+ unified_timestamp,
+)
+
+
+class ExpressenIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
+ 'md5': '2fbbe3ca14392a6b1b36941858d33a45',
+ 'info_dict': {
+ 'id': '8690962',
+ 'ext': 'mp4',
+ 'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
+ 'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 788,
+ 'timestamp': 1526639109,
+ 'upload_date': '20180518',
+ },
+ }, {
+ 'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ def extract_data(name):
+ return self._parse_json(
+ self._search_regex(
+ r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
+ webpage, 'info', group='value'),
+ display_id, transform_source=unescapeHTML)
+
+ info = extract_data('video-tracking-info')
+ video_id = info['videoId']
+
+ data = extract_data('article-data')
+ stream = data['stream']
+
+ if determine_ext(stream) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ stream, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ else:
+ formats = [{
+ 'url': stream,
+ }]
+ self._sort_formats(formats)
+
+ title = info.get('titleRaw') or data['title']
+ description = info.get('descriptionRaw')
+ thumbnail = info.get('socialMediaImage') or data.get('image')
+ duration = int_or_none(info.get('videoTotalSecondsDuration') or
+ data.get('totalSecondsDuration'))
+ timestamp = unified_timestamp(info.get('publishDate'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
from .aol import AolIE
from .allocine import AllocineIE
from .aliexpress import AliExpressLiveIE
+from .apa import APAIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
from .appletrailers import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
+from .businessinsider import BusinessInsiderIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
CamdemyIE,
CamdemyFolderIE
)
+from .cammodels import CamModelsIE
+from .camtube import CamTubeIE
from .camwithher import CamWithHerIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cliprs import ClipRsIE
from .clipsyndicate import ClipsyndicateIE
from .closertotruth import CloserToTruthIE
+from .cloudflarestream import CloudflareStreamIE
from .cloudy import CloudyIE
from .clubic import ClubicIE
from .clyp import ClypIE
DRTVIE,
DRTVLiveIE,
)
+from .dtube import DTubeIE
from .dvtv import DVTVIE
from .dumpert import DumpertIE
from .defense import DefenseGouvFrIE
from .europa import EuropaIE
from .everyonesmixtape import EveryonesMixtapeIE
from .expotv import ExpoTVIE
+from .expressen import ExpressenIE
from .extremetube import ExtremeTubeIE
from .eyedotv import EyedoTVIE
from .facebook import (
FranceTVSiteIE,
FranceTVEmbedIE,
FranceTVInfoIE,
+ FranceTVInfoSportIE,
FranceTVJeunesseIE,
GenerationWhatIE,
CultureboxIE,
)
from .ina import InaIE
from .inc import IncIE
-from .indavideo import (
- IndavideoIE,
- IndavideoEmbedIE,
-)
+from .indavideo import IndavideoEmbedIE
from .infoq import InfoQIE
from .instagram import InstagramIE, InstagramUserIE
from .internazionale import InternazionaleIE
from .iprima import IPrimaIE
from .iqiyi import IqiyiIE
from .ir90tv import Ir90TvIE
-from .itv import ITVIE
+from .itv import (
+ ITVIE,
+ ITVBTCCIE,
+)
from .ivi import (
IviIE,
IviCompilationIE
MailRuMusicIE,
MailRuMusicSearchIE,
)
-from .makerschannel import MakersChannelIE
from .makertv import MakerTVIE
from .mangomolo import (
MangomoloVideoIE,
MangomoloLiveIE,
)
from .manyvids import ManyVidsIE
+from .markiza import (
+ MarkizaIE,
+ MarkizaPageIE,
+)
from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .moevideo import MoeVideoIE
from .mofosex import MofosexIE
from .mojvideo import MojvideoIE
-from .moniker import MonikerIE
from .morningstar import MorningstarIE
from .motherless import (
MotherlessIE,
from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE
from .mwave import MwaveIE, MwaveMeetGreetIE
+from .mychannels import MyChannelsIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvi import (
NBCOlympicsIE,
NBCOlympicsStreamIE,
NBCSportsIE,
+ NBCSportsStreamIE,
NBCSportsVPlayerIE,
)
from .ndr import (
from .nfb import NFBIE
from .nfl import NFLIE
from .nhk import NhkVodIE
-from .nhl import (
- NHLVideocenterIE,
- NHLNewsIE,
- NHLVideocenterCategoryIE,
- NHLIE,
-)
+from .nhl import NHLIE
from .nick import (
NickIE,
NickBrIE,
NickRuIE,
)
from .niconico import NiconicoIE, NiconicoPlaylistIE
-from .ninecninemedia import (
- NineCNineMediaStackIE,
- NineCNineMediaIE,
-)
+from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .pearvideo import PearVideoIE
+from .peertube import PeerTubeIE
from .people import PeopleIE
from .performgroup import PerformGroupIE
from .periscope import (
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE
from .spiegeltv import SpiegeltvIE
-from .spike import SpikeIE
+from .spike import (
+ BellatorIE,
+ ParamountNetworkIE,
+)
from .stitcher import StitcherIE
from .sport5 import Sport5IE
from .sportbox import SportBoxEmbedIE
from .tvigle import TvigleIE
from .tvland import TVLandIE
from .tvn24 import TVN24IE
+from .tvnet import TVNetIE
from .tvnoe import TVNoeIE
from .tvnow import (
TVNowIE,
)
from .zapiks import ZapiksIE
from .zaq1 import Zaq1IE
+from .zattoo import (
+ QuicklineIE,
+ QuicklineLiveIE,
+ ZattooIE,
+ ZattooLiveIE,
+)
from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import ZingMp3IE
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
+ _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
_TESTS = [{
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
# no title
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
+ 'info_dict': {
+ 'id': '359649331226507',
+ 'ext': 'mp4',
+ 'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
+ 'uploader': 'ESL One Dota 2',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
@staticmethod
return urls
def _login(self):
- (useremail, password) = self._get_login_info()
+ useremail, password = self._get_login_info()
if useremail is None:
return
if server_js_data:
video_data = extract_video_data(server_js_data.get('instances', []))
+ def extract_from_jsmods_instances(js_data):
+ if js_data:
+ return extract_video_data(try_get(
+ js_data, lambda x: x['jsmods']['instances'], list) or [])
+
if not video_data:
server_js_data = self._parse_json(
self._search_regex(
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
webpage, 'js data', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
- if server_js_data:
- video_data = extract_video_data(try_get(
- server_js_data, lambda x: x['jsmods']['instances'],
- list) or [])
+ video_data = extract_from_jsmods_instances(server_js_data)
if not video_data:
if not fatal_if_no_video:
expected=True)
elif '>You must log in to continue' in webpage:
self.raise_login_required()
- else:
- raise ExtractorError('Cannot parse data')
+
+ # Video info not in first request, do a secondary request using
+ # tahoe player specific URL
+ tahoe_data = self._download_webpage(
+ self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
+ data=urlencode_postdata({
+ '__user': 0,
+ '__a': 1,
+ '__pc': self._search_regex(
+ r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
+ 'pkg cohort', default='PHASED:DEFAULT'),
+ '__rev': self._search_regex(
+ r'client_revision["\']\s*:\s*(\d+),', webpage,
+ 'client revision', default='3944515'),
+ }),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ tahoe_js_data = self._parse_json(
+ self._search_regex(
+ r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
+ 'tahoe js data', default='{}'),
+ video_id, fatal=False)
+ video_data = extract_from_jsmods_instances(tahoe_js_data)
+
+ if not video_data:
+ raise ExtractorError('Cannot parse data')
formats = []
for f in video_data:
video_title = 'Facebook video #%s' % video_id
uploader = clean_html(get_element_by_id(
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
- r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
+ r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
+ fatal=False) or self._og_search_title(webpage, fatal=False)
timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None))
}]
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None or password is None:
return False
return self._make_url_result(video_id, catalogue)
+class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
+ IE_NAME = 'sport.francetvinfo.fr'
+ _VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018',
+ 'info_dict': {
+ 'id': '6e49080e-3f45-11e8-b459-000d3a2439ea',
+ 'ext': 'mp4',
+ 'title': 'Retour sur les meilleurs moments de Pyeongchang 2018',
+ 'timestamp': 1523639962,
+ 'upload_date': '20180413',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id')
+ return self._make_url_result(video_id, 'Sport-web')
+
+
class GenerationWhatIE(InfoExtractor):
IE_NAME = 'france2.fr:generation-what'
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
}]
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
try:
from .common import InfoExtractor
from .nexx import NexxIE
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ try_get,
+)
class FunkBaseIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ }, {
+ # only available via byIdList API
+ 'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
+ 'info_dict': {
+ 'id': '205067',
+ 'ext': 'mp4',
+ 'title': 'Martin Sonneborn erklärt die EU',
+ 'description': 'md5:050f74626e4ed87edf4626d2024210c0',
+ 'timestamp': 1494424042,
+ 'upload_date': '20170510',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
'only_matching': True,
channel_id = mobj.group('id')
alias = mobj.group('alias')
- results = self._download_json(
- 'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
- headers={
- 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
- 'Referer': url,
- }, query={
- 'channelId': channel_id,
- 'size': 100,
- })['result']
+ headers = {
+ 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
+ 'Referer': url,
+ }
- video = next(r for r in results if r.get('alias') == alias)
+ video = None
+
+ by_id_list = self._download_json(
+ 'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id,
+ headers=headers, query={
+ 'ids': alias,
+ }, fatal=False)
+ if by_id_list:
+ video = try_get(by_id_list, lambda x: x['result'][0], dict)
+
+ if not video:
+ results = self._download_json(
+ 'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
+ headers=headers, query={
+ 'channelId': channel_id,
+ 'size': 100,
+ })['result']
+ video = next(r for r in results if r.get('alias') == alias)
return self._make_url_result(video)
]
def _login(self, webpage_url, display_id):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None or password is None:
self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
return None
is_html,
js_to_json,
KNOWN_EXTENSIONS,
+ merge_dicts,
mimetype2ext,
orderedSet,
sanitized_Request,
from .yapfiles import YapFilesIE
from .vice import ViceIE
from .xfileshare import XFileShareIE
+from .cloudflarestream import CloudflareStreamIE
+from .peertube import PeerTubeIE
+from .indavideo import IndavideoEmbedIE
+from .apa import APAIE
class GenericIE(InfoExtractor):
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
}
},
+ # RSS feed with enclosures and unsupported link URLs
+ {
+ 'url': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'info_dict': {
+ 'id': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
+ 'title': 'Hello Internet',
+ },
+ 'playlist_mincount': 100,
+ },
# SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
{
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
},
'add_ie': ['Kaltura'],
},
+ {
+ # Kaltura iframe embed, more sophisticated
+ 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
+ 'info_dict': {
+ 'id': '1_9gzouybz',
+ 'ext': 'mp4',
+ 'title': 'lecture-05sep2017',
+ 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
+ 'upload_date': '20170913',
+ 'uploader_id': 'eps2',
+ 'timestamp': 1505340777,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ },
{
# meta twitter:player
'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
},
'expected_warnings': ['Failed to parse JSON Expecting value'],
},
- # Ooyala embed
- {
- 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
- 'info_dict': {
- 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
- 'ext': 'mp4',
- 'description': 'Index/Match versus VLOOKUP.',
- 'title': 'This is what separates the Excel masters from the wannabes',
- 'duration': 191.933,
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- }
- },
# Brightcove URL in single quotes
{
'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
'skip_download': True,
},
},
+ {
+ # CloudflareStream embed
+ 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
+ 'info_dict': {
+ 'id': '31c9291ab41fac05471db4e73aa11717',
+ 'ext': 'mp4',
+ 'title': '31c9291ab41fac05471db4e73aa11717',
+ },
+ 'add_ie': [CloudflareStreamIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # PeerTube embed
+ 'url': 'https://joinpeertube.org/fr/home/',
+ 'info_dict': {
+ 'id': 'home',
+ 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ # Indavideo embed
+ 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
+ 'info_dict': {
+ 'id': '1693903',
+ 'ext': 'mp4',
+ 'title': 'Így kell otthon hamburgert sütni',
+ 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
+ 'timestamp': 1426330212,
+ 'upload_date': '20150314',
+ 'uploader': 'StreetKitchen',
+ 'uploader_id': '546363',
+ },
+ 'add_ie': [IndavideoEmbedIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # APA embed via JWPlatform embed
+ 'url': 'http://www.vol.at/blue-man-group/5593454',
+ 'info_dict': {
+ 'id': 'jjv85FdZ',
+ 'ext': 'mp4',
+ 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 254,
+ 'timestamp': 1519211149,
+ 'upload_date': '20180221',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
{
'url': 'http://share-videos.se/auto/video/83645793?uid=13',
'md5': 'b68d276de422ab07ee1d49388103f457',
entries = []
for it in doc.findall('./channel/item'):
- next_url = xpath_text(it, 'link', fatal=False)
+ next_url = None
+ enclosure_nodes = it.findall('./enclosure')
+ for e in enclosure_nodes:
+ next_url = e.attrib.get('url')
+ if next_url:
+ break
+
if not next_url:
- enclosure_nodes = it.findall('./enclosure')
- for e in enclosure_nodes:
- next_url = e.attrib.get('url')
- if next_url:
- break
+ next_url = xpath_text(it, 'link', fatal=False)
if not next_url:
continue
return self.playlist_from_matches(
xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
+ cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
+ if cloudflarestream_urls:
+ return self.playlist_from_matches(
+ cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
+
+ peertube_urls = PeerTubeIE._extract_urls(webpage)
+ if peertube_urls:
+ return self.playlist_from_matches(
+ peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
+
+ indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
+ if indavideo_urls:
+ return self.playlist_from_matches(
+ indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
+
+ apa_urls = APAIE._extract_urls(webpage)
+ if apa_urls:
+ return self.playlist_from_matches(
+ apa_urls, video_id, video_title, ie=APAIE.ie_key())
+
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)]
return self.playlist_from_matches(
sharevideos_urls, video_id, video_title)
- def merge_dicts(dict1, dict2):
- merged = {}
- for k, v in dict1.items():
- if v is not None:
- merged[k] = v
- for k, v in dict2.items():
- if v is None:
- continue
- if (k not in merged or
- (isinstance(v, compat_str) and v and
- isinstance(merged[k], compat_str) and
- not merged[k])):
- merged[k] = v
- return merged
-
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
# coding: utf-8
from __future__ import unicode_literals
+import base64
+import hashlib
+import json
import random
import re
-import math
from .common import InfoExtractor
from ..compat import (
+ compat_HTTPError,
compat_str,
- compat_chr,
- compat_ord,
)
from ..utils import (
ExtractorError,
class GloboIE(InfoExtractor):
_VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
-
- _API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
- _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
-
- _RESIGN_EXPIRATION = 86400
-
+ _NETRC_MACHINE = 'globo'
_TESTS = [{
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
'only_matching': True,
}]
- class MD5(object):
- HEX_FORMAT_LOWERCASE = 0
- HEX_FORMAT_UPPERCASE = 1
- BASE64_PAD_CHARACTER_DEFAULT_COMPLIANCE = ''
- BASE64_PAD_CHARACTER_RFC_COMPLIANCE = '='
- PADDING = '=0xFF01DD'
- hexcase = 0
- b64pad = ''
-
- def __init__(self):
- pass
-
- class JSArray(list):
- def __getitem__(self, y):
- try:
- return list.__getitem__(self, y)
- except IndexError:
- return 0
-
- def __setitem__(self, i, y):
- try:
- return list.__setitem__(self, i, y)
- except IndexError:
- self.extend([0] * (i - len(self) + 1))
- self[-1] = y
-
- @classmethod
- def hex_md5(cls, param1):
- return cls.rstr2hex(cls.rstr_md5(cls.str2rstr_utf8(param1)))
-
- @classmethod
- def b64_md5(cls, param1, param2=None):
- return cls.rstr2b64(cls.rstr_md5(cls.str2rstr_utf8(param1, param2)))
-
- @classmethod
- def any_md5(cls, param1, param2):
- return cls.rstr2any(cls.rstr_md5(cls.str2rstr_utf8(param1)), param2)
-
- @classmethod
- def rstr_md5(cls, param1):
- return cls.binl2rstr(cls.binl_md5(cls.rstr2binl(param1), len(param1) * 8))
-
- @classmethod
- def rstr2hex(cls, param1):
- _loc_2 = '0123456789ABCDEF' if cls.hexcase else '0123456789abcdef'
- _loc_3 = ''
- for _loc_5 in range(0, len(param1)):
- _loc_4 = compat_ord(param1[_loc_5])
- _loc_3 += _loc_2[_loc_4 >> 4 & 15] + _loc_2[_loc_4 & 15]
- return _loc_3
-
- @classmethod
- def rstr2b64(cls, param1):
- _loc_2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
- _loc_3 = ''
- _loc_4 = len(param1)
- for _loc_5 in range(0, _loc_4, 3):
- _loc_6_1 = compat_ord(param1[_loc_5]) << 16
- _loc_6_2 = compat_ord(param1[_loc_5 + 1]) << 8 if _loc_5 + 1 < _loc_4 else 0
- _loc_6_3 = compat_ord(param1[_loc_5 + 2]) if _loc_5 + 2 < _loc_4 else 0
- _loc_6 = _loc_6_1 | _loc_6_2 | _loc_6_3
- for _loc_7 in range(0, 4):
- if _loc_5 * 8 + _loc_7 * 6 > len(param1) * 8:
- _loc_3 += cls.b64pad
- else:
- _loc_3 += _loc_2[_loc_6 >> 6 * (3 - _loc_7) & 63]
- return _loc_3
-
- @staticmethod
- def rstr2any(param1, param2):
- _loc_3 = len(param2)
- _loc_4 = []
- _loc_9 = [0] * ((len(param1) >> 2) + 1)
- for _loc_5 in range(0, len(_loc_9)):
- _loc_9[_loc_5] = compat_ord(param1[_loc_5 * 2]) << 8 | compat_ord(param1[_loc_5 * 2 + 1])
-
- while len(_loc_9) > 0:
- _loc_8 = []
- _loc_7 = 0
- for _loc_5 in range(0, len(_loc_9)):
- _loc_7 = (_loc_7 << 16) + _loc_9[_loc_5]
- _loc_6 = math.floor(_loc_7 / _loc_3)
- _loc_7 -= _loc_6 * _loc_3
- if len(_loc_8) > 0 or _loc_6 > 0:
- _loc_8[len(_loc_8)] = _loc_6
-
- _loc_4[len(_loc_4)] = _loc_7
- _loc_9 = _loc_8
-
- _loc_10 = ''
- _loc_5 = len(_loc_4) - 1
- while _loc_5 >= 0:
- _loc_10 += param2[_loc_4[_loc_5]]
- _loc_5 -= 1
-
- return _loc_10
-
- @classmethod
- def str2rstr_utf8(cls, param1, param2=None):
- _loc_3 = ''
- _loc_4 = -1
- if not param2:
- param2 = cls.PADDING
- param1 = param1 + param2[1:9]
- while True:
- _loc_4 += 1
- if _loc_4 >= len(param1):
- break
- _loc_5 = compat_ord(param1[_loc_4])
- _loc_6 = compat_ord(param1[_loc_4 + 1]) if _loc_4 + 1 < len(param1) else 0
- if 55296 <= _loc_5 <= 56319 and 56320 <= _loc_6 <= 57343:
- _loc_5 = 65536 + ((_loc_5 & 1023) << 10) + (_loc_6 & 1023)
- _loc_4 += 1
- if _loc_5 <= 127:
- _loc_3 += compat_chr(_loc_5)
- continue
- if _loc_5 <= 2047:
- _loc_3 += compat_chr(192 | _loc_5 >> 6 & 31) + compat_chr(128 | _loc_5 & 63)
- continue
- if _loc_5 <= 65535:
- _loc_3 += compat_chr(224 | _loc_5 >> 12 & 15) + compat_chr(128 | _loc_5 >> 6 & 63) + compat_chr(
- 128 | _loc_5 & 63)
- continue
- if _loc_5 <= 2097151:
- _loc_3 += compat_chr(240 | _loc_5 >> 18 & 7) + compat_chr(128 | _loc_5 >> 12 & 63) + compat_chr(
- 128 | _loc_5 >> 6 & 63) + compat_chr(128 | _loc_5 & 63)
- return _loc_3
-
- @staticmethod
- def rstr2binl(param1):
- _loc_2 = [0] * ((len(param1) >> 2) + 1)
- for _loc_3 in range(0, len(_loc_2)):
- _loc_2[_loc_3] = 0
- for _loc_3 in range(0, len(param1) * 8, 8):
- _loc_2[_loc_3 >> 5] |= (compat_ord(param1[_loc_3 // 8]) & 255) << _loc_3 % 32
- return _loc_2
-
- @staticmethod
- def binl2rstr(param1):
- _loc_2 = ''
- for _loc_3 in range(0, len(param1) * 32, 8):
- _loc_2 += compat_chr(param1[_loc_3 >> 5] >> _loc_3 % 32 & 255)
- return _loc_2
-
- @classmethod
- def binl_md5(cls, param1, param2):
- param1 = cls.JSArray(param1)
- param1[param2 >> 5] |= 128 << param2 % 32
- param1[(param2 + 64 >> 9 << 4) + 14] = param2
- _loc_3 = 1732584193
- _loc_4 = -271733879
- _loc_5 = -1732584194
- _loc_6 = 271733878
- for _loc_7 in range(0, len(param1), 16):
- _loc_8 = _loc_3
- _loc_9 = _loc_4
- _loc_10 = _loc_5
- _loc_11 = _loc_6
- _loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 0], 7, -680876936)
- _loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 1], 12, -389564586)
- _loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 2], 17, 606105819)
- _loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 3], 22, -1044525330)
- _loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 4], 7, -176418897)
- _loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 5], 12, 1200080426)
- _loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 6], 17, -1473231341)
- _loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 7], 22, -45705983)
- _loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 8], 7, 1770035416)
- _loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 9], 12, -1958414417)
- _loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 10], 17, -42063)
- _loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 11], 22, -1990404162)
- _loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 12], 7, 1804603682)
- _loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 13], 12, -40341101)
- _loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 14], 17, -1502002290)
- _loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 15], 22, 1236535329)
- _loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 1], 5, -165796510)
- _loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 6], 9, -1069501632)
- _loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 11], 14, 643717713)
- _loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 0], 20, -373897302)
- _loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 5], 5, -701558691)
- _loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 10], 9, 38016083)
- _loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 15], 14, -660478335)
- _loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 4], 20, -405537848)
- _loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 9], 5, 568446438)
- _loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 14], 9, -1019803690)
- _loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 3], 14, -187363961)
- _loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 8], 20, 1163531501)
- _loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 13], 5, -1444681467)
- _loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 2], 9, -51403784)
- _loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 7], 14, 1735328473)
- _loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 12], 20, -1926607734)
- _loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 5], 4, -378558)
- _loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 8], 11, -2022574463)
- _loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 11], 16, 1839030562)
- _loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 14], 23, -35309556)
- _loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 1], 4, -1530992060)
- _loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 4], 11, 1272893353)
- _loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 7], 16, -155497632)
- _loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 10], 23, -1094730640)
- _loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 13], 4, 681279174)
- _loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 0], 11, -358537222)
- _loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 3], 16, -722521979)
- _loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 6], 23, 76029189)
- _loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 9], 4, -640364487)
- _loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 12], 11, -421815835)
- _loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 15], 16, 530742520)
- _loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 2], 23, -995338651)
- _loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 0], 6, -198630844)
- _loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 7], 10, 1126891415)
- _loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 14], 15, -1416354905)
- _loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 5], 21, -57434055)
- _loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 12], 6, 1700485571)
- _loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 3], 10, -1894986606)
- _loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 10], 15, -1051523)
- _loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 1], 21, -2054922799)
- _loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 8], 6, 1873313359)
- _loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 15], 10, -30611744)
- _loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 6], 15, -1560198380)
- _loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 13], 21, 1309151649)
- _loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 4], 6, -145523070)
- _loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 11], 10, -1120210379)
- _loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 2], 15, 718787259)
- _loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 9], 21, -343485551)
- _loc_3 = cls.safe_add(_loc_3, _loc_8)
- _loc_4 = cls.safe_add(_loc_4, _loc_9)
- _loc_5 = cls.safe_add(_loc_5, _loc_10)
- _loc_6 = cls.safe_add(_loc_6, _loc_11)
- return [_loc_3, _loc_4, _loc_5, _loc_6]
-
- @classmethod
- def md5_cmn(cls, param1, param2, param3, param4, param5, param6):
- return cls.safe_add(
- cls.bit_rol(cls.safe_add(cls.safe_add(param2, param1), cls.safe_add(param4, param6)), param5), param3)
-
- @classmethod
- def md5_ff(cls, param1, param2, param3, param4, param5, param6, param7):
- return cls.md5_cmn(param2 & param3 | ~param2 & param4, param1, param2, param5, param6, param7)
-
- @classmethod
- def md5_gg(cls, param1, param2, param3, param4, param5, param6, param7):
- return cls.md5_cmn(param2 & param4 | param3 & ~param4, param1, param2, param5, param6, param7)
-
- @classmethod
- def md5_hh(cls, param1, param2, param3, param4, param5, param6, param7):
- return cls.md5_cmn(param2 ^ param3 ^ param4, param1, param2, param5, param6, param7)
-
- @classmethod
- def md5_ii(cls, param1, param2, param3, param4, param5, param6, param7):
- return cls.md5_cmn(param3 ^ (param2 | ~param4), param1, param2, param5, param6, param7)
-
- @classmethod
- def safe_add(cls, param1, param2):
- _loc_3 = (param1 & 65535) + (param2 & 65535)
- _loc_4 = (param1 >> 16) + (param2 >> 16) + (_loc_3 >> 16)
- return cls.lshift(_loc_4, 16) | _loc_3 & 65535
-
- @classmethod
- def bit_rol(cls, param1, param2):
- return cls.lshift(param1, param2) | (param1 & 0xFFFFFFFF) >> (32 - param2)
-
- @staticmethod
- def lshift(value, count):
- r = (0xFFFFFFFF & value) << count
- return -(~(r - 1) & 0xFFFFFFFF) if r > 0x7FFFFFFF else r
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ try:
+ self._download_json(
+ 'https://login.globo.com/api/authentication', None, data=json.dumps({
+ 'payload': {
+ 'email': email,
+ 'password': password,
+ 'serviceId': 4654,
+ },
+ }).encode(), headers={
+ 'Content-Type': 'application/json; charset=utf-8',
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ resp = self._parse_json(e.cause.read(), None)
+ raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True)
+ raise
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
- self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
+ 'http://api.globovideos.com/videos/%s/playlist' % video_id,
+ video_id)['videos'][0]
title = video['title']
formats = []
for resource in video['resources']:
resource_id = resource.get('_id')
- if not resource_id or resource_id.endswith('manifest'):
+ resource_url = resource.get('url')
+ if not resource_id or not resource_url:
continue
security = self._download_json(
- self._SECURITY_URL_TEMPLATE % (video_id, resource_id),
- video_id, 'Downloading security hash for %s' % resource_id)
+ 'http://security.video.globo.com/videos/%s/hash' % video_id,
+ video_id, 'Downloading security hash for %s' % resource_id, query={
+ 'player': 'flash',
+ 'version': '17.0.0.132',
+ 'resource_id': resource_id,
+ })
security_hash = security.get('hash')
if not security_hash:
continue
hash_code = security_hash[:2]
- received_time = int(security_hash[2:12])
+ received_time = security_hash[2:12]
received_random = security_hash[12:22]
received_md5 = security_hash[22:]
- sign_time = received_time + self._RESIGN_EXPIRATION
+ sign_time = compat_str(int(received_time) + 86400)
padding = '%010d' % random.randint(1, 10000000000)
- signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
- signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
+ md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode()
+ signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
+ signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5
- resource_url = resource['url']
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
+ elif resource_id.endswith('mpd') or resource_url.endswith('.mpd'):
+ formats.extend(self._extract_mpd_formats(
+ signed_url, resource_id, mpd_id='dash', fatal=False))
+ elif resource_id.endswith('manifest') or resource_url.endswith('/manifest'):
+ formats.extend(self._extract_ism_formats(
+ signed_url, resource_id, ism_id='mss', fatal=False))
else:
formats.append({
'url': signed_url,
'adobe_requestor_id': requestor_id,
})
else:
- self._initialize_geo_bypass(['US'])
+ self._initialize_geo_bypass({'countries': ['US']})
entitlement = self._download_json(
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
video_id, data=urlencode_postdata(data))
from .common import InfoExtractor
from ..utils import (
determine_ext,
+ ExtractorError,
int_or_none,
+ parse_age_limit,
parse_iso8601,
)
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
'timestamp': 1491868800,
'upload_date': '20170411',
+ 'age_limit': 14,
}
}
video_id, headers={
'Content-Type': 'application/json; charset=utf-8',
}, data=b'{"client":"web","device_type":"pc"}')
+ if video_data.get('requires_drm'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
main_video_asset = video_data['main_video_asset']
episode_number = int_or_none(video_data.get('episode_number'))
'season_number': season_number,
'episode_number': episode_number,
'subtitles': subtitles,
+ 'age_limit': parse_age_limit(video_data.get('rating')),
}
# Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
# so disabling geo bypass completely
_GEO_BYPASS = False
+ _NETRC_MACHINE = 'hidive'
+ _LOGIN_URL = 'https://www.hidive.com/account/login'
_TESTS = [{
'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
'params': {
'skip_download': True,
},
+ 'skip': 'Requires Authentication',
}]
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ webpage = self._download_webpage(self._LOGIN_URL, None)
+ form = self._search_regex(
+ r'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
+ webpage, 'login form')
+ data = self._hidden_inputs(form)
+ data.update({
+ 'Email': email,
+ 'Password': password,
+ })
+ self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title, key = mobj.group('title', 'key')
data=urlencode_postdata({
'Title': title,
'Key': key,
+ 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
}))
restriction = settings.get('restrictionReason')
subtitles.setdefault(cc_lang, []).append({
'url': cc_url,
})
+ self._sort_formats(formats)
season_number = int_or_none(self._search_regex(
r's(\d+)', key, 'season number', default=None))
self._logout_url = modules['user']['resources']['logout']['uri']
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
# TODO: figure out authentication with cookies
if username is None or password is None:
self.raise_login_required()
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ determine_ext,
mimetype2ext,
+ parse_duration,
qualities,
- remove_end,
)
class ImdbIE(InfoExtractor):
IE_NAME = 'imdb'
IE_DESC = 'Internet Movie Database trailers'
- _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title).+?[/-]vi(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
'info_dict': {
'id': '2524815897',
'ext': 'mp4',
- 'title': 'Ice Age: Continental Drift Trailer (No. 2)',
- 'description': 'md5:9061c2219254e5d14e03c25c98e96a81',
+ 'title': 'No. 2 from Ice Age: Continental Drift (2012)',
+ 'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
}
}, {
'url': 'http://www.imdb.com/video/_/vi2524815897',
}, {
'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
'only_matching': True,
+ }, {
+ 'url': 'https://www.imdb.com/list/ls009921623/videoplayer/vi260482329',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id)
- descr = self._html_search_regex(
- r'(?s)<span itemprop="description">(.*?)</span>',
- webpage, 'description', fatal=False)
- player_url = 'http://www.imdb.com/video/imdb/vi%s/imdb/single' % video_id
- player_page = self._download_webpage(
- player_url, video_id, 'Downloading player page')
- # the player page contains the info for the default format, we have to
- # fetch other pages for the rest of the formats
- extra_formats = re.findall(r'href="(?P<url>%s.*?)".*?>(?P<name>.*?)<' % re.escape(player_url), player_page)
- format_pages = [
- self._download_webpage(
- f_url, video_id, 'Downloading info for %s format' % f_name)
- for f_url, f_name in extra_formats]
- format_pages.append(player_page)
+ webpage = self._download_webpage(
+ 'https://www.imdb.com/videoplayer/vi' + video_id, video_id)
+ video_metadata = self._parse_json(self._search_regex(
+ r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage,
+ 'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id]
+ title = self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage) or self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title']
quality = qualities(('SD', '480p', '720p', '1080p'))
formats = []
- for format_page in format_pages:
- json_data = self._search_regex(
- r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
- format_page, 'json data', flags=re.DOTALL)
- info = self._parse_json(json_data, video_id, fatal=False)
- if not info:
- continue
- format_info = info.get('videoPlayerObject', {}).get('video', {})
- if not format_info:
- continue
- video_info_list = format_info.get('videoInfoList')
- if not video_info_list or not isinstance(video_info_list, list):
+ for encoding in video_metadata.get('encodings', []):
+ if not encoding or not isinstance(encoding, dict):
continue
- video_info = video_info_list[0]
- if not video_info or not isinstance(video_info, dict):
+ video_url = encoding.get('videoUrl')
+ if not video_url or not isinstance(video_url, compat_str):
continue
- video_url = video_info.get('videoUrl')
- if not video_url:
+ ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
continue
- format_id = format_info.get('ffname')
+ format_id = encoding.get('definition')
formats.append({
'format_id': format_id,
'url': video_url,
- 'ext': mimetype2ext(video_info.get('videoMimeType')),
+ 'ext': ext,
'quality': quality(format_id),
})
self._sort_formats(formats)
return {
'id': video_id,
- 'title': remove_end(self._og_search_title(webpage), ' - IMDb'),
+ 'title': title,
'formats': formats,
- 'description': descr,
- 'thumbnail': format_info.get('slate'),
+ 'description': video_metadata.get('description'),
+ 'thumbnail': video_metadata.get('slate', {}).get('url'),
+ 'duration': parse_duration(video_metadata.get('duration')),
}
class ImdbListIE(InfoExtractor):
IE_NAME = 'imdb:list'
IE_DESC = 'Internet Movie Database lists'
- _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+ _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/ls(?P<id>\d{9})(?!/videoplayer/vi\d+)'
_TEST = {
- 'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
+ 'url': 'https://www.imdb.com/list/ls009921623/',
'info_dict': {
- 'id': 'JFs9NWw6XI0',
- 'title': 'March 23, 2012 Releases',
+ 'id': '009921623',
+ 'title': 'The Bourne Legacy',
+ 'description': 'A list of trailers, clips, and more from The Bourne Legacy, starring Jeremy Renner and Rachel Weisz.',
},
- 'playlist_count': 7,
+ 'playlist_count': 8,
}
def _real_extract(self, url):
webpage = self._download_webpage(url, list_id)
entries = [
self.url_result('http://www.imdb.com' + m, 'Imdb')
- for m in re.findall(r'href="(/video/imdb/vi[^"]+)"\s+data-type="playlist"', webpage)]
+ for m in re.findall(r'href="(/list/ls%s/videoplayer/vi[^"]+)"' % list_id, webpage)]
list_title = self._html_search_regex(
- r'<h1 class="header">(.*?)</h1>', webpage, 'list title')
+ r'<h1[^>]+class="[^"]*header[^"]*"[^>]*>(.*?)</h1>',
+ webpage, 'list title')
+ list_description = self._html_search_regex(
+ r'<div[^>]+class="[^"]*list-description[^"]*"[^>]*><p>(.*?)</p>',
+ webpage, 'list description')
- return self.playlist_result(entries, list_id, list_title)
+ return self.playlist_result(entries, list_id, list_title, list_description)
import re
from .common import InfoExtractor
-from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
- 'description': 'Imgur: The most awesome images on the Internet.',
+ 'description': 'Imgur: The magic of the Internet',
},
}, {
'url': 'https://imgur.com/A61SaA1',
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
- 'description': 'Imgur: The most awesome images on the Internet.',
+ 'description': 'Imgur: The magic of the Internet',
},
}, {
'url': 'https://imgur.com/gallery/YcAQlkx',
'id': 'YcAQlkx',
'ext': 'mp4',
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
- 'description': 'Imgur: The most awesome images on the Internet.'
-
}
}, {
'url': 'http://imgur.com/topic/Funny/N8rOudd',
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(
- compat_urlparse.urljoin(url, video_id), video_id)
+ gifv_url = 'https://i.imgur.com/{id}.gifv'.format(id=video_id)
+ webpage = self._download_webpage(gifv_url, video_id)
width = int_or_none(self._og_search_property(
'video:width', webpage, default=None))
return {
'id': video_id,
'formats': formats,
- 'description': self._og_search_description(webpage),
+ 'description': self._og_search_description(webpage, default=None),
'title': self._og_search_title(webpage),
}
'params': {
'skip_download': True,
},
+ }, {
+ # div with id=kaltura_player_1_kqs38cgm
+ 'url': 'https://www.inc.com/oscar-raymundo/richard-branson-young-entrepeneurs.html',
+ 'info_dict': {
+ 'id': '1_kqs38cgm',
+ 'ext': 'mp4',
+ 'title': 'Branson: "In the end, you have to say, Screw it. Just do it."',
+ 'description': 'md5:21b832d034f9af5191ca5959da5e9cb6',
+ 'timestamp': 1364403232,
+ 'upload_date': '20130327',
+ 'uploader_id': 'incdigital@inc.com',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
'only_matching': True,
webpage = self._download_webpage(url, display_id)
partner_id = self._search_regex(
- r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage, 'partner id')
+ r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage,
+ 'partner id', default='1034971')
- kaltura_id = self._parse_json(self._search_regex(
- r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
+ kaltura_id = self._search_regex(
+ r'id=(["\'])kaltura_player_(?P<id>.+?)\1', webpage, 'kaltura id',
+ default=None, group='id') or self._parse_json(self._search_regex(
+ r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
display_id)['vid_kaltura_id']
return self.url_result(
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
int_or_none,
parse_age_limit,
parse_iso8601,
+ update_url_query,
)
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
_TESTS = [{
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
- 'md5': 'f79b009c66194acacd40712a6778acfa',
+ 'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
'info_dict': {
'id': '1837039',
'ext': 'mp4',
'only_matching': True,
}]
+ # Some example URLs covered by generic extractor:
+ # http://indavideo.hu/video/Vicces_cica_1
+ # http://index.indavideo.hu/video/2015_0728_beregszasz
+ # http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
+ # http://erotika.indavideo.hu/video/Amator_tini_punci
+ # http://film.indavideo.hu/video/f_hrom_nagymamm_volt
+ # http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
+ webpage)
+
def _real_extract(self, url):
video_id = self._match_id(url)
title = video['title']
- video_urls = video.get('video_files', [])
+ video_urls = []
+
+ video_files = video.get('video_files')
+ if isinstance(video_files, list):
+ video_urls.extend(video_files)
+ elif isinstance(video_files, dict):
+ video_urls.extend(video_files.values())
+
video_file = video.get('video_file')
if video:
video_urls.append(video_file)
if flv_url not in video_urls:
video_urls.append(flv_url)
- formats = [{
- 'url': video_url,
- 'height': int_or_none(self._search_regex(
- r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)),
- } for video_url in video_urls]
+ filesh = video.get('filesh')
+
+ formats = []
+ for video_url in video_urls:
+ height = int_or_none(self._search_regex(
+ r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
+ if filesh:
+ if not height:
+ continue
+ token = filesh.get(compat_str(height))
+ if token is None:
+ continue
+ video_url = update_url_query(video_url, {'token': token})
+ formats.append({
+ 'url': video_url,
+ 'height': height,
+ })
self._sort_formats(formats)
timestamp = video.get('date')
'tags': tags,
'formats': formats,
}
-
-
-class IndavideoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:.+?\.)?indavideo\.hu/video/(?P<id>[^/#?]+)'
- _TESTS = [{
- 'url': 'http://indavideo.hu/video/Vicces_cica_1',
- 'md5': '8c82244ba85d2a2310275b318eb51eac',
- 'info_dict': {
- 'id': '1335611',
- 'display_id': 'Vicces_cica_1',
- 'ext': 'mp4',
- 'title': 'Vicces cica',
- 'description': 'Játszik a tablettel. :D',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Jet_Pack',
- 'uploader_id': '491217',
- 'timestamp': 1390821212,
- 'upload_date': '20140127',
- 'duration': 7,
- 'age_limit': 0,
- 'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'],
- },
- }, {
- 'url': 'http://index.indavideo.hu/video/2015_0728_beregszasz',
- 'only_matching': True,
- }, {
- 'url': 'http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko',
- 'only_matching': True,
- }, {
- 'url': 'http://erotika.indavideo.hu/video/Amator_tini_punci',
- 'only_matching': True,
- }, {
- 'url': 'http://film.indavideo.hu/video/f_hrom_nagymamm_volt',
- 'only_matching': True,
- }, {
- 'url': 'http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
- embed_url = self._search_regex(
- r'<link[^>]+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url')
-
- return {
- '_type': 'url_transparent',
- 'ie_key': 'IndavideoEmbed',
- 'url': embed_url,
- 'display_id': display_id,
- }
return ohdave_rsa_encrypt(data, e, N)
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
# No authentication to be performed
if not username:
import re
from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
from ..compat import (
compat_str,
compat_etree_register_namespace,
xpath_text,
int_or_none,
parse_duration,
+ smuggle_url,
ExtractorError,
determine_ext,
)
# unavailable via data-playlist-url
'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
'only_matching': True,
+ }, {
+ # InvalidVodcrid
+ 'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
+ 'only_matching': True,
+ }, {
+ # ContentUnavailable
+ 'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
+ 'only_matching': True,
}]
def _real_extract(self, url):
if fault_code == 'InvalidGeoRegion':
self.raise_geo_restricted(
msg=fault_string, countries=self._GEO_COUNTRIES)
- elif fault_code != 'InvalidEntity':
+ elif fault_code not in (
+ 'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
info.update({
'subtitles': subtitles,
})
return info
+
+
+class ITVBTCCIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
+ 'info_dict': {
+ 'id': 'btcc-2018-all-the-action-from-brands-hatch',
+ 'title': 'BTCC 2018: All the action from Brands Hatch',
+ },
+ 'playlist_mincount': 9,
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result(
+ smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
+ # ITV does not like some GB IP ranges, so here are some
+ # IP blocks it accepts
+ 'geo_ip_blocks': [
+ '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
+ ],
+ 'referrer': url,
+ }),
+ ie=BrightcoveNewIE.ie_key(), video_id=video_id)
+ for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
+
+ title = self._og_search_title(webpage, fatal=False)
+
+ return self.playlist_result(entries, playlist_id, title)
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
from ..utils import (
determine_ext,
float_or_none,
def _real_extract(self, url):
video_id = self._match_id(url)
- url = 'http://www.izlesene.com/video/%s' % video_id
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage('http://www.izlesene.com/video/%s' % video_id, video_id)
+
+ video = self._parse_json(
+ self._search_regex(
+ r'videoObj\s*=\s*({.+?})\s*;\s*\n', webpage, 'streams'),
+ video_id)
+
+ title = video.get('videoTitle') or self._og_search_title(webpage)
+
+ formats = []
+ for stream in video['media']['level']:
+ source_url = stream.get('source')
+ if not source_url or not isinstance(source_url, compat_str):
+ continue
+ ext = determine_ext(url, 'mp4')
+ quality = stream.get('value')
+ height = int_or_none(quality)
+ formats.append({
+ 'format_id': '%sp' % quality if quality else 'sd',
+ 'url': compat_urllib_parse_unquote(source_url),
+ 'ext': ext,
+ 'height': height,
+ })
+ self._sort_formats(formats)
- title = self._og_search_title(webpage)
description = self._og_search_description(webpage, default=None)
- thumbnail = self._proto_relative_url(
+ thumbnail = video.get('posterURL') or self._proto_relative_url(
self._og_search_thumbnail(webpage), scheme='http:')
uploader = self._html_search_regex(
timestamp = parse_iso8601(self._html_search_meta(
'uploadDate', webpage, 'upload date'))
- duration = float_or_none(self._html_search_regex(
- r'"videoduration"\s*:\s*"([^"]+)"',
- webpage, 'duration', fatal=False), scale=1000)
+ duration = float_or_none(video.get('duration') or self._html_search_regex(
+ r'videoduration["\']?\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'duration', fatal=False, group='value'), scale=1000)
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
comment_count = self._html_search_regex(
r'comment_count\s*=\s*\'([^\']+)\';',
webpage, 'comment_count', fatal=False)
- content_url = self._html_search_meta(
- 'contentURL', webpage, 'content URL', fatal=False)
- ext = determine_ext(content_url, 'mp4')
-
- # Might be empty for some videos.
- streams = self._html_search_regex(
- r'"qualitylevel"\s*:\s*"([^"]+)"', webpage, 'streams', default='')
-
- formats = []
- if streams:
- for stream in streams.split('|'):
- quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
- formats.append({
- 'format_id': '%sp' % quality if quality else 'sd',
- 'url': compat_urllib_parse_unquote(url),
- 'ext': ext,
- })
- else:
- stream_url = self._search_regex(
- r'"streamurl"\s*:\s*"([^"]+)"', webpage, 'stream URL')
- formats.append({
- 'format_id': 'sd',
- 'url': compat_urllib_parse_unquote(stream_url),
- 'ext': ext,
- })
-
return {
'id': video_id,
'title': title,
re.search(
r'''(?xs)
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
- (?:https?:)?//(?:(?:www|cdnapi)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
+ (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
(?:(?!(?P=q1)).)*
[?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
+ (?:(?!(?P=q1)).)*
(?P=q1)
''', webpage)
)
media_id, 'Downloading flash playJson data', query={
'id': media_id,
'platid': 1,
- 'splatid': 101,
+ 'splatid': 105,
'format': 1,
'source': 1000,
'tkey': self.calc_time_key(int(time.time())),
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
- self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
pc, mobile, metadata = self._extract(
video_id, 'getPlaylistByMediaId',
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ orderedSet,
+ parse_duration,
+ try_get,
+)
+
+
+class MarkizaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
+ _TESTS = [{
+ 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
+ 'md5': 'ada4e9fad038abeed971843aa028c7b0',
+ 'info_dict': {
+ 'id': '139078',
+ 'ext': 'mp4',
+ 'title': 'Oteckovia 109',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2760,
+ },
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
+ 'info_dict': {
+ 'id': '85430',
+ 'title': 'Televízne noviny',
+ },
+ 'playlist_count': 23,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/84723',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videoarchiv.markiza.sk/embed/85295',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ 'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
+ video_id, query={'id': video_id})
+
+ info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
+
+ if info.get('_type') == 'playlist':
+ info.update({
+ 'id': video_id,
+ 'title': try_get(
+ data, lambda x: x['details']['name'], compat_str),
+ })
+ else:
+ info['duration'] = parse_duration(
+ try_get(data, lambda x: x['details']['duration'], compat_str))
+ return info
+
+
+class MarkizaPageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
+ _TESTS = [{
+ 'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
+ 'md5': 'ada4e9fad038abeed971843aa028c7b0',
+ 'info_dict': {
+ 'id': '139355',
+ 'ext': 'mp4',
+ 'title': 'Oteckovia 110',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2604,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if MarkizaIE.suitable(url) else super(MarkizaPageIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
+ for video_id in orderedSet(re.findall(
+ r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
+ webpage))]
+
+ return self.playlist_result(entries, playlist_id)
import re
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ parse_codecs,
+)
class MinotoIE(InfoExtractor):
formats.extend(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
else:
fmt_profile = fmt.get('profile') or {}
- f = {
+ formats.append({
'format_id': fmt_profile.get('name-short'),
'format_note': fmt_profile.get('name'),
'url': fmt_url,
'filesize': int_or_none(fmt.get('filesize')),
'width': int_or_none(fmt.get('width')),
'height': int_or_none(fmt.get('height')),
- }
- codecs = fmt.get('codecs')
- if codecs:
- codecs = codecs.split(',')
- if len(codecs) == 2:
- f.update({
- 'vcodec': codecs[0],
- 'acodec': codecs[1],
- })
- formats.append(f)
+ 'codecs': parse_codecs(fmt.get('codecs')),
+ })
self._sort_formats(formats)
return {
formats.append({
'format_id': 'http',
'url': decrypted,
+ 'downloader_options': {
+ # Mixcloud starts throttling at >~5M
+ 'http_chunk_size': 5242880,
+ },
})
self._sort_formats(formats)
from __future__ import unicode_literals
-import re
+from .nhl import NHLBaseIE
-from .common import InfoExtractor
-from ..utils import (
- parse_duration,
- parse_iso8601,
-)
-
-class MLBIE(InfoExtractor):
+class MLBIE(NHLBaseIE):
_VALID_URL = r'''(?x)
https?://
- (?:[\da-z_-]+\.)*mlb\.com/
+ (?:[\da-z_-]+\.)*(?P<site>mlb)\.com/
(?:
(?:
- (?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)|
+ (?:[^/]+/)*c-|
(?:
shared/video/embed/(?:embed|m-internal-embed)\.html|
(?:[^/]+/)+(?:play|index)\.jsp|
)\?.*?\bcontent_id=
)
- (?P<id>n?\d+)|
- (?:[^/]+/)*(?P<path>[^/]+)
+ (?P<id>\d+)
)
'''
+ _CONTENT_DOMAIN = 'content.mlb.com'
_TESTS = [
{
- 'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
- 'md5': 'ff56a598c2cf411a9a38a69709e97079',
+ 'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
+ 'md5': '632358dacfceec06bad823b83d21df2d',
'info_dict': {
'id': '34698933',
'ext': 'mp4',
'title': "Ackley's spectacular catch",
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
'duration': 66,
- 'timestamp': 1405980600,
- 'upload_date': '20140721',
+ 'timestamp': 1405995000,
+ 'upload_date': '20140722',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
- 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
- 'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
+ 'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663',
+ 'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f',
'info_dict': {
'id': '34496663',
'ext': 'mp4',
'title': 'Stanton prepares for Derby',
'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
'duration': 46,
- 'timestamp': 1405105800,
+ 'timestamp': 1405120200,
'upload_date': '20140711',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
- 'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
- 'md5': '0e6e73d509321e142409b695eadd541f',
+ 'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115',
+ 'md5': '99bb9176531adc600b90880fb8be9328',
'info_dict': {
'id': '34578115',
'ext': 'mp4',
'title': 'Cespedes repeats as Derby champ',
'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
'duration': 488,
- 'timestamp': 1405399936,
+ 'timestamp': 1405414336,
'upload_date': '20140715',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
- 'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
- 'md5': 'b8fd237347b844365d74ea61d4245967',
+ 'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915',
+ 'md5': 'da8b57a12b060e7663ee1eebd6f330ec',
'info_dict': {
'id': '34577915',
'ext': 'mp4',
'title': 'Bautista on Home Run Derby',
'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
'duration': 52,
- 'timestamp': 1405390722,
+ 'timestamp': 1405405122,
'upload_date': '20140715',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
- 'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
- 'md5': 'aafaf5b0186fee8f32f20508092f8111',
+ 'url': 'https://www.mlb.com/news/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer/c-118550098',
+ 'md5': 'e09e37b552351fddbf4d9e699c924d68',
'info_dict': {
'id': '75609783',
'ext': 'mp4',
'title': 'Must C: Pillar climbs for catch',
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
- 'timestamp': 1429124820,
+ 'timestamp': 1429139220,
'upload_date': '20150415',
}
},
'only_matching': True,
},
{
- 'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
+ 'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
'only_matching': True,
},
{
'only_matching': True,
},
{
- 'url': 'http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#',
+ 'url': 'https://www.mlb.com/cut4/carlos-gomez-borrowed-sunglasses-from-an-as-fan/c-278912842',
'only_matching': True,
}
]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- if not video_id:
- video_path = mobj.group('path')
- webpage = self._download_webpage(url, video_path)
- video_id = self._search_regex(
- [r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id')
-
- detail = self._download_xml(
- 'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
- % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
-
- title = detail.find('./headline').text
- description = detail.find('./big-blurb').text
- duration = parse_duration(detail.find('./duration').text)
- timestamp = parse_iso8601(detail.attrib['date'][:-5])
-
- thumbnails = [{
- 'url': thumbnail.text,
- } for thumbnail in detail.findall('./thumbnailScenarios/thumbnailScenario')]
-
- formats = []
- for media_url in detail.findall('./url'):
- playback_scenario = media_url.attrib['playback_scenario']
- fmt = {
- 'url': media_url.text,
- 'format_id': playback_scenario,
- }
- m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
- if m:
- fmt.update({
- 'vbr': int(m.group('vbr')) * 1000,
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
- formats.append(fmt)
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- 'thumbnails': thumbnails,
- }
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-import os.path
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- remove_start,
- sanitized_Request,
- urlencode_postdata,
-)
-
-
-class MonikerIE(InfoExtractor):
- IE_DESC = 'allmyvideos.net and vidspot.net'
- _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?:(?:2|v)/v-)?(?P<id>[a-zA-Z0-9_-]+)'
-
- _TESTS = [{
- 'url': 'http://allmyvideos.net/jih3nce3x6wn',
- 'md5': '710883dee1bfc370ecf9fa6a89307c88',
- 'info_dict': {
- 'id': 'jih3nce3x6wn',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video',
- },
- }, {
- 'url': 'http://allmyvideos.net/embed-jih3nce3x6wn',
- 'md5': '710883dee1bfc370ecf9fa6a89307c88',
- 'info_dict': {
- 'id': 'jih3nce3x6wn',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video',
- },
- }, {
- 'url': 'http://vidspot.net/l2ngsmhs8ci5',
- 'md5': '710883dee1bfc370ecf9fa6a89307c88',
- 'info_dict': {
- 'id': 'l2ngsmhs8ci5',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video',
- },
- }, {
- 'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
- 'only_matching': True,
- }, {
- 'url': 'http://vidspot.net/2/v-ywDf99',
- 'md5': '5f8254ce12df30479428b0152fb8e7ba',
- 'info_dict': {
- 'id': 'ywDf99',
- 'ext': 'mp4',
- 'title': 'IL FAIT LE MALIN EN PORSHE CAYENNE ( mais pas pour longtemps)',
- 'description': 'IL FAIT LE MALIN EN PORSHE CAYENNE.',
- },
- }, {
- 'url': 'http://allmyvideos.net/v/v-HXZm5t',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- orig_video_id = self._match_id(url)
- video_id = remove_start(orig_video_id, 'embed-')
- url = url.replace(orig_video_id, video_id)
- assert re.match(self._VALID_URL, url) is not None
- orig_webpage = self._download_webpage(url, video_id)
-
- if '>File Not Found<' in orig_webpage:
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
- error = self._search_regex(
- r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
- if error:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, error), expected=True)
-
- builtin_url = self._search_regex(
- r'<iframe[^>]+src=(["\'])(?P<url>.+?/builtin-.+?)\1',
- orig_webpage, 'builtin URL', default=None, group='url')
-
- if builtin_url:
- req = sanitized_Request(builtin_url)
- req.add_header('Referer', url)
- webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
- title = self._og_search_title(orig_webpage).strip()
- description = self._og_search_description(orig_webpage).strip()
- else:
- fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
- data = dict(fields)
-
- post = urlencode_postdata(data)
- headers = {
- b'Content-Type': b'application/x-www-form-urlencoded',
- }
- req = sanitized_Request(url, post, headers)
- webpage = self._download_webpage(
- req, video_id, note='Downloading video page ...')
-
- title = os.path.splitext(data['fname'])[0]
- description = None
-
- # Could be several links with different quality
- links = re.findall(r'"file" : "?(.+?)",', webpage)
- # Assume the links are ordered in quality
- formats = [{
- 'url': l,
- 'quality': i,
- } for i, l in enumerate(links)]
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'formats': formats,
- }
from .common import InfoExtractor
-class MakersChannelIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?makerschannel\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)'
+class MyChannelsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mychannels\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)'
_TEST = {
- 'url': 'http://makerschannel.com/en/zoomin/community-highlights?video_id=849',
- 'md5': '624a512c6969236b5967bf9286345ad1',
+ 'url': 'https://mychannels.com/missholland/miss-holland?production_id=3416',
+ 'md5': 'b8993daad4262dd68d89d651c0c52c45',
'info_dict': {
- 'id': '849',
+ 'id': 'wUUDZZep6vQD',
'ext': 'mp4',
- 'title': 'Landing a bus on a plane is an epic win',
- 'uploader': 'ZoomIn',
- 'description': 'md5:cd9cca2ea7b69b78be81d07020c97139',
+ 'title': 'Miss Holland joins VOTE LEAVE',
+ 'description': 'Miss Holland | #13 Not a potato',
+ 'uploader': 'Miss Holland',
}
}
def extract_data_val(attr, fatal=False):
return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal)
- minoto_id = self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id')
+ minoto_id = extract_data_val('minoto-id') or self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id')
return {
'_type': 'url_transparent',
'url': 'minoto:%s' % minoto_id,
- 'id': extract_data_val('video-id', True),
+ 'id': url_id,
'title': extract_data_val('title', True),
'description': extract_data_val('description'),
'thumbnail': extract_data_val('image'),
from __future__ import unicode_literals
-import re
import base64
+import json
+import re
from .common import InfoExtractor
from .theplatform import ThePlatformIE
from ..utils import (
find_xpath_attr,
smuggle_url,
+ try_get,
unescapeHTML,
update_url_query,
int_or_none,
def _real_extract(self, url):
permalink, video_id = re.match(self._VALID_URL, url).groups()
permalink = 'http' + permalink
- video_data = self._download_json(
+ response = self._download_json(
'https://api.nbc.com/v3/videos', video_id, query={
'filter[permalink]': permalink,
- })['data'][0]['attributes']
+ 'fields[videos]': 'description,entitlement,episodeNumber,guid,keywords,seasonNumber,title,vChipRating',
+ 'fields[shows]': 'shortTitle',
+ 'include': 'show.shortTitle',
+ })
+ video_data = response['data'][0]['attributes']
query = {
'mbr': 'true',
'manifest': 'm3u',
'title': title,
'url': theplatform_url,
'description': video_data.get('description'),
- 'keywords': video_data.get('keywords'),
+ 'tags': video_data.get('keywords'),
'season_number': int_or_none(video_data.get('seasonNumber')),
'episode_number': int_or_none(video_data.get('episodeNumber')),
- 'series': video_data.get('showName'),
+ 'episode': title,
+ 'series': try_get(response, lambda x: x['included'][0]['attributes']['shortTitle']),
'ie_key': 'ThePlatform',
}
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+class NBCSportsStreamIE(AdobePassIE):
+ _VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
+ 'info_dict': {
+ 'id': '206559',
+ 'ext': 'mp4',
+ 'title': 'Amgen Tour of California Women\'s Recap',
+ 'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Requires Adobe Pass Authentication',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ live_source = self._download_json(
+ 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id,
+ video_id)
+ video_source = live_source['videoSources'][0]
+ title = video_source['title']
+ source_url = None
+ for k in ('source', 'msl4source', 'iossource', 'hlsv4'):
+ sk = k + 'Url'
+ source_url = video_source.get(sk) or video_source.get(sk + 'Alt')
+ if source_url:
+ break
+ else:
+ source_url = video_source['ottStreamUrl']
+ is_live = video_source.get('type') == 'live' or video_source.get('status') == 'Live'
+ resource = self._get_mvpd_resource('nbcsports', title, video_id, '')
+ token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource)
+ tokenized_url = self._download_json(
+ 'https://token.playmakerservices.com/cdn',
+ video_id, data=json.dumps({
+ 'requestorId': 'nbcsports',
+ 'pid': video_id,
+ 'application': 'NBCSports',
+ 'version': 'v1',
+ 'platform': 'desktop',
+ 'cdn': 'akamai',
+ 'url': video_source['sourceUrl'],
+ 'token': base64.b64encode(token.encode()).decode(),
+ 'resourceId': base64.b64encode(resource.encode()).decode(),
+ }).encode())['tokenizedUrl']
+ formats = self._extract_m3u8_formats(tokenized_url, video_id, 'mp4')
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': live_source.get('description'),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
+
class CSNNEIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
_TESTS = [{
# movie
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
- 'md5': '828cea195be04e66057b846288295ba1',
+ 'md5': '31899fd683de49ad46f4ee67e53e83fe',
'info_dict': {
'id': '128907',
'ext': 'mp4',
'title': 'Stiftung Warentest',
'alt_title': 'Wie ein Test abläuft',
'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
- 'release_year': 2013,
'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2509,
'params': {
'skip_download': True,
},
+ 'skip': 'HTTP Error 404: Not Found',
}, {
# does not work via arc
'url': 'nexx:741:1269984',
'ext': 'mp4',
'title': '1 TAG ohne KLO... wortwörtlich! 😑',
'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
- 'description': 'md5:4604539793c49eda9443ab5c5b1d612f',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 607,
'timestamp': 1518614955,
'upload_date': '20180214',
},
+ }, {
+ # free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
+ 'url': 'nexx:747:1533779',
+ 'md5': '6bf6883912b82b7069fb86c2297e9893',
+ 'info_dict': {
+ 'id': '1533779',
+ 'ext': 'mp4',
+ 'title': 'Aufregung um ausgebrochene Raubtiere',
+ 'alt_title': 'Eifel-Zoo',
+ 'description': 'md5:f21375c91c74ad741dcb164c427999d2',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 111,
+ 'timestamp': 1527874460,
+ 'upload_date': '20180601',
+ },
}, {
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
'only_matching': True,
self._handle_error(result)
return result['result']
+ def _extract_free_formats(self, video, video_id):
+ stream_data = video['streamdata']
+ cdn = stream_data['cdnType']
+ assert cdn == 'free'
+
+ hash = video['general']['hash']
+
+ ps = compat_str(stream_data['originalDomain'])
+ if stream_data['applyFolderHierarchy'] == 1:
+ s = ('%04d' % int(video_id))[::-1]
+ ps += '/%s/%s' % (s[0:2], s[2:4])
+ ps += '/%s/%s_' % (video_id, hash)
+
+ t = 'http://%s' + ps
+ fd = stream_data['azureFileDistribution'].split(',')
+ cdn_provider = stream_data['cdnProvider']
+
+ def p0(p):
+ return '_%s' % p if stream_data['applyAzureStructure'] == 1 else ''
+
+ formats = []
+ if cdn_provider == 'ak':
+ t += ','
+ for i in fd:
+ p = i.split(':')
+ t += p[1] + p0(int(p[0])) + ','
+ t += '.mp4.csmil/master.%s'
+ elif cdn_provider == 'ce':
+ k = t.split('/')
+ h = k.pop()
+ http_base = t = '/'.join(k)
+ http_base = http_base % stream_data['cdnPathHTTP']
+ t += '/asset.ism/manifest.%s?dcp_ver=aos4&videostream='
+ for i in fd:
+ p = i.split(':')
+ tbr = int(p[0])
+ filename = '%s%s%s.mp4' % (h, p[1], p0(tbr))
+ f = {
+ 'url': http_base + '/' + filename,
+ 'format_id': '%s-http-%d' % (cdn, tbr),
+ 'tbr': tbr,
+ }
+ width_height = p[1].split('x')
+ if len(width_height) == 2:
+ f.update({
+ 'width': int_or_none(width_height[0]),
+ 'height': int_or_none(width_height[1]),
+ })
+ formats.append(f)
+ a = filename + ':%s' % (tbr * 1000)
+ t += a + ','
+ t = t[:-1] + '&audiostream=' + a.split(':')[0]
+ else:
+ assert False
+
+ if cdn_provider == 'ce':
+ formats.extend(self._extract_mpd_formats(
+ t % (stream_data['cdnPathDASH'], 'mpd'), video_id,
+ mpd_id='%s-dash' % cdn, fatal=False))
+ formats.extend(self._extract_m3u8_formats(
+ t % (stream_data['cdnPathHLS'], 'm3u8'), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False))
+
+ return formats
+
+ def _extract_azure_formats(self, video, video_id):
+ stream_data = video['streamdata']
+ cdn = stream_data['cdnType']
+ assert cdn == 'azure'
+
+ azure_locator = stream_data['azureLocator']
+
+ def get_cdn_shield_base(shield_type='', static=False):
+ for secure in ('', 's'):
+ cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
+ if cdn_shield:
+ return 'http%s://%s' % (secure, cdn_shield)
+ else:
+ if 'fb' in stream_data['azureAccount']:
+ prefix = 'df' if static else 'f'
+ else:
+ prefix = 'd' if static else 'p'
+ account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
+ return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
+
+ language = video['general'].get('language_raw') or ''
+
+ azure_stream_base = get_cdn_shield_base()
+ is_ml = ',' in language
+ azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
+ azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
+
+ protection_token = try_get(
+ video, lambda x: x['protectiondata']['token'], compat_str)
+ if protection_token:
+ azure_manifest_url += '?hdnts=%s' % protection_token
+
+ formats = self._extract_m3u8_formats(
+ azure_manifest_url % '(format=m3u8-aapl)',
+ video_id, 'mp4', 'm3u8_native',
+ m3u8_id='%s-hls' % cdn, fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ azure_manifest_url % '(format=mpd-time-csf)',
+ video_id, mpd_id='%s-dash' % cdn, fatal=False))
+ formats.extend(self._extract_ism_formats(
+ azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
+
+ azure_progressive_base = get_cdn_shield_base('Prog', True)
+ azure_file_distribution = stream_data.get('azureFileDistribution')
+ if azure_file_distribution:
+ fds = azure_file_distribution.split(',')
+ if fds:
+ for fd in fds:
+ ss = fd.split(':')
+ if len(ss) == 2:
+ tbr = int_or_none(ss[0])
+ if tbr:
+ f = {
+ 'url': '%s%s/%s_src_%s_%d.mp4' % (
+ azure_progressive_base, azure_locator, video_id, ss[1], tbr),
+ 'format_id': '%s-http-%d' % (cdn, tbr),
+ 'tbr': tbr,
+ }
+ width_height = ss[1].split('x')
+ if len(width_height) == 2:
+ f.update({
+ 'width': int_or_none(width_height[0]),
+ 'height': int_or_none(width_height[1]),
+ })
+ formats.append(f)
+
+ return formats
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
general = video['general']
title = general['title']
- stream_data = video['streamdata']
- language = general.get('language_raw') or ''
-
- # TODO: reverse more cdns
-
- cdn = stream_data['cdnType']
- assert cdn == 'azure'
-
- azure_locator = stream_data['azureLocator']
-
- def get_cdn_shield_base(shield_type='', static=False):
- for secure in ('', 's'):
- cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
- if cdn_shield:
- return 'http%s://%s' % (secure, cdn_shield)
- else:
- if 'fb' in stream_data['azureAccount']:
- prefix = 'df' if static else 'f'
- else:
- prefix = 'd' if static else 'p'
- account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
- return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
-
- azure_stream_base = get_cdn_shield_base()
- is_ml = ',' in language
- azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
- azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
-
- protection_token = try_get(
- video, lambda x: x['protectiondata']['token'], compat_str)
- if protection_token:
- azure_manifest_url += '?hdnts=%s' % protection_token
+ cdn = video['streamdata']['cdnType']
- formats = self._extract_m3u8_formats(
- azure_manifest_url % '(format=m3u8-aapl)',
- video_id, 'mp4', 'm3u8_native',
- m3u8_id='%s-hls' % cdn, fatal=False)
- formats.extend(self._extract_mpd_formats(
- azure_manifest_url % '(format=mpd-time-csf)',
- video_id, mpd_id='%s-dash' % cdn, fatal=False))
- formats.extend(self._extract_ism_formats(
- azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
-
- azure_progressive_base = get_cdn_shield_base('Prog', True)
- azure_file_distribution = stream_data.get('azureFileDistribution')
- if azure_file_distribution:
- fds = azure_file_distribution.split(',')
- if fds:
- for fd in fds:
- ss = fd.split(':')
- if len(ss) == 2:
- tbr = int_or_none(ss[0])
- if tbr:
- f = {
- 'url': '%s%s/%s_src_%s_%d.mp4' % (
- azure_progressive_base, azure_locator, video_id, ss[1], tbr),
- 'format_id': '%s-http-%d' % (cdn, tbr),
- 'tbr': tbr,
- }
- width_height = ss[1].split('x')
- if len(width_height) == 2:
- f.update({
- 'width': int_or_none(width_height[0]),
- 'height': int_or_none(width_height[1]),
- })
- formats.append(f)
+ if cdn == 'azure':
+ formats = self._extract_azure_formats(video, video_id)
+ elif cdn == 'free':
+ formats = self._extract_free_formats(video, video_id)
+ else:
+ # TODO: reverse more cdns
+ assert False
self._sort_formats(formats)
from __future__ import unicode_literals
import re
-import json
-import os
from .common import InfoExtractor
-from ..compat import (
- compat_urlparse,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_str,
-)
+from ..compat import compat_str
from ..utils import (
- unified_strdate,
determine_ext,
int_or_none,
parse_iso8601,
)
-class NHLBaseInfoExtractor(InfoExtractor):
- @staticmethod
- def _fix_json(json_string):
- return json_string.replace('\\\'', '\'')
-
- def _real_extract_video(self, video_id):
- vid_parts = video_id.split(',')
- if len(vid_parts) == 3:
- video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0'))
- json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
- data = self._download_json(
- json_url, video_id, transform_source=self._fix_json)
- return self._extract_video(data[0])
-
- def _extract_video(self, info):
- video_id = info['id']
- self.report_extraction(video_id)
-
- initial_video_url = info['publishPoint']
- if info['formats'] == '1':
- parsed_url = compat_urllib_parse_urlparse(initial_video_url)
- filename, ext = os.path.splitext(parsed_url.path)
- path = '%s_sd%s' % (filename, ext)
- data = compat_urllib_parse_urlencode({
- 'type': 'fvod',
- 'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
- })
- path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
- path_doc = self._download_xml(
- path_url, video_id, 'Downloading final video url')
- video_url = path_doc.find('path').text
- else:
- video_url = initial_video_url
-
- join = compat_urlparse.urljoin
- ret = {
- 'id': video_id,
- 'title': info['name'],
- 'url': video_url,
- 'description': info['description'],
- 'duration': int(info['duration']),
- 'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
- 'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
- }
- if video_url.startswith('rtmp:'):
- mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url)
- ret.update({
- 'tc_url': mobj.group('tc_url'),
- 'play_path': mobj.group('play_path'),
- 'app': mobj.group('app'),
- 'no_resume': True,
- })
- return ret
-
-
-class NHLVideocenterIE(NHLBaseInfoExtractor):
- IE_NAME = 'nhl.com:videocenter'
- _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
-
- _TESTS = [{
- 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
- 'md5': 'db704a4ea09e8d3988c85e36cc892d09',
- 'info_dict': {
- 'id': '453614',
- 'ext': 'mp4',
- 'title': 'Quick clip: Weise 4-3 goal vs Flames',
- 'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
- 'duration': 18,
- 'upload_date': '20131006',
- },
- }, {
- 'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h',
- 'md5': 'd22e82bc592f52d37d24b03531ee9696',
- 'info_dict': {
- 'id': '2014020024-628-h',
- 'ext': 'mp4',
- 'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)',
- 'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014',
- 'duration': 0,
- 'upload_date': '20141011',
- },
- }, {
- 'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
- 'md5': 'c78fc64ea01777e426cfc202b746c825',
- 'info_dict': {
- 'id': '58665',
- 'ext': 'flv',
- 'title': 'Classic Game In Six - April 22, 1979',
- 'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
- 'duration': 400,
- 'upload_date': '20100129'
- },
- }, {
- 'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
- 'only_matching': True,
- }, {
- 'url': 'http://video.nhl.com/videocenter/?id=736722',
- 'only_matching': True,
- }, {
- 'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en',
- 'md5': '076fcb88c255154aacbf0a7accc3f340',
- 'info_dict': {
- 'id': '2014020299-X-h',
- 'ext': 'mp4',
- 'title': 'Penguins at Islanders / Game Highlights',
- 'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014',
- 'duration': 268,
- 'upload_date': '20141122',
- }
- }, {
- 'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4',
- 'info_dict': {
- 'id': '691469',
- 'ext': 'mp4',
- 'title': 'RAW | Craig MacTavish Full Press Conference',
- 'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.',
- 'upload_date': '20141205',
- },
- 'params': {
- 'skip_download': True, # Requires rtmpdump
- }
- }, {
- 'url': 'http://video.nhl.com/videocenter/embed?playlist=836127',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- return self._real_extract_video(video_id)
-
-
-class NHLNewsIE(NHLBaseInfoExtractor):
- IE_NAME = 'nhl.com:news'
- IE_DESC = 'NHL news'
- _VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
-
- _TESTS = [{
- 'url': 'http://www.nhl.com/ice/news.htm?id=750727',
- 'md5': '4b3d1262e177687a3009937bd9ec0be8',
- 'info_dict': {
- 'id': '736722',
- 'ext': 'mp4',
- 'title': 'Cal Clutterbuck has been fined $2,000',
- 'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
- 'duration': 37,
- 'upload_date': '20150128',
- },
- }, {
- # iframe embed
- 'url': 'http://sabres.nhl.com/club/news.htm?id=780189',
- 'md5': '9f663d1c006c90ac9fb82777d4294e12',
- 'info_dict': {
- 'id': '836127',
- 'ext': 'mp4',
- 'title': 'Morning Skate: OTT vs. BUF (9/23/15)',
- 'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.",
- 'duration': 93,
- 'upload_date': '20150923',
- },
- }]
-
+class NHLBaseIE(InfoExtractor):
def _real_extract(self, url):
- news_id = self._match_id(url)
- webpage = self._download_webpage(url, news_id)
- video_id = self._search_regex(
- [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'",
- r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'],
- webpage, 'video id')
- return self._real_extract_video(video_id)
-
+ site, tmp_id = re.match(self._VALID_URL, url).groups()
+ video_data = self._download_json(
+ 'https://%s/%s/%sid/v1/%s/details/web-v1.json'
+ % (self._CONTENT_DOMAIN, site[:3], 'item/' if site == 'mlb' else '', tmp_id), tmp_id)
+ if video_data.get('type') != 'video':
+ video_data = video_data['media']
+ video = video_data.get('video')
+ if video:
+ video_data = video
+ else:
+ videos = video_data.get('videos')
+ if videos:
+ video_data = videos[0]
-class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
- IE_NAME = 'nhl.com:videocenter:category'
- IE_DESC = 'NHL videocenter category'
- _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
- _TEST = {
- 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
- 'info_dict': {
- 'id': '999',
- 'title': 'Highlights',
- },
- 'playlist_count': 12,
- }
+ video_id = compat_str(video_data['id'])
+ title = video_data['title']
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- team = mobj.group('team')
- webpage = self._download_webpage(url, team)
- cat_id = self._search_regex(
- [r'var defaultCatId = "(.+?)";',
- r'{statusIndex:0,index:0,.*?id:(.*?),'],
- webpage, 'category id')
- playlist_title = self._html_search_regex(
- r'tab0"[^>]*?>(.*?)</td>',
- webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
+ formats = []
+ for playback in video_data.get('playbacks', []):
+ playback_url = playback.get('url')
+ if not playback_url:
+ continue
+ ext = determine_ext(playback_url)
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ playback_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=playback.get('name', 'hls'), fatal=False)
+ self._check_formats(m3u8_formats, video_id)
+ formats.extend(m3u8_formats)
+ else:
+ height = int_or_none(playback.get('height'))
+ formats.append({
+ 'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
+ 'url': playback_url,
+ 'width': int_or_none(playback.get('width')),
+ 'height': height,
+ 'tbr': int_or_none(self._search_regex(r'_(\d+)[kK]', playback_url, 'bitrate', default=None)),
+ })
+ self._sort_formats(formats)
- data = compat_urllib_parse_urlencode({
- 'cid': cat_id,
- # This is the default value
- 'count': 12,
- 'ptrs': 3,
- 'format': 'json',
- })
- path = '/videocenter/servlets/browse?' + data
- request_url = compat_urlparse.urljoin(url, path)
- response = self._download_webpage(request_url, playlist_title)
- response = self._fix_json(response)
- if not response.strip():
- self._downloader.report_warning('Got an empty response, trying '
- 'adding the "newvideos" parameter')
- response = self._download_webpage(request_url + '&newvideos=true',
- playlist_title)
- response = self._fix_json(response)
- videos = json.loads(response)
+ thumbnails = []
+ cuts = video_data.get('image', {}).get('cuts') or []
+ if isinstance(cuts, dict):
+ cuts = cuts.values()
+ for thumbnail_data in cuts:
+ thumbnail_url = thumbnail_data.get('src')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail_data.get('width')),
+ 'height': int_or_none(thumbnail_data.get('height')),
+ })
return {
- '_type': 'playlist',
- 'title': playlist_title,
- 'id': cat_id,
- 'entries': [self._extract_video(v) for v in videos],
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'timestamp': parse_iso8601(video_data.get('date')),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
}
-class NHLIE(InfoExtractor):
+class NHLIE(NHLBaseIE):
IE_NAME = 'nhl.com'
_VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
- _SITES_MAP = {
- 'nhl': 'nhl',
- 'wch2016': 'wch',
- }
+ _CONTENT_DOMAIN = 'nhl.bamcontent.com'
_TESTS = [{
# type=video
'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
'only_matching': True,
}]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- tmp_id, site = mobj.group('id'), mobj.group('site')
- video_data = self._download_json(
- 'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json'
- % (self._SITES_MAP[site], tmp_id), tmp_id)
- if video_data.get('type') == 'article':
- video_data = video_data['media']
-
- video_id = compat_str(video_data['id'])
- title = video_data['title']
-
- formats = []
- for playback in video_data.get('playbacks', []):
- playback_url = playback.get('url')
- if not playback_url:
- continue
- ext = determine_ext(playback_url)
- if ext == 'm3u8':
- m3u8_formats = self._extract_m3u8_formats(
- playback_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id=playback.get('name', 'hls'), fatal=False)
- self._check_formats(m3u8_formats, video_id)
- formats.extend(m3u8_formats)
- else:
- height = int_or_none(playback.get('height'))
- formats.append({
- 'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
- 'url': playback_url,
- 'width': int_or_none(playback.get('width')),
- 'height': height,
- })
- self._sort_formats(formats, ('preference', 'width', 'height', 'tbr', 'format_id'))
-
- thumbnails = []
- for thumbnail_id, thumbnail_data in video_data.get('image', {}).get('cuts', {}).items():
- thumbnail_url = thumbnail_data.get('src')
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'id': thumbnail_id,
- 'url': thumbnail_url,
- 'width': int_or_none(thumbnail_data.get('width')),
- 'height': int_or_none(thumbnail_data.get('height')),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': video_data.get('description'),
- 'timestamp': parse_iso8601(video_data.get('date')),
- 'duration': parse_duration(video_data.get('duration')),
- 'thumbnails': thumbnails,
- 'formats': formats,
- }
https?://
(?:
(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
- (?:www\.)?nickjr\.nl
+ (?:www\.)?nickjr\.[a-z]{2}
)
/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
'''
}, {
'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
'only_matching': True,
+ }, {
+ 'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
self._login()
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
# No authentication to be performed
if not username:
return True
import re
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
parse_iso8601,
float_or_none,
)
-class NineCNineMediaBaseIE(InfoExtractor):
- _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
-
-
-class NineCNineMediaStackIE(NineCNineMediaBaseIE):
- IE_NAME = '9c9media:stack'
- _GEO_COUNTRIES = ['CA']
- _VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
-
- def _real_extract(self, url):
- destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups()
- stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.'
- stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id)
-
- formats = []
- formats.extend(self._extract_m3u8_formats(
- stack_base_url + 'm3u8', stack_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- formats.extend(self._extract_f4m_formats(
- stack_base_url + 'f4m', stack_id,
- f4m_id='hds', fatal=False))
- self._sort_formats(formats)
-
- return {
- 'id': stack_id,
- 'formats': formats,
- }
-
-
-class NineCNineMediaIE(NineCNineMediaBaseIE):
+class NineCNineMediaIE(InfoExtractor):
IE_NAME = '9c9media'
+ _GEO_COUNTRIES = ['CA']
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
+ _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
def _real_extract(self, url):
destination_code, content_id = re.match(self._VALID_URL, url).groups()
content_package = content['ContentPackages'][0]
package_id = content_package['Id']
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
- content_package = self._download_json(content_package_url, content_id)
+ content_package = self._download_json(
+ content_package_url, content_id, query={
+ '$include': '[HasClosedCaptions]',
+ })
- if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm':
+ if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
raise ExtractorError('This video is DRM protected.', expected=True)
- stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items']
- multistacks = len(stacks) > 1
+ manifest_base_url = content_package_url + 'manifest.'
+ formats = []
+ formats.extend(self._extract_m3u8_formats(
+ manifest_base_url + 'm3u8', content_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ formats.extend(self._extract_f4m_formats(
+ manifest_base_url + 'f4m', content_id,
+ f4m_id='hds', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ manifest_base_url + 'mpd', content_id,
+ mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
thumbnails = []
for image in content.get('Images', []):
continue
container.append(e_name)
- description = content.get('Desc') or content.get('ShortDesc')
season = content.get('Season', {})
- base_info = {
- 'description': description,
+
+ info = {
+ 'id': content_id,
+ 'title': title,
+ 'description': content.get('Desc') or content.get('ShortDesc'),
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
'episode_number': int_or_none(content.get('Episode')),
'season': season.get('Name'),
'series': content.get('Media', {}).get('Name'),
'tags': tags,
'categories': categories,
+ 'duration': float_or_none(content_package.get('Duration')),
+ 'formats': formats,
}
- entries = []
- for stack in stacks:
- stack_id = compat_str(stack['Id'])
- entry = {
- '_type': 'url_transparent',
- 'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id),
- 'id': stack_id,
- 'title': '%s_part%s' % (title, stack['Name']) if multistacks else title,
- 'duration': float_or_none(stack.get('Duration')),
- 'ie_key': 'NineCNineMediaStack',
+ if content_package.get('HasClosedCaptions'):
+ info['subtitles'] = {
+ 'en': [{
+ 'url': manifest_base_url + 'vtt',
+ 'ext': 'vtt',
+ }, {
+ 'url': manifest_base_url + 'srt',
+ 'ext': 'srt',
+ }]
}
- entry.update(base_info)
- entries.append(entry)
- return {
- '_type': 'multi_video',
- 'id': content_id,
- 'title': title,
- 'description': description,
- 'entries': entries,
- }
+ return info
self._login()
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
https?://
(?:www\.)?
(?:
- npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
- ntr\.nl/(?:[^/]+/){2,}|
+ npo\.nl/(?:[^/]+/)*|
+ (?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
omroepwnl\.nl/video/fragment/[^/]+__|
(?:zapp|npo3)\.nl/(?:[^/]+/){2,}
)
}, {
'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
'only_matching': True,
+ }, {
+ 'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://npo.nl/KN_1698996',
+ 'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return (False if any(ie.suitable(url)
+ for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE))
+ else super(NPOIE, cls).suitable(url))
+
def _real_extract(self, url):
video_id = self._match_id(url)
return self._get_info(video_id)
class NPORadioIE(InfoExtractor):
IE_NAME = 'npo.nl:radio'
- _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.npo.nl/radio/radio-1',
}
}
+ @classmethod
+ def suitable(cls, url):
+ return False if NPORadioFragmentIE.suitable(url) else super(NPORadioIE, cls).suitable(url)
+
@staticmethod
def _html_get_attribute_regex(attribute):
return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO']
+ _api_host = None
+
def _real_extract(self, url):
video_id = self._match_id(url)
- data = self._download_json(
- 'http://%s/mediaelement/%s' % (self._API_HOST, video_id),
- video_id, 'Downloading mediaelement JSON')
+ api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
+
+ for api_host in api_hosts:
+ data = self._download_json(
+ 'http://%s/mediaelement/%s' % (api_host, video_id),
+ video_id, 'Downloading mediaelement JSON',
+ fatal=api_host == api_hosts[-1])
+ if not data:
+ continue
+ self._api_host = api_host
+ break
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id
)
(?P<id>[^?#&]+)
'''
- _API_HOST = 'v8-psapi.nrk.no'
+ _API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
_TESTS = [{
# video
'url': 'http://www.nrk.no/video/PS*150533',
(?:/\d{2}-\d{2}-\d{4})?
(?:\#del=(?P<part_id>\d+))?
''' % _EPISODE_RE
- _API_HOST = 'psapi-ne.nrk.no'
-
+ _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': '4e9ca6629f09e588ed240fb11619922a',
class OpenloadIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o',
}, {
'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
'only_matching': True,
+ }, {
+ 'url': 'https://oload.win/f/kUEfGclsU9o',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.download/f/kUEfGclsU9o',
+ 'only_matching': True,
+ }, {
+ # Its title has not got its extension but url has it
+ 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
+ 'only_matching': True,
}]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
'title': title,
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
'url': video_url,
- # Seems all videos have extensions in their titles
- 'ext': determine_ext(title, 'mp4'),
+ 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
'subtitles': subtitles,
'http_headers': headers,
}
_TOKEN = None
def _real_initialize(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
try:
# needed. Keeping this commented for when this inevitably changes.
'''
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
'skip_download': True,
},
},
+ {
+ 'url': 'http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/',
+ 'info_dict': {
+ 'id': '2365936247',
+ 'ext': 'mp4',
+ 'title': 'Antiques Roadshow - Indianapolis, Hour 2',
+ 'description': 'md5:524b32249db55663e7231b6b8d1671a2',
+ 'duration': 3180,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ },
+ {
+ 'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/',
+ 'info_dict': {
+ 'id': '3007193718',
+ 'ext': 'mp4',
+ 'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster",
+ 'description': 'md5:37efbac85e0c09b009586523ec143652',
+ 'duration': 6292,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ },
+ {
+ 'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/',
+ 'info_dict': {
+ 'id': '3011407934',
+ 'ext': 'mp4',
+ 'title': 'Stories from the Stage - Road Trip',
+ 'duration': 1619,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 403: Forbidden'],
+ },
{
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
'only_matching': True,
r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
+ r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
+ r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
]
media_id = self._search_regex(
if not url:
url = self._og_search_url(webpage)
- mobj = re.match(self._VALID_URL, url)
+ mobj = re.match(
+ self._VALID_URL, self._proto_relative_url(url.strip()))
player_id = mobj.group('player_id')
if not display_id:
url, display_id, note='Downloading player page',
errnote='Could not download player page')
video_id = self._search_regex(
- r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
+ r'<div\s+id=["\']video_(\d+)', player_page, 'video ID',
+ default=None)
+ if not video_id:
+ video_info = self._extract_video_data(
+ player_page, 'video data', display_id)
+ video_id = compat_str(
+ video_info.get('id') or video_info['contentID'])
else:
video_id = mobj.group('id')
display_id = video_id
return video_id, display_id, None, description
+ def _extract_video_data(self, string, name, video_id, fatal=True):
+ return self._parse_json(
+ self._search_regex(
+ [r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
+ r'window\.videoBridge\s*=\s*({.+?});'],
+ string, name, default='{}'),
+ video_id, transform_source=js_to_json, fatal=fatal)
+
def _real_extract(self, url):
video_id, display_id, upload_date, description = self._extract_webpage(url)
'http://player.pbs.org/%s/%s' % (page, video_id),
display_id, 'Downloading %s page' % page, fatal=False)
if player:
- video_info = self._parse_json(
- self._search_regex(
- r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
- player, '%s video data' % page, default='{}'),
- display_id, transform_source=js_to_json, fatal=False)
+ video_info = self._extract_video_data(
+ player, '%s video data' % page, display_id, fatal=False)
if video_info:
extract_redirect_urls(video_info)
if not info:
info = video_info
if not chapters:
- for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
- chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
- if not chapter:
- continue
+ raw_chapters = video_info.get('chapters') or []
+ if not raw_chapters:
+ for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
+ chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
+ if not chapter:
+ continue
+ raw_chapters.append(chapter)
+ for chapter in raw_chapters:
start_time = float_or_none(chapter.get('start_time'), 1000)
duration = float_or_none(chapter.get('duration'), 1000)
if start_time is None or duration is None:
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ parse_resolution,
+ try_get,
+ unified_timestamp,
+ urljoin,
+)
+
+
+class PeerTubeIE(InfoExtractor):
+ _INSTANCES_RE = r'''(?:
+ # Taken from https://instances.joinpeertube.org/instances
+ tube\.openalgeria\.org|
+ peertube\.pointsecu\.fr|
+ peertube\.nogafa\.org|
+ peertube\.pl|
+ megatube\.lilomoino\.fr|
+ peertube\.tamanoir\.foucry\.net|
+ peertube\.inapurna\.org|
+ peertube\.netzspielplatz\.de|
+ video\.deadsuperhero\.com|
+ peertube\.devosi\.org|
+ peertube\.1312\.media|
+ tube\.worldofhauru\.xyz|
+ tube\.bootlicker\.party|
+ skeptikon\.fr|
+ peertube\.geekshell\.fr|
+ tube\.opportunis\.me|
+ peertube\.peshane\.net|
+ video\.blueline\.mg|
+ tube\.homecomputing\.fr|
+ videos\.cloudfrancois\.fr|
+ peertube\.viviers-fibre\.net|
+ tube\.ouahpiti\.info|
+ video\.tedomum\.net|
+ video\.g3l\.org|
+ fontube\.fr|
+ peertube\.gaialabs\.ch|
+ peertube\.extremely\.online|
+ peertube\.public-infrastructure\.eu|
+ tube\.kher\.nl|
+ peertube\.qtg\.fr|
+ tube\.22decembre\.eu|
+ facegirl\.me|
+ video\.migennes\.net|
+ janny\.moe|
+ tube\.p2p\.legal|
+ video\.atlanti\.se|
+ troll\.tv|
+ peertube\.geekael\.fr|
+ vid\.leotindall\.com|
+ video\.anormallostpod\.ovh|
+ p-tube\.h3z\.jp|
+ tube\.darfweb\.eu|
+ videos\.iut-orsay\.fr|
+ peertube\.solidev\.net|
+ videos\.symphonie-of-code\.fr|
+ testtube\.ortg\.de|
+ videos\.cemea\.org|
+ peertube\.gwendalavir\.eu|
+ video\.passageenseine\.fr|
+ videos\.festivalparminous\.org|
+ peertube\.touhoppai\.moe|
+ peertube\.duckdns\.org|
+ sikke\.fi|
+ peertube\.mastodon\.host|
+ firedragonvideos\.com|
+ vidz\.dou\.bet|
+ peertube\.koehn\.com|
+ peer\.hostux\.social|
+ share\.tube|
+ peertube\.walkingmountains\.fr|
+ medias\.libox\.fr|
+ peertube\.moe|
+ peertube\.xyz|
+ jp\.peertube\.network|
+ videos\.benpro\.fr|
+ tube\.otter\.sh|
+ peertube\.angristan\.xyz|
+ peertube\.parleur\.net|
+ peer\.ecutsa\.fr|
+ peertube\.heraut\.eu|
+ peertube\.tifox\.fr|
+ peertube\.maly\.io|
+ vod\.mochi\.academy|
+ exode\.me|
+ coste\.video|
+ tube\.aquilenet\.fr|
+ peertube\.gegeweb\.eu|
+ framatube\.org|
+ thinkerview\.video|
+ tube\.conferences-gesticulees\.net|
+ peertube\.datagueule\.tv|
+ video\.lqdn\.fr|
+ meilleurtube\.delire\.party|
+ tube\.mochi\.academy|
+ peertube\.dav\.li|
+ media\.zat\.im|
+ pytu\.be|
+ peertube\.valvin\.fr|
+ peertube\.nsa\.ovh|
+ video\.colibris-outilslibres\.org|
+ video\.hispagatos\.org|
+ tube\.svnet\.fr|
+ peertube\.video|
+ videos\.lecygnenoir\.info|
+ peertube3\.cpy\.re|
+ peertube2\.cpy\.re|
+ videos\.tcit\.fr|
+ peertube\.cpy\.re
+ )'''
+ _VALID_URL = r'''(?x)
+ https?://
+ %s
+ /(?:videos/(?:watch|embed)|api/v\d/videos)/
+ (?P<id>[^/?\#&]+)
+ ''' % _INSTANCES_RE
+ _TESTS = [{
+ 'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
+ 'md5': '80f24ff364cc9d333529506a263e7feb',
+ 'info_dict': {
+ 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
+ 'ext': 'mp4',
+ 'title': 'wow',
+ 'description': 'wow such video, so gif',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ 'timestamp': 1519297480,
+ 'upload_date': '20180222',
+ 'uploader': 'Luclu7',
+ 'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
+ 'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
+ 'license': 'Unknown',
+ 'duration': 3,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'tags': list,
+ 'categories': list,
+ }
+ }, {
+ 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
+ 'only_matching': True,
+ }, {
+ # nsfw
+ 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'''(?x)<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s/videos/embed/[^/?\#&]+)\1'''
+ % PeerTubeIE._INSTANCES_RE, webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ urljoin(url, '/api/v1/videos/%s' % video_id), video_id)
+
+ title = video['name']
+
+ formats = []
+ for file_ in video['files']:
+ if not isinstance(file_, dict):
+ continue
+ file_url = file_.get('fileUrl')
+ if not file_url or not isinstance(file_url, compat_str):
+ continue
+ file_size = int_or_none(file_.get('size'))
+ format_id = try_get(
+ file_, lambda x: x['resolution']['label'], compat_str)
+ f = parse_resolution(format_id)
+ f.update({
+ 'url': file_url,
+ 'format_id': format_id,
+ 'filesize': file_size,
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ def account_data(field):
+ return try_get(video, lambda x: x['account'][field], compat_str)
+
+ category = try_get(video, lambda x: x['category']['label'], compat_str)
+ categories = [category] if category else None
+
+ nsfw = video.get('nsfw')
+ if nsfw is bool:
+ age_limit = 18 if nsfw else 0
+ else:
+ age_limit = None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': urljoin(url, video.get('thumbnailPath')),
+ 'timestamp': unified_timestamp(video.get('publishedAt')),
+ 'uploader': account_data('displayName'),
+ 'uploader_id': account_data('uuid'),
+ 'uploder_url': account_data('url'),
+ 'license': try_get(
+ video, lambda x: x['licence']['label'], compat_str),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('views')),
+ 'like_count': int_or_none(video.get('likes')),
+ 'dislike_count': int_or_none(video.get('dislikes')),
+ 'age_limit': age_limit,
+ 'tags': try_get(video, lambda x: x['tags'], list),
+ 'categories': categories,
+ 'formats': formats,
+ }
self._login()
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
raise ExtractorError('Unable to log in')
- def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
+ def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id):
captions_post = {
'a': author,
- 'cn': clip_id,
+ 'cn': clip_idx,
'lc': lang,
'm': name,
}
author = qs.get('author', [None])[0]
name = qs.get('name', [None])[0]
- clip_id = qs.get('clip', [None])[0]
+ clip_idx = qs.get('clip', [None])[0]
course_name = qs.get('course', [None])[0]
- if any(not f for f in (author, name, clip_id, course_name,)):
+ if any(not f for f in (author, name, clip_idx, course_name,)):
raise ExtractorError('Invalid URL', expected=True)
- display_id = '%s-%s' % (name, clip_id)
+ display_id = '%s-%s' % (name, clip_idx)
course = self._download_course(course_name, url, display_id)
clip_index = clip_.get('index')
if clip_index is None:
continue
- if compat_str(clip_index) == clip_id:
+ if compat_str(clip_index) == clip_idx:
clip = clip_
break
raise ExtractorError('Unable to resolve clip')
title = clip['title']
+ clip_id = clip.get('clipName') or clip.get('name') or clip['clipId']
QUALITIES = {
'low': {'width': 640, 'height': 480},
clip_post = {
'author': author,
'includeCaptions': False,
- 'clipIndex': int(clip_id),
+ 'clipIndex': int(clip_idx),
'courseName': course_name,
'locale': 'en',
'moduleName': name,
# TODO: other languages?
subtitles = self.extract_subtitles(
- author, clip_id, 'en', name, duration, display_id)
+ author, clip_idx, 'en', name, duration, display_id)
return {
- 'id': clip.get('clipName') or clip['name'],
+ 'id': clip_id,
'title': title,
'duration': duration,
'creator': author,
'format_id': compat_str(abr),
'abr': abr,
'vcodec': 'none',
- } for abr in (96, 128, 256)]
+ } for abr in (96, 128, 192, 256)]
+ self._check_formats(formats, episode_id)
description = clean_html(episode.get('longTeaser'))
thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
'info_dict': {
'id': '604333',
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Fowler Jr. prend la direction de Jacksonville',
'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
'timestamp': 1430397346,
class RedditRIE(InfoExtractor):
- _VALID_URL = r'(?P<url>https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
'info_dict': {
# imgur
'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
'only_matching': True,
+ }, {
+ # imgur @ old reddit
+ 'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
+ 'only_matching': True,
}, {
# streamable
'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
# youtube
'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
'only_matching': True,
+ }, {
+ # reddit video @ nm reddit
+ 'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
}]
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
- int_or_none,
ExtractorError,
+ float_or_none,
+ int_or_none,
+ strip_or_none,
)
(?:
video/[^?]+\?.*\bid=|
ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
- auvio/[^/]+\?.*id=
+ auvio/[^/]+\?.*\b(?P<live>l)?id=
)(?P<id>\d+)'''
_TESTS = [{
'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
- 'md5': '799f334ddf2c0a582ba80c44655be570',
+ 'md5': '8c876a1cceeb6cf31b476461ade72384',
'info_dict': {
'id': '1921274',
'ext': 'mp4',
'title': 'Les Diables au coeur (épisode 2)',
- 'description': 'Football - Diables Rouges',
- 'duration': 3099,
+ 'description': '(du 25/04/2014)',
+ 'duration': 3099.54,
'upload_date': '20140425',
- 'timestamp': 1398456336,
- 'uploader': 'rtbfsport',
+ 'timestamp': 1398456300,
}
}, {
# geo restricted
}, {
'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
'only_matching': True,
+ }, {
+ # Live
+ 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
+ 'only_matching': True,
+ }, {
+ # Audio
+ 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
+ 'only_matching': True,
+ }, {
+ # With Subtitle
+ 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
+ 'only_matching': True,
}]
_IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
_PROVIDERS = {
]
def _real_extract(self, url):
- video_id = self._match_id(url)
- data = self._download_json(
- 'http://www.rtbf.be/api/media/video?method=getVideoDetail&args[]=%s' % video_id, video_id)
+ live, media_id = re.match(self._VALID_URL, url).groups()
+ embed_page = self._download_webpage(
+ 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
+ media_id, query={'id': media_id})
+ data = self._parse_json(self._html_search_regex(
+ r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
error = data.get('error')
if error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
- data = data['data']
-
provider = data.get('provider')
if provider in self._PROVIDERS:
return self.url_result(data['url'], self._PROVIDERS[provider])
+ title = data['title']
+ is_live = data.get('isLive')
+ if is_live:
+ title = self._live_title(title)
+ height_re = r'-(\d+)p\.'
formats = []
- for key, format_id in self._QUALITIES:
- format_url = data.get(key + 'Url')
- if format_url:
+
+ m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
+
+ fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
+ http_url = data.get('url')
+ if formats and http_url and re.search(height_re, http_url):
+ http_url = fix_url(http_url)
+ for m3u8_f in formats.copy():
+ height = m3u8_f.get('height')
+ if not height:
+ continue
+ f = m3u8_f.copy()
+ del f['protocol']
+ f.update({
+ 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
+ 'url': re.sub(height_re, '-%dp.' % height, http_url),
+ })
+ formats.append(f)
+ else:
+ sources = data.get('sources') or {}
+ for key, format_id in self._QUALITIES:
+ format_url = sources.get(key)
+ if not format_url:
+ continue
+ height = int_or_none(self._search_regex(
+ height_re, format_url, 'height', default=None))
formats.append({
'format_id': format_id,
- 'url': format_url,
+ 'url': fix_url(format_url),
+ 'height': height,
})
- thumbnails = []
- for thumbnail_id, thumbnail_url in data.get('thumbnail', {}).items():
- if thumbnail_id != 'default':
- thumbnails.append({
- 'url': self._IMAGE_HOST + thumbnail_url,
- 'id': thumbnail_id,
- })
+ mpd_url = data.get('urlDash')
+ if not data.get('drm') and mpd_url:
+ formats.extend(self._extract_mpd_formats(
+ mpd_url, media_id, mpd_id='dash', fatal=False))
+
+ audio_url = data.get('urlAudio')
+ if audio_url:
+ formats.append({
+ 'format_id': 'audio',
+ 'url': audio_url,
+ 'vcodec': 'none',
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for track in (data.get('tracks') or {}).values():
+ sub_url = track.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(track.get('lang') or 'fr', []).append({
+ 'url': sub_url,
+ })
return {
- 'id': video_id,
+ 'id': media_id,
'formats': formats,
- 'title': data['title'],
- 'description': data.get('description') or data.get('subtitle'),
- 'thumbnails': thumbnails,
- 'duration': data.get('duration') or data.get('realDuration'),
- 'timestamp': int_or_none(data.get('created')),
- 'view_count': int_or_none(data.get('viewCount')),
- 'uploader': data.get('channel'),
- 'tags': data.get('tags'),
+ 'title': title,
+ 'description': strip_or_none(data.get('description')),
+ 'thumbnail': data.get('thumbnail'),
+ 'duration': float_or_none(data.get('realDuration')),
+ 'timestamp': int_or_none(data.get('liveFrom')),
+ 'series': data.get('programLabel'),
+ 'subtitles': subtitles,
+ 'is_live': is_live,
}
self._login()
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
class SafariIE(SafariBaseIE):
IE_NAME = 'safari'
IE_DESC = 'safaribooksonline.com online video'
- _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?safaribooksonline\.com/
+ (?:
+ library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
+ videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
+ )
+ '''
_TESTS = [{
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
}, {
'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
+ 'only_matching': True,
}]
+ _PARTNER_ID = '1926081'
+ _UICONF_ID = '29375172'
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part'))
-
- webpage = self._download_webpage(url, video_id)
- reference_id = self._search_regex(
- r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
- webpage, 'kaltura reference id', group='id')
- partner_id = self._search_regex(
- r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
- webpage, 'kaltura widget id', group='id')
- ui_id = self._search_regex(
- r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
- webpage, 'kaltura uiconf id', group='id')
+
+ reference_id = mobj.group('reference_id')
+ if reference_id:
+ video_id = reference_id
+ partner_id = self._PARTNER_ID
+ ui_id = self._UICONF_ID
+ else:
+ video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part'))
+
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ mobj = re.match(self._VALID_URL, urlh.geturl())
+ reference_id = mobj.group('reference_id')
+ if not reference_id:
+ reference_id = self._search_regex(
+ r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura reference id', group='id')
+ partner_id = self._search_regex(
+ r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura widget id', default=self._PARTNER_ID,
+ group='id')
+ ui_id = self._search_regex(
+ r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura uiconf id', default=self._UICONF_ID,
+ group='id')
query = {
'wid': '_%s' % partner_id,
_VALID_URL = r'''(?x)
https?://
(?:
- (?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)|
+ (?:www\.)?safaribooksonline\.com/
+ (?:
+ library/view/[^/]+|
+ api/v1/book|
+ videos/[^/]+
+ )|
techbus\.safaribooksonline\.com
)
- /(?P<id>[^/]+)/?(?:[#?]|$)
+ /(?P<id>[^/]+)
'''
_TESTS = [{
}, {
'url': 'http://techbus.safaribooksonline.com/9780134426365',
'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
+ 'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url)
+ else super(SafariCourseIE, cls).suitable(url))
+
def _real_extract(self, url):
course_id = self._match_id(url)
# The video id is in the redirected url
self.to_screen('Getting video id')
request = HEADRequest(url)
- (_, urlh) = self._download_webpage_handle(request, 'NA', False)
+ _, urlh = self._download_webpage_handle(request, 'NA', False)
return self._real_extract(urlh.geturl())
else:
pseudo_id = mobj.group('pseudo_id')
class SixPlayIE(InfoExtractor):
IE_NAME = '6play'
- _VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.6play.fr/le-meilleur-patissier-p_1807/le-meilleur-patissier-special-fetes-mercredi-a-21-00-sur-m6-c_11638450',
- 'md5': '42310bffe4ba3982db112b9cd3467328',
+ _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay.be)/.+?-c_)(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
+ 'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
'info_dict': {
- 'id': '11638450',
+ 'id': '12041051',
'ext': 'mp4',
- 'title': 'Le Meilleur Pâtissier, spécial fêtes mercredi à 21:00 sur M6',
- 'description': 'md5:308853f6a5f9e2d55a30fc0654de415f',
- 'duration': 39,
- 'series': 'Le meilleur pâtissier',
+ 'title': 'Le but qui a marqué l\'histoire du football français !',
+ 'description': 'md5:b59e7e841d646ef1eb42a7868eb6a851',
},
- 'params': {
- 'skip_download': True,
- },
- }
+ }, {
+ 'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ domain, video_id = re.search(self._VALID_URL, url).groups()
+ service, consumer_name = {
+ '6play.fr': ('6play', 'm6web'),
+ 'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
+ }.get(domain, ('6play', 'm6web'))
data = self._download_json(
- 'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/6play/videos/clip_%s' % video_id,
- video_id, query={
+ 'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/%s/videos/clip_%s' % (service, video_id),
+ video_id, headers={
+ 'x-customer-name': consumer_name
+ }, query={
'csa': 5,
'with': 'clips',
})
subtitles.setdefault('fr', []).append({'url': asset_url})
continue
if container == 'm3u8' or ext == 'm3u8':
- if protocol == 'usp' and not compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
+ if protocol == 'usp':
+ if compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
+ urlh = self._request_webpage(asset_url, video_id, fatal=False)
+ if not urlh:
+ continue
+ asset_url = urlh.geturl()
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
formats.extend(self._extract_m3u8_formats(
asset_url, video_id, 'mp4', 'm3u8_native',
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
if isinstance(thumbnail, compat_str):
thumbnail = thumbnail.replace('-large', '-t500x500')
- ext = 'mp3'
result = {
'id': track_id,
'uploader': info.get('user', {}).get('username'),
track_id, 'Downloading track url', query=query)
for key, stream_url in format_dict.items():
- abr = int_or_none(self._search_regex(
- r'_(\d+)_url', key, 'audio bitrate', default=None))
+ ext, abr = 'mp3', None
+ mobj = re.search(r'_([^_]+)_(\d+)_url', key)
+ if mobj:
+ ext, abr = mobj.groups()
+ abr = int(abr)
if key.startswith('http'):
stream_formats = [{
'format_id': key,
}]
elif key.startswith('hls'):
stream_formats = self._extract_m3u8_formats(
- stream_url, track_id, 'mp3', entry_protocol='m3u8_native',
+ stream_url, track_id, ext, entry_protocol='m3u8_native',
m3u8_id=key, fatal=False)
else:
continue
- for f in stream_formats:
- f['abr'] = abr
+ if abr:
+ for f in stream_formats:
+ f['abr'] = abr
formats.extend(stream_formats)
formats.append({
'format_id': 'fallback',
'url': update_url_query(info['stream_url'], query),
- 'ext': ext,
+ 'ext': 'mp3',
})
for f in formats:
from .spiegeltv import SpiegeltvIE
from ..compat import compat_urlparse
from ..utils import (
- extract_attributes,
- unified_strdate,
- get_element_by_attribute,
+ parse_duration,
+ strip_or_none,
+ unified_timestamp,
)
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'
_TESTS = [{
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
- 'md5': '2c2754212136f35fb4b19767d242f66e',
+ 'md5': 'b57399839d055fccfeb9a0455c439868',
'info_dict': {
- 'id': '1259285',
+ 'id': '563747',
'ext': 'mp4',
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
'duration': 49,
'upload_date': '20130311',
+ 'timestamp': 1362994320,
},
}, {
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
- 'md5': 'f2cdf638d7aa47654e251e1aee360af1',
+ 'md5': '5b6c2f4add9d62912ed5fc78a1faed80',
'info_dict': {
- 'id': '1309159',
+ 'id': '580988',
'ext': 'mp4',
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
'duration': 983,
'upload_date': '20131115',
+ 'timestamp': 1384546642,
},
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
- 'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51',
+ 'md5': '97b91083a672d72976faa8433430afb9',
'info_dict': {
- 'id': '1519126',
+ 'id': '601883',
'ext': 'mp4',
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
'upload_date': '20140904',
+ 'timestamp': 1409834160,
}
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage, handle = self._download_webpage_handle(url, video_id)
+ metadata_url = 'http://www.spiegel.de/video/metadata/video-%s.json' % video_id
+ handle = self._request_webpage(metadata_url, video_id)
# 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
if SpiegeltvIE.suitable(handle.geturl()):
return self.url_result(handle.geturl(), 'Spiegeltv')
- nexx_id = self._search_regex(
- r'nexxOmniaId\s*:\s*(\d+)', webpage, 'nexx id', default=None)
- if nexx_id:
- domain_id = NexxIE._extract_domain_id(webpage) or '748'
- return self.url_result(
- 'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
- video_id=nexx_id)
-
- video_data = extract_attributes(self._search_regex(r'(<div[^>]+id="spVideoElements"[^>]+>)', webpage, 'video element', default=''))
-
- title = video_data.get('data-video-title') or get_element_by_attribute('class', 'module-title', webpage)
- description = video_data.get('data-video-teaser') or self._html_search_meta('description', webpage, 'description')
-
- base_url = self._search_regex(
- [r'server\s*:\s*(["\'])(?P<url>.+?)\1', r'var\s+server\s*=\s*"(?P<url>[^"]+)\"'],
- webpage, 'server URL', group='url')
-
- xml_url = base_url + video_id + '.xml'
- idoc = self._download_xml(xml_url, video_id)
-
- formats = []
- for n in list(idoc):
- if n.tag.startswith('type') and n.tag != 'type6':
- format_id = n.tag.rpartition('type')[2]
- video_url = base_url + n.find('./filename').text
- formats.append({
- 'format_id': format_id,
- 'url': video_url,
- 'width': int(n.find('./width').text),
- 'height': int(n.find('./height').text),
- 'abr': int(n.find('./audiobitrate').text),
- 'vbr': int(n.find('./videobitrate').text),
- 'vcodec': n.find('./codec').text,
- 'acodec': 'MP4A',
- })
- duration = float(idoc[0].findall('./duration')[0].text)
-
- self._check_formats(formats, video_id)
- self._sort_formats(formats)
+ video_data = self._parse_json(self._webpage_read_content(
+ handle, metadata_url, video_id), video_id)
+ title = video_data['title']
+ nexx_id = video_data['nexxOmniaId']
+ domain_id = video_data.get('nexxOmniaDomain') or '748'
return {
+ '_type': 'url_transparent',
'id': video_id,
+ 'url': 'nexx:%s:%s' % (domain_id, nexx_id),
'title': title,
- 'description': description.strip() if description else None,
- 'duration': duration,
- 'upload_date': unified_strdate(video_data.get('data-video-date')),
- 'formats': formats,
+ 'description': strip_or_none(video_data.get('teaser')),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'timestamp': unified_timestamp(video_data.get('datum')),
+ 'ie_key': NexxIE.ie_key(),
}
from __future__ import unicode_literals
-import re
-
from .mtv import MTVServicesInfoExtractor
-class SpikeIE(MTVServicesInfoExtractor):
- _VALID_URL = r'https?://(?:[^/]+\.)?spike\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
+class BellatorIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bellator\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
_TESTS = [{
- 'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
- 'md5': '1a9265f32b0c375793d6c4ce45255256',
+ 'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
'info_dict': {
- 'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
+ 'id': 'b55e434e-fde1-4a98-b7cc-92003a034de4',
'ext': 'mp4',
- 'title': 'Auction Hunters|December 27, 2013|4|414|Can Allen Ride A Hundred Year-Old Motorcycle?',
- 'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
- 'timestamp': 1388120400,
- 'upload_date': '20131227',
+ 'title': 'Douglas Lima vs. Paul Daley - Round 1',
+ 'description': 'md5:805a8dd29310fd611d32baba2f767885',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
},
}, {
- 'url': 'http://www.spike.com/full-episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-209',
- 'md5': 'b25c6f16418aefb9ad5a6cae2559321f',
+ 'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
+ 'only_matching': True,
+ }]
+
+ _FEED_URL = 'http://www.spike.com/feeds/mrss/'
+ _GEO_COUNTRIES = ['US']
+
+
+class ParamountNetworkIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.paramountnetwork.com/episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-13',
'info_dict': {
'id': '37ace3a8-1df6-48be-85b8-38df8229e241',
'ext': 'mp4',
'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1',
'description': 'md5:a739ca8f978a7802f67f8016d27ce114',
},
- }, {
- 'url': 'http://www.spike.com/video-clips/lhtu8m/',
- 'only_matching': True,
- }, {
- 'url': 'http://www.spike.com/video-clips/lhtu8m',
- 'only_matching': True,
- }, {
- 'url': 'http://bellator.spike.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
- 'only_matching': True,
- }, {
- 'url': 'http://bellator.spike.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
- 'only_matching': True,
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}]
- _FEED_URL = 'http://www.spike.com/feeds/mrss/'
- _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
- _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
+ _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
_GEO_COUNTRIES = ['US']
-
- def _extract_mgid(self, webpage):
- mgid = super(SpikeIE, self)._extract_mgid(webpage)
- if mgid is None:
- url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id')
- video_type, episode_id = url_parts.split('/', 1)
- mgid = 'mgid:arc:{0}:spike.com:{1}'.format(video_type, episode_id)
- return mgid
import re
from .turner import TurnerBaseIE
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_parse_qs,
+)
from ..utils import (
float_or_none,
int_or_none,
def _real_extract(self, url):
site, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
- video_data = self._parse_json(self._search_regex(
+ drupal_settings = self._parse_json(self._search_regex(
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
- webpage, 'drupal setting'), display_id)['turner_playlist'][0]
+ webpage, 'drupal setting'), display_id)
+ video_data = drupal_settings['turner_playlist'][0]
media_id = video_data['mediaID']
title = video_data['title']
+ tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse(
+ drupal_settings['ngtv_token_url']).query)
- streams_data = self._download_json(
- 'http://medium.ngtv.io/media/%s/tv' % media_id,
- media_id)['media']['tv']
- duration = None
- chapters = []
- formats = []
- for supported_type in ('unprotected', 'bulkaes'):
- stream_data = streams_data.get(supported_type, {})
- m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
- if not m3u8_url:
- continue
- if stream_data.get('playlistProtection') == 'spe':
- m3u8_url = self._add_akamai_spe_token(
- 'http://token.vgtf.net/token/token_spe',
- m3u8_url, media_id, {
- 'url': url,
- 'site_name': site[:3].upper(),
- 'auth_required': video_data.get('authRequired') == '1',
- })
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
-
- duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
-
- if not chapters:
- for chapter in stream_data.get('contentSegments', []):
- start_time = float_or_none(chapter.get('start'))
- duration = float_or_none(chapter.get('duration'))
- if start_time is None or duration is None:
- continue
- chapters.append({
- 'start_time': start_time,
- 'end_time': start_time + duration,
- })
- self._sort_formats(formats)
+ info = self._extract_ngtv_info(
+ media_id, tokenizer_query, {
+ 'url': url,
+ 'site_name': site[:3].upper(),
+ 'auth_required': video_data.get('authRequired') == '1',
+ })
thumbnails = []
for image_id, image in video_data.get('images', {}).items():
})
thumbnails.append(i)
- return {
+ info.update({
'id': media_id,
'title': title,
'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
- 'duration': duration,
+ 'duration': float_or_none(video_data.get('duration')) or info.get('duration'),
'timestamp': int_or_none(video_data.get('created')),
'season_number': int_or_none(video_data.get('season')),
'episode_number': int_or_none(video_data.get('episode')),
- 'cahpters': chapters,
'thumbnails': thumbnails,
- 'formats': formats,
- }
+ })
+ return info
# coding: utf-8
from __future__ import unicode_literals
-import binascii
-import re
import json
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_ord,
-)
+from .turner import TurnerBaseIE
from ..utils import (
+ determine_ext,
ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_duration,
+ parse_iso8601,
qualities,
- determine_ext,
)
-class TeamcocoIE(InfoExtractor):
- _VALID_URL = r'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
+class TeamcocoIE(TurnerBaseIE):
+ _VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
_TESTS = [
{
- 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
- 'md5': '3f7746aa0dc86de18df7539903d399ea',
+ 'url': 'http://teamcoco.com/video/mary-kay-remote',
+ 'md5': '55d532f81992f5c92046ad02fec34d7d',
'info_dict': {
'id': '80187',
'ext': 'mp4',
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
- 'duration': 504,
- 'age_limit': 0,
+ 'duration': 495.0,
+ 'upload_date': '20140402',
+ 'timestamp': 1396407600,
}
}, {
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
'title': 'Louis C.K. Interview Pt. 1 11/3/11',
'duration': 288,
- 'age_limit': 0,
+ 'upload_date': '20111104',
+ 'timestamp': 1320405840,
}
}, {
'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
'ext': 'mp4',
'title': 'Timothy Olyphant Raises A Toast To “Justified”',
'description': 'md5:15501f23f020e793aeca761205e42c24',
+ 'upload_date': '20150415',
+ 'timestamp': 1429088400,
},
'params': {
'skip_download': True, # m3u8 downloads
},
'params': {
'skip_download': True, # m3u8 downloads
- }
+ },
+ 'skip': 'This video is no longer available.',
+ }, {
+ 'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
+ 'only_matching': True,
}
]
- _VIDEO_ID_REGEXES = (
- r'"eVar42"\s*:\s*(\d+)',
- r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
- r'"id_not"\s*:\s*(\d+)'
- )
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- display_id = mobj.group('display_id')
- webpage, urlh = self._download_webpage_handle(url, display_id)
- if 'src=expired' in urlh.geturl():
- raise ExtractorError('This video is expired.', expected=True)
- video_id = mobj.group('video_id')
- if not video_id:
- video_id = self._html_search_regex(
- self._VIDEO_ID_REGEXES, webpage, 'video id')
+ def _graphql_call(self, query_template, object_type, object_id):
+ find_object = 'find' + object_type
+ return self._download_json(
+ 'http://teamcoco.com/graphql/', object_id, data=json.dumps({
+ 'query': query_template % (find_object, object_id)
+ }))['data'][find_object]
- data = None
-
- preload_codes = self._html_search_regex(
- r'(function.+)setTimeout\(function\(\)\{playlist',
- webpage, 'preload codes')
- base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes)
- base64_fragments.remove('init')
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ response = self._graphql_call('''{
+ %s(slug: "%s") {
+ ... on RecordSlug {
+ record {
+ id
+ title
+ teaser
+ publishOn
+ thumb {
+ preview
+ }
+ file {
+ url
+ }
+ tags {
+ name
+ }
+ duration
+ turnerMediaId
+ turnerMediaAuthToken
+ }
+ }
+ ... on NotFoundSlug {
+ status
+ }
+ }
+}''', 'Slug', display_id)
+ if response.get('status'):
+ raise ExtractorError('This video is no longer available.', expected=True)
+
+ record = response['record']
+ video_id = record['id']
+
+ info = {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': record['title'],
+ 'thumbnail': record.get('thumb', {}).get('preview'),
+ 'description': record.get('teaser'),
+ 'duration': parse_duration(record.get('duration')),
+ 'timestamp': parse_iso8601(record.get('publishOn')),
+ }
- def _check_sequence(cur_fragments):
- if not cur_fragments:
- return
- for i in range(len(cur_fragments)):
- cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii')
- try:
- raw_data = compat_b64decode(cur_sequence)
- if compat_ord(raw_data[0]) == compat_ord('{'):
- return json.loads(raw_data.decode('utf-8'))
- except (TypeError, binascii.Error, UnicodeDecodeError, ValueError):
+ media_id = record.get('turnerMediaId')
+ if media_id:
+ self._initialize_geo_bypass({
+ 'countries': ['US'],
+ })
+ info.update(self._extract_ngtv_info(media_id, {
+ 'accessToken': record['turnerMediaAuthToken'],
+ 'accessTokenType': 'jws',
+ }))
+ else:
+ video_sources = self._graphql_call('''{
+ %s(id: "%s") {
+ src
+ }
+}''', 'RecordVideoSource', video_id) or {}
+
+ formats = []
+ get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
+ for format_id, src in video_sources.get('src', {}).items():
+ if not isinstance(src, dict):
continue
-
- def _check_data():
- for i in range(len(base64_fragments) + 1):
- for j in range(i, len(base64_fragments) + 1):
- data = _check_sequence(base64_fragments[:i] + base64_fragments[j:])
- if data:
- return data
-
- self.to_screen('Try to compute possible data sequence. This may take some time.')
- data = _check_data()
-
- if not data:
- raise ExtractorError(
- 'Preload information could not be extracted', expected=True)
-
- formats = []
- get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
- for filed in data['files']:
- if determine_ext(filed['url']) == 'm3u8':
- # compat_urllib_parse.urljoin does not work here
- if filed['url'].startswith('/'):
- m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url']
- else:
- m3u8_url = filed['url']
- m3u8_formats = self._extract_m3u8_formats(
- m3u8_url, video_id, ext='mp4')
- for m3u8_format in m3u8_formats:
- if m3u8_format not in formats:
- formats.append(m3u8_format)
- elif determine_ext(filed['url']) == 'f4m':
- # TODO Correct f4m extraction
- continue
- else:
- if filed['url'].startswith('/mp4:protected/'):
- # TODO Correct extraction for these files
+ src_url = src.get('src')
+ if not src_url:
continue
- m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
- if m_format is not None:
- format_id = m_format.group(1)
+ ext = determine_ext(src_url, mimetype2ext(src.get('type')))
+ if format_id == 'hls' or ext == 'm3u8':
+ # compat_urllib_parse.urljoin does not work here
+ if src_url.startswith('/'):
+ src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
+ formats.extend(self._extract_m3u8_formats(
+ src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
else:
- format_id = filed['bitrate']
- tbr = (
- int(filed['bitrate'])
- if filed['bitrate'].isdigit()
- else None)
-
- formats.append({
- 'url': filed['url'],
- 'ext': 'mp4',
- 'tbr': tbr,
- 'format_id': format_id,
- 'quality': get_quality(format_id),
- })
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'formats': formats,
- 'title': data['title'],
- 'thumbnail': data.get('thumb', {}).get('href'),
- 'description': data.get('teaser'),
- 'duration': data.get('duration'),
- 'age_limit': self._family_friendly_search(webpage),
- }
+ if src_url.startswith('/mp4:protected/'):
+ # TODO Correct extraction for these files
+ continue
+ tbr = int_or_none(self._search_regex(
+ r'(\d+)k\.mp4', src_url, 'tbr', default=None))
+
+ formats.append({
+ 'url': src_url,
+ 'ext': ext,
+ 'tbr': tbr,
+ 'format_id': format_id,
+ 'quality': get_quality(format_id),
+ })
+ if not formats:
+ formats = self._extract_m3u8_formats(
+ record['file']['url'], video_id, 'mp4', fatal=False)
+ self._sort_formats(formats)
+ info['formats'] = formats
+
+ return info
_NETRC_MACHINE = 'tennistv'
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if not username or not password:
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
# Sometimes wat serves the whole file with the --test option
'skip_download': True,
},
+ 'expected_warnings': ['HTTP Error 404'],
}, {
'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
'info_dict': {
}]
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
self.report_login()
import re
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urlencode_postdata
+)
class TumblrIE(InfoExtractor):
_VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
+ _NETRC_MACHINE = 'tumblr'
+ _LOGIN_URL = 'https://www.tumblr.com/login'
_TESTS = [{
'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
'md5': '479bb068e5b16462f5176a6828829767',
'add_ie': ['Instagram'],
}]
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+ login_form.update({
+ 'user[email]': username,
+ 'user[password]': password
+ })
+
+ response, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': self._LOGIN_URL,
+ })
+
+ # Successful login
+ if '/dashboard' in urlh.geturl():
+ return
+
+ login_errors = self._parse_json(
+ self._search_regex(
+ r'RegistrationForm\.errors\s*=\s*(\[.+?\])\s*;', response,
+ 'login errors', default='[]'),
+ None, fatal=False)
+ if login_errors:
+ raise ExtractorError(
+ 'Unable to login: %s' % login_errors[0], expected=True)
+
+ self.report_warning('Login has probably failed')
+
def _real_extract(self, url):
m_url = re.match(self._VALID_URL, url)
video_id = m_url.group('id')
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
webpage, urlh = self._download_webpage_handle(url, video_id)
+ redirect_url = compat_str(urlh.geturl())
+ if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
+ raise ExtractorError(
+ 'This Tumblr may contain sensitive media. '
+ 'Disable safe mode in your account settings '
+ 'at https://www.tumblr.com/settings/account#safe_mode',
+ expected=True)
+
iframe_url = self._search_regex(
r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
webpage, 'iframe url', default=None)
if iframe_url is None:
- return self.url_result(urlh.geturl(), 'Generic')
+ return self.url_result(redirect_url, 'Generic')
iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page')
return {
'id': content_id,
- 'title': title,
+ 'title': self._live_title(title) if is_live else title,
'formats': formats,
'thumbnail': thumbnail,
'location': location,
xpath_text,
int_or_none,
determine_ext,
+ float_or_none,
parse_duration,
xpath_attr,
update_url_query,
def _extract_timestamp(self, video_data):
return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
- def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data):
+ def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
if not token:
query = {
'path': secure_path,
- 'videoId': content_id,
}
+ if custom_tokenizer_query:
+ query.update(custom_tokenizer_query)
+ else:
+ query['videoId'] = content_id
if ap_data.get('auth_required'):
query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
auth = self._download_xml(
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
'is_live': is_live,
}
+
+ def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
+ streams_data = self._download_json(
+ 'http://medium.ngtv.io/media/%s/tv' % media_id,
+ media_id)['media']['tv']
+ duration = None
+ chapters = []
+ formats = []
+ for supported_type in ('unprotected', 'bulkaes'):
+ stream_data = streams_data.get(supported_type, {})
+ m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
+ if not m3u8_url:
+ continue
+ if stream_data.get('playlistProtection') == 'spe':
+ m3u8_url = self._add_akamai_spe_token(
+ 'http://token.ngtv.io/token/token_spe',
+ m3u8_url, media_id, ap_data or {}, tokenizer_query)
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
+
+ duration = float_or_none(stream_data.get('totalRuntime'))
+
+ if not chapters:
+ for chapter in stream_data.get('contentSegments', []):
+ start_time = float_or_none(chapter.get('start'))
+ chapter_duration = float_or_none(chapter.get('duration'))
+ if start_time is None or chapter_duration is None:
+ continue
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': start_time + chapter_duration,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'formats': formats,
+ 'chapters': chapters,
+ 'duration': duration,
+ }
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
int_or_none,
parse_iso8601,
- try_get,
- determine_ext,
)
title = info['title']
- subtitles = {}
- formats = []
- # http formats are linked with unresolvable host
- for kind in ('hls3', ''):
- data = self._download_json(
- 'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
- video_id, 'Downloading sources JSON', query={
- 'protocol': kind,
- 'videoFormat': 'MP4+WEBVTT',
- })
- items = try_get(data, lambda x: x['playback']['items']['item'])
- if not items:
- continue
- if isinstance(items, dict):
- items = [items]
- for item in items:
- manifest_url = item.get('url')
- if not isinstance(manifest_url, compat_str):
- continue
- ext = determine_ext(manifest_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id=kind, fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_akamai_formats(
- manifest_url, video_id, {
- 'hls': 'tv4play-i.akamaihd.net',
- }))
- elif ext == 'webvtt':
- subtitles = self._merge_subtitles(
- subtitles, {
- 'sv': [{
- 'url': manifest_url,
- 'ext': 'vtt',
- }]})
+ manifest_url = self._download_json(
+ 'https://playback-api.b17g.net/media/' + video_id,
+ video_id, query={
+ 'service': 'tv4',
+ 'device': 'browser',
+ 'protocol': 'hls',
+ })['playbackItem']['manifestUrl']
+ formats = self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ manifest_url.replace('.m3u8', '.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ formats.extend(self._extract_f4m_formats(
+ manifest_url.replace('.m3u8', '.f4m'),
+ video_id, f4m_id='hds', fatal=False))
+ formats.extend(self._extract_ism_formats(
+ re.sub(r'\.ism/.+?\.m3u8', r'.ism/Manifest', manifest_url),
+ video_id, ism_id='mss', fatal=False))
if not formats and info.get('is_geo_restricted'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
'id': video_id,
'title': title,
'formats': formats,
- 'subtitles': subtitles,
+ # 'subtitles': subtitles,
'description': info.get('description'),
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': int_or_none(info.get('duration')),
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ unescapeHTML,
+)
+
+
+class TVNetIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?:\d+/)?(?P<id>\d+)(?:/|$)'
+ _TESTS = [{
+ # video
+ 'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h',
+ 'md5': 'b4d7abe0252c9b47774760b7519c7558',
+ 'info_dict': {
+ 'id': '109788',
+ 'ext': 'mp4',
+ 'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang',
+ 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+ 'is_live': False,
+ 'view_count': int,
+ },
+ }, {
+ # audio
+ 'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi',
+ 'md5': 'b5875ce9b0a2eecde029216d0e6db2ae',
+ 'info_dict': {
+ 'id': '27017',
+ 'ext': 'm4a',
+ 'title': 'VOV1 - Bản tin chiều (10/06/2018)',
+ 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+ 'is_live': False,
+ },
+ }, {
+ 'url': 'http://us.tvnet.gov.vn/video/118023/129999/ngay-0705',
+ 'info_dict': {
+ 'id': '129999',
+ 'ext': 'mp4',
+ 'title': 'VTV1 - Quốc hội với cử tri (11/06/2018)',
+ 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+ 'is_live': False,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # live stream
+ 'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1',
+ 'info_dict': {
+ 'id': '1011',
+ 'ext': 'mp4',
+ 'title': r're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # radio live stream
+ 'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014',
+ 'info_dict': {
+ 'id': '1014',
+ 'ext': 'm4a',
+ 'title': r're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://us.tvnet.gov.vn/phim/6136/25510/vtv3---ca-mot-doi-an-oan-tap-1-50/phim-truyen-hinh',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'title', webpage, default=None) or self._search_regex(
+ r'<title>([^<]+)<', webpage, 'title')
+ title = re.sub(r'\s*-\s*TV Net\s*$', '', title)
+
+ if '/video/' in url or '/radio/' in url:
+ is_live = False
+ elif '/kenh-truyen-hinh/' in url:
+ is_live = True
+ else:
+ is_live = None
+
+ data_file = unescapeHTML(self._search_regex(
+ r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
+ 'data file', group='url'))
+
+ stream_urls = set()
+ formats = []
+ for stream in self._download_json(data_file, video_id):
+ if not isinstance(stream, dict):
+ continue
+ stream_url = stream.get('url')
+ if (stream_url in stream_urls or not stream_url or
+ not isinstance(stream_url, compat_str)):
+ continue
+ stream_urls.add(stream_url)
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4',
+ entry_protocol='m3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ # better support for radio streams
+ if title.startswith('VOV'):
+ for f in formats:
+ f.update({
+ 'ext': 'm4a',
+ 'vcodec': 'none',
+ })
+
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or unescapeHTML(
+ self._search_regex(
+ r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
+ 'thumbnail', default=None, group='url'))
+
+ if is_live:
+ title = self._live_title(title)
+
+ view_count = int_or_none(self._search_regex(
+ r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>',
+ webpage, 'view count', default=None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'is_live': is_live,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
- self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
video_id = self._match_id(url)
geo_country = self._search_regex(
r'https?://[^/]+\.([a-z]{2})', url,
'geo country', default=None)
if geo_country:
- self._initialize_geo_bypass([geo_country.upper()])
+ self._initialize_geo_bypass({'countries': [geo_country.upper()]})
video = self._download_json(
'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
+ compat_kwargs,
compat_parse_qs,
compat_str,
compat_urllib_parse_urlencode,
from ..utils import (
clean_html,
ExtractorError,
+ float_or_none,
int_or_none,
- js_to_json,
orderedSet,
parse_duration,
parse_iso8601,
+ qualities,
+ try_get,
+ unified_timestamp,
update_url_query,
urlencode_postdata,
urljoin,
'%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
expected=True)
- def _call_api(self, path, item_id, note):
+ def _call_api(self, path, item_id, *args, **kwargs):
+ kwargs.setdefault('headers', {})['Client-ID'] = self._CLIENT_ID
response = self._download_json(
- '%s/%s' % (self._API_BASE, path), item_id, note,
- headers={'Client-ID': self._CLIENT_ID})
+ '%s/%s' % (self._API_BASE, path), item_id,
+ *args, **compat_kwargs(kwargs))
self._handle_error(response)
return response
self._login()
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
}
-class TwitchClipsIE(InfoExtractor):
+class TwitchClipsIE(TwitchBaseIE):
IE_NAME = 'twitch:clips'
_VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
- 'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound',
+ 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
'md5': '761769e1eafce0ffebfb4089cb3847cd',
'info_dict': {
- 'id': 'AggressiveCobraPoooound',
+ 'id': '42850523',
'ext': 'mp4',
'title': 'EA Play 2016 Live from the Novo Theatre',
'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1465767393,
+ 'upload_date': '20160612',
'creator': 'EA',
'uploader': 'stereotype_',
- 'uploader_id': 'stereotype_',
+ 'uploader_id': '43566419',
},
}, {
# multiple formats
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ status = self._download_json(
+ 'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id,
+ video_id)
- clip = self._parse_json(
- self._search_regex(
- r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
- video_id, transform_source=js_to_json)
+ formats = []
- title = clip.get('title') or clip.get('channel_title') or self._og_search_title(webpage)
-
- formats = [{
- 'url': option['source'],
- 'format_id': option.get('quality'),
- 'height': int_or_none(option.get('quality')),
- } for option in clip.get('quality_options', []) if option.get('source')]
-
- if not formats:
- formats = [{
- 'url': clip['clip_video_url'],
- }]
+ for option in status['quality_options']:
+ if not isinstance(option, dict):
+ continue
+ source = option.get('source')
+ if not source or not isinstance(source, compat_str):
+ continue
+ formats.append({
+ 'url': source,
+ 'format_id': option.get('quality'),
+ 'height': int_or_none(option.get('quality')),
+ 'fps': int_or_none(option.get('frame_rate')),
+ })
self._sort_formats(formats)
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'),
- 'uploader': clip.get('curator_login'),
- 'uploader_id': clip.get('curator_display_name'),
+ info = {
'formats': formats,
}
+
+ clip = self._call_api(
+ 'kraken/clips/%s' % video_id, video_id, fatal=False, headers={
+ 'Accept': 'application/vnd.twitchtv.v5+json',
+ })
+
+ if clip:
+ quality_key = qualities(('tiny', 'small', 'medium'))
+ thumbnails = []
+ thumbnails_dict = clip.get('thumbnails')
+ if isinstance(thumbnails_dict, dict):
+ for thumbnail_id, thumbnail_url in thumbnails_dict.items():
+ thumbnails.append({
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ 'preference': quality_key(thumbnail_id),
+ })
+
+ info.update({
+ 'id': clip.get('tracking_id') or video_id,
+ 'title': clip.get('title') or video_id,
+ 'duration': float_or_none(clip.get('duration')),
+ 'views': int_or_none(clip.get('views')),
+ 'timestamp': unified_timestamp(clip.get('created_at')),
+ 'thumbnails': thumbnails,
+ 'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str),
+ 'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str),
+ 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
+ })
+ else:
+ info.update({
+ 'title': video_id,
+ 'id': video_id,
+ })
+
+ return info
'id': '623160978427936768',
'ext': 'mp4',
'title': 'Twitter web player',
- 'thumbnail': r're:^https?://.*(?:\bformat=|\.)jpg',
+ 'thumbnail': r're:^https?://.*$',
},
},
{
},
]
+ _API_BASE = 'https://api.twitter.com/1.1'
+
def _parse_media_info(self, media_info, video_id):
formats = []
for media_variant in media_info.get('variants', []):
main_script, 'bearer token')
# https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id
api_data = self._download_json(
- 'https://api.twitter.com/1.1/statuses/show/%s.json' % video_id,
+ '%s/statuses/show/%s.json' % (self._API_BASE, video_id),
video_id, 'Downloading API data',
headers={
'Authorization': 'Bearer ' + bearer_token,
formats.extend(self._extract_mobile_formats(username, video_id))
if formats:
+ title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
+ thumbnail = config.get('posterImageUrl') or config.get('image_src')
+ duration = float_or_none(config.get('duration'), scale=1000) or duration
break
+ if not formats:
+ headers = {
+ 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
+ 'Referer': url,
+ }
+ ct0 = self._get_cookies(url).get('ct0')
+ if ct0:
+ headers['csrf_token'] = ct0.value
+ guest_token = self._download_json(
+ '%s/guest/activate.json' % self._API_BASE, video_id,
+ 'Downloading guest token', data=b'',
+ headers=headers)['guest_token']
+ headers['x-guest-token'] = guest_token
+ self._set_cookie('api.twitter.com', 'gt', guest_token)
+ config = self._download_json(
+ '%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id),
+ video_id, headers=headers)
+ track = config['track']
+ vmap_url = track.get('vmapUrl')
+ if vmap_url:
+ formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
+ else:
+ playback_url = track['playbackUrl']
+ if determine_ext(playback_url) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ playback_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+ else:
+ formats = [{
+ 'url': playback_url,
+ }]
+ title = 'Twitter web player'
+ thumbnail = config.get('posterImage')
+ duration = float_or_none(track.get('durationMs'), scale=1000)
+
self._remove_duplicate_formats(formats)
self._sort_formats(formats)
- title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
- thumbnail = config.get('posterImageUrl') or config.get('image_src')
- duration = float_or_none(config.get('duration'), scale=1000) or duration
-
return {
'id': video_id,
'title': title,
'params': {
'skip_download': True, # requires ffmpeg
},
+ }, {
+ # card via api.twitter.com/1.1/videos/tweet/config
+ 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
+ 'info_dict': {
+ 'id': '1001551623938805763',
+ 'ext': 'mp4',
+ 'title': 're:.*?Shep is on a roll today.*?',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:63b036c228772523ae1924d5f8e5ed6b',
+ 'uploader': 'Lis Power',
+ 'uploader_id': 'LisPower1',
+ 'duration': 111.278,
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
}]
def _real_extract(self, url):
int_or_none,
js_to_json,
sanitized_Request,
+ try_get,
unescapeHTML,
urlencode_postdata,
)
# no url in outputs format entry
'url': 'https://www.udemy.com/learn-web-development-complete-step-by-step-guide-to-success/learn/v4/t/lecture/4125812',
'only_matching': True,
+ }, {
+ # only outputs rendition
+ 'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0',
+ 'only_matching': True,
}]
def _extract_course_info(self, webpage, video_id):
% (course_id, lecture_id),
lecture_id, 'Downloading lecture JSON', query={
'fields[lecture]': 'title,description,view_html,asset',
- 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data',
+ 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
})
def _handle_error(self, response):
error_str += ' - %s' % error_data.get('formErrors')
raise ExtractorError(error_str, expected=True)
- def _download_webpage(self, *args, **kwargs):
+ def _download_webpage_handle(self, *args, **kwargs):
kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
- return super(UdemyIE, self)._download_webpage(
+ return super(UdemyIE, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
def _download_json(self, url_or_request, *args, **kwargs):
self._login()
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
'url': src,
})
- download_urls = asset.get('download_urls')
- if isinstance(download_urls, dict):
- extract_formats(download_urls.get('Video'))
+ for url_kind in ('download', 'stream'):
+ urls = asset.get('%s_urls' % url_kind)
+ if isinstance(urls, dict):
+ extract_formats(urls.get('Video'))
+
+ captions = asset.get('captions')
+ if isinstance(captions, list):
+ for cc in captions:
+ if not isinstance(cc, dict):
+ continue
+ cc_url = cc.get('url')
+ if not cc_url or not isinstance(cc_url, compat_str):
+ continue
+ lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
+ sub_dict = (automatic_captions if cc.get('source') == 'auto'
+ else subtitles)
+ sub_dict.setdefault(lang or 'en', []).append({
+ 'url': cc_url,
+ })
view_html = lecture.get('view_html')
if view_html:
fatal=False)
extract_subtitles(text_tracks)
+ if not formats and outputs:
+ for format_id, output in outputs.items():
+ f = extract_output_format(output, format_id)
+ if f.get('url'):
+ formats.append(f)
+
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
return {
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
parse_duration,
parse_iso8601,
+ urlencode_postdata,
)
class UFCTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P<id>[^/]+)'
+ _NETRC_MACHINE = 'ufctv'
_TEST = {
'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode',
'info_dict': {
}
}
+ def _real_initialize(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ code = self._download_json(
+ 'https://www.ufc.tv/secure/authenticate',
+ None, 'Logging in', data=urlencode_postdata({
+ 'username': username,
+ 'password': password,
+ 'format': 'json',
+ })).get('code')
+ if code and code != 'loginsuccess':
+ raise ExtractorError(code, expected=True)
+
def _real_extract(self, url):
display_id = self._match_id(url)
video_data = self._download_json(url, display_id, query={
'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
self.report_login()
self._search_regex(
r'setup\(([^)]+)\)', code, 'jwplayer data',
default=NO_DEFAULT if num == len(codes) else '{}'),
- video_id, transform_source=js_to_json)
+ video_id, transform_source=lambda s: js_to_json(
+ re.sub(r'\s*\+\s*window\[.+?\]', '', s)))
if jwplayer_data:
break
from __future__ import unicode_literals
+import base64
import re
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
int_or_none,
js_to_json,
+ parse_age_limit,
parse_duration,
)
class ViewLiftBaseIE(InfoExtractor):
- _DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
+ _DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'
class ViewLiftEmbedIE(ViewLiftBaseIE):
- _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' % ViewLiftBaseIE._DOMAINS_REGEX
+ _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
'md5': '2924e9215c6eff7a55ed35b72276bd93',
formats = []
has_bitrate = False
- for source in self._parse_json(js_to_json(self._search_regex(
- r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
+ sources = self._parse_json(self._search_regex(
+ r'(?s)sources:\s*(\[.+?\]),', webpage,
+ 'sources', default='[]'), video_id, js_to_json)
+ for source in sources:
file_ = source.get('file')
if not file_:
continue
format_id = source.get('label') or ext
if all(v in ('m3u8', 'hls') for v in (type_, ext)):
formats.extend(self._extract_m3u8_formats(
- file_, video_id, 'mp4', m3u8_id='hls'))
+ file_, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
else:
bitrate = int_or_none(self._search_regex(
[r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
'tbr': bitrate,
'height': height,
})
+ if not formats:
+ hls_url = self._parse_json(self._search_regex(
+ r'filmInfo\.src\s*=\s*({.+?});',
+ webpage, 'src'), video_id, js_to_json)['src']
+ formats = self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
self._sort_formats(formats, field_preference)
'display_id': 'lost_for_life',
'ext': 'mp4',
'title': 'Lost for Life',
- 'description': 'md5:fbdacc8bb6b455e464aaf98bc02e1c82',
+ 'description': 'md5:ea10b5a50405ae1f7b5269a6ec594102',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 4489,
- 'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals']
+ 'categories': 'mincount:3',
+ 'age_limit': 14,
+ 'upload_date': '20150421',
+ 'timestamp': 1429656819,
}
}, {
'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 979,
- 'categories': ['Documentary', 'Sports', 'Politics']
+ 'categories': 'mincount:2',
+ 'timestamp': 1399478279,
+ 'upload_date': '20140507',
}
}, {
# Film is not playable in your area.
}, {
'url': 'http://www.winnersview.com/videos/the-good-son',
'only_matching': True,
- }, {
- 'url': 'http://www.kesari.tv/news/video/1461919076414',
- 'only_matching': True,
}, {
# Was once Kaltura embed
'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
raise ExtractorError(
'Film %s is not available.' % display_id, expected=True)
- film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
-
- snag = self._parse_json(
- self._search_regex(
- r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'),
- display_id)
-
- for item in snag:
- if item.get('data', {}).get('film', {}).get('id') == film_id:
- data = item['data']['film']
- title = data['title']
- description = clean_html(data.get('synopsis'))
- thumbnail = data.get('image')
- duration = int_or_none(data.get('duration') or data.get('runtime'))
- categories = [
- category['title'] for category in data.get('categories', [])
- if category.get('title')]
- break
+ initial_store_state = self._search_regex(
+ r"window\.initialStoreState\s*=.*?JSON\.parse\(unescape\(atob\('([^']+)'\)\)\)",
+ webpage, 'Initial Store State', default=None)
+ if initial_store_state:
+ modules = self._parse_json(compat_urllib_parse_unquote(base64.b64decode(
+ initial_store_state).decode()), display_id)['page']['data']['modules']
+ content_data = next(m['contentData'][0] for m in modules if m.get('moduleType') == 'VideoDetailModule')
+ gist = content_data['gist']
+ film_id = gist['id']
+ title = gist['title']
+ video_assets = content_data['streamingInfo']['videoAssets']
+
+ formats = []
+ mpeg_video_assets = video_assets.get('mpeg') or []
+ for video_asset in mpeg_video_assets:
+ video_asset_url = video_asset.get('url')
+ if not video_asset:
+ continue
+ bitrate = int_or_none(video_asset.get('bitrate'))
+ height = int_or_none(self._search_regex(
+ r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
+ 'height', default=None))
+ formats.append({
+ 'url': video_asset_url,
+ 'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
+ 'tbr': bitrate,
+ 'height': height,
+ 'vcodec': video_asset.get('codec'),
+ })
+
+ hls_url = video_assets.get('hls')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ self._sort_formats(formats, ('height', 'tbr', 'format_id'))
+
+ info = {
+ 'id': film_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': gist.get('description'),
+ 'thumbnail': gist.get('videoImageUrl'),
+ 'duration': int_or_none(gist.get('runtime')),
+ 'age_limit': parse_age_limit(content_data.get('parentalRating')),
+ 'timestamp': int_or_none(gist.get('publishDate'), 1000),
+ 'formats': formats,
+ }
+ for k in ('categories', 'tags'):
+ info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
+ return info
else:
- title = self._search_regex(
- r'itemprop="title">([^<]+)<', webpage, 'title')
- description = self._html_search_regex(
- r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
- webpage, 'description', default=None) or self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- duration = parse_duration(self._search_regex(
- r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
- webpage, 'duration', fatal=False))
- categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
+ film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
- return {
- '_type': 'url_transparent',
- 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
- 'id': film_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'categories': categories,
- 'ie_key': 'ViewLiftEmbed',
- }
+ snag = self._parse_json(
+ self._search_regex(
+ r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag', default='[]'),
+ display_id)
+
+ for item in snag:
+ if item.get('data', {}).get('film', {}).get('id') == film_id:
+ data = item['data']['film']
+ title = data['title']
+ description = clean_html(data.get('synopsis'))
+ thumbnail = data.get('image')
+ duration = int_or_none(data.get('duration') or data.get('runtime'))
+ categories = [
+ category['title'] for category in data.get('categories', [])
+ if category.get('title')]
+ break
+ else:
+ title = self._search_regex(
+ r'itemprop="title">([^<]+)<', webpage, 'title')
+ description = self._html_search_regex(
+ r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
+ webpage, 'description', default=None) or self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = parse_duration(self._search_regex(
+ r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
+ webpage, 'duration', fatal=False))
+ categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
+ 'id': film_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'categories': categories,
+ 'ie_key': 'ViewLiftEmbed',
+ }
self._login()
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
ExtractorError,
InAdvancePagedList,
int_or_none,
+ merge_dicts,
NO_DEFAULT,
RegexNotFoundError,
sanitized_Request,
_LOGIN_URL = 'https://vimeo.com/log_in'
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
if self._LOGIN_REQUIRED:
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
'preference': 1,
})
- info_dict = self._parse_config(config, video_id)
- formats.extend(info_dict['formats'])
+ info_dict_config = self._parse_config(config, video_id)
+ formats.extend(info_dict_config['formats'])
self._vimeo_sort_formats(formats)
+ json_ld = self._search_json_ld(webpage, video_id, default={})
+
if not cc_license:
cc_license = self._search_regex(
r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
webpage, 'license', default=None, group='license')
- info_dict.update({
+ info_dict = {
'id': video_id,
'formats': formats,
'timestamp': unified_timestamp(timestamp),
'like_count': like_count,
'comment_count': comment_count,
'license': cc_license,
- })
+ }
+
+ info_dict = merge_dicts(info_dict, info_dict_config, json_ld)
return info_dict
class VimeoLikesIE(InfoExtractor):
- _VALID_URL = r'https://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
+ _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
IE_NAME = 'vimeo:likes'
IE_DESC = 'Vimeo user likes'
- _TEST = {
+ _TESTS = [{
'url': 'https://vimeo.com/user755559/likes/',
'playlist_mincount': 293,
'info_dict': {
'description': 'See all the videos urza likes',
'title': 'Videos urza likes',
},
- }
+ }, {
+ 'url': 'https://vimeo.com/stormlapse/likes',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
user_id = self._match_id(url)
self._search_regex(
r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)">
.*?</a></li>\s*<li\s+class="pagination_next">
- ''', webpage, 'page count'),
+ ''', webpage, 'page count', default=1),
'page count', fatal=True)
PAGE_SIZE = 12
title = self._html_search_regex(
description = self._html_search_meta('description', webpage)
def _get_page(idx):
- page_url = 'https://vimeo.com/user%s/likes/page:%d/sort:date' % (
+ page_url = 'https://vimeo.com/%s/likes/page:%d/sort:date' % (
user_id, idx + 1)
webpage = self._download_webpage(
page_url, user_id,
return {
'_type': 'playlist',
- 'id': 'user%s_likes' % user_id,
+ 'id': '%s_likes' % user_id,
'title': title,
'description': description,
'entries': pl,
_NETRC_MACHINE = 'vk'
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
_TESTS = [
{
'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
- 'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
'info_dict': {
'id': '11713067',
'ext': 'mp4',
'upload_date': '20140819',
'duration': 120,
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['HTTP Error 404'],
},
{
'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
- 'md5': '34bdfa5ca9fd3c7eb88601b635b0424c',
+ 'md5': 'b16574df2c3cd1a36ca0098f2a791925',
'info_dict': {
'id': '11713075',
'ext': 'mp4',
formats = []
try:
+ alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
manifest_urls = self._download_json(
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
m3u8_url = manifest_urls.get('hls')
if m3u8_url:
m3u8_url = remove_bitrate_limit(m3u8_url)
- m3u8_formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
- if m3u8_formats:
- formats.extend(m3u8_formats)
+ for m3u8_alt_url in alt_urls(m3u8_url):
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_alt_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
- m3u8_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
+ m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
- http_url = extract_url('android5/%s.mp4', 'http')
- if http_url:
- for m3u8_format in m3u8_formats:
- vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
- if not vbr or not abr:
- continue
- format_id = m3u8_format['format_id'].replace('hls', 'http')
- fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url)
- if self._is_valid_url(fmt_url, video_id, format_id):
- f = m3u8_format.copy()
- f.update({
- 'url': fmt_url,
- 'format_id': format_id,
- 'protocol': 'http',
- })
- formats.append(f)
mpd_url = manifest_urls.get('mpd')
if mpd_url:
- formats.extend(self._extract_mpd_formats(remove_bitrate_limit(
- mpd_url), video_id, mpd_id='dash', fatal=False))
+ mpd_url = remove_bitrate_limit(mpd_url)
+ for mpd_alt_url in alt_urls(mpd_url):
+ formats.extend(self._extract_mpd_formats(
+ mpd_alt_url, video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
except ExtractorError:
abr = 64
source = self._parse_json(
self._search_regex(
- r'(?s)source\s*:\s*({.+?})\s*,\s*\n', webpage, 'source',
+ r'(?s)source["\']?\s*:\s*({.+?})\s*[,}]', webpage, 'source',
default='{}'),
video_id, transform_source=js_to_json, fatal=False) or {}
webpage = self._download_webpage(url, video_id)
youtube_id = self._search_regex(
- r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
+ (r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
+ r'data-id=["\']([0-9A-Za-z_-]{11})'),
webpage, 'video URL', default=None)
if youtube_id:
return {
class XiamiBaseIE(InfoExtractor):
_API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
- def _download_webpage(self, *args, **kwargs):
- webpage = super(XiamiBaseIE, self)._download_webpage(*args, **kwargs)
+ def _download_webpage_handle(self, *args, **kwargs):
+ webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs)
if '>Xiami is currently not available in your country.<' in webpage:
self.raise_geo_restricted('Xiami is currently not available in your country')
return webpage
'youtube-dl with --cookies',
expected=True)
- def _download_webpage(self, *args, **kwargs):
- webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
+ def _download_webpage_handle(self, *args, **kwargs):
+ webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage:
self._raise_captcha()
return webpage
'info_dict': {
'id': '4878838',
'ext': 'mp3',
- 'title': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio - Gypsy Eyes 1',
+ 'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
'filesize': 4628061,
'duration': 193.04,
'track': 'Gypsy Eyes 1',
'album': 'Gypsy Soul',
'album_artist': 'Carlo Ambrosio',
- 'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio',
- 'release_year': '2009',
+ 'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari',
+ 'release_year': 2009,
},
'skip': 'Travis CI servers blocked by YandexMusic',
}
track_info.update({
'album': album.get('title'),
'album_artist': extract_artist(album.get('artists')),
- 'release_year': compat_str(year) if year else None,
+ 'release_year': int_or_none(year),
})
track_artist = extract_artist(track.get('artists'))
orderedSet,
parse_codecs,
parse_duration,
+ qualities,
remove_quotes,
remove_start,
smuggle_url,
If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
"""
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
# No authentication to be performed
if username is None:
if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
return True
- def _download_webpage(self, *args, **kwargs):
+ def _download_webpage_handle(self, *args, **kwargs):
kwargs.setdefault('query', {})['disable_polymer'] = 'true'
- return super(YoutubeBaseInfoExtractor, self)._download_webpage(
+ return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
def _real_initialize(self):
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
'license': 'Standard YouTube License',
'creator': 'Icona Pop',
+ 'track': 'I Love It (feat. Charli XCX)',
+ 'artist': 'Icona Pop',
}
},
{
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
'license': 'Standard YouTube License',
'creator': 'Justin Timberlake',
+ 'track': 'Tunnel Vision',
+ 'artist': 'Justin Timberlake',
'age_limit': 18,
}
},
'id': 'IB3lcPjvWLA',
'ext': 'm4a',
'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
- 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
+ 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
'duration': 244,
'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO',
'ext': 'mp4',
'duration': 219,
'upload_date': '20100909',
- 'uploader': 'The Amazing Atheist',
+ 'uploader': 'TJ Kirk',
'uploader_id': 'TheAmazingAtheist',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
'license': 'Standard YouTube License',
'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
'info_dict': {
'id': '6kLq3WMV1nU',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
- 'duration': 247,
+ 'duration': 246,
'uploader': 'LloydVEVO',
'uploader_id': 'LloydVEVO',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
'uploader_id': 'AllenMeow',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
- 'uploader': '孫艾倫',
+ 'uploader': '孫ᄋᄅ',
'license': 'Standard YouTube License',
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
},
'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
'info_dict': {
'id': 'FIl7x6_3R5Y',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'md5:7b81415841e02ecd4313668cde88737a',
'description': 'md5:116377fd2963b81ec4ce64b542173306',
'duration': 220,
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
'uploader': 'dorappi2000',
'license': 'Standard YouTube License',
- 'formats': 'mincount:32',
+ 'formats': 'mincount:31',
},
+ 'skip': 'not actual anymore',
},
# DASH manifest with segment_list
{
'id': 'lsguqyKfVQg',
'ext': 'mp4',
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
- 'alt_title': 'Dark Walk',
+ 'alt_title': 'Dark Walk - Position Music',
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'duration': 133,
'upload_date': '20151119',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
'uploader': 'IronSoulElf',
'license': 'Standard YouTube License',
- 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
+ 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ 'track': 'Dark Walk - Position Music',
+ 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
},
'params': {
'skip_download': True,
'description': 'md5:dda0d780d5a6e120758d1711d062a867',
'duration': 4060,
'upload_date': '20151119',
- 'uploader': 'Bernie 2016',
+ 'uploader': 'Bernie Sanders',
'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
'license': 'Creative Commons Attribution license (reuse allowed)',
'params': {
'skip_download': True,
},
+ 'skip': 'This video is not available.',
},
{
# YouTube Red video with episode data
'id': 'iqKdEhx-dD4',
'ext': 'mp4',
'title': 'Isolation - Mind Field (Ep 1)',
- 'description': 'md5:8013b7ddea787342608f63a13ddc9492',
+ 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
'duration': 2085,
'upload_date': '20170118',
'uploader': 'Vsauce',
'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
'license': 'Standard YouTube License',
- 'view_count': int,
},
'params': {
'skip_download': True,
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config:
args = ytplayer_config['args']
- if args.get('url_encoded_fmt_stream_map'):
+ if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
add_dash_mpd(video_info)
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
- # Start extracting information
- self.report_information_extraction(video_id)
-
- # uploader
- if 'author' not in video_info:
- raise ExtractorError('Unable to extract uploader name')
- video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
-
- # uploader_id
- video_uploader_id = None
- video_uploader_url = None
- mobj = re.search(
- r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
- video_webpage)
- if mobj is not None:
- video_uploader_id = mobj.group('uploader_id')
- video_uploader_url = mobj.group('uploader_url')
- else:
- self._downloader.report_warning('unable to extract uploader nickname')
-
- # thumbnail image
- # We try first to get a high quality image:
- m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
- video_webpage, re.DOTALL)
- if m_thumb is not None:
- video_thumbnail = m_thumb.group(1)
- elif 'thumbnail_url' not in video_info:
- self._downloader.report_warning('unable to extract video thumbnail')
- video_thumbnail = None
- else: # don't panic if we can't find it
- video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
-
- # upload date
- upload_date = self._html_search_meta(
- 'datePublished', video_webpage, 'upload date', default=None)
- if not upload_date:
- upload_date = self._search_regex(
- [r'(?s)id="eow-date.*?>(.*?)</span>',
- r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
- video_webpage, 'upload date', default=None)
- upload_date = unified_strdate(upload_date)
-
- video_license = self._html_search_regex(
- r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
- video_webpage, 'license', default=None)
-
- m_music = re.search(
- r'''(?x)
- <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
- <ul[^>]*>\s*
- <li>(?P<title>.+?)
- by (?P<creator>.+?)
- (?:
- \(.+?\)|
- <a[^>]*
- (?:
- \bhref=["\']/red[^>]*>| # drop possible
- >\s*Listen ad-free with YouTube Red # YouTube Red ad
- )
- .*?
- )?</li
- ''',
- video_webpage)
- if m_music:
- video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
- video_creator = clean_html(m_music.group('creator'))
- else:
- video_alt_title = video_creator = None
-
- m_episode = re.search(
- r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
- video_webpage)
- if m_episode:
- series = m_episode.group('series')
- season_number = int(m_episode.group('season'))
- episode_number = int(m_episode.group('episode'))
- else:
- series = season_number = episode_number = None
-
- m_cat_container = self._search_regex(
- r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
- video_webpage, 'categories', default=None)
- if m_cat_container:
- category = self._html_search_regex(
- r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
- default=None)
- video_categories = None if category is None else [category]
- else:
- video_categories = None
-
- video_tags = [
- unescapeHTML(m.group('content'))
- for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
-
- def _extract_count(count_name):
- return str_to_int(self._search_regex(
- r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
- % re.escape(count_name),
- video_webpage, count_name, default=None))
-
- like_count = _extract_count('like')
- dislike_count = _extract_count('dislike')
-
- # subtitles
- video_subtitles = self.extract_subtitles(video_id, video_webpage)
- automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
-
- video_duration = try_get(
- video_info, lambda x: int_or_none(x['length_seconds'][0]))
- if not video_duration:
- video_duration = parse_duration(self._html_search_meta(
- 'duration', video_webpage, 'video duration'))
-
- # annotations
- video_annotations = None
- if self._downloader.params.get('writeannotations', False):
- video_annotations = self._extract_annotations(video_id)
-
- chapters = self._extract_chapters(description_original, video_duration)
+ def _extract_filesize(media_url):
+ return int_or_none(self._search_regex(
+ r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
'width': int_or_none(width_height[0]),
'height': int_or_none(width_height[1]),
}
+ q = qualities(['small', 'medium', 'hd720'])
formats = []
for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str)
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
+ filesize = int_or_none(url_data.get(
+ 'clen', [None])[0]) or _extract_filesize(url)
+
+ quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
+
more_fields = {
- 'filesize': int_or_none(url_data.get('clen', [None])[0]),
+ 'filesize': filesize,
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
'width': width,
'height': height,
'fps': int_or_none(url_data.get('fps', [None])[0]),
- 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
+ 'format_note': quality,
+ 'quality': q(quality),
}
for key, value in more_fields.items():
if value:
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
formats.append(a_format)
else:
- unavailable_message = extract_unavailable_message()
- if unavailable_message:
- raise ExtractorError(unavailable_message, expected=True)
+ error_message = clean_html(video_info.get('reason', [None])[0])
+ if not error_message:
+ error_message = extract_unavailable_message()
+ if error_message:
+ raise ExtractorError(error_message, expected=True)
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
+ # uploader
+ video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
+ if video_uploader:
+ video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
+ else:
+ self._downloader.report_warning('unable to extract uploader name')
+
+ # uploader_id
+ video_uploader_id = None
+ video_uploader_url = None
+ mobj = re.search(
+ r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
+ video_webpage)
+ if mobj is not None:
+ video_uploader_id = mobj.group('uploader_id')
+ video_uploader_url = mobj.group('uploader_url')
+ else:
+ self._downloader.report_warning('unable to extract uploader nickname')
+
+ # thumbnail image
+ # We try first to get a high quality image:
+ m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
+ video_webpage, re.DOTALL)
+ if m_thumb is not None:
+ video_thumbnail = m_thumb.group(1)
+ elif 'thumbnail_url' not in video_info:
+ self._downloader.report_warning('unable to extract video thumbnail')
+ video_thumbnail = None
+ else: # don't panic if we can't find it
+ video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
+
+ # upload date
+ upload_date = self._html_search_meta(
+ 'datePublished', video_webpage, 'upload date', default=None)
+ if not upload_date:
+ upload_date = self._search_regex(
+ [r'(?s)id="eow-date.*?>(.*?)</span>',
+ r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
+ video_webpage, 'upload date', default=None)
+ upload_date = unified_strdate(upload_date)
+
+ video_license = self._html_search_regex(
+ r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
+ video_webpage, 'license', default=None)
+
+ m_music = re.search(
+ r'''(?x)
+ <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
+ <ul[^>]*>\s*
+ <li>(?P<title>.+?)
+ by (?P<creator>.+?)
+ (?:
+ \(.+?\)|
+ <a[^>]*
+ (?:
+ \bhref=["\']/red[^>]*>| # drop possible
+ >\s*Listen ad-free with YouTube Red # YouTube Red ad
+ )
+ .*?
+ )?</li
+ ''',
+ video_webpage)
+ if m_music:
+ video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
+ video_creator = clean_html(m_music.group('creator'))
+ else:
+ video_alt_title = video_creator = None
+
+ def extract_meta(field):
+ return self._html_search_regex(
+ r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
+ video_webpage, field, default=None)
+
+ track = extract_meta('Song')
+ artist = extract_meta('Artist')
+
+ m_episode = re.search(
+ r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
+ video_webpage)
+ if m_episode:
+ series = m_episode.group('series')
+ season_number = int(m_episode.group('season'))
+ episode_number = int(m_episode.group('episode'))
+ else:
+ series = season_number = episode_number = None
+
+ m_cat_container = self._search_regex(
+ r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
+ video_webpage, 'categories', default=None)
+ if m_cat_container:
+ category = self._html_search_regex(
+ r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
+ default=None)
+ video_categories = None if category is None else [category]
+ else:
+ video_categories = None
+
+ video_tags = [
+ unescapeHTML(m.group('content'))
+ for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+
+ def _extract_count(count_name):
+ return str_to_int(self._search_regex(
+ r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
+ % re.escape(count_name),
+ video_webpage, count_name, default=None))
+
+ like_count = _extract_count('like')
+ dislike_count = _extract_count('dislike')
+
+ # subtitles
+ video_subtitles = self.extract_subtitles(video_id, video_webpage)
+ automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
+
+ video_duration = try_get(
+ video_info, lambda x: int_or_none(x['length_seconds'][0]))
+ if not video_duration:
+ video_duration = parse_duration(self._html_search_meta(
+ 'duration', video_webpage, 'video duration'))
+
+ # annotations
+ video_annotations = None
+ if self._downloader.params.get('writeannotations', False):
+ video_annotations = self._extract_annotations(video_id)
+
+ chapters = self._extract_chapters(description_original, video_duration)
+
# Look for the DASH manifest
if self._downloader.params.get('youtube_include_dash_manifest', True):
dash_mpd_fatal = True
for df in self._extract_mpd_formats(
mpd_url, video_id, fatal=dash_mpd_fatal,
formats_dict=self._formats):
+ if not df.get('filesize'):
+ df['filesize'] = _extract_filesize(df['url'])
# Do not overwrite DASH format found in some previous DASH manifest
if df['format_id'] not in dash_formats:
dash_formats[df['format_id']] = df
'uploader_url': video_uploader_url,
'upload_date': upload_date,
'license': video_license,
- 'creator': video_creator,
+ 'creator': video_creator or artist,
'title': video_title,
- 'alt_title': video_alt_title,
+ 'alt_title': video_alt_title or track,
'thumbnail': video_thumbnail,
'description': video_description,
'categories': video_categories,
'series': series,
'season_number': season_number,
'episode_number': episode_number,
+ 'track': track,
+ 'artist': artist,
}
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from uuid import uuid4
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class ZattooBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'zattoo'
+ _HOST_URL = 'https://zattoo.com'
+
+ _power_guide_hash = None
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if not username or not password:
+ self.raise_login_required(
+ 'A valid %s account is needed to access this media.'
+ % self._NETRC_MACHINE)
+
+ try:
+ data = self._download_json(
+ '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in',
+ data=urlencode_postdata({
+ 'login': username,
+ 'password': password,
+ 'remember': 'true',
+ }), headers={
+ 'Referer': '%s/login' % self._HOST_URL,
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ raise ExtractorError(
+ 'Unable to login: incorrect username and/or password',
+ expected=True)
+ raise
+
+ self._power_guide_hash = data['session']['power_guide_hash']
+
+ def _real_initialize(self):
+ webpage = self._download_webpage(
+ self._HOST_URL, None, 'Downloading app token')
+ app_token = self._html_search_regex(
+ r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
+ webpage, 'app token', group='token')
+ app_version = self._html_search_regex(
+ r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')
+
+ # Will setup appropriate cookies
+ self._request_webpage(
+ '%s/zapi/v2/session/hello' % self._HOST_URL, None,
+ 'Opening session', data=urlencode_postdata({
+ 'client_app_token': app_token,
+ 'uuid': compat_str(uuid4()),
+ 'lang': 'en',
+ 'app_version': app_version,
+ 'format': 'json',
+ }))
+
+ self._login()
+
+ def _extract_cid(self, video_id, channel_name):
+ channel_groups = self._download_json(
+ '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,
+ self._power_guide_hash),
+ video_id, 'Downloading channel list',
+ query={'details': False})['channel_groups']
+ channel_list = []
+ for chgrp in channel_groups:
+ channel_list.extend(chgrp['channels'])
+ try:
+ return next(
+ chan['cid'] for chan in channel_list
+ if chan.get('cid') and (
+ chan.get('display_alias') == channel_name or
+ chan.get('cid') == channel_name))
+ except StopIteration:
+ raise ExtractorError('Could not extract channel id')
+
+ def _extract_cid_and_video_info(self, video_id):
+ data = self._download_json(
+ '%s/zapi/program/details' % self._HOST_URL,
+ video_id,
+ 'Downloading video information',
+ query={
+ 'program_id': video_id,
+ 'complete': True
+ })
+
+ p = data['program']
+ cid = p['cid']
+
+ info_dict = {
+ 'id': video_id,
+ 'title': p.get('title') or p['episode_title'],
+ 'description': p.get('description'),
+ 'thumbnail': p.get('image_url'),
+ 'creator': p.get('channel_name'),
+ 'episode': p.get('episode_title'),
+ 'episode_number': int_or_none(p.get('episode_number')),
+ 'season_number': int_or_none(p.get('season_number')),
+ 'release_year': int_or_none(p.get('year')),
+ 'categories': try_get(p, lambda x: x['categories'], list),
+ }
+
+ return cid, info_dict
+
+ def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
+ postdata_common = {
+ 'https_watch_urls': True,
+ }
+
+ if is_live:
+ postdata_common.update({'timeshift': 10800})
+ url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)
+ elif record_id:
+ url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
+ else:
+ url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)
+
+ formats = []
+ for stream_type in ('dash', 'hls', 'hls5', 'hds'):
+ postdata = postdata_common.copy()
+ postdata['stream_type'] = stream_type
+
+ data = self._download_json(
+ url, video_id, 'Downloading %s formats' % stream_type.upper(),
+ data=urlencode_postdata(postdata), fatal=False)
+ if not data:
+ continue
+
+ watch_urls = try_get(
+ data, lambda x: x['stream']['watch_urls'], list)
+ if not watch_urls:
+ continue
+
+ for watch in watch_urls:
+ if not isinstance(watch, dict):
+ continue
+ watch_url = watch.get('url')
+ if not watch_url or not isinstance(watch_url, compat_str):
+ continue
+ format_id_list = [stream_type]
+ maxrate = watch.get('maxrate')
+ if maxrate:
+ format_id_list.append(compat_str(maxrate))
+ audio_channel = watch.get('audio_channel')
+ if audio_channel:
+ format_id_list.append(compat_str(audio_channel))
+ preference = 1 if audio_channel == 'A' else None
+ format_id = '-'.join(format_id_list)
+ if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
+ this_formats = self._extract_mpd_formats(
+ watch_url, video_id, mpd_id=format_id, fatal=False)
+ elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
+ this_formats = self._extract_m3u8_formats(
+ watch_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ fatal=False)
+ elif stream_type == 'hds':
+ this_formats = self._extract_f4m_formats(
+ watch_url, video_id, f4m_id=format_id, fatal=False)
+ elif stream_type == 'smooth_playready':
+ this_formats = self._extract_ism_formats(
+ watch_url, video_id, ism_id=format_id, fatal=False)
+ else:
+ assert False
+ for this_format in this_formats:
+ this_format['preference'] = preference
+ formats.extend(this_formats)
+ self._sort_formats(formats)
+ return formats
+
+ def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
+ if is_live:
+ cid = self._extract_cid(video_id, channel_name)
+ info_dict = {
+ 'id': channel_name,
+ 'title': self._live_title(channel_name),
+ 'is_live': True,
+ }
+ else:
+ cid, info_dict = self._extract_cid_and_video_info(video_id)
+ formats = self._extract_formats(
+ cid, video_id, record_id=record_id, is_live=is_live)
+ info_dict['formats'] = formats
+ return info_dict
+
+
+class QuicklineBaseIE(ZattooBaseIE):
+ _NETRC_MACHINE = 'quickline'
+ _HOST_URL = 'https://mobiltv.quickline.com'
+
+
+class QuicklineIE(QuicklineBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'
+
+ _TEST = {
+ 'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ channel_name, video_id = re.match(self._VALID_URL, url).groups()
+ return self._extract_video(channel_name, video_id)
+
+
+class QuicklineLiveIE(QuicklineBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'https://mobiltv.quickline.com/watch/srf1',
+ 'only_matching': True,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ channel_name = video_id = self._match_id(url)
+ return self._extract_video(channel_name, video_id, is_live=True)
+
+
+class ZattooIE(ZattooBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
+
+ # Since regular videos are only available for 7 days and recorded videos
+ # are only available for a specific user, we cannot have detailed tests.
+ _TESTS = [{
+ 'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
+ return self._extract_video(channel_name, video_id, record_id)
+
+
+class ZattooLiveIE(ZattooBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'https://zattoo.com/watch/srf1',
+ 'only_matching': True,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ channel_name = video_id = self._match_id(url)
+ return self._extract_video(channel_name, video_id, is_live=True)
network.add_option(
'--proxy', dest='proxy',
default=None, metavar='URL',
- help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable experimental '
+ help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
'SOCKS proxy, specify a proper scheme. For example '
'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") '
'for direct connection')
'--geo-verification-proxy',
dest='geo_verification_proxy', default=None, metavar='URL',
help='Use this proxy to verify the IP address for some geo-restricted sites. '
- 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.')
+ 'The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading.')
geo.add_option(
'--cn-verification-proxy',
dest='cn_verification_proxy', default=None, metavar='URL',
geo.add_option(
'--geo-bypass',
action='store_true', dest='geo_bypass', default=True,
- help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
+ help='Bypass geographic restriction via faking X-Forwarded-For HTTP header')
geo.add_option(
'--no-geo-bypass',
action='store_false', dest='geo_bypass', default=True,
- help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
+ help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header')
geo.add_option(
'--geo-bypass-country', metavar='CODE',
dest='geo_bypass_country', default=None,
- help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)')
+ help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code')
+ geo.add_option(
+ '--geo-bypass-ip-block', metavar='IP_BLOCK',
+ dest='geo_bypass_ip_block', default=None,
+ help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation')
selection = optparse.OptionGroup(parser, 'Video Selection')
selection.add_option(
downloader.add_option(
'--xattr-set-filesize',
dest='xattr_set_filesize', action='store_true',
- help='Set file xattribute ytdl.filesize with expected file size (experimental)')
+ help='Set file xattribute ytdl.filesize with expected file size')
downloader.add_option(
'--hls-prefer-native',
dest='hls_prefer_native', action='store_true', default=None,
def determine_ext(url, default_ext='unknown_video'):
- if url is None:
+ if url is None or '.' not in url:
return default_ext
guess = url.partition('?')[0].rpartition('.')[2]
if re.match(r'^[A-Za-z0-9]+$', guess):
return v
+def merge_dicts(*dicts):
+ merged = {}
+ for a_dict in dicts:
+ for k, v in a_dict.items():
+ if v is None:
+ continue
+ if (k not in merged or
+ (isinstance(v, compat_str) and v and
+ isinstance(merged[k], compat_str) and
+ not merged[k])):
+ merged[k] = v
+ return merged
+
+
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
return int(m.group('age'))
if s in US_RATINGS:
return US_RATINGS[s]
- return TV_PARENTAL_GUIDELINES.get(s)
+ m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
+ if m:
+ return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
+ return None
def strip_jsonp(code):
]
_x = functools.partial(xpath_with_ns, ns_map={
+ 'xml': 'http://www.w3.org/XML/1998/namespace',
'ttml': 'http://www.w3.org/ns/ttml',
'tts': 'http://www.w3.org/ns/ttml#styling',
})
repeat = False
while True:
for style in dfxp.findall(_x('.//ttml:style')):
- style_id = style.get('id')
+ style_id = style.get('id') or style.get(_x('xml:id'))
+ if not style_id:
+ continue
parent_style_id = style.get('style')
if parent_style_id:
if parent_style_id not in styles:
}
@classmethod
- def random_ipv4(cls, code):
- block = cls._country_ip_map.get(code.upper())
- if not block:
- return None
+ def random_ipv4(cls, code_or_block):
+ if len(code_or_block) == 2:
+ block = cls._country_ip_map.get(code_or_block.upper())
+ if not block:
+ return None
+ else:
+ block = code_or_block
addr, preflen = block.split('/')
addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
addr_max = addr_min | (0xffffffff >> int(preflen))
from __future__ import unicode_literals
-__version__ = '2018.04.25'
+__version__ = '2018.06.18'