]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2018.06.18
authorRogério Brito <rbrito@ime.usp.br>
Tue, 19 Jun 2018 06:59:16 +0000 (03:59 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Tue, 19 Jun 2018 06:59:16 +0000 (03:59 -0300)
138 files changed:
ChangeLog
README.md
README.txt
devscripts/gh-pages/generate-download.py
devscripts/gh-pages/update-copyright.py
docs/supportedsites.md
setup.cfg
test/test_utils.py
youtube-dl
youtube-dl.1
youtube-dl.bash-completion
youtube-dl.fish
youtube-dl.zsh
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/downloader/common.py
youtube_dl/downloader/fragment.py
youtube_dl/downloader/http.py
youtube_dl/downloader/rtmp.py
youtube_dl/extractor/abc.py
youtube_dl/extractor/adn.py
youtube_dl/extractor/animeondemand.py
youtube_dl/extractor/anvato.py
youtube_dl/extractor/apa.py [new file with mode: 0644]
youtube_dl/extractor/atresplayer.py
youtube_dl/extractor/audimedia.py
youtube_dl/extractor/audiomack.py
youtube_dl/extractor/bambuser.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/bellmedia.py
youtube_dl/extractor/bilibili.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/businessinsider.py [new file with mode: 0644]
youtube_dl/extractor/cammodels.py [new file with mode: 0644]
youtube_dl/extractor/camtube.py [new file with mode: 0644]
youtube_dl/extractor/cbc.py
youtube_dl/extractor/chaturbate.py
youtube_dl/extractor/cloudflarestream.py [new file with mode: 0644]
youtube_dl/extractor/common.py
youtube_dl/extractor/crackle.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/ctvnews.py
youtube_dl/extractor/curiositystream.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/discovery.py
youtube_dl/extractor/discoverynetworks.py
youtube_dl/extractor/dplay.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/dreisat.py
youtube_dl/extractor/dtube.py [new file with mode: 0644]
youtube_dl/extractor/dvtv.py
youtube_dl/extractor/expressen.py [new file with mode: 0644]
youtube_dl/extractor/extractors.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/funimation.py
youtube_dl/extractor/funk.py
youtube_dl/extractor/gdcvault.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/globo.py
youtube_dl/extractor/go.py
youtube_dl/extractor/go90.py
youtube_dl/extractor/hidive.py
youtube_dl/extractor/hrti.py
youtube_dl/extractor/imdb.py
youtube_dl/extractor/imgur.py
youtube_dl/extractor/inc.py
youtube_dl/extractor/indavideo.py
youtube_dl/extractor/iqiyi.py
youtube_dl/extractor/itv.py
youtube_dl/extractor/izlesene.py
youtube_dl/extractor/kaltura.py
youtube_dl/extractor/leeco.py
youtube_dl/extractor/limelight.py
youtube_dl/extractor/markiza.py [new file with mode: 0644]
youtube_dl/extractor/minoto.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mlb.py
youtube_dl/extractor/moniker.py [deleted file]
youtube_dl/extractor/mychannels.py [moved from youtube_dl/extractor/makerschannel.py with 59% similarity]
youtube_dl/extractor/nbc.py
youtube_dl/extractor/nexx.py
youtube_dl/extractor/nhl.py
youtube_dl/extractor/nick.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/ninecninemedia.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/openload.py
youtube_dl/extractor/packtpub.py
youtube_dl/extractor/patreon.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/peertube.py [new file with mode: 0644]
youtube_dl/extractor/pluralsight.py
youtube_dl/extractor/rbmaradio.py
youtube_dl/extractor/rds.py
youtube_dl/extractor/reddit.py
youtube_dl/extractor/roosterteeth.py
youtube_dl/extractor/rtbf.py
youtube_dl/extractor/safari.py
youtube_dl/extractor/sina.py
youtube_dl/extractor/sixplay.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/spiegel.py
youtube_dl/extractor/spike.py
youtube_dl/extractor/tbs.py
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/tennistv.py
youtube_dl/extractor/tf1.py
youtube_dl/extractor/tubitv.py
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/tunein.py
youtube_dl/extractor/turner.py
youtube_dl/extractor/tv4.py
youtube_dl/extractor/tvnet.py [new file with mode: 0644]
youtube_dl/extractor/tvplay.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/twitter.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/ufctv.py
youtube_dl/extractor/vessel.py
youtube_dl/extractor/vidzi.py
youtube_dl/extractor/viewlift.py
youtube_dl/extractor/viki.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/wat.py
youtube_dl/extractor/watchbox.py
youtube_dl/extractor/wimp.py
youtube_dl/extractor/xiami.py
youtube_dl/extractor/yandexmusic.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zattoo.py [new file with mode: 0644]
youtube_dl/options.py
youtube_dl/utils.py
youtube_dl/version.py

index 4a3df67df4bda200f29cf4e6e551949c3af54cf2..fe50870977efbb353ef0c9f56530320de0d9ca45 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,207 @@
+version 2018.06.18
+
+Core
+* [downloader/rtmp] Fix downloading in verbose mode (#16736)
+
+Extractors
++ [markiza] Add support for markiza.sk (#16750)
+* [wat] Try all supported adaptive URLs
++ [6play] Add support for rtlplay.be and extract hd usp formats
++ [rtbf] Add support for audio and live streams (#9638, #11923)
++ [rtbf] Extract HLS, DASH and all HTTP formats
++ [rtbf] Extract subtitles
++ [rtbf] Fixup specific HTTP URLs (#16101)
++ [expressen] Add support for expressen.se
+* [vidzi] Fix extraction (#16678)
+* [pbs] Improve extraction (#16623, #16684)
+* [bilibili] Restrict cid regular expression (#16638, #16734)
+
+
+version 2018.06.14
+
+Core
+* [downloader/http] Fix retry on error when streaming to stdout (#16699)
+
+Extractors
++ [discoverynetworks] Add support for disco-api videos (#16724)
++ [dailymotion] Add support for password protected videos (#9789)
++ [abc:iview] Add support for livestreams (#12354)
+* [abc:iview] Fix extraction (#16704)
++ [crackle] Add support for sonycrackle.com (#16698)
++ [tvnet] Add support for tvnet.gov.vn (#15462)
+* [nrk] Update API hosts and try all previously known ones (#16690)
+* [wimp] Fix Youtube embeds extraction
+
+
+version 2018.06.11
+
+Extractors
+* [npo] Extend URL regular expression and add support for npostart.nl (#16682)
++ [inc] Add support for another embed schema (#16666)
+* [tv4] Fix format extraction (#16650)
++ [nexx] Add support for free cdn (#16538)
++ [pbs] Add another cove id pattern (#15373)
++ [rbmaradio] Add support for 192k format (#16631)
+
+
+version 2018.06.04
+
+Extractors
++ [camtube] Add support for camtube.co
++ [twitter:card] Extract guest token (#16609)
++ [chaturbate] Use geo verification headers
++ [bbc] Add support for bbcthree (#16612)
+* [youtube] Move metadata extraction after video availability check
++ [youtube] Extract track and artist
++ [safari] Add support for new URL schema (#16614)
+* [adn] Fix extraction
+
+
+version 2018.06.02
+
+Core
+* [utils] Improve determine_ext
+
+Extractors
++ [facebook] Add support for tahoe player videos (#15441, #16554)
+* [cbc] Improve extraction (#16583, #16593)
+* [openload] Improve ext extraction (#16595)
++ [twitter:card] Add support for another endpoint (#16586)
++ [openload] Add support for oload.win and oload.download (#16592)
+* [audimedia] Fix extraction (#15309)
++ [francetv] Add support for sport.francetvinfo.fr (#15645)
+* [mlb] Improve extraction (#16587)
+- [nhl] Remove old extractors
+* [rbmaradio] Check formats availability (#16585)
+
+
+version 2018.05.30
+
+Core
+* [downloader/rtmp] Generalize download messages and report time elapsed
+  on finish
+* [downloader/rtmp] Gracefully handle live streams interrupted by user
+
+Extractors
+* [teamcoco] Fix extraction for full episodes (#16573)
+* [spiegel] Fix info extraction (#16538)
++ [apa] Add support for apa.at (#15041, #15672)
++ [bellmedia] Add support for bnnbloomberg.ca (#16560)
++ [9c9media] Extract MPD formats and subtitles
+* [cammodels] Use geo verification headers
++ [ufctv] Add support for authentication (#16542)
++ [cammodels] Add support for cammodels.com (#14499)
+* [utils] Fix style id extraction for namespaced id attribute in dfxp2srt
+  (#16551)
+* [soundcloud] Detect format extension (#16549)
+* [cbc] Fix playlist title extraction (#16502)
++ [tumblr] Detect and report sensitive media (#13829)
++ [tumblr] Add support for authentication (#15133)
+
+
+version 2018.05.26
+
+Core
+* [utils] Improve parse_age_limit
+
+Extractors
+* [audiomack] Stringify video id (#15310)
+* [izlesene] Fix extraction (#16233, #16271, #16407)
++ [indavideo] Add support for generic embeds (#11989)
+* [indavideo] Fix extraction (#11221)
+* [indavideo] Sign download URLs (#16174)
++ [peertube] Add support for PeerTube based sites (#16301, #16329)
+* [imgur] Fix extraction (#16537)
++ [hidive] Add support for authentication (#16534)
++ [nbc] Add support for stream.nbcsports.com (#13911)
++ [viewlift] Add support for hoichoi.tv (#16536)
+* [go90] Extract age limit and detect DRM protection(#10127)
+* [viewlift] fix extraction for snagfilms.com (#15766)
+* [globo] Improve extraction (#4189)
+    * Add support for authentication
+    * Simplify URL signing
+    * Extract DASH and MSS formats
+* [leeco] Fix extraction (#16464)
+* [teamcoco] Add fallback for format extraction (#16484)
+* [teamcoco] Improve URL regular expression (#16484)
+* [imdb] Improve extraction (#4085, #14557)
+
+
+version 2018.05.18
+
+Extractors
+* [vimeo:likes] Relax URL regular expression and fix single page likes
+  extraction (#16475)
+* [pluralsight] Fix clip id extraction (#16460)
++ [mychannels] Add support for mychannels.com (#15334)
+- [moniker] Remove extractor (#15336)
+* [pbs] Fix embed data extraction (#16474)
++ [mtv] Add support for paramountnetwork.com and bellator.com (#15418)
+* [youtube] Fix hd720 format position
+* [dailymotion] Remove fragment part from m3u8 URLs (#8915)
+* [3sat] Improve extraction (#15350)
+    * Extract all formats
+    * Extract more format metadata
+    * Improve format sorting
+    * Use hls native downloader
+    * Detect and bypass geo-restriction
++ [dtube] Add support for d.tube (#15201)
+* [options] Fix typo (#16450)
+* [youtube] Improve format filesize extraction (#16453)
+* [youtube] Make uploader extraction non fatal (#16444)
+* [youtube] Fix extraction for embed restricted live streams (#16433)
+* [nbc] Improve info extraction (#16440)
+* [twitch:clips] Fix extraction (#16429)
+* [redditr] Relax URL regular expression (#16426, #16427)
+* [mixcloud] Bypass throttling for HTTP formats (#12579, #16424)
++ [nick] Add support for nickjr.de (#13230)
+* [teamcoco] Fix extraction (#16374)
+
+
+version 2018.05.09
+
+Core
+* [YoutubeDL] Ensure ext exists for automatic captions
+* Introduce --geo-bypass-ip-block
+
+Extractors
++ [udemy] Extract asset captions
++ [udemy] Extract stream URLs (#16372)
++ [businessinsider] Add support for businessinsider.com (#16387, #16388, #16389)
++ [cloudflarestream] Add support for cloudflarestream.com (#16375)
+* [watchbox] Fix extraction (#16356)
+* [discovery] Extract Affiliate/Anonymous Auth Token from cookies (#14954)
++ [itv:btcc] Add support for itv.com/btcc (#16139)
+* [tunein] Use live title for live streams (#16347)
+* [itv] Improve extraction (#16253)
+
+
+version 2018.05.01
+
+Core
+* [downloader/fragment] Restart download if .ytdl file is corrupt (#16312)
++ [extractor/common] Extract interaction statistic
++ [utils] Add merge_dicts
++ [extractor/common] Add _download_json_handle
+
+Extractors
+* [kaltura] Improve iframe embeds detection (#16337)
++ [udemy] Extract outputs renditions (#16289, #16291, #16320, #16321, #16334,
+  #16335)
++ [zattoo] Add support for zattoo.com and mobiltv.quickline.com (#14668, #14676)
+* [yandexmusic] Convert release_year to int
+* [udemy] Override _download_webpage_handle instead of _download_webpage
+* [xiami] Override _download_webpage_handle instead of _download_webpage
+* [yandexmusic] Override _download_webpage_handle instead of _download_webpage
+* [youtube] Correctly disable polymer on all requests (#16323, #16326)
+* [generic] Prefer enclosures over links in RSS feeds (#16189)
++ [redditr] Add support for old.reddit.com URLs (#16274)
+* [nrktv] Update API host (#16324)
++ [imdb] Extract all formats (#16249)
++ [vimeo] Extract JSON-LD (#16295)
+* [funk:channel] Improve extraction (#16285)
+
+
 version 2018.04.25
 
 Core
index 5af0f387be8e34800e97acc46033d0702f90284e..499a0c2067ca39a201a8379a7ce1c2bf7cdb755e 100644 (file)
--- a/README.md
+++ b/README.md
@@ -93,8 +93,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
 
 ## Network Options:
     --proxy URL                      Use the specified HTTP/HTTPS/SOCKS proxy.
-                                     To enable experimental SOCKS proxy, specify
-                                     a proper scheme. For example
+                                     To enable SOCKS proxy, specify a proper
+                                     scheme. For example
                                      socks5://127.0.0.1:1080/. Pass in an empty
                                      string (--proxy "") for direct connection
     --socket-timeout SECONDS         Time to wait before giving up, in seconds
@@ -106,16 +106,18 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
     --geo-verification-proxy URL     Use this proxy to verify the IP address for
                                      some geo-restricted sites. The default
                                      proxy specified by --proxy (or none, if the
-                                     options is not present) is used for the
+                                     option is not present) is used for the
                                      actual downloading.
     --geo-bypass                     Bypass geographic restriction via faking
-                                     X-Forwarded-For HTTP header (experimental)
+                                     X-Forwarded-For HTTP header
     --no-geo-bypass                  Do not bypass geographic restriction via
                                      faking X-Forwarded-For HTTP header
-                                     (experimental)
     --geo-bypass-country CODE        Force bypass geographic restriction with
                                      explicitly provided two-letter ISO 3166-2
-                                     country code (experimental)
+                                     country code
+    --geo-bypass-ip-block IP_BLOCK   Force bypass geographic restriction with
+                                     explicitly provided IP block in CIDR
+                                     notation
 
 ## Video Selection:
     --playlist-start NUMBER          Playlist video to start at (default is 1)
@@ -206,7 +208,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
     --playlist-reverse               Download playlist videos in reverse order
     --playlist-random                Download playlist videos in random order
     --xattr-set-filesize             Set file xattribute ytdl.filesize with
-                                     expected file size (experimental)
+                                     expected file size
     --hls-prefer-native              Use the native HLS downloader instead of
                                      ffmpeg
     --hls-prefer-ffmpeg              Use ffmpeg instead of the native HLS
index 55ff41210d91f0e75d9f91dc36775048c561297d..fec09ab4af79fa02e7dc6f9a725644e705f9b384 100644 (file)
@@ -116,8 +116,8 @@ OPTIONS
 Network Options:
 
     --proxy URL                      Use the specified HTTP/HTTPS/SOCKS proxy.
-                                     To enable experimental SOCKS proxy, specify
-                                     a proper scheme. For example
+                                     To enable SOCKS proxy, specify a proper
+                                     scheme. For example
                                      socks5://127.0.0.1:1080/. Pass in an empty
                                      string (--proxy "") for direct connection
     --socket-timeout SECONDS         Time to wait before giving up, in seconds
@@ -131,16 +131,18 @@ Geo Restriction:
     --geo-verification-proxy URL     Use this proxy to verify the IP address for
                                      some geo-restricted sites. The default
                                      proxy specified by --proxy (or none, if the
-                                     options is not present) is used for the
+                                     option is not present) is used for the
                                      actual downloading.
     --geo-bypass                     Bypass geographic restriction via faking
-                                     X-Forwarded-For HTTP header (experimental)
+                                     X-Forwarded-For HTTP header
     --no-geo-bypass                  Do not bypass geographic restriction via
                                      faking X-Forwarded-For HTTP header
-                                     (experimental)
     --geo-bypass-country CODE        Force bypass geographic restriction with
                                      explicitly provided two-letter ISO 3166-2
-                                     country code (experimental)
+                                     country code
+    --geo-bypass-ip-block IP_BLOCK   Force bypass geographic restriction with
+                                     explicitly provided IP block in CIDR
+                                     notation
 
 
 Video Selection:
@@ -235,7 +237,7 @@ Download Options:
     --playlist-reverse               Download playlist videos in reverse order
     --playlist-random                Download playlist videos in random order
     --xattr-set-filesize             Set file xattribute ytdl.filesize with
-                                     expected file size (experimental)
+                                     expected file size
     --hls-prefer-native              Use the native HLS downloader instead of
                                      ffmpeg
     --hls-prefer-ffmpeg              Use ffmpeg instead of the native HLS
index fcd7e1dff663f1607c2842081b40dc1890cf677a..a873d32ee437d1620f52a9e8fb06bc6f25359a75 100755 (executable)
@@ -1,27 +1,22 @@
 #!/usr/bin/env python3
 from __future__ import unicode_literals
 
-import hashlib
-import urllib.request
 import json
 
 versions_info = json.load(open('update/versions.json'))
 version = versions_info['latest']
-URL = versions_info['versions'][version]['bin'][0]
-
-data = urllib.request.urlopen(URL).read()
+version_dict = versions_info['versions'][version]
 
 # Read template page
 with open('download.html.in', 'r', encoding='utf-8') as tmplf:
     template = tmplf.read()
 
-sha256sum = hashlib.sha256(data).hexdigest()
 template = template.replace('@PROGRAM_VERSION@', version)
-template = template.replace('@PROGRAM_URL@', URL)
-template = template.replace('@PROGRAM_SHA256SUM@', sha256sum)
-template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0])
-template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1])
-template = template.replace('@TAR_URL@', versions_info['versions'][version]['tar'][0])
-template = template.replace('@TAR_SHA256SUM@', versions_info['versions'][version]['tar'][1])
+template = template.replace('@PROGRAM_URL@', version_dict['bin'][0])
+template = template.replace('@PROGRAM_SHA256SUM@', version_dict['bin'][1])
+template = template.replace('@EXE_URL@', version_dict['exe'][0])
+template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1])
+template = template.replace('@TAR_URL@', version_dict['tar'][0])
+template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1])
 with open('download.html', 'w', encoding='utf-8') as dlf:
     dlf.write(template)
index e6c3abc8d8c716db6adbb62598a9d9179fcaa2da..61487f9258862aeb4880fd6c8df0a91a76804d8a 100755 (executable)
@@ -13,7 +13,7 @@ year = str(datetime.datetime.now().year)
 for fn in glob.glob('*.html*'):
     with io.open(fn, encoding='utf-8') as f:
         content = f.read()
-    newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content)
+    newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
     if content != newc:
         tmpFn = fn + '.part'
         with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
index a110f687b1ad0c84914d4c403073bea6458c06cc..432a7ba934125014ed4c73ca3b26eec13619bcce 100644 (file)
@@ -15,7 +15,6 @@
  - **8tracks**
  - **91porn**
  - **9c9media**
- - **9c9media:stack**
  - **9gag**
  - **9now.com.au**
  - **abc.net.au**
@@ -48,6 +47,7 @@
  - **anitube.se**
  - **Anvato**
  - **AnySex**
+ - **APA**
  - **Aparat**
  - **AppleConnect**
  - **AppleDaily**: 臺灣蘋果日報
  - **Beatport**
  - **Beeg**
  - **BehindKink**
+ - **Bellator**
  - **BellMedia**
  - **Bet**
  - **Bigflix**
  - **BRMediathek**: Bayerischer Rundfunk Mediathek
  - **bt:article**: Bergens Tidende Articles
  - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
+ - **BusinessInsider**
  - **BuzzFeed**
  - **BYUtv**
  - **Camdemy**
  - **CamdemyFolder**
+ - **CamModels**
+ - **CamTube**
  - **CamWithHer**
  - **canalc2.tv**
  - **Canalplus**: mycanal.fr and piwiplus.fr
  - **ClipRs**
  - **Clipsyndicate**
  - **CloserToTruth**
+ - **CloudflareStream**
  - **cloudtime**: CloudTime
  - **Cloudy**
  - **Clubic**
  - **DrTuber**
  - **drtv**
  - **drtv:live**
+ - **DTube**
  - **Dumpert**
  - **dvtv**: http://video.aktualne.cz/
  - **dw**
  - **Europa**
  - **EveryonesMixtape**
  - **ExpoTV**
+ - **Expressen**
  - **ExtremeTube**
  - **EyedoTV**
  - **facebook**
  - **ImgurAlbum**
  - **Ina**
  - **Inc**
- - **Indavideo**
  - **IndavideoEmbed**
  - **InfoQ**
  - **Instagram**
  - **Ir90Tv**
  - **ITTF**
  - **ITV**
+ - **ITVBTCC**
  - **ivi**: ivi.ru
  - **ivi:compilation**: ivi.ru compilations
  - **ivideon**: Ivideon TV
  - **mailru**: Видео@Mail.Ru
  - **mailru:music**: Музыка@Mail.Ru
  - **mailru:music:search**: Музыка@Mail.Ru
- - **MakersChannel**
  - **MakerTV**
  - **mangomolo:live**
  - **mangomolo:video**
  - **ManyVids**
+ - **Markiza**
+ - **MarkizaPage**
  - **massengeschmack.tv**
  - **MatchTV**
  - **MDR**: MDR.DE and KiKA
  - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
  - **Mofosex**
  - **Mojvideo**
- - **Moniker**: allmyvideos.net and vidspot.net
  - **Morningstar**: morningstar.com
  - **Motherless**
  - **MotherlessGroup**
  - **mva:course**: Microsoft Virtual Academy courses
  - **Mwave**
  - **MwaveMeetGreet**
+ - **MyChannels**
  - **MySpace**
  - **MySpace:album**
  - **MySpass**
  - **nbcolympics**
  - **nbcolympics:stream**
  - **NBCSports**
+ - **NBCSportsStream**
  - **NBCSportsVPlayer**
  - **ndr**: NDR.de - Norddeutscher Rundfunk
  - **ndr:embed**
  - **nfl.com**
  - **NhkVod**
  - **nhl.com**
- - **nhl.com:news**: NHL news
- - **nhl.com:videocenter**
- - **nhl.com:videocenter:category**: NHL videocenter category
  - **nick.com**
  - **nick.de**
  - **nickelodeon:br**
  - **PacktPubCourse**
  - **PandaTV**: 熊猫TV
  - **pandora.tv**: 판도라TV
+ - **ParamountNetwork**
  - **parliamentlive.tv**: UK parliament videos
  - **Patreon**
  - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET  (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
  - **pcmag**
  - **PearVideo**
+ - **PeerTube**
  - **People**
  - **PerformGroup**
  - **periscope**: Periscope
  - **qqmusic:playlist**: QQ音乐 - 歌单
  - **qqmusic:singer**: QQ音乐 - 歌手
  - **qqmusic:toplist**: QQ音乐 - 排行榜
+ - **Quickline**
+ - **QuicklineLive**
  - **R7**
  - **R7Article**
  - **radio.de**
  - **Spiegel**
  - **Spiegel:Article**: Articles on spiegel.de
  - **Spiegeltv**
- - **Spike**
+ - **sport.francetvinfo.fr**
  - **Sport5**
  - **SportBoxEmbed**
  - **SportDeutschland**
  - **tvigle**: Интернет-телевидение Tvigle.ru
  - **tvland.com**
  - **TVN24**
+ - **TVNet**
  - **TVNoe**
  - **TVNow**
  - **TVNowList**
  - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
  - **Zapiks**
  - **Zaq1**
+ - **Zattoo**
+ - **ZattooLive**
  - **ZDF**
  - **ZDFChannel**
  - **zingmp3**: mp3.zing.vn
index 5208f7ae234fa15b52a1aac67b7d909c9d53de11..af9a554c605b1593945dccaa8f2a4f5e10968717 100644 (file)
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,5 +2,5 @@
 universal = True
 
 [flake8]
-exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
+exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
 ignore = E402,E501,E731,E741
index 253a7fe176c69491df316af3925b565713e5e5c2..e63af01668ceb84cc2d8452490bc94eb48791f40 100644 (file)
@@ -42,6 +42,7 @@ from youtube_dl.utils import (
     is_html,
     js_to_json,
     limit_length,
+    merge_dicts,
     mimetype2ext,
     month_by_name,
     multipart_encode,
@@ -360,6 +361,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
         self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
         self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
+        self.assertEqual(determine_ext('foobar', None), None)
 
     def test_find_xpath_attr(self):
         testxml = '''<root>
@@ -518,6 +520,8 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_age_limit('PG-13'), 13)
         self.assertEqual(parse_age_limit('TV-14'), 14)
         self.assertEqual(parse_age_limit('TV-MA'), 17)
+        self.assertEqual(parse_age_limit('TV14'), 14)
+        self.assertEqual(parse_age_limit('TV_G'), 0)
 
     def test_parse_duration(self):
         self.assertEqual(parse_duration(None), None)
@@ -669,6 +673,17 @@ class TestUtil(unittest.TestCase):
             self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
             self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
 
+    def test_merge_dicts(self):
+        self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
+        self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
+        self.assertEqual(merge_dicts({'a': 1}, {'a': None}), {'a': 1})
+        self.assertEqual(merge_dicts({'a': 1}, {'a': ''}), {'a': 1})
+        self.assertEqual(merge_dicts({'a': 1}, {}), {'a': 1})
+        self.assertEqual(merge_dicts({'a': None}, {'a': 1}), {'a': 1})
+        self.assertEqual(merge_dicts({'a': ''}, {'a': 1}), {'a': ''})
+        self.assertEqual(merge_dicts({'a': ''}, {'a': 'abc'}), {'a': 'abc'})
+        self.assertEqual(merge_dicts({'a': None}, {'a': ''}, {'a': 'abc'}), {'a': 'abc'})
+
     def test_encode_compat_str(self):
         self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
         self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
index 7875c3a7f8bc1194834c7e7c341fc5f1204716e0..32154d701f5e82e28c2f8d711565536ce7c4da07 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 976505407a134122a37a351da616e66708bcecaf..89a3fb67efc6c544642483d21c240104df290591 100644 (file)
@@ -116,7 +116,7 @@ Do not emit color codes in output
 .TP
 .B \-\-proxy \f[I]URL\f[]
 Use the specified HTTP/HTTPS/SOCKS proxy.
-To enable experimental SOCKS proxy, specify a proper scheme.
+To enable SOCKS proxy, specify a proper scheme.
 For example socks5://127.0.0.1:1080/.
 Pass in an empty string (\-\-proxy "") for direct connection
 .RS
@@ -145,26 +145,31 @@ Make all connections via IPv6
 .TP
 .B \-\-geo\-verification\-proxy \f[I]URL\f[]
 Use this proxy to verify the IP address for some geo\-restricted sites.
-The default proxy specified by \-\-proxy (or none, if the options is not
+The default proxy specified by \-\-proxy (or none, if the option is not
 present) is used for the actual downloading.
 .RS
 .RE
 .TP
 .B \-\-geo\-bypass
 Bypass geographic restriction via faking X\-Forwarded\-For HTTP header
-(experimental)
 .RS
 .RE
 .TP
 .B \-\-no\-geo\-bypass
 Do not bypass geographic restriction via faking X\-Forwarded\-For HTTP
-header (experimental)
+header
 .RS
 .RE
 .TP
 .B \-\-geo\-bypass\-country \f[I]CODE\f[]
 Force bypass geographic restriction with explicitly provided two\-letter
-ISO 3166\-2 country code (experimental)
+ISO 3166\-2 country code
+.RS
+.RE
+.TP
+.B \-\-geo\-bypass\-ip\-block \f[I]IP_BLOCK\f[]
+Force bypass geographic restriction with explicitly provided IP block in
+CIDR notation
 .RS
 .RE
 .SS Video Selection:
@@ -353,7 +358,7 @@ Download playlist videos in random order
 .RE
 .TP
 .B \-\-xattr\-set\-filesize
-Set file xattribute ytdl.filesize with expected file size (experimental)
+Set file xattribute ytdl.filesize with expected file size
 .RS
 .RE
 .TP
index c1b86f2bf790754cae415486f8dd51bea9906712..1fc0982a0bc57c58a580d5bcd793ddebbb83368b 100644 (file)
@@ -4,7 +4,7 @@ __youtube_dl()
     COMPREPLY=()
     cur="${COMP_WORDS[COMP_CWORD]}"
     prev="${COMP_WORDS[COMP_CWORD-1]}"
-    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs"
+    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --geo-bypass-ip-block --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs"
     keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
     fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
     diropts="--cache-dir"
index ec98a2e5eb1658b79549ac57e027dfe012500311..05ece67d88008030740752c0e57c089708e3979e 100644 (file)
@@ -15,16 +15,17 @@ complete --command youtube-dl --long-option flat-playlist --description 'Do not
 complete --command youtube-dl --long-option mark-watched --description 'Mark videos watched (YouTube only)'
 complete --command youtube-dl --long-option no-mark-watched --description 'Do not mark videos watched (YouTube only)'
 complete --command youtube-dl --long-option no-color --description 'Do not emit color codes in output'
-complete --command youtube-dl --long-option proxy --description 'Use the specified HTTP/HTTPS/SOCKS proxy. To enable experimental SOCKS proxy, specify a proper scheme. For example socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") for direct connection'
+complete --command youtube-dl --long-option proxy --description 'Use the specified HTTP/HTTPS/SOCKS proxy. To enable SOCKS proxy, specify a proper scheme. For example socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") for direct connection'
 complete --command youtube-dl --long-option socket-timeout --description 'Time to wait before giving up, in seconds'
 complete --command youtube-dl --long-option source-address --description 'Client-side IP address to bind to'
 complete --command youtube-dl --long-option force-ipv4 --short-option 4 --description 'Make all connections via IPv4'
 complete --command youtube-dl --long-option force-ipv6 --short-option 6 --description 'Make all connections via IPv6'
-complete --command youtube-dl --long-option geo-verification-proxy --description 'Use this proxy to verify the IP address for some geo-restricted sites. The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.'
+complete --command youtube-dl --long-option geo-verification-proxy --description 'Use this proxy to verify the IP address for some geo-restricted sites. The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading.'
 complete --command youtube-dl --long-option cn-verification-proxy
-complete --command youtube-dl --long-option geo-bypass --description 'Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)'
-complete --command youtube-dl --long-option no-geo-bypass --description 'Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)'
-complete --command youtube-dl --long-option geo-bypass-country --description 'Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)'
+complete --command youtube-dl --long-option geo-bypass --description 'Bypass geographic restriction via faking X-Forwarded-For HTTP header'
+complete --command youtube-dl --long-option no-geo-bypass --description 'Do not bypass geographic restriction via faking X-Forwarded-For HTTP header'
+complete --command youtube-dl --long-option geo-bypass-country --description 'Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code'
+complete --command youtube-dl --long-option geo-bypass-ip-block --description 'Force bypass geographic restriction with explicitly provided IP block in CIDR notation'
 complete --command youtube-dl --long-option playlist-start --description 'Playlist video to start at (default is %default)'
 complete --command youtube-dl --long-option playlist-end --description 'Playlist video to end at (default is last)'
 complete --command youtube-dl --long-option playlist-items --description 'Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.'
@@ -56,7 +57,7 @@ complete --command youtube-dl --long-option http-chunk-size --description 'Size
 complete --command youtube-dl --long-option test
 complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
 complete --command youtube-dl --long-option playlist-random --description 'Download playlist videos in random order'
-complete --command youtube-dl --long-option xattr-set-filesize --description 'Set file xattribute ytdl.filesize with expected file size (experimental)'
+complete --command youtube-dl --long-option xattr-set-filesize --description 'Set file xattribute ytdl.filesize with expected file size'
 complete --command youtube-dl --long-option hls-prefer-native --description 'Use the native HLS downloader instead of ffmpeg'
 complete --command youtube-dl --long-option hls-prefer-ffmpeg --description 'Use ffmpeg instead of the native HLS downloader'
 complete --command youtube-dl --long-option hls-use-mpegts --description 'Use the mpegts container for HLS videos, allowing to play the video while downloading (some players may not be able to play it)'
index 1f573a57973f290089b7950f5a7f6c11d114162e..9b1e1c0c8470447810922c239b3618182790f9cf 100644 (file)
@@ -19,7 +19,7 @@ __youtube_dl() {
             elif [[ ${prev} == "--recode-video" ]]; then
                 _arguments '*: :(mp4 flv ogg webm mkv)'
             else
-                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs)'
+                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --geo-bypass-ip-block --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --buffer-size --no-resize-buffer --http-chunk-size --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs)'
             fi
         ;;
     esac
index ad359880526116d71d13fa2a736ab02ca79e19c0..2a405c5cac386d67b11dc0489dccf4ee68951c53 100755 (executable)
@@ -211,7 +211,7 @@ class YoutubeDL(object):
                        At the moment, this is only supported by YouTube.
     proxy:             URL of the proxy server to use
     geo_verification_proxy:  URL of the proxy to use for IP address verification
-                       on geo-restricted sites. (Experimental)
+                       on geo-restricted sites.
     socket_timeout:    Time to wait for unresponsive hosts, in seconds
     bidi_workaround:   Work around buggy terminals without bidirectional text
                        support, using fridibi
@@ -259,7 +259,7 @@ class YoutubeDL(object):
                        - "warn": only emit a warning
                        - "detect_or_warn": check whether we can do anything
                                            about it, warn otherwise (default)
-    source_address:    (Experimental) Client-side IP address to bind to.
+    source_address:    Client-side IP address to bind to.
     call_home:         Boolean, true iff we are allowed to contact the
                        youtube-dl servers for debugging.
     sleep_interval:    Number of seconds to sleep before each download when
@@ -281,11 +281,14 @@ class YoutubeDL(object):
                        match_filter_func in utils.py is one example for this.
     no_color:          Do not emit color codes in output.
     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
-                       HTTP header (experimental)
+                       HTTP header
     geo_bypass_country:
                        Two-letter ISO 3166-2 country code that will be used for
                        explicit geographic restriction bypassing via faking
-                       X-Forwarded-For HTTP header (experimental)
+                       X-Forwarded-For HTTP header
+    geo_bypass_ip_block:
+                       IP range in CIDR notation that will be used similarly to
+                       geo_bypass_country
 
     The following options determine which downloader is picked:
     external_downloader: Executable of the external downloader to call.
@@ -1479,23 +1482,28 @@ class YoutubeDL(object):
             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
 
+        for cc_kind in ('subtitles', 'automatic_captions'):
+            cc = info_dict.get(cc_kind)
+            if cc:
+                for _, subtitle in cc.items():
+                    for subtitle_format in subtitle:
+                        if subtitle_format.get('url'):
+                            subtitle_format['url'] = sanitize_url(subtitle_format['url'])
+                        if subtitle_format.get('ext') is None:
+                            subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
+
+        automatic_captions = info_dict.get('automatic_captions')
         subtitles = info_dict.get('subtitles')
-        if subtitles:
-            for _, subtitle in subtitles.items():
-                for subtitle_format in subtitle:
-                    if subtitle_format.get('url'):
-                        subtitle_format['url'] = sanitize_url(subtitle_format['url'])
-                    if subtitle_format.get('ext') is None:
-                        subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
 
         if self.params.get('listsubtitles', False):
             if 'automatic_captions' in info_dict:
-                self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+                self.list_subtitles(
+                    info_dict['id'], automatic_captions, 'automatic captions')
             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
             return
+
         info_dict['requested_subtitles'] = self.process_subtitles(
-            info_dict['id'], subtitles,
-            info_dict.get('automatic_captions'))
+            info_dict['id'], subtitles, automatic_captions)
 
         # We now pick which formats have to be downloaded
         if info_dict.get('formats') is None:
index 9bb952457e149f3687efd0ca925aed8d69996366..ba435ea428c57c933a5ea113dd58f38bbf7e2cd4 100644 (file)
@@ -430,6 +430,7 @@ def _real_main(argv=None):
         'config_location': opts.config_location,
         'geo_bypass': opts.geo_bypass,
         'geo_bypass_country': opts.geo_bypass_country,
+        'geo_bypass_ip_block': opts.geo_bypass_ip_block,
         # just for deprecation check
         'autonumber': opts.autonumber if opts.autonumber is True else None,
         'usetitle': opts.usetitle if opts.usetitle is True else None,
index edd125ee2c6539cf04cbba4ba282faa15b913496..5979833c08ab877973893ed9fa4d24cd12c3193b 100644 (file)
@@ -45,7 +45,6 @@ class FileDownloader(object):
     min_filesize:       Skip files smaller than this size
     max_filesize:       Skip files larger than this size
     xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
-                        (experimental)
     external_downloader_args:  A list of additional command-line arguments for the
                         external downloader.
     hls_use_mpegts:     Use the mpegts container for HLS videos.
index 927c7e491655f950bb1a1c316fcd7911b4b3f2fe..917f6dc019a475139b2f8360a13b617e2c29a7ce 100644 (file)
@@ -74,9 +74,14 @@ class FragmentFD(FileDownloader):
         return not ctx['live'] and not ctx['tmpfilename'] == '-'
 
     def _read_ytdl_file(self, ctx):
+        assert 'ytdl_corrupt' not in ctx
         stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
-        ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
-        stream.close()
+        try:
+            ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+        except Exception:
+            ctx['ytdl_corrupt'] = True
+        finally:
+            stream.close()
 
     def _write_ytdl_file(self, ctx):
         frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
@@ -158,11 +163,17 @@ class FragmentFD(FileDownloader):
         if self.__do_ytdl_file(ctx):
             if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
                 self._read_ytdl_file(ctx)
-                if ctx['fragment_index'] > 0 and resume_len == 0:
+                is_corrupt = ctx.get('ytdl_corrupt') is True
+                is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
+                if is_corrupt or is_inconsistent:
+                    message = (
+                        '.ytdl file is corrupt' if is_corrupt else
+                        'Inconsistent state of incomplete fragment download')
                     self.report_warning(
-                        'Inconsistent state of incomplete fragment download. '
-                        'Restarting from the beginning...')
+                        '%s. Restarting from the beginning...' % message)
                     ctx['fragment_index'] = resume_len = 0
+                    if 'ytdl_corrupt' in ctx:
+                        del ctx['ytdl_corrupt']
                     self._write_ytdl_file(ctx)
             else:
                 self._write_ytdl_file(ctx)
index a22875f6988eef76837c0b6ac62795d50b1e3d9a..5b1e960136f6166563692a13923684ca47bc125d 100644 (file)
@@ -217,10 +217,11 @@ class HttpFD(FileDownloader):
             before = start  # start measuring
 
             def retry(e):
-                if ctx.tmpfilename != '-':
+                to_stdout = ctx.tmpfilename == '-'
+                if not to_stdout:
                     ctx.stream.close()
                 ctx.stream = None
-                ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
+                ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
                 raise RetryDownload(e)
 
             while True:
index b823b5171887f2b61c11f81c6ec935e575251f2e..fbb7f51b018fabde5d140a033b953a35f9ca711e 100644 (file)
@@ -29,66 +29,68 @@ class RtmpFD(FileDownloader):
             proc = subprocess.Popen(args, stderr=subprocess.PIPE)
             cursor_in_new_line = True
             proc_stderr_closed = False
-            while not proc_stderr_closed:
-                # read line from stderr
-                line = ''
-                while True:
-                    char = proc.stderr.read(1)
-                    if not char:
-                        proc_stderr_closed = True
-                        break
-                    if char in [b'\r', b'\n']:
-                        break
-                    line += char.decode('ascii', 'replace')
-                if not line:
-                    # proc_stderr_closed is True
-                    continue
-                mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
-                if mobj:
-                    downloaded_data_len = int(float(mobj.group(1)) * 1024)
-                    percent = float(mobj.group(2))
-                    if not resume_percent:
-                        resume_percent = percent
-                        resume_downloaded_data_len = downloaded_data_len
-                    time_now = time.time()
-                    eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
-                    speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
-                    data_len = None
-                    if percent > 0:
-                        data_len = int(downloaded_data_len * 100 / percent)
-                    self._hook_progress({
-                        'status': 'downloading',
-                        'downloaded_bytes': downloaded_data_len,
-                        'total_bytes_estimate': data_len,
-                        'tmpfilename': tmpfilename,
-                        'filename': filename,
-                        'eta': eta,
-                        'elapsed': time_now - start,
-                        'speed': speed,
-                    })
-                    cursor_in_new_line = False
-                else:
-                    # no percent for live streams
-                    mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
+            try:
+                while not proc_stderr_closed:
+                    # read line from stderr
+                    line = ''
+                    while True:
+                        char = proc.stderr.read(1)
+                        if not char:
+                            proc_stderr_closed = True
+                            break
+                        if char in [b'\r', b'\n']:
+                            break
+                        line += char.decode('ascii', 'replace')
+                    if not line:
+                        # proc_stderr_closed is True
+                        continue
+                    mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
                     if mobj:
                         downloaded_data_len = int(float(mobj.group(1)) * 1024)
+                        percent = float(mobj.group(2))
+                        if not resume_percent:
+                            resume_percent = percent
+                            resume_downloaded_data_len = downloaded_data_len
                         time_now = time.time()
-                        speed = self.calc_speed(start, time_now, downloaded_data_len)
+                        eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
+                        speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
+                        data_len = None
+                        if percent > 0:
+                            data_len = int(downloaded_data_len * 100 / percent)
                         self._hook_progress({
+                            'status': 'downloading',
                             'downloaded_bytes': downloaded_data_len,
+                            'total_bytes_estimate': data_len,
                             'tmpfilename': tmpfilename,
                             'filename': filename,
-                            'status': 'downloading',
+                            'eta': eta,
                             'elapsed': time_now - start,
                             'speed': speed,
                         })
                         cursor_in_new_line = False
-                    elif self.params.get('verbose', False):
-                        if not cursor_in_new_line:
-                            self.to_screen('')
-                        cursor_in_new_line = True
-                        self.to_screen('[rtmpdump] ' + line)
-            proc.wait()
+                    else:
+                        # no percent for live streams
+                        mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
+                        if mobj:
+                            downloaded_data_len = int(float(mobj.group(1)) * 1024)
+                            time_now = time.time()
+                            speed = self.calc_speed(start, time_now, downloaded_data_len)
+                            self._hook_progress({
+                                'downloaded_bytes': downloaded_data_len,
+                                'tmpfilename': tmpfilename,
+                                'filename': filename,
+                                'status': 'downloading',
+                                'elapsed': time_now - start,
+                                'speed': speed,
+                            })
+                            cursor_in_new_line = False
+                        elif self.params.get('verbose', False):
+                            if not cursor_in_new_line:
+                                self.to_screen('')
+                            cursor_in_new_line = True
+                            self.to_screen('[rtmpdump] ' + line)
+            finally:
+                proc.wait()
             if not cursor_in_new_line:
                 self.to_screen('')
             return proc.returncode
@@ -163,7 +165,15 @@ class RtmpFD(FileDownloader):
         RD_INCOMPLETE = 2
         RD_NO_CONNECT = 3
 
-        retval = run_rtmpdump(args)
+        started = time.time()
+
+        try:
+            retval = run_rtmpdump(args)
+        except KeyboardInterrupt:
+            if not info_dict.get('is_live'):
+                raise
+            retval = RD_SUCCESS
+            self.to_screen('\n[rtmpdump] Interrupted by user')
 
         if retval == RD_NO_CONNECT:
             self.report_error('[rtmpdump] Could not connect to RTMP server.')
@@ -171,7 +181,7 @@ class RtmpFD(FileDownloader):
 
         while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
             prevsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen('[rtmpdump] %s bytes' % prevsize)
+            self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
             time.sleep(5.0)  # This seems to be needed
             args = basic_args + ['--resume']
             if retval == RD_FAILED:
@@ -188,13 +198,14 @@ class RtmpFD(FileDownloader):
                 break
         if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
             fsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen('[rtmpdump] %s bytes' % fsize)
+            self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
             self.try_rename(tmpfilename, filename)
             self._hook_progress({
                 'downloaded_bytes': fsize,
                 'total_bytes': fsize,
                 'filename': filename,
                 'status': 'finished',
+                'elapsed': time.time() - started,
             })
             return True
         else:
index 512f046849e3b6090ec06e14938d57c5279d66a1..4ac323bf6de6d17016c2425c133aad460072cadd 100644 (file)
@@ -105,22 +105,22 @@ class ABCIE(InfoExtractor):
 
 class ABCIViewIE(InfoExtractor):
     IE_NAME = 'abc.net.au:iview'
-    _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
     _GEO_COUNTRIES = ['AU']
 
     # ABC iview programs are normally available for 14 days only.
     _TESTS = [{
-        'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
+        'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
         'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
         'info_dict': {
-            'id': 'ZY9247A021S00',
+            'id': 'ZX9371A050S00',
             'ext': 'mp4',
-            'title': "Gaston's Visit",
+            'title': "Gaston's Birthday",
             'series': "Ben And Holly's Little Kingdom",
-            'description': 'md5:18db170ad71cf161e006a4c688e33155',
-            'upload_date': '20180318',
+            'description': 'md5:f9de914d02f226968f598ac76f105bcf',
+            'upload_date': '20180604',
             'uploader_id': 'abc4kids',
-            'timestamp': 1521400959,
+            'timestamp': 1528140219,
         },
         'params': {
             'skip_download': True,
@@ -129,17 +129,16 @@ class ABCIViewIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        video_params = self._parse_json(self._search_regex(
-            r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
-        title = video_params.get('title') or video_params['seriesTitle']
-        stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
+        video_params = self._download_json(
+            'https://iview.abc.net.au/api/programs/' + video_id, video_id)
+        title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
+        stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
 
-        house_number = video_params.get('episodeHouseNumber')
-        path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
+        house_number = video_params.get('episodeHouseNumber') or video_id
+        path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
             int(time.time()), house_number)
         sig = hmac.new(
-            'android.content.res.Resources'.encode('utf-8'),
+            b'android.content.res.Resources',
             path.encode('utf-8'), hashlib.sha256).hexdigest()
         token = self._download_webpage(
             'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
@@ -169,18 +168,26 @@ class ABCIViewIE(InfoExtractor):
                 'ext': 'vtt',
             }]
 
+        is_live = video_params.get('livestream') == '1'
+        if is_live:
+            title = self._live_title(title)
+
         return {
             'id': video_id,
-            'title': unescapeHTML(title),
-            'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
-            'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
+            'title': title,
+            'description': video_params.get('description'),
+            'thumbnail': video_params.get('thumbnail'),
             'duration': int_or_none(video_params.get('eventDuration')),
             'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
             'series': unescapeHTML(video_params.get('seriesTitle')),
             'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
-            'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
-            'episode': self._html_search_meta('episode_title', webpage, default=None),
+            'season_number': int_or_none(self._search_regex(
+                r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
+            'episode_number': int_or_none(self._search_regex(
+                r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
+            'episode_id': house_number,
             'uploader_id': video_params.get('channel'),
             'formats': formats,
             'subtitles': subtitles,
+            'is_live': is_live,
         }
index 041c61aff7cafc01cf30a9c714b2d35d1af978ee..1eb99c39a11eef514d76528946bd5acddd34cb26 100644 (file)
@@ -1,8 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import base64
+import binascii
 import json
 import os
+import random
 
 from .common import InfoExtractor
 from ..aes import aes_cbc_decrypt
@@ -12,9 +15,12 @@ from ..compat import (
 )
 from ..utils import (
     bytes_to_intlist,
+    bytes_to_long,
     ExtractorError,
     float_or_none,
     intlist_to_bytes,
+    long_to_bytes,
+    pkcs1pad,
     srt_subtitles_timecode,
     strip_or_none,
     urljoin,
@@ -35,6 +41,7 @@ class ADNIE(InfoExtractor):
         }
     }
     _BASE_URL = 'http://animedigitalnetwork.fr'
+    _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
 
     def _get_subtitles(self, sub_path, video_id):
         if not sub_path:
@@ -42,16 +49,14 @@ class ADNIE(InfoExtractor):
 
         enc_subtitles = self._download_webpage(
             urljoin(self._BASE_URL, sub_path),
-            video_id, fatal=False, headers={
-                'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
-            })
+            video_id, fatal=False)
         if not enc_subtitles:
             return None
 
         # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
         dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
             bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
-            bytes_to_intlist(b'\xc8\x6e\x06\xbc\xbe\xc6\x49\xf5\x88\x0d\xc8\x47\xc4\x27\x0c\x60'),
+            bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')),
             bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
         ))
         subtitles_json = self._parse_json(
@@ -112,11 +117,24 @@ class ADNIE(InfoExtractor):
         error = None
         if not links:
             links_url = player_config.get('linksurl') or options['videoUrl']
-            links_data = self._download_json(urljoin(
-                self._BASE_URL, links_url), video_id)
+            token = options['token']
+            self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
+            message = bytes_to_intlist(json.dumps({
+                'k': self._K,
+                'e': 60,
+                't': token,
+            }))
+            padded_message = intlist_to_bytes(pkcs1pad(message, 128))
+            n, e = self._RSA_KEY
+            encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
+            authorization = base64.b64encode(encrypted_message).decode()
+            links_data = self._download_json(
+                urljoin(self._BASE_URL, links_url), video_id, headers={
+                    'Authorization': 'Bearer ' + authorization,
+                })
             links = links_data.get('links') or {}
             metas = metas or links_data.get('meta') or {}
-            sub_path = sub_path or links_data.get('subtitles')
+            sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token
             error = links_data.get('error')
         title = metas.get('title') or video_info['title']
 
index e4fa72f466c188a318978f750b9af0344d53a980..1fe5d5e56e698728cf17d15b35624c4fe1882798 100644 (file)
@@ -52,7 +52,7 @@ class AnimeOnDemandIE(InfoExtractor):
     }]
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
index 7a29cd2c6315fad6caa05ac6a499f07f80a41566..f6a78eb5d4cb1288d3bc2b864303343b24603422 100644 (file)
@@ -277,7 +277,9 @@ class AnvatoIE(InfoExtractor):
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
-        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+        self._initialize_geo_bypass({
+            'countries': smuggled_data.get('geo_countries'),
+        })
 
         mobj = re.match(self._VALID_URL, url)
         access_key, video_id = mobj.group('access_key_or_mcp', 'id')
diff --git a/youtube_dl/extractor/apa.py b/youtube_dl/extractor/apa.py
new file mode 100644 (file)
index 0000000..a30a935
--- /dev/null
@@ -0,0 +1,94 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    determine_ext,
+    js_to_json,
+)
+
+
+class APAIE(InfoExtractor):
+    _VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _TESTS = [{
+        'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
+        'md5': '2b12292faeb0a7d930c778c7a5b4759b',
+        'info_dict': {
+            'id': 'jjv85FdZ',
+            'ext': 'mp4',
+            'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 254,
+            'timestamp': 1519211149,
+            'upload_date': '20180221',
+        },
+    }, {
+        'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
+        'only_matching': True,
+    }, {
+        'url': 'http://uvp-rma.sf.apa.at/embed/70404cca-2f47-4855-bbb8-20b1fae58f76',
+        'only_matching': True,
+    }, {
+        'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return [
+            mobj.group('url')
+            for mobj in re.finditer(
+                r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1',
+                webpage)]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        jwplatform_id = self._search_regex(
+            r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
+            'jwplatform id', default=None)
+
+        if jwplatform_id:
+            return self.url_result(
+                'jwplatform:' + jwplatform_id, ie='JWPlatform',
+                video_id=video_id)
+
+        sources = self._parse_json(
+            self._search_regex(
+                r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
+            video_id, transform_source=js_to_json)
+
+        formats = []
+        for source in sources:
+            if not isinstance(source, dict):
+                continue
+            source_url = source.get('file')
+            if not source_url or not isinstance(source_url, compat_str):
+                continue
+            ext = determine_ext(source_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                formats.append({
+                    'url': source_url,
+                })
+        self._sort_formats(formats)
+
+        thumbnail = self._search_regex(
+            r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            'thumbnail', fatal=False, group='url')
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 1a31ebe0820d67df4b768e8102a134b34cfff0c5..ae1c094277251b6bfe26298d3c8dd8dfd741a905 100644 (file)
@@ -74,7 +74,7 @@ class AtresPlayerIE(InfoExtractor):
         self._login()
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
index aa6925623140f08090515fda2f42a7debd5545ac..6bd48ef1593e4dded0805f44524cc3227fe3bb26 100644 (file)
@@ -5,13 +5,12 @@ from .common import InfoExtractor
 from ..utils import (
     int_or_none,
     parse_iso8601,
-    sanitized_Request,
 )
 
 
 class AudiMediaIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)'
+    _TESTS = [{
         'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
         'md5': '79a8b71c46d49042609795ab59779b66',
         'info_dict': {
@@ -24,41 +23,46 @@ class AudiMediaIE(InfoExtractor):
             'duration': 74022,
             'view_count': int,
         }
-    }
-    # extracted from https://audimedia.tv/assets/embed/embedded-player.js (dataSourceAuthToken)
-    _AUTH_TOKEN = 'e25b42847dba18c6c8816d5d8ce94c326e06823ebf0859ed164b3ba169be97f2'
+    }, {
+        'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
 
         raw_payload = self._search_regex([
-            r'class="amtv-embed"[^>]+id="([^"]+)"',
-            r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
+            r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"',
+            r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"',
+            r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"',
+            r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"',
+            r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})',
         ], webpage, 'raw payload')
-        _, stage_mode, video_id, lang = raw_payload.split('-')
+        _, stage_mode, video_id, _ = raw_payload.split('-')
 
         # TODO: handle s and e stage_mode (live streams and ended live streams)
         if stage_mode not in ('s', 'e'):
-            request = sanitized_Request(
-                'https://audimedia.tv/api/video/v1/videos/%s?embed[]=video_versions&embed[]=thumbnail_image&where[content_language_iso]=%s' % (video_id, lang),
-                headers={'X-Auth-Token': self._AUTH_TOKEN})
-            json_data = self._download_json(request, video_id)['results']
+            video_data = self._download_json(
+                'https://www.audimedia.tv/api/video/v1/videos/' + video_id,
+                video_id, query={
+                    'embed[]': ['video_versions', 'thumbnail_image'],
+                })['results']
             formats = []
 
-            stream_url_hls = json_data.get('stream_url_hls')
+            stream_url_hls = video_data.get('stream_url_hls')
             if stream_url_hls:
                 formats.extend(self._extract_m3u8_formats(
                     stream_url_hls, video_id, 'mp4',
                     entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
 
-            stream_url_hds = json_data.get('stream_url_hds')
+            stream_url_hds = video_data.get('stream_url_hds')
             if stream_url_hds:
                 formats.extend(self._extract_f4m_formats(
                     stream_url_hds + '?hdcore=3.4.0',
                     video_id, f4m_id='hds', fatal=False))
 
-            for video_version in json_data.get('video_versions'):
+            for video_version in video_data.get('video_versions', []):
                 video_version_url = video_version.get('download_url') or video_version.get('stream_url')
                 if not video_version_url:
                     continue
@@ -79,11 +83,11 @@ class AudiMediaIE(InfoExtractor):
 
             return {
                 'id': video_id,
-                'title': json_data['title'],
-                'description': json_data.get('subtitle'),
-                'thumbnail': json_data.get('thumbnail_image', {}).get('file'),
-                'timestamp': parse_iso8601(json_data.get('publication_date')),
-                'duration': int_or_none(json_data.get('duration')),
-                'view_count': int_or_none(json_data.get('view_count')),
+                'title': video_data['title'],
+                'description': video_data.get('subtitle'),
+                'thumbnail': video_data.get('thumbnail_image', {}).get('file'),
+                'timestamp': parse_iso8601(video_data.get('publication_date')),
+                'duration': int_or_none(video_data.get('duration')),
+                'view_count': int_or_none(video_data.get('view_count')),
                 'formats': formats,
             }
index f3bd4d4447f559a8bd924f7d796a1a9faf24b9d3..62049b921089d4a10cc9f71b625540f82ea7e3b6 100644 (file)
@@ -65,7 +65,7 @@ class AudiomackIE(InfoExtractor):
             return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
 
         return {
-            'id': api_response.get('id', album_url_tag),
+            'id': compat_str(api_response.get('id', album_url_tag)),
             'uploader': api_response.get('artist'),
             'title': api_response.get('title'),
             'url': api_response['url'],
index 633c57553d9a9e769f726a664e08061217763ced..34f1b3d83614d97aaf3a07794a5672768a0939ef 100644 (file)
@@ -44,7 +44,7 @@ class BambuserIE(InfoExtractor):
     }
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
index 8b20c03d6e424b95e42b1bea1ac3fb91e24bea11..30a63a24e12296fca713cea3b6304c784ae4af15 100644 (file)
@@ -12,6 +12,7 @@ from ..utils import (
     float_or_none,
     get_element_by_class,
     int_or_none,
+    js_to_json,
     parse_duration,
     parse_iso8601,
     try_get,
@@ -772,6 +773,17 @@ class BBCIE(BBCCoUkIE):
         # single video article embedded with data-media-vpid
         'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
         'only_matching': True,
+    }, {
+        'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
+        'info_dict': {
+            'id': 'p06556y7',
+            'ext': 'mp4',
+            'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
+            'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
+        },
+        'params': {
+            'skip_download': True,
+        }
     }]
 
     @classmethod
@@ -994,6 +1006,36 @@ class BBCIE(BBCCoUkIE):
                     'subtitles': subtitles,
                 }
 
+        bbc3_config = self._parse_json(
+            self._search_regex(
+                r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
+                'bbcthree config', default='{}'),
+            playlist_id, transform_source=js_to_json, fatal=False)
+        if bbc3_config:
+            bbc3_playlist = try_get(
+                bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
+                dict)
+            if bbc3_playlist:
+                playlist_title = bbc3_playlist.get('title') or playlist_title
+                thumbnail = bbc3_playlist.get('holdingImageURL')
+                entries = []
+                for bbc3_item in bbc3_playlist['items']:
+                    programme_id = bbc3_item.get('versionID')
+                    if not programme_id:
+                        continue
+                    formats, subtitles = self._download_media_selector(programme_id)
+                    self._sort_formats(formats)
+                    entries.append({
+                        'id': programme_id,
+                        'title': playlist_title,
+                        'thumbnail': thumbnail,
+                        'timestamp': timestamp,
+                        'formats': formats,
+                        'subtitles': subtitles,
+                    })
+                return self.playlist_result(
+                    entries, playlist_id, playlist_title, playlist_description)
+
         def extract_all(pattern):
             return list(filter(None, map(
                 lambda s: self._parse_json(s, playlist_id, fatal=False),
index 8820a391468e7a6ecb168704eddf416fdc34ede4..f36a2452d4e9aba89dde708e3bd51ada35ada301 100644 (file)
@@ -12,7 +12,7 @@ class BellMediaIE(InfoExtractor):
             (?:
                 ctv|
                 tsn|
-                bnn|
+                bnn(?:bloomberg)?|
                 thecomedynetwork|
                 discovery|
                 discoveryvelocity|
@@ -27,17 +27,16 @@ class BellMediaIE(InfoExtractor):
             much\.com
         )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
     _TESTS = [{
-        'url': 'http://www.ctv.ca/video/player?vid=706966',
-        'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
+        'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
+        'md5': '36d3ef559cfe8af8efe15922cd3ce950',
         'info_dict': {
-            'id': '706966',
-            'ext': 'mp4',
-            'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
-            'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
-            'upload_date': '20150919',
-            'timestamp': 1442624700,
+            'id': '1403070',
+            'ext': 'flv',
+            'title': 'David Cockfield\'s Top Picks',
+            'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
+            'upload_date': '20180525',
+            'timestamp': 1527288600,
         },
-        'expected_warnings': ['HTTP Error 404'],
     }, {
         'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
         'only_matching': True,
@@ -70,6 +69,7 @@ class BellMediaIE(InfoExtractor):
         'investigationdiscovery': 'invdisc',
         'animalplanet': 'aniplan',
         'etalk': 'ctv',
+        'bnnbloomberg': 'bnn',
     }
 
     def _real_extract(self, url):
index 3e3348ef5baed8f6e9a31634778421c921e352ce..4d6b051fe102256f1733c99e6d42697324948c8f 100644 (file)
@@ -114,7 +114,7 @@ class BiliBiliIE(InfoExtractor):
 
         if 'anime/' not in url:
             cid = self._search_regex(
-                r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
+                r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
                 default=None
             ) or compat_parse_qs(self._search_regex(
                 [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
index 0e4eaef659105df0248d6ef5171f5f444660f1ee..ab62e54d63c2335d0002701c078105e00ccc6186 100644 (file)
@@ -669,7 +669,10 @@ class BrightcoveNewIE(AdobePassIE):
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
-        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+        self._initialize_geo_bypass({
+            'countries': smuggled_data.get('geo_countries'),
+            'ip_blocks': smuggled_data.get('geo_ip_blocks'),
+        })
 
         account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
 
diff --git a/youtube_dl/extractor/businessinsider.py b/youtube_dl/extractor/businessinsider.py
new file mode 100644 (file)
index 0000000..dfcf9bc
--- /dev/null
@@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+
+
+class BusinessInsiderIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
+        'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
+        'info_dict': {
+            'id': 'hZRllCfw',
+            'ext': 'mp4',
+            'title': "Here's how much radiation you're exposed to in everyday life",
+            'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd',
+            'upload_date': '20170709',
+            'timestamp': 1499606400,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        jwplatform_id = self._search_regex(
+            (r'data-media-id=["\']([a-zA-Z0-9]{8})',
+             r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
+             r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'),
+            webpage, 'jwplatform id')
+        return self.url_result(
+            'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
+            video_id=video_id)
diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py
new file mode 100644 (file)
index 0000000..ee0165d
--- /dev/null
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
+
+
+class CamModelsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.cammodels.com/cam/AutumnKnight/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        user_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            url, user_id, headers=self.geo_verification_headers())
+
+        manifest_root = self._html_search_regex(
+            r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
+
+        if not manifest_root:
+            ERRORS = (
+                ("I'm offline, but let's stay connected", 'This user is currently offline'),
+                ('in a private show', 'This user is in a private show'),
+                ('is currently performing LIVE', 'This model is currently performing live'),
+            )
+            for pattern, message in ERRORS:
+                if pattern in webpage:
+                    error = message
+                    expected = True
+                    break
+            else:
+                error = 'Unable to find manifest URL root'
+                expected = False
+            raise ExtractorError(error, expected=expected)
+
+        manifest = self._download_json(
+            '%s%s.json' % (manifest_root, user_id), user_id)
+
+        formats = []
+        for format_id, format_dict in manifest['formats'].items():
+            if not isinstance(format_dict, dict):
+                continue
+            encodings = format_dict.get('encodings')
+            if not isinstance(encodings, list):
+                continue
+            vcodec = format_dict.get('videoCodec')
+            acodec = format_dict.get('audioCodec')
+            for media in encodings:
+                if not isinstance(media, dict):
+                    continue
+                media_url = media.get('location')
+                if not media_url or not isinstance(media_url, compat_str):
+                    continue
+
+                format_id_list = [format_id]
+                height = int_or_none(media.get('videoHeight'))
+                if height is not None:
+                    format_id_list.append('%dp' % height)
+                f = {
+                    'url': media_url,
+                    'format_id': '-'.join(format_id_list),
+                    'width': int_or_none(media.get('videoWidth')),
+                    'height': height,
+                    'vbr': int_or_none(media.get('videoKbps')),
+                    'abr': int_or_none(media.get('audioKbps')),
+                    'fps': int_or_none(media.get('fps')),
+                    'vcodec': vcodec,
+                    'acodec': acodec,
+                }
+                if 'rtmp' in format_id:
+                    f['ext'] = 'flv'
+                elif 'hls' in format_id:
+                    f.update({
+                        'ext': 'mp4',
+                        # hls skips fragments, preferring rtmp
+                        'preference': -1,
+                    })
+                else:
+                    continue
+                formats.append(f)
+        self._sort_formats(formats)
+
+        return {
+            'id': user_id,
+            'title': self._live_title(user_id),
+            'is_live': True,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/camtube.py b/youtube_dl/extractor/camtube.py
new file mode 100644 (file)
index 0000000..c7d40f8
--- /dev/null
@@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    unified_timestamp,
+)
+
+
+class CamTubeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female',
+        'info_dict': {
+            'id': '42ad3956-dd5b-445a-8313-803ea6079fac',
+            'display_id': 'minafay-030618-1136-chaturbate-female',
+            'ext': 'mp4',
+            'title': 'minafay-030618-1136-chaturbate-female',
+            'duration': 1274,
+            'timestamp': 1528018608,
+            'upload_date': '20180603',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    _API_BASE = 'https://api.camtube.co'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        token = self._download_json(
+            '%s/rpc/session/new' % self._API_BASE, display_id,
+            'Downloading session token')['token']
+
+        self._set_cookie('api.camtube.co', 'session', token)
+
+        video = self._download_json(
+            '%s/recordings/%s' % (self._API_BASE, display_id), display_id,
+            headers={'Referer': url})
+
+        video_id = video['uuid']
+        timestamp = unified_timestamp(video.get('createdAt'))
+        duration = int_or_none(video.get('duration'))
+        view_count = int_or_none(video.get('viewCount'))
+        like_count = int_or_none(video.get('likeCount'))
+        creator = video.get('stageName')
+
+        formats = [{
+            'url': '%s/recordings/%s/manifest.m3u8'
+                   % (self._API_BASE, video_id),
+            'format_id': 'hls',
+            'ext': 'mp4',
+            'protocol': 'm3u8_native',
+        }]
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': display_id,
+            'timestamp': timestamp,
+            'duration': duration,
+            'view_count': view_count,
+            'like_count': like_count,
+            'creator': creator,
+            'formats': formats,
+        }
index 54b4b9be958ae49f0ea4f7d37cadcdf4e2c8b1c7..43f95c739deed7e497b2d85b23393c24f0f5c864 100644 (file)
@@ -17,9 +17,11 @@ from ..utils import (
     xpath_element,
     xpath_with_ns,
     find_xpath_attr,
+    orderedSet,
     parse_duration,
     parse_iso8601,
     parse_age_limit,
+    strip_or_none,
     int_or_none,
     ExtractorError,
 )
@@ -129,15 +131,23 @@ class CBCIE(InfoExtractor):
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
+        title = self._og_search_title(webpage, default=None) or self._html_search_meta(
+            'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
+                r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
         entries = [
             self._extract_player_init(player_init, display_id)
             for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+        media_ids = []
+        for media_id_re in (
+                r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
+                r'<div[^>]+\bid=["\']player-(\d+)',
+                r'guid["\']\s*:\s*["\'](\d+)'):
+            media_ids.extend(re.findall(media_id_re, webpage))
         entries.extend([
             self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
-            for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
+            for media_id in orderedSet(media_ids)])
         return self.playlist_result(
-            entries, display_id,
-            self._og_search_title(webpage, fatal=False),
+            entries, display_id, strip_or_none(title),
             self._og_search_description(webpage))
 
 
index e3eba4be94660759537e969701f9e043fff5b73d..e2b828d8afdada257b6b4f458022947522180642 100644 (file)
@@ -31,7 +31,8 @@ class ChaturbateIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(
+            url, video_id, headers=self.geo_verification_headers())
 
         m3u8_urls = []
 
diff --git a/youtube_dl/extractor/cloudflarestream.py b/youtube_dl/extractor/cloudflarestream.py
new file mode 100644 (file)
index 0000000..e6d92cc
--- /dev/null
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class CloudflareStreamIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:watch\.)?cloudflarestream\.com/|
+                            embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=
+                        )
+                        (?P<id>[\da-f]+)
+                    '''
+    _TESTS = [{
+        'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
+        'info_dict': {
+            'id': '31c9291ab41fac05471db4e73aa11717',
+            'ext': 'mp4',
+            'title': '31c9291ab41fac05471db4e73aa11717',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
+        'only_matching': True,
+    }, {
+        'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return [
+            mobj.group('url')
+            for mobj in re.finditer(
+                r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1',
+                webpage)]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        formats = self._extract_m3u8_formats(
+            'https://cloudflarestream.com/%s/manifest/video.m3u8' % video_id,
+            video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls',
+            fatal=False)
+        formats.extend(self._extract_mpd_formats(
+            'https://cloudflarestream.com/%s/manifest/video.mpd' % video_id,
+            video_id, mpd_id='dash', fatal=False))
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'formats': formats,
+        }
index 59b9d373951331ef5c02bb6ed80d0f28bfaf4c8f..a2548dba364b338e8c1fd37e8f3d9acaa0aa6e67 100644 (file)
@@ -339,15 +339,17 @@ class InfoExtractor(object):
     _GEO_BYPASS attribute may be set to False in order to disable
     geo restriction bypass mechanisms for a particular extractor.
     Though it won't disable explicit geo restriction bypass based on
-    country code provided with geo_bypass_country. (experimental)
+    country code provided with geo_bypass_country.
 
     _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
     countries for this extractor. One of these countries will be used by
     geo restriction bypass mechanism right away in order to bypass
-    geo restriction, of course, if the mechanism is not disabled. (experimental)
+    geo restriction, of course, if the mechanism is not disabled.
 
-    NB: both these geo attributes are experimental and may change in future
-    or be completely removed.
+    _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
+    IP blocks in CIDR notation for this extractor. One of these IP blocks
+    will be used by geo restriction bypass mechanism similarly
+    to _GEO_COUNTRIES.
 
     Finally, the _WORKING attribute should be set to False for broken IEs
     in order to warn the users and skip the tests.
@@ -358,6 +360,7 @@ class InfoExtractor(object):
     _x_forwarded_for_ip = None
     _GEO_BYPASS = True
     _GEO_COUNTRIES = None
+    _GEO_IP_BLOCKS = None
     _WORKING = True
 
     def __init__(self, downloader=None):
@@ -392,12 +395,15 @@ class InfoExtractor(object):
 
     def initialize(self):
         """Initializes an instance (authentication, etc)."""
-        self._initialize_geo_bypass(self._GEO_COUNTRIES)
+        self._initialize_geo_bypass({
+            'countries': self._GEO_COUNTRIES,
+            'ip_blocks': self._GEO_IP_BLOCKS,
+        })
         if not self._ready:
             self._real_initialize()
             self._ready = True
 
-    def _initialize_geo_bypass(self, countries):
+    def _initialize_geo_bypass(self, geo_bypass_context):
         """
         Initialize geo restriction bypass mechanism.
 
@@ -408,28 +414,82 @@ class InfoExtractor(object):
         HTTP requests.
 
         This method will be used for initial geo bypass mechanism initialization
-        during the instance initialization with _GEO_COUNTRIES.
+        during the instance initialization with _GEO_COUNTRIES and
+        _GEO_IP_BLOCKS.
 
-        You may also manually call it from extractor's code if geo countries
+        You may also manually call it from extractor's code if geo bypass
         information is not available beforehand (e.g. obtained during
-        extraction) or due to some another reason.
+        extraction) or due to some other reason. In this case you should pass
+        this information in geo bypass context passed as first argument. It may
+        contain following fields:
+
+        countries:  List of geo unrestricted countries (similar
+                    to _GEO_COUNTRIES)
+        ip_blocks:  List of geo unrestricted IP blocks in CIDR notation
+                    (similar to _GEO_IP_BLOCKS)
+
         """
         if not self._x_forwarded_for_ip:
-            country_code = self._downloader.params.get('geo_bypass_country', None)
-            # If there is no explicit country for geo bypass specified and
-            # the extractor is known to be geo restricted let's fake IP
-            # as X-Forwarded-For right away.
-            if (not country_code and
-                    self._GEO_BYPASS and
-                    self._downloader.params.get('geo_bypass', True) and
-                    countries):
-                country_code = random.choice(countries)
-            if country_code:
-                self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+
+            # Geo bypass mechanism is explicitly disabled by user
+            if not self._downloader.params.get('geo_bypass', True):
+                return
+
+            if not geo_bypass_context:
+                geo_bypass_context = {}
+
+            # Backward compatibility: previously _initialize_geo_bypass
+            # expected a list of countries, some 3rd party code may still use
+            # it this way
+            if isinstance(geo_bypass_context, (list, tuple)):
+                geo_bypass_context = {
+                    'countries': geo_bypass_context,
+                }
+
+            # The whole point of geo bypass mechanism is to fake IP
+            # as X-Forwarded-For HTTP header based on some IP block or
+            # country code.
+
+            # Path 1: bypassing based on IP block in CIDR notation
+
+            # Explicit IP block specified by user, use it right away
+            # regardless of whether extractor is geo bypassable or not
+            ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
+
+            # Otherwise use random IP block from geo bypass context but only
+            # if extractor is known as geo bypassable
+            if not ip_block:
+                ip_blocks = geo_bypass_context.get('ip_blocks')
+                if self._GEO_BYPASS and ip_blocks:
+                    ip_block = random.choice(ip_blocks)
+
+            if ip_block:
+                self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
+                if self._downloader.params.get('verbose', False):
+                    self._downloader.to_screen(
+                        '[debug] Using fake IP %s as X-Forwarded-For.'
+                        % self._x_forwarded_for_ip)
+                return
+
+            # Path 2: bypassing based on country code
+
+            # Explicit country code specified by user, use it right away
+            # regardless of whether extractor is geo bypassable or not
+            country = self._downloader.params.get('geo_bypass_country', None)
+
+            # Otherwise use random country code from geo bypass context but
+            # only if extractor is known as geo bypassable
+            if not country:
+                countries = geo_bypass_context.get('countries')
+                if self._GEO_BYPASS and countries:
+                    country = random.choice(countries)
+
+            if country:
+                self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
                 if self._downloader.params.get('verbose', False):
                     self._downloader.to_screen(
                         '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
-                        % (self._x_forwarded_for_ip, country_code.upper()))
+                        % (self._x_forwarded_for_ip, country.upper()))
 
     def extract(self, url):
         """Extracts URL information and returns it in list of dicts."""
@@ -682,18 +742,30 @@ class InfoExtractor(object):
             else:
                 self.report_warning(errmsg + str(ve))
 
-    def _download_json(self, url_or_request, video_id,
-                       note='Downloading JSON metadata',
-                       errnote='Unable to download JSON metadata',
-                       transform_source=None,
-                       fatal=True, encoding=None, data=None, headers={}, query={}):
-        json_string = self._download_webpage(
+    def _download_json_handle(
+            self, url_or_request, video_id, note='Downloading JSON metadata',
+            errnote='Unable to download JSON metadata', transform_source=None,
+            fatal=True, encoding=None, data=None, headers={}, query={}):
+        """Return a tuple (JSON object, URL handle)"""
+        res = self._download_webpage_handle(
             url_or_request, video_id, note, errnote, fatal=fatal,
             encoding=encoding, data=data, headers=headers, query=query)
-        if (not fatal) and json_string is False:
-            return None
+        if res is False:
+            return res
+        json_string, urlh = res
         return self._parse_json(
-            json_string, video_id, transform_source=transform_source, fatal=fatal)
+            json_string, video_id, transform_source=transform_source,
+            fatal=fatal), urlh
+
+    def _download_json(
+            self, url_or_request, video_id, note='Downloading JSON metadata',
+            errnote='Unable to download JSON metadata', transform_source=None,
+            fatal=True, encoding=None, data=None, headers={}, query={}):
+        res = self._download_json_handle(
+            url_or_request, video_id, note=note, errnote=errnote,
+            transform_source=transform_source, fatal=fatal, encoding=encoding,
+            data=data, headers=headers, query=query)
+        return res if res is False else res[0]
 
     def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
         if transform_source:
@@ -1008,6 +1080,40 @@ class InfoExtractor(object):
         if isinstance(json_ld, dict):
             json_ld = [json_ld]
 
+        INTERACTION_TYPE_MAP = {
+            'CommentAction': 'comment',
+            'AgreeAction': 'like',
+            'DisagreeAction': 'dislike',
+            'LikeAction': 'like',
+            'DislikeAction': 'dislike',
+            'ListenAction': 'view',
+            'WatchAction': 'view',
+            'ViewAction': 'view',
+        }
+
+        def extract_interaction_statistic(e):
+            interaction_statistic = e.get('interactionStatistic')
+            if not isinstance(interaction_statistic, list):
+                return
+            for is_e in interaction_statistic:
+                if not isinstance(is_e, dict):
+                    continue
+                if is_e.get('@type') != 'InteractionCounter':
+                    continue
+                interaction_type = is_e.get('interactionType')
+                if not isinstance(interaction_type, compat_str):
+                    continue
+                interaction_count = int_or_none(is_e.get('userInteractionCount'))
+                if interaction_count is None:
+                    continue
+                count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
+                if not count_kind:
+                    continue
+                count_key = '%s_count' % count_kind
+                if info.get(count_key) is not None:
+                    continue
+                info[count_key] = interaction_count
+
         def extract_video_object(e):
             assert e['@type'] == 'VideoObject'
             info.update({
@@ -1023,6 +1129,7 @@ class InfoExtractor(object):
                 'height': int_or_none(e.get('height')),
                 'view_count': int_or_none(e.get('interactionCount')),
             })
+            extract_interaction_statistic(e)
 
         for e in json_ld:
             if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
index fc014f8b558008f971448153c56fb45354ee641f..f4a61645502a3b064a62bcd807dc2fcb5835254b 100644 (file)
@@ -19,8 +19,8 @@ from ..utils import (
 
 
 class CrackleIE(InfoExtractor):
-    _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
-    _TEST = {
+    _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
+    _TESTS = [{
         # geo restricted to CA
         'url': 'https://www.crackle.com/andromeda/2502343',
         'info_dict': {
@@ -45,7 +45,10 @@ class CrackleIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         }
-    }
+    }, {
+        'url': 'https://www.sonycrackle.com/andromeda/2502343',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
index 3efdc8c21dce170691333f8ee297d65c7c6d1e06..311da515df118e23df584641c6533897d96af1b8 100644 (file)
@@ -49,7 +49,7 @@ class CrunchyrollBaseIE(InfoExtractor):
             })
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
index 55a127b7696e5d5dbb845709451c1b05b8df7211..03f8cefb77beb1185e14c2f435616d8f25f70f83 100644 (file)
@@ -11,10 +11,10 @@ class CTVNewsIE(InfoExtractor):
     _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
     _TESTS = [{
         'url': 'http://www.ctvnews.ca/video?clipId=901995',
-        'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
+        'md5': '9b8624ba66351a23e0b6e1391971f9af',
         'info_dict': {
             'id': '901995',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Extended: \'That person cannot be me\' Johnson says',
             'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
             'timestamp': 1467286284,
index 8e45923e3da94c8d848f05e4fe71c5a2d189a0e8..35b1e7a34e21b634bc1251e122741d53aade77c5 100644 (file)
@@ -35,7 +35,7 @@ class CuriosityStreamBaseIE(InfoExtractor):
         return result['data']
 
     def _real_initialize(self):
-        (email, password) = self._get_login_info()
+        email, password = self._get_login_info()
         if email is None:
             return
         result = self._download_json(
index 0e7d587dd47c539254468913d4215c2e57d1d5be..9a74906cb625f590bd5d1bc8862fc5c652fac52c 100644 (file)
@@ -1,12 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-import json
+import base64
+import hashlib
 import itertools
+import json
+import random
+import re
+import string
 
 from .common import InfoExtractor
-
+from ..compat import compat_struct_pack
 from ..utils import (
     determine_ext,
     error_to_compat_str,
@@ -64,7 +68,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
             'uploader': 'Deadline',
             'uploader_id': 'x1xm8ri',
             'age_limit': 0,
-            'view_count': int,
         },
     }, {
         'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
@@ -167,6 +170,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
             player = self._parse_json(player_v5, video_id)
             metadata = player['metadata']
 
+            if metadata.get('error', {}).get('type') == 'password_protected':
+                password = self._downloader.params.get('videopassword')
+                if password:
+                    r = int(metadata['id'][1:], 36)
+                    us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=')
+                    t = ''.join(random.choice(string.ascii_letters) for i in range(10))
+                    n = us64e(compat_struct_pack('I', r))
+                    i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest())
+                    metadata = self._download_json(
+                        'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id)
+
             self._check_error(metadata)
 
             formats = []
@@ -180,9 +194,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                         continue
                     ext = mimetype2ext(type_) or determine_ext(media_url)
                     if ext == 'm3u8':
-                        formats.extend(self._extract_m3u8_formats(
+                        m3u8_formats = self._extract_m3u8_formats(
                             media_url, video_id, 'mp4', preference=-1,
-                            m3u8_id='hls', fatal=False))
+                            m3u8_id='hls', fatal=False)
+                        for f in m3u8_formats:
+                            f['url'] = f['url'].split('#')[0]
+                            formats.append(f)
                     elif ext == 'f4m':
                         formats.extend(self._extract_f4m_formats(
                             media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
@@ -299,8 +316,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
 
     def _check_error(self, info):
         error = info.get('error')
-        if info.get('error') is not None:
-            title = error['title']
+        if error:
+            title = error.get('title') or error['message']
             # See https://developer.dailymotion.com/api#access-error
             if error.get('code') == 'DM007':
                 self.raise_geo_restricted(msg=title)
index 91449dcd8549e992afed748651ad0e3312812721..3589bd42831515d5d4b16bfd64dbb861830b5173 100644 (file)
@@ -5,7 +5,10 @@ import re
 import string
 
 from .discoverygo import DiscoveryGoBaseIE
-from ..compat import compat_str
+from ..compat import (
+    compat_str,
+    compat_urllib_parse_unquote,
+)
 from ..utils import (
     ExtractorError,
     try_get,
@@ -55,15 +58,27 @@ class DiscoveryIE(DiscoveryGoBaseIE):
         video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
         video_id = video['id']
 
-        access_token = self._download_json(
-            'https://www.%s.com/anonymous' % site, display_id, query={
-                'authRel': 'authorization',
-                'client_id': try_get(
-                    react_data, lambda x: x['application']['apiClientId'],
-                    compat_str) or '3020a40c2356a645b4b4',
-                'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
-                'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
-            })['access_token']
+        access_token = None
+        cookies = self._get_cookies(url)
+
+        # prefer Affiliate Auth Token over Anonymous Auth Token
+        auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
+        if auth_storage_cookie and auth_storage_cookie.value:
+            auth_storage = self._parse_json(compat_urllib_parse_unquote(
+                compat_urllib_parse_unquote(auth_storage_cookie.value)),
+                video_id, fatal=False) or {}
+            access_token = auth_storage.get('a') or auth_storage.get('access_token')
+
+        if not access_token:
+            access_token = self._download_json(
+                'https://www.%s.com/anonymous' % site, display_id, query={
+                    'authRel': 'authorization',
+                    'client_id': try_get(
+                        react_data, lambda x: x['application']['apiClientId'],
+                        compat_str) or '3020a40c2356a645b4b4',
+                    'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
+                    'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
+                })['access_token']
 
         try:
             stream = self._download_json(
@@ -72,7 +87,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
                     'Authorization': 'Bearer ' + access_token,
                 })
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
                 e_description = self._parse_json(
                     e.cause.read().decode(), display_id)['description']
                 if 'resource not available for country' in e_description:
index b6653784cc739ca3f381c71c34e29a19a058e0d7..fba1ef22180a6f4adf93c83e23d8370b6da87c56 100644 (file)
@@ -3,8 +3,8 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
 from .brightcove import BrightcoveLegacyIE
+from .dplay import DPlayIE
 from ..compat import (
     compat_parse_qs,
     compat_urlparse,
@@ -12,8 +12,13 @@ from ..compat import (
 from ..utils import smuggle_url
 
 
-class DiscoveryNetworksDeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))'
+class DiscoveryNetworksDeIE(DPlayIE):
+    _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/
+                        (?:
+                           .*\#(?P<id>\d+)|
+                           (?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
+                           programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
+                        )'''
 
     _TESTS = [{
         'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
@@ -40,6 +45,14 @@ class DiscoveryNetworksDeIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
+        alternate_id = mobj.group('alternate_id')
+        if alternate_id:
+            self._initialize_geo_bypass({
+                'countries': ['DE'],
+            })
+            return self._get_disco_api_info(
+                url, '%s/%s' % (mobj.group('programme'), alternate_id),
+                'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
         brightcove_id = mobj.group('id')
         if not brightcove_id:
             title = mobj.group('title')
index b734467734c30a880badab5ff72cfac7543b3b56..fe47f6dcef72dcac44aa811ca3cc112d93f066cf 100644 (file)
@@ -97,12 +97,83 @@ class DPlayIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    def _get_disco_api_info(self, url, display_id, disco_host, realm):
+        disco_base = 'https://' + disco_host
+        token = self._download_json(
+            '%s/token' % disco_base, display_id, 'Downloading token',
+            query={
+                'realm': realm,
+            })['data']['attributes']['token']
+        headers = {
+            'Referer': url,
+            'Authorization': 'Bearer ' + token,
+        }
+        video = self._download_json(
+            '%s/content/videos/%s' % (disco_base, display_id), display_id,
+            headers=headers, query={
+                'include': 'show'
+            })
+        video_id = video['data']['id']
+        info = video['data']['attributes']
+        title = info['name']
+        formats = []
+        for format_id, format_dict in self._download_json(
+                '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
+                display_id, headers=headers)['data']['attributes']['streaming'].items():
+            if not isinstance(format_dict, dict):
+                continue
+            format_url = format_dict.get('url')
+            if not format_url:
+                continue
+            ext = determine_ext(format_url)
+            if format_id == 'dash' or ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    format_url, display_id, mpd_id='dash', fatal=False))
+            elif format_id == 'hls' or ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, display_id, 'mp4',
+                    entry_protocol='m3u8_native', m3u8_id='hls',
+                    fatal=False))
+            else:
+                formats.append({
+                    'url': format_url,
+                    'format_id': format_id,
+                })
+        self._sort_formats(formats)
+
+        series = None
+        try:
+            included = video.get('included')
+            if isinstance(included, list):
+                show = next(e for e in included if e.get('type') == 'show')
+                series = try_get(
+                    show, lambda x: x['attributes']['name'], compat_str)
+        except StopIteration:
+            pass
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': info.get('description'),
+            'duration': float_or_none(
+                info.get('videoDuration'), scale=1000),
+            'timestamp': unified_timestamp(info.get('publishStart')),
+            'series': series,
+            'season_number': int_or_none(info.get('seasonNumber')),
+            'episode_number': int_or_none(info.get('episodeNumber')),
+            'age_limit': int_or_none(info.get('minimum_age')),
+            'formats': formats,
+        }
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         display_id = mobj.group('id')
         domain = mobj.group('domain')
 
-        self._initialize_geo_bypass([mobj.group('country').upper()])
+        self._initialize_geo_bypass({
+            'countries': [mobj.group('country').upper()],
+        })
 
         webpage = self._download_webpage(url, display_id)
 
@@ -111,72 +182,8 @@ class DPlayIE(InfoExtractor):
 
         if not video_id:
             host = mobj.group('host')
-            disco_base = 'https://disco-api.%s' % host
-            self._download_json(
-                '%s/token' % disco_base, display_id, 'Downloading token',
-                query={
-                    'realm': host.replace('.', ''),
-                })
-            video = self._download_json(
-                '%s/content/videos/%s' % (disco_base, display_id), display_id,
-                headers={
-                    'Referer': url,
-                    'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1',
-                }, query={
-                    'include': 'show'
-                })
-            video_id = video['data']['id']
-            info = video['data']['attributes']
-            title = info['name']
-            formats = []
-            for format_id, format_dict in self._download_json(
-                    '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
-                    display_id)['data']['attributes']['streaming'].items():
-                if not isinstance(format_dict, dict):
-                    continue
-                format_url = format_dict.get('url')
-                if not format_url:
-                    continue
-                ext = determine_ext(format_url)
-                if format_id == 'dash' or ext == 'mpd':
-                    formats.extend(self._extract_mpd_formats(
-                        format_url, display_id, mpd_id='dash', fatal=False))
-                elif format_id == 'hls' or ext == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        format_url, display_id, 'mp4',
-                        entry_protocol='m3u8_native', m3u8_id='hls',
-                        fatal=False))
-                else:
-                    formats.append({
-                        'url': format_url,
-                        'format_id': format_id,
-                    })
-            self._sort_formats(formats)
-
-            series = None
-            try:
-                included = video.get('included')
-                if isinstance(included, list):
-                    show = next(e for e in included if e.get('type') == 'show')
-                    series = try_get(
-                        show, lambda x: x['attributes']['name'], compat_str)
-            except StopIteration:
-                pass
-
-            return {
-                'id': video_id,
-                'display_id': display_id,
-                'title': title,
-                'description': info.get('description'),
-                'duration': float_or_none(
-                    info.get('videoDuration'), scale=1000),
-                'timestamp': unified_timestamp(info.get('publishStart')),
-                'series': series,
-                'season_number': int_or_none(info.get('seasonNumber')),
-                'episode_number': int_or_none(info.get('episodeNumber')),
-                'age_limit': int_or_none(info.get('minimum_age')),
-                'formats': formats,
-            }
+            return self._get_disco_api_info(
+                url, display_id, 'disco-api.' + host, host.replace('.', ''))
 
         info = self._download_json(
             'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
index ffbd2623d1e5c1707a643931ab36e928fc2d5fc6..ab32ba4ff3eee337e8dfc2c7fa11c755e2b027c4 100644 (file)
@@ -42,7 +42,7 @@ class DramaFeverBaseIE(InfoExtractor):
         self._login()
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
index f138025d5564b27bef7d09c2d74d7aefffd8cfdc..8d31258c1191ac35e55ec841b05aeb4d146306a7 100644 (file)
@@ -8,7 +8,6 @@ from ..utils import (
     unified_strdate,
     xpath_text,
     determine_ext,
-    qualities,
     float_or_none,
     ExtractorError,
 )
@@ -16,7 +15,8 @@ from ..utils import (
 
 class DreiSatIE(InfoExtractor):
     IE_NAME = '3sat'
-    _VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+    _GEO_COUNTRIES = ['DE']
+    _VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
     _TESTS = [
         {
             'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
@@ -43,7 +43,8 @@ class DreiSatIE(InfoExtractor):
     def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
         param_groups = {}
         for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
-            group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace'))
+            group_id = param_group.get(self._xpath_ns(
+                'id', 'http://www.w3.org/XML/1998/namespace'))
             params = {}
             for param in param_group:
                 params[param.get('name')] = param.get('value')
@@ -54,7 +55,7 @@ class DreiSatIE(InfoExtractor):
             src = video.get('src')
             if not src:
                 continue
-            bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+            bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
             group_id = video.get('paramGroup')
             param_group = param_groups[group_id]
             for proto in param_group['protocols'].split(','):
@@ -75,66 +76,36 @@ class DreiSatIE(InfoExtractor):
             note='Downloading video info',
             errnote='Failed to download video info')
 
-        status_code = doc.find('./status/statuscode')
-        if status_code is not None and status_code.text != 'ok':
-            code = status_code.text
-            if code == 'notVisibleAnymore':
+        status_code = xpath_text(doc, './status/statuscode')
+        if status_code and status_code != 'ok':
+            if status_code == 'notVisibleAnymore':
                 message = 'Video %s is not available' % video_id
             else:
-                message = '%s returned error: %s' % (self.IE_NAME, code)
+                message = '%s returned error: %s' % (self.IE_NAME, status_code)
             raise ExtractorError(message, expected=True)
 
-        title = doc.find('.//information/title').text
-        description = xpath_text(doc, './/information/detail', 'description')
-        duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
-        uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
-        uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
-        upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
-
-        def xml_to_thumbnails(fnode):
-            thumbnails = []
-            for node in fnode:
-                thumbnail_url = node.text
-                if not thumbnail_url:
-                    continue
-                thumbnail = {
-                    'url': thumbnail_url,
-                }
-                if 'key' in node.attrib:
-                    m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
-                    if m:
-                        thumbnail['width'] = int(m.group(1))
-                        thumbnail['height'] = int(m.group(2))
-                thumbnails.append(thumbnail)
-            return thumbnails
-
-        thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage'))
-
-        format_nodes = doc.findall('.//formitaeten/formitaet')
-        quality = qualities(['veryhigh', 'high', 'med', 'low'])
-
-        def get_quality(elem):
-            return quality(xpath_text(elem, 'quality'))
-        format_nodes.sort(key=get_quality)
-        format_ids = []
+        title = xpath_text(doc, './/information/title', 'title', True)
+
+        urls = []
         formats = []
-        for fnode in format_nodes:
-            video_url = fnode.find('url').text
+        for fnode in doc.findall('.//formitaeten/formitaet'):
+            video_url = xpath_text(fnode, 'url')
+            if not video_url or video_url in urls:
+                continue
+            urls.append(video_url)
+
             is_available = 'http://www.metafilegenerator' not in video_url
-            if not is_available:
+            geoloced = 'static_geoloced_online' in video_url
+            if not is_available or geoloced:
                 continue
+
             format_id = fnode.attrib['basetype']
-            quality = xpath_text(fnode, './quality', 'quality')
             format_m = re.match(r'''(?x)
                 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
                 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
             ''', format_id)
 
             ext = determine_ext(video_url, None) or format_m.group('container')
-            if ext not in ('smil', 'f4m', 'm3u8'):
-                format_id = format_id + '-' + quality
-            if format_id in format_ids:
-                continue
 
             if ext == 'meta':
                 continue
@@ -147,24 +118,23 @@ class DreiSatIE(InfoExtractor):
                 if video_url.startswith('https://'):
                     continue
                 formats.extend(self._extract_m3u8_formats(
-                    video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+                    video_url, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id=format_id, fatal=False))
             elif ext == 'f4m':
                 formats.extend(self._extract_f4m_formats(
                     video_url, video_id, f4m_id=format_id, fatal=False))
             else:
-                proto = format_m.group('proto').lower()
-
-                abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000)
-                vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000)
+                quality = xpath_text(fnode, './quality')
+                if quality:
+                    format_id += '-' + quality
 
-                width = int_or_none(xpath_text(fnode, './width', 'width'))
-                height = int_or_none(xpath_text(fnode, './height', 'height'))
+                abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
+                vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
 
-                filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))
-
-                format_note = ''
-                if not format_note:
-                    format_note = None
+                tbr = int_or_none(self._search_regex(
+                    r'_(\d+)k', video_url, 'bitrate', None))
+                if tbr and vbr and not abr:
+                    abr = tbr - vbr
 
                 formats.append({
                     'format_id': format_id,
@@ -174,31 +144,50 @@ class DreiSatIE(InfoExtractor):
                     'vcodec': format_m.group('vcodec'),
                     'abr': abr,
                     'vbr': vbr,
-                    'width': width,
-                    'height': height,
-                    'filesize': filesize,
-                    'format_note': format_note,
-                    'protocol': proto,
-                    '_available': is_available,
+                    'tbr': tbr,
+                    'width': int_or_none(xpath_text(fnode, './width')),
+                    'height': int_or_none(xpath_text(fnode, './height')),
+                    'filesize': int_or_none(xpath_text(fnode, './filesize')),
+                    'protocol': format_m.group('proto').lower(),
                 })
-            format_ids.append(format_id)
+
+        geolocation = xpath_text(doc, './/details/geolocation')
+        if not formats and geolocation and geolocation != 'none':
+            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 
         self._sort_formats(formats)
 
+        thumbnails = []
+        for node in doc.findall('.//teaserimages/teaserimage'):
+            thumbnail_url = node.text
+            if not thumbnail_url:
+                continue
+            thumbnail = {
+                'url': thumbnail_url,
+            }
+            thumbnail_key = node.get('key')
+            if thumbnail_key:
+                m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
+                if m:
+                    thumbnail['width'] = int(m.group(1))
+                    thumbnail['height'] = int(m.group(2))
+            thumbnails.append(thumbnail)
+
+        upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
+
         return {
             'id': video_id,
             'title': title,
-            'description': description,
-            'duration': duration,
+            'description': xpath_text(doc, './/information/detail'),
+            'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
             'thumbnails': thumbnails,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
+            'uploader': xpath_text(doc, './/details/originChannelTitle'),
+            'uploader_id': xpath_text(doc, './/details/originChannelId'),
             'upload_date': upload_date,
             'formats': formats,
         }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
+        video_id = self._match_id(url)
+        details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
         return self.extract_from_xml_url(video_id, details_url)
diff --git a/youtube_dl/extractor/dtube.py b/youtube_dl/extractor/dtube.py
new file mode 100644 (file)
index 0000000..4ca97f8
--- /dev/null
@@ -0,0 +1,83 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+from socket import timeout
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+)
+
+
+class DTubeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
+    _TEST = {
+        'url': 'https://d.tube/#!/v/benswann/zqd630em',
+        'md5': 'a03eaa186618ffa7a3145945543a251e',
+        'info_dict': {
+            'id': 'zqd630em',
+            'ext': 'mp4',
+            'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
+            'description': 'md5:700d164e066b87f9eac057949e4227c2',
+            'uploader_id': 'benswann',
+            'upload_date': '20180222',
+            'timestamp': 1519328958,
+        },
+        'params': {
+            'format': '480p',
+        },
+    }
+
+    def _real_extract(self, url):
+        uploader_id, video_id = re.match(self._VALID_URL, url).groups()
+        result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({
+            'jsonrpc': '2.0',
+            'method': 'get_content',
+            'params': [uploader_id, video_id],
+        }).encode())['result']
+
+        metadata = json.loads(result['json_metadata'])
+        video = metadata['video']
+        content = video['content']
+        info = video.get('info', {})
+        title = info.get('title') or result['title']
+
+        def canonical_url(h):
+            if not h:
+                return None
+            return 'https://ipfs.io/ipfs/' + h
+
+        formats = []
+        for q in ('240', '480', '720', '1080', ''):
+            video_url = canonical_url(content.get('video%shash' % q))
+            if not video_url:
+                continue
+            format_id = (q + 'p') if q else 'Source'
+            try:
+                self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
+                self._downloader._opener.open(video_url, timeout=5).close()
+            except timeout as e:
+                self.to_screen(
+                    '%s: %s URL is invalid, skipping' % (video_id, format_id))
+                continue
+            formats.append({
+                'format_id': format_id,
+                'url': video_url,
+                'height': int_or_none(q),
+                'ext': 'mp4',
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': content.get('description'),
+            'thumbnail': canonical_url(info.get('snaphash')),
+            'tags': content.get('tags') or metadata.get('tags'),
+            'duration': info.get('duration'),
+            'formats': formats,
+            'timestamp': parse_iso8601(result.get('created')),
+            'uploader_id': uploader_id,
+        }
index 3f760888e6060e1522c735a0d688fe790becb9e0..20996962a7bf58b46d72a262034f63cfd5d049c4 100644 (file)
@@ -91,17 +91,6 @@ class DVTVIE(InfoExtractor):
     }, {
         'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
         'only_matching': True,
-    }, {
-        'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
-        'md5': '87defe16681b1429c91f7a74809823c6',
-        'info_dict': {
-            'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
-            'ext': 'mp4',
-            'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
-        },
-        'params': {
-            'skip_download': True,
-        },
     }]
 
     def _parse_video_metadata(self, js, video_id, live_js=None):
diff --git a/youtube_dl/extractor/expressen.py b/youtube_dl/extractor/expressen.py
new file mode 100644 (file)
index 0000000..f611780
--- /dev/null
@@ -0,0 +1,77 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    unescapeHTML,
+    unified_timestamp,
+)
+
+
+class ExpressenIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
+        'md5': '2fbbe3ca14392a6b1b36941858d33a45',
+        'info_dict': {
+            'id': '8690962',
+            'ext': 'mp4',
+            'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
+            'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 788,
+            'timestamp': 1526639109,
+            'upload_date': '20180518',
+        },
+    }, {
+        'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        def extract_data(name):
+            return self._parse_json(
+                self._search_regex(
+                    r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
+                    webpage, 'info', group='value'),
+                display_id, transform_source=unescapeHTML)
+
+        info = extract_data('video-tracking-info')
+        video_id = info['videoId']
+
+        data = extract_data('article-data')
+        stream = data['stream']
+
+        if determine_ext(stream) == 'm3u8':
+            formats = self._extract_m3u8_formats(
+                stream, display_id, 'mp4', entry_protocol='m3u8_native',
+                m3u8_id='hls')
+        else:
+            formats = [{
+                'url': stream,
+            }]
+        self._sort_formats(formats)
+
+        title = info.get('titleRaw') or data['title']
+        description = info.get('descriptionRaw')
+        thumbnail = info.get('socialMediaImage') or data.get('image')
+        duration = int_or_none(info.get('videoTotalSecondsDuration') or
+                               data.get('totalSecondsDuration'))
+        timestamp = unified_timestamp(info.get('publishDate'))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'formats': formats,
+        }
index 6fb65e4fe5bca6ee9ecfc2647a330604786a46ab..3b3964c0112737c0ea4c62165ffc02b0633de198 100644 (file)
@@ -44,6 +44,7 @@ from .anysex import AnySexIE
 from .aol import AolIE
 from .allocine import AllocineIE
 from .aliexpress import AliExpressLiveIE
+from .apa import APAIE
 from .aparat import AparatIE
 from .appleconnect import AppleConnectIE
 from .appletrailers import (
@@ -137,6 +138,7 @@ from .brightcove import (
     BrightcoveLegacyIE,
     BrightcoveNewIE,
 )
+from .businessinsider import BusinessInsiderIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
@@ -144,6 +146,8 @@ from .camdemy import (
     CamdemyIE,
     CamdemyFolderIE
 )
+from .cammodels import CamModelsIE
+from .camtube import CamTubeIE
 from .camwithher import CamWithHerIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
@@ -195,6 +199,7 @@ from .clippit import ClippitIE
 from .cliprs import ClipRsIE
 from .clipsyndicate import ClipsyndicateIE
 from .closertotruth import CloserToTruthIE
+from .cloudflarestream import CloudflareStreamIE
 from .cloudy import CloudyIE
 from .clubic import ClubicIE
 from .clyp import ClypIE
@@ -281,6 +286,7 @@ from .drtv import (
     DRTVIE,
     DRTVLiveIE,
 )
+from .dtube import DTubeIE
 from .dvtv import DVTVIE
 from .dumpert import DumpertIE
 from .defense import DefenseGouvFrIE
@@ -329,6 +335,7 @@ from .esri import EsriVideoIE
 from .europa import EuropaIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .expotv import ExpoTVIE
+from .expressen import ExpressenIE
 from .extremetube import ExtremeTubeIE
 from .eyedotv import EyedoTVIE
 from .facebook import (
@@ -376,6 +383,7 @@ from .francetv import (
     FranceTVSiteIE,
     FranceTVEmbedIE,
     FranceTVInfoIE,
+    FranceTVInfoSportIE,
     FranceTVJeunesseIE,
     GenerationWhatIE,
     CultureboxIE,
@@ -466,10 +474,7 @@ from .imgur import (
 )
 from .ina import InaIE
 from .inc import IncIE
-from .indavideo import (
-    IndavideoIE,
-    IndavideoEmbedIE,
-)
+from .indavideo import IndavideoEmbedIE
 from .infoq import InfoQIE
 from .instagram import InstagramIE, InstagramUserIE
 from .internazionale import InternazionaleIE
@@ -477,7 +482,10 @@ from .internetvideoarchive import InternetVideoArchiveIE
 from .iprima import IPrimaIE
 from .iqiyi import IqiyiIE
 from .ir90tv import Ir90TvIE
-from .itv import ITVIE
+from .itv import (
+    ITVIE,
+    ITVBTCCIE,
+)
 from .ivi import (
     IviIE,
     IviCompilationIE
@@ -576,13 +584,16 @@ from .mailru import (
     MailRuMusicIE,
     MailRuMusicSearchIE,
 )
-from .makerschannel import MakersChannelIE
 from .makertv import MakerTVIE
 from .mangomolo import (
     MangomoloVideoIE,
     MangomoloLiveIE,
 )
 from .manyvids import ManyVidsIE
+from .markiza import (
+    MarkizaIE,
+    MarkizaPageIE,
+)
 from .massengeschmacktv import MassengeschmackTVIE
 from .matchtv import MatchTVIE
 from .mdr import MDRIE
@@ -619,7 +630,6 @@ from .mnet import MnetIE
 from .moevideo import MoeVideoIE
 from .mofosex import MofosexIE
 from .mojvideo import MojvideoIE
-from .moniker import MonikerIE
 from .morningstar import MorningstarIE
 from .motherless import (
     MotherlessIE,
@@ -640,6 +650,7 @@ from .mtv import (
 from .muenchentv import MuenchenTVIE
 from .musicplayon import MusicPlayOnIE
 from .mwave import MwaveIE, MwaveMeetGreetIE
+from .mychannels import MyChannelsIE
 from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvi import (
@@ -661,6 +672,7 @@ from .nbc import (
     NBCOlympicsIE,
     NBCOlympicsStreamIE,
     NBCSportsIE,
+    NBCSportsStreamIE,
     NBCSportsVPlayerIE,
 )
 from .ndr import (
@@ -700,12 +712,7 @@ from .nexx import (
 from .nfb import NFBIE
 from .nfl import NFLIE
 from .nhk import NhkVodIE
-from .nhl import (
-    NHLVideocenterIE,
-    NHLNewsIE,
-    NHLVideocenterCategoryIE,
-    NHLIE,
-)
+from .nhl import NHLIE
 from .nick import (
     NickIE,
     NickBrIE,
@@ -714,10 +721,7 @@ from .nick import (
     NickRuIE,
 )
 from .niconico import NiconicoIE, NiconicoPlaylistIE
-from .ninecninemedia import (
-    NineCNineMediaStackIE,
-    NineCNineMediaIE,
-)
+from .ninecninemedia import NineCNineMediaIE
 from .ninegag import NineGagIE
 from .ninenow import NineNowIE
 from .nintendo import NintendoIE
@@ -805,6 +809,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
 from .pbs import PBSIE
 from .pearvideo import PearVideoIE
+from .peertube import PeerTubeIE
 from .people import PeopleIE
 from .performgroup import PerformGroupIE
 from .periscope import (
@@ -1010,7 +1015,10 @@ from .spankbang import SpankBangIE
 from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE, SpiegelArticleIE
 from .spiegeltv import SpiegeltvIE
-from .spike import SpikeIE
+from .spike import (
+    BellatorIE,
+    ParamountNetworkIE,
+)
 from .stitcher import StitcherIE
 from .sport5 import Sport5IE
 from .sportbox import SportBoxEmbedIE
@@ -1136,6 +1144,7 @@ from .tvc import (
 from .tvigle import TvigleIE
 from .tvland import TVLandIE
 from .tvn24 import TVN24IE
+from .tvnet import TVNetIE
 from .tvnoe import TVNoeIE
 from .tvnow import (
     TVNowIE,
@@ -1418,5 +1427,11 @@ from .youtube import (
 )
 from .zapiks import ZapiksIE
 from .zaq1 import Zaq1IE
+from .zattoo import (
+    QuicklineIE,
+    QuicklineLiveIE,
+    ZattooIE,
+    ZattooLiveIE,
+)
 from .zdf import ZDFIE, ZDFChannelIE
 from .zingmp3 import ZingMp3IE
index 220ada3a6dd962f16020e811894220298b720dce..8a9ed96c264ae37c5de2af472eca4c0ff1f4fa86 100644 (file)
@@ -56,6 +56,7 @@ class FacebookIE(InfoExtractor):
     _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
 
     _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
+    _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
 
     _TESTS = [{
         'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
@@ -208,6 +209,17 @@ class FacebookIE(InfoExtractor):
         # no title
         'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
         'only_matching': True,
+    }, {
+        'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
+        'info_dict': {
+            'id': '359649331226507',
+            'ext': 'mp4',
+            'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
+            'uploader': 'ESL One Dota 2',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     @staticmethod
@@ -226,7 +238,7 @@ class FacebookIE(InfoExtractor):
         return urls
 
     def _login(self):
-        (useremail, password) = self._get_login_info()
+        useremail, password = self._get_login_info()
         if useremail is None:
             return
 
@@ -312,16 +324,18 @@ class FacebookIE(InfoExtractor):
         if server_js_data:
             video_data = extract_video_data(server_js_data.get('instances', []))
 
+        def extract_from_jsmods_instances(js_data):
+            if js_data:
+                return extract_video_data(try_get(
+                    js_data, lambda x: x['jsmods']['instances'], list) or [])
+
         if not video_data:
             server_js_data = self._parse_json(
                 self._search_regex(
                     r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
                     webpage, 'js data', default='{}'),
                 video_id, transform_source=js_to_json, fatal=False)
-            if server_js_data:
-                video_data = extract_video_data(try_get(
-                    server_js_data, lambda x: x['jsmods']['instances'],
-                    list) or [])
+            video_data = extract_from_jsmods_instances(server_js_data)
 
         if not video_data:
             if not fatal_if_no_video:
@@ -333,8 +347,33 @@ class FacebookIE(InfoExtractor):
                     expected=True)
             elif '>You must log in to continue' in webpage:
                 self.raise_login_required()
-            else:
-                raise ExtractorError('Cannot parse data')
+
+            # Video info not in first request, do a secondary request using
+            # tahoe player specific URL
+            tahoe_data = self._download_webpage(
+                self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
+                data=urlencode_postdata({
+                    '__user': 0,
+                    '__a': 1,
+                    '__pc': self._search_regex(
+                        r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
+                        'pkg cohort', default='PHASED:DEFAULT'),
+                    '__rev': self._search_regex(
+                        r'client_revision["\']\s*:\s*(\d+),', webpage,
+                        'client revision', default='3944515'),
+                }),
+                headers={
+                    'Content-Type': 'application/x-www-form-urlencoded',
+                })
+            tahoe_js_data = self._parse_json(
+                self._search_regex(
+                    r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
+                    'tahoe js data', default='{}'),
+                video_id, fatal=False)
+            video_data = extract_from_jsmods_instances(tahoe_js_data)
+
+        if not video_data:
+            raise ExtractorError('Cannot parse data')
 
         formats = []
         for f in video_data:
@@ -380,7 +419,8 @@ class FacebookIE(InfoExtractor):
             video_title = 'Facebook video #%s' % video_id
         uploader = clean_html(get_element_by_id(
             'fbPhotoPageAuthorName', webpage)) or self._search_regex(
-            r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
+            r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
+            fatal=False) or self._og_search_title(webpage, fatal=False)
         timestamp = int_or_none(self._search_regex(
             r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
             'timestamp', default=None))
index 448647d727159d97b2f940e76136888af1abc64a..435561147feb991cf3595a3a4d71914a5c0154f6 100644 (file)
@@ -46,7 +46,7 @@ class FC2IE(InfoExtractor):
     }]
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None or password is None:
             return False
 
index c02cd03de1c59452ac1ff2432f9e5ea54134e7de..6fc6b0da076bef777f667e2125b2203c0036f4fa 100644 (file)
@@ -379,6 +379,31 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
         return self._make_url_result(video_id, catalogue)
 
 
+class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
+    IE_NAME = 'sport.francetvinfo.fr'
+    _VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018',
+        'info_dict': {
+            'id': '6e49080e-3f45-11e8-b459-000d3a2439ea',
+            'ext': 'mp4',
+            'title': 'Retour sur les meilleurs moments de Pyeongchang 2018',
+            'timestamp': 1523639962,
+            'upload_date': '20180413',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': [FranceTVIE.ie_key()],
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id')
+        return self._make_url_result(video_id, 'Sport-web')
+
+
 class GenerationWhatIE(InfoExtractor):
     IE_NAME = 'france2.fr:generation-what'
     _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
index 107f658baf2c393036dd4d2c770c01258e29e1a9..07d01caecfe6a1cc9bde8e23eb8d3955cdeda62c 100644 (file)
@@ -51,7 +51,7 @@ class FunimationIE(InfoExtractor):
     }]
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
         try:
index faea6576fe6877293ee94cc882c74cbca93eabfb..0ff058619bc05fa6b3d6c0680be56bff957ca802 100644 (file)
@@ -5,7 +5,10 @@ import re
 
 from .common import InfoExtractor
 from .nexx import NexxIE
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    try_get,
+)
 
 
 class FunkBaseIE(InfoExtractor):
@@ -77,6 +80,20 @@ class FunkChannelIE(FunkBaseIE):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # only available via byIdList API
+        'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
+        'info_dict': {
+            'id': '205067',
+            'ext': 'mp4',
+            'title': 'Martin Sonneborn erklärt die EU',
+            'description': 'md5:050f74626e4ed87edf4626d2024210c0',
+            'timestamp': 1494424042,
+            'upload_date': '20170510',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
         'only_matching': True,
@@ -87,16 +104,28 @@ class FunkChannelIE(FunkBaseIE):
         channel_id = mobj.group('id')
         alias = mobj.group('alias')
 
-        results = self._download_json(
-            'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
-            headers={
-                'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
-                'Referer': url,
-            }, query={
-                'channelId': channel_id,
-                'size': 100,
-            })['result']
+        headers = {
+            'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
+            'Referer': url,
+        }
 
-        video = next(r for r in results if r.get('alias') == alias)
+        video = None
+
+        by_id_list = self._download_json(
+            'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id,
+            headers=headers, query={
+                'ids': alias,
+            }, fatal=False)
+        if by_id_list:
+            video = try_get(by_id_list, lambda x: x['result'][0], dict)
+
+        if not video:
+            results = self._download_json(
+                'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
+                headers=headers, query={
+                    'channelId': channel_id,
+                    'size': 100,
+                })['result']
+            video = next(r for r in results if r.get('alias') == alias)
 
         return self._make_url_result(video)
index f71d9092e5371d5d2c143058ded24cb5f4ca1958..8806dc48a945e7dea5760498f5b2ef570fb419a4 100644 (file)
@@ -91,7 +91,7 @@ class GDCVaultIE(InfoExtractor):
     ]
 
     def _login(self, webpage_url, display_id):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None or password is None:
             self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
             return None
index af1322e0085befa144605f16c22a52fcca5a3bcf..dad951b751853f900a26e6cd2e7bce5ca964b749 100644 (file)
@@ -23,6 +23,7 @@ from ..utils import (
     is_html,
     js_to_json,
     KNOWN_EXTENSIONS,
+    merge_dicts,
     mimetype2ext,
     orderedSet,
     sanitized_Request,
@@ -106,6 +107,10 @@ from .springboardplatform import SpringboardPlatformIE
 from .yapfiles import YapFilesIE
 from .vice import ViceIE
 from .xfileshare import XFileShareIE
+from .cloudflarestream import CloudflareStreamIE
+from .peertube import PeerTubeIE
+from .indavideo import IndavideoEmbedIE
+from .apa import APAIE
 
 
 class GenericIE(InfoExtractor):
@@ -190,6 +195,16 @@ class GenericIE(InfoExtractor):
                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
             }
         },
+        # RSS feed with enclosures and unsupported link URLs
+        {
+            'url': 'http://www.hellointernet.fm/podcast?format=rss',
+            'info_dict': {
+                'id': 'http://www.hellointernet.fm/podcast?format=rss',
+                'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
+                'title': 'Hello Internet',
+            },
+            'playlist_mincount': 100,
+        },
         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
         {
             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
@@ -1271,6 +1286,23 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Kaltura'],
         },
+        {
+            # Kaltura iframe embed, more sophisticated
+            'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
+            'info_dict': {
+                'id': '1_9gzouybz',
+                'ext': 'mp4',
+                'title': 'lecture-05sep2017',
+                'description': 'md5:40f347d91fd4ba047e511c5321064b49',
+                'upload_date': '20170913',
+                'uploader_id': 'eps2',
+                'timestamp': 1505340777,
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'add_ie': ['Kaltura'],
+        },
         {
             # meta twitter:player
             'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
@@ -1443,21 +1475,6 @@ class GenericIE(InfoExtractor):
             },
             'expected_warnings': ['Failed to parse JSON Expecting value'],
         },
-        # Ooyala embed
-        {
-            'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
-            'info_dict': {
-                'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
-                'ext': 'mp4',
-                'description': 'Index/Match versus VLOOKUP.',
-                'title': 'This is what separates the Excel masters from the wannabes',
-                'duration': 191.933,
-            },
-            'params': {
-                # m3u8 downloads
-                'skip_download': True,
-            }
-        },
         # Brightcove URL in single quotes
         {
             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
@@ -1985,6 +2002,63 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            # CloudflareStream embed
+            'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
+            'info_dict': {
+                'id': '31c9291ab41fac05471db4e73aa11717',
+                'ext': 'mp4',
+                'title': '31c9291ab41fac05471db4e73aa11717',
+            },
+            'add_ie': [CloudflareStreamIE.ie_key()],
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # PeerTube embed
+            'url': 'https://joinpeertube.org/fr/home/',
+            'info_dict': {
+                'id': 'home',
+                'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
+            },
+            'playlist_count': 2,
+        },
+        {
+            # Indavideo embed
+            'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
+            'info_dict': {
+                'id': '1693903',
+                'ext': 'mp4',
+                'title': 'Így kell otthon hamburgert sütni',
+                'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
+                'timestamp': 1426330212,
+                'upload_date': '20150314',
+                'uploader': 'StreetKitchen',
+                'uploader_id': '546363',
+            },
+            'add_ie': [IndavideoEmbedIE.ie_key()],
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # APA embed via JWPlatform embed
+            'url': 'http://www.vol.at/blue-man-group/5593454',
+            'info_dict': {
+                'id': 'jjv85FdZ',
+                'ext': 'mp4',
+                'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
+                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+                'thumbnail': r're:^https?://.*\.jpg$',
+                'duration': 254,
+                'timestamp': 1519211149,
+                'upload_date': '20180221',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
         {
             'url': 'http://share-videos.se/auto/video/83645793?uid=13',
             'md5': 'b68d276de422ab07ee1d49388103f457',
@@ -2025,13 +2099,15 @@ class GenericIE(InfoExtractor):
 
         entries = []
         for it in doc.findall('./channel/item'):
-            next_url = xpath_text(it, 'link', fatal=False)
+            next_url = None
+            enclosure_nodes = it.findall('./enclosure')
+            for e in enclosure_nodes:
+                next_url = e.attrib.get('url')
+                if next_url:
+                    break
+
             if not next_url:
-                enclosure_nodes = it.findall('./enclosure')
-                for e in enclosure_nodes:
-                    next_url = e.attrib.get('url')
-                    if next_url:
-                        break
+                next_url = xpath_text(it, 'link', fatal=False)
 
             if not next_url:
                 continue
@@ -2995,6 +3071,26 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
 
+        cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
+        if cloudflarestream_urls:
+            return self.playlist_from_matches(
+                cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
+
+        peertube_urls = PeerTubeIE._extract_urls(webpage)
+        if peertube_urls:
+            return self.playlist_from_matches(
+                peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
+
+        indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
+        if indavideo_urls:
+            return self.playlist_from_matches(
+                indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
+
+        apa_urls = APAIE._extract_urls(webpage)
+        if apa_urls:
+            return self.playlist_from_matches(
+                apa_urls, video_id, video_title, ie=APAIE.ie_key())
+
         sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
             r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
             webpage)]
@@ -3002,21 +3098,6 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 sharevideos_urls, video_id, video_title)
 
-        def merge_dicts(dict1, dict2):
-            merged = {}
-            for k, v in dict1.items():
-                if v is not None:
-                    merged[k] = v
-            for k, v in dict2.items():
-                if v is None:
-                    continue
-                if (k not in merged or
-                        (isinstance(v, compat_str) and v and
-                            isinstance(merged[k], compat_str) and
-                            not merged[k])):
-                    merged[k] = v
-            return merged
-
         # Look for HTML5 media
         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
         if entries:
index dc7b2661c58a0b35053ea50c7a2c1fa7b093f642..c2140c36274b0bd0b82a59a81eb35aa6aca9ab9e 100644 (file)
@@ -1,15 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import base64
+import hashlib
+import json
 import random
 import re
-import math
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_HTTPError,
     compat_str,
-    compat_chr,
-    compat_ord,
 )
 from ..utils import (
     ExtractorError,
@@ -22,12 +23,7 @@ from ..utils import (
 
 class GloboIE(InfoExtractor):
     _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
-
-    _API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
-    _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
-
-    _RESIGN_EXPIRATION = 86400
-
+    _NETRC_MACHINE = 'globo'
     _TESTS = [{
         'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
         'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
@@ -70,287 +66,51 @@ class GloboIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    class MD5(object):
-        HEX_FORMAT_LOWERCASE = 0
-        HEX_FORMAT_UPPERCASE = 1
-        BASE64_PAD_CHARACTER_DEFAULT_COMPLIANCE = ''
-        BASE64_PAD_CHARACTER_RFC_COMPLIANCE = '='
-        PADDING = '=0xFF01DD'
-        hexcase = 0
-        b64pad = ''
-
-        def __init__(self):
-            pass
-
-        class JSArray(list):
-            def __getitem__(self, y):
-                try:
-                    return list.__getitem__(self, y)
-                except IndexError:
-                    return 0
-
-            def __setitem__(self, i, y):
-                try:
-                    return list.__setitem__(self, i, y)
-                except IndexError:
-                    self.extend([0] * (i - len(self) + 1))
-                    self[-1] = y
-
-        @classmethod
-        def hex_md5(cls, param1):
-            return cls.rstr2hex(cls.rstr_md5(cls.str2rstr_utf8(param1)))
-
-        @classmethod
-        def b64_md5(cls, param1, param2=None):
-            return cls.rstr2b64(cls.rstr_md5(cls.str2rstr_utf8(param1, param2)))
-
-        @classmethod
-        def any_md5(cls, param1, param2):
-            return cls.rstr2any(cls.rstr_md5(cls.str2rstr_utf8(param1)), param2)
-
-        @classmethod
-        def rstr_md5(cls, param1):
-            return cls.binl2rstr(cls.binl_md5(cls.rstr2binl(param1), len(param1) * 8))
-
-        @classmethod
-        def rstr2hex(cls, param1):
-            _loc_2 = '0123456789ABCDEF' if cls.hexcase else '0123456789abcdef'
-            _loc_3 = ''
-            for _loc_5 in range(0, len(param1)):
-                _loc_4 = compat_ord(param1[_loc_5])
-                _loc_3 += _loc_2[_loc_4 >> 4 & 15] + _loc_2[_loc_4 & 15]
-            return _loc_3
-
-        @classmethod
-        def rstr2b64(cls, param1):
-            _loc_2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
-            _loc_3 = ''
-            _loc_4 = len(param1)
-            for _loc_5 in range(0, _loc_4, 3):
-                _loc_6_1 = compat_ord(param1[_loc_5]) << 16
-                _loc_6_2 = compat_ord(param1[_loc_5 + 1]) << 8 if _loc_5 + 1 < _loc_4 else 0
-                _loc_6_3 = compat_ord(param1[_loc_5 + 2]) if _loc_5 + 2 < _loc_4 else 0
-                _loc_6 = _loc_6_1 | _loc_6_2 | _loc_6_3
-                for _loc_7 in range(0, 4):
-                    if _loc_5 * 8 + _loc_7 * 6 > len(param1) * 8:
-                        _loc_3 += cls.b64pad
-                    else:
-                        _loc_3 += _loc_2[_loc_6 >> 6 * (3 - _loc_7) & 63]
-            return _loc_3
-
-        @staticmethod
-        def rstr2any(param1, param2):
-            _loc_3 = len(param2)
-            _loc_4 = []
-            _loc_9 = [0] * ((len(param1) >> 2) + 1)
-            for _loc_5 in range(0, len(_loc_9)):
-                _loc_9[_loc_5] = compat_ord(param1[_loc_5 * 2]) << 8 | compat_ord(param1[_loc_5 * 2 + 1])
-
-            while len(_loc_9) > 0:
-                _loc_8 = []
-                _loc_7 = 0
-                for _loc_5 in range(0, len(_loc_9)):
-                    _loc_7 = (_loc_7 << 16) + _loc_9[_loc_5]
-                    _loc_6 = math.floor(_loc_7 / _loc_3)
-                    _loc_7 -= _loc_6 * _loc_3
-                    if len(_loc_8) > 0 or _loc_6 > 0:
-                        _loc_8[len(_loc_8)] = _loc_6
-
-                _loc_4[len(_loc_4)] = _loc_7
-                _loc_9 = _loc_8
-
-            _loc_10 = ''
-            _loc_5 = len(_loc_4) - 1
-            while _loc_5 >= 0:
-                _loc_10 += param2[_loc_4[_loc_5]]
-                _loc_5 -= 1
-
-            return _loc_10
-
-        @classmethod
-        def str2rstr_utf8(cls, param1, param2=None):
-            _loc_3 = ''
-            _loc_4 = -1
-            if not param2:
-                param2 = cls.PADDING
-            param1 = param1 + param2[1:9]
-            while True:
-                _loc_4 += 1
-                if _loc_4 >= len(param1):
-                    break
-                _loc_5 = compat_ord(param1[_loc_4])
-                _loc_6 = compat_ord(param1[_loc_4 + 1]) if _loc_4 + 1 < len(param1) else 0
-                if 55296 <= _loc_5 <= 56319 and 56320 <= _loc_6 <= 57343:
-                    _loc_5 = 65536 + ((_loc_5 & 1023) << 10) + (_loc_6 & 1023)
-                    _loc_4 += 1
-                if _loc_5 <= 127:
-                    _loc_3 += compat_chr(_loc_5)
-                    continue
-                if _loc_5 <= 2047:
-                    _loc_3 += compat_chr(192 | _loc_5 >> 6 & 31) + compat_chr(128 | _loc_5 & 63)
-                    continue
-                if _loc_5 <= 65535:
-                    _loc_3 += compat_chr(224 | _loc_5 >> 12 & 15) + compat_chr(128 | _loc_5 >> 6 & 63) + compat_chr(
-                        128 | _loc_5 & 63)
-                    continue
-                if _loc_5 <= 2097151:
-                    _loc_3 += compat_chr(240 | _loc_5 >> 18 & 7) + compat_chr(128 | _loc_5 >> 12 & 63) + compat_chr(
-                        128 | _loc_5 >> 6 & 63) + compat_chr(128 | _loc_5 & 63)
-            return _loc_3
-
-        @staticmethod
-        def rstr2binl(param1):
-            _loc_2 = [0] * ((len(param1) >> 2) + 1)
-            for _loc_3 in range(0, len(_loc_2)):
-                _loc_2[_loc_3] = 0
-            for _loc_3 in range(0, len(param1) * 8, 8):
-                _loc_2[_loc_3 >> 5] |= (compat_ord(param1[_loc_3 // 8]) & 255) << _loc_3 % 32
-            return _loc_2
-
-        @staticmethod
-        def binl2rstr(param1):
-            _loc_2 = ''
-            for _loc_3 in range(0, len(param1) * 32, 8):
-                _loc_2 += compat_chr(param1[_loc_3 >> 5] >> _loc_3 % 32 & 255)
-            return _loc_2
-
-        @classmethod
-        def binl_md5(cls, param1, param2):
-            param1 = cls.JSArray(param1)
-            param1[param2 >> 5] |= 128 << param2 % 32
-            param1[(param2 + 64 >> 9 << 4) + 14] = param2
-            _loc_3 = 1732584193
-            _loc_4 = -271733879
-            _loc_5 = -1732584194
-            _loc_6 = 271733878
-            for _loc_7 in range(0, len(param1), 16):
-                _loc_8 = _loc_3
-                _loc_9 = _loc_4
-                _loc_10 = _loc_5
-                _loc_11 = _loc_6
-                _loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 0], 7, -680876936)
-                _loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 1], 12, -389564586)
-                _loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 2], 17, 606105819)
-                _loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 3], 22, -1044525330)
-                _loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 4], 7, -176418897)
-                _loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 5], 12, 1200080426)
-                _loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 6], 17, -1473231341)
-                _loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 7], 22, -45705983)
-                _loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 8], 7, 1770035416)
-                _loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 9], 12, -1958414417)
-                _loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 10], 17, -42063)
-                _loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 11], 22, -1990404162)
-                _loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 12], 7, 1804603682)
-                _loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 13], 12, -40341101)
-                _loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 14], 17, -1502002290)
-                _loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 15], 22, 1236535329)
-                _loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 1], 5, -165796510)
-                _loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 6], 9, -1069501632)
-                _loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 11], 14, 643717713)
-                _loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 0], 20, -373897302)
-                _loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 5], 5, -701558691)
-                _loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 10], 9, 38016083)
-                _loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 15], 14, -660478335)
-                _loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 4], 20, -405537848)
-                _loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 9], 5, 568446438)
-                _loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 14], 9, -1019803690)
-                _loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 3], 14, -187363961)
-                _loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 8], 20, 1163531501)
-                _loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 13], 5, -1444681467)
-                _loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 2], 9, -51403784)
-                _loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 7], 14, 1735328473)
-                _loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 12], 20, -1926607734)
-                _loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 5], 4, -378558)
-                _loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 8], 11, -2022574463)
-                _loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 11], 16, 1839030562)
-                _loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 14], 23, -35309556)
-                _loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 1], 4, -1530992060)
-                _loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 4], 11, 1272893353)
-                _loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 7], 16, -155497632)
-                _loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 10], 23, -1094730640)
-                _loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 13], 4, 681279174)
-                _loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 0], 11, -358537222)
-                _loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 3], 16, -722521979)
-                _loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 6], 23, 76029189)
-                _loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 9], 4, -640364487)
-                _loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 12], 11, -421815835)
-                _loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 15], 16, 530742520)
-                _loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 2], 23, -995338651)
-                _loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 0], 6, -198630844)
-                _loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 7], 10, 1126891415)
-                _loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 14], 15, -1416354905)
-                _loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 5], 21, -57434055)
-                _loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 12], 6, 1700485571)
-                _loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 3], 10, -1894986606)
-                _loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 10], 15, -1051523)
-                _loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 1], 21, -2054922799)
-                _loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 8], 6, 1873313359)
-                _loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 15], 10, -30611744)
-                _loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 6], 15, -1560198380)
-                _loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 13], 21, 1309151649)
-                _loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 4], 6, -145523070)
-                _loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 11], 10, -1120210379)
-                _loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 2], 15, 718787259)
-                _loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 9], 21, -343485551)
-                _loc_3 = cls.safe_add(_loc_3, _loc_8)
-                _loc_4 = cls.safe_add(_loc_4, _loc_9)
-                _loc_5 = cls.safe_add(_loc_5, _loc_10)
-                _loc_6 = cls.safe_add(_loc_6, _loc_11)
-            return [_loc_3, _loc_4, _loc_5, _loc_6]
-
-        @classmethod
-        def md5_cmn(cls, param1, param2, param3, param4, param5, param6):
-            return cls.safe_add(
-                cls.bit_rol(cls.safe_add(cls.safe_add(param2, param1), cls.safe_add(param4, param6)), param5), param3)
-
-        @classmethod
-        def md5_ff(cls, param1, param2, param3, param4, param5, param6, param7):
-            return cls.md5_cmn(param2 & param3 | ~param2 & param4, param1, param2, param5, param6, param7)
-
-        @classmethod
-        def md5_gg(cls, param1, param2, param3, param4, param5, param6, param7):
-            return cls.md5_cmn(param2 & param4 | param3 & ~param4, param1, param2, param5, param6, param7)
-
-        @classmethod
-        def md5_hh(cls, param1, param2, param3, param4, param5, param6, param7):
-            return cls.md5_cmn(param2 ^ param3 ^ param4, param1, param2, param5, param6, param7)
-
-        @classmethod
-        def md5_ii(cls, param1, param2, param3, param4, param5, param6, param7):
-            return cls.md5_cmn(param3 ^ (param2 | ~param4), param1, param2, param5, param6, param7)
-
-        @classmethod
-        def safe_add(cls, param1, param2):
-            _loc_3 = (param1 & 65535) + (param2 & 65535)
-            _loc_4 = (param1 >> 16) + (param2 >> 16) + (_loc_3 >> 16)
-            return cls.lshift(_loc_4, 16) | _loc_3 & 65535
-
-        @classmethod
-        def bit_rol(cls, param1, param2):
-            return cls.lshift(param1, param2) | (param1 & 0xFFFFFFFF) >> (32 - param2)
-
-        @staticmethod
-        def lshift(value, count):
-            r = (0xFFFFFFFF & value) << count
-            return -(~(r - 1) & 0xFFFFFFFF) if r > 0x7FFFFFFF else r
+    def _real_initialize(self):
+        email, password = self._get_login_info()
+        if email is None:
+            return
+
+        try:
+            self._download_json(
+                'https://login.globo.com/api/authentication', None, data=json.dumps({
+                    'payload': {
+                        'email': email,
+                        'password': password,
+                        'serviceId': 4654,
+                    },
+                }).encode(), headers={
+                    'Content-Type': 'application/json; charset=utf-8',
+                })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                resp = self._parse_json(e.cause.read(), None)
+                raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True)
+            raise
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         video = self._download_json(
-            self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
+            'http://api.globovideos.com/videos/%s/playlist' % video_id,
+            video_id)['videos'][0]
 
         title = video['title']
 
         formats = []
         for resource in video['resources']:
             resource_id = resource.get('_id')
-            if not resource_id or resource_id.endswith('manifest'):
+            resource_url = resource.get('url')
+            if not resource_id or not resource_url:
                 continue
 
             security = self._download_json(
-                self._SECURITY_URL_TEMPLATE % (video_id, resource_id),
-                video_id, 'Downloading security hash for %s' % resource_id)
+                'http://security.video.globo.com/videos/%s/hash' % video_id,
+                video_id, 'Downloading security hash for %s' % resource_id, query={
+                    'player': 'flash',
+                    'version': '17.0.0.132',
+                    'resource_id': resource_id,
+                })
 
             security_hash = security.get('hash')
             if not security_hash:
@@ -361,22 +121,28 @@ class GloboIE(InfoExtractor):
                 continue
 
             hash_code = security_hash[:2]
-            received_time = int(security_hash[2:12])
+            received_time = security_hash[2:12]
             received_random = security_hash[12:22]
             received_md5 = security_hash[22:]
 
-            sign_time = received_time + self._RESIGN_EXPIRATION
+            sign_time = compat_str(int(received_time) + 86400)
             padding = '%010d' % random.randint(1, 10000000000)
 
-            signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
-            signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
+            md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode()
+            signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
+            signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5
 
-            resource_url = resource['url']
             signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
             if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
                 formats.extend(self._extract_m3u8_formats(
                     signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
                     m3u8_id='hls', fatal=False))
+            elif resource_id.endswith('mpd') or resource_url.endswith('.mpd'):
+                formats.extend(self._extract_mpd_formats(
+                    signed_url, resource_id, mpd_id='dash', fatal=False))
+            elif resource_id.endswith('manifest') or resource_url.endswith('/manifest'):
+                formats.extend(self._extract_ism_formats(
+                    signed_url, resource_id, ism_id='mss', fatal=False))
             else:
                 formats.append({
                     'url': signed_url,
index 9c7b1bd37d447c5a3cb09aa6b3418b63accffcad..e781405f2f7d55aa51f8cc50cc99df22ee5dff40 100644 (file)
@@ -123,7 +123,7 @@ class GoIE(AdobePassIE):
                         'adobe_requestor_id': requestor_id,
                     })
                 else:
-                    self._initialize_geo_bypass(['US'])
+                    self._initialize_geo_bypass({'countries': ['US']})
                 entitlement = self._download_json(
                     'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
                     video_id, data=urlencode_postdata(data))
index 9b2e1c1645da92fea04303963fdb8947fea6b419..35dde42d07143a3805c5f1cfd551001eb2a23de3 100644 (file)
@@ -6,7 +6,9 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
+    ExtractorError,
     int_or_none,
+    parse_age_limit,
     parse_iso8601,
 )
 
@@ -23,6 +25,7 @@ class Go90IE(InfoExtractor):
             'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
             'timestamp': 1491868800,
             'upload_date': '20170411',
+            'age_limit': 14,
         }
     }
 
@@ -33,6 +36,8 @@ class Go90IE(InfoExtractor):
             video_id, headers={
                 'Content-Type': 'application/json; charset=utf-8',
             }, data=b'{"client":"web","device_type":"pc"}')
+        if video_data.get('requires_drm'):
+            raise ExtractorError('This video is DRM protected.', expected=True)
         main_video_asset = video_data['main_video_asset']
 
         episode_number = int_or_none(video_data.get('episode_number'))
@@ -123,4 +128,5 @@ class Go90IE(InfoExtractor):
             'season_number': season_number,
             'episode_number': episode_number,
             'subtitles': subtitles,
+            'age_limit': parse_age_limit(video_data.get('rating')),
         }
index eee517071e969b176891b34bc795e426c12b7a97..39fabe8a55958374f03700e8a653ea86d707dcc9 100644 (file)
@@ -17,6 +17,8 @@ class HiDiveIE(InfoExtractor):
     # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
     # so disabling geo bypass completely
     _GEO_BYPASS = False
+    _NETRC_MACHINE = 'hidive'
+    _LOGIN_URL = 'https://www.hidive.com/account/login'
 
     _TESTS = [{
         'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
@@ -31,8 +33,26 @@ class HiDiveIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+        'skip': 'Requires Authentication',
     }]
 
+    def _real_initialize(self):
+        email, password = self._get_login_info()
+        if email is None:
+            return
+
+        webpage = self._download_webpage(self._LOGIN_URL, None)
+        form = self._search_regex(
+            r'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
+            webpage, 'login form')
+        data = self._hidden_inputs(form)
+        data.update({
+            'Email': email,
+            'Password': password,
+        })
+        self._download_webpage(
+            self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         title, key = mobj.group('title', 'key')
@@ -43,6 +63,7 @@ class HiDiveIE(InfoExtractor):
             data=urlencode_postdata({
                 'Title': title,
                 'Key': key,
+                'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
             }))
 
         restriction = settings.get('restrictionReason')
@@ -79,6 +100,7 @@ class HiDiveIE(InfoExtractor):
                 subtitles.setdefault(cc_lang, []).append({
                     'url': cc_url,
                 })
+        self._sort_formats(formats)
 
         season_number = int_or_none(self._search_regex(
             r's(\d+)', key, 'season number', default=None))
index 6424d34ac4acc0f6c01205076cd0e6e25723fc22..9ba1aa7032eb932e325f9e1e40da1e4b583ce322 100644 (file)
@@ -66,7 +66,7 @@ class HRTiBaseIE(InfoExtractor):
         self._logout_url = modules['user']['resources']['logout']['uri']
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         # TODO: figure out authentication with cookies
         if username is None or password is None:
             self.raise_login_required()
index 3ff672a89215f249574fac721bf913bee84d8200..4bafa54a21e5abbc294a3686b6b974a9bb6d4eb3 100644 (file)
@@ -3,25 +3,27 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
+    determine_ext,
     mimetype2ext,
+    parse_duration,
     qualities,
-    remove_end,
 )
 
 
 class ImdbIE(InfoExtractor):
     IE_NAME = 'imdb'
     IE_DESC = 'Internet Movie Database trailers'
-    _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title).+?[/-]vi(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.imdb.com/video/imdb/vi2524815897',
         'info_dict': {
             'id': '2524815897',
             'ext': 'mp4',
-            'title': 'Ice Age: Continental Drift Trailer (No. 2)',
-            'description': 'md5:9061c2219254e5d14e03c25c98e96a81',
+            'title': 'No. 2 from Ice Age: Continental Drift (2012)',
+            'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
         }
     }, {
         'url': 'http://www.imdb.com/video/_/vi2524815897',
@@ -38,76 +40,67 @@ class ImdbIE(InfoExtractor):
     }, {
         'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
         'only_matching': True,
+    }, {
+        'url': 'https://www.imdb.com/list/ls009921623/videoplayer/vi260482329',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id)
-        descr = self._html_search_regex(
-            r'(?s)<span itemprop="description">(.*?)</span>',
-            webpage, 'description', fatal=False)
-        player_url = 'http://www.imdb.com/video/imdb/vi%s/imdb/single' % video_id
-        player_page = self._download_webpage(
-            player_url, video_id, 'Downloading player page')
-        # the player page contains the info for the default format, we have to
-        # fetch other pages for the rest of the formats
-        extra_formats = re.findall(r'href="(?P<url>%s.*?)".*?>(?P<name>.*?)<' % re.escape(player_url), player_page)
-        format_pages = [
-            self._download_webpage(
-                f_url, video_id, 'Downloading info for %s format' % f_name)
-            for f_url, f_name in extra_formats]
-        format_pages.append(player_page)
+        webpage = self._download_webpage(
+            'https://www.imdb.com/videoplayer/vi' + video_id, video_id)
+        video_metadata = self._parse_json(self._search_regex(
+            r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage,
+            'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id]
+        title = self._html_search_meta(
+            ['og:title', 'twitter:title'], webpage) or self._html_search_regex(
+            r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title']
 
         quality = qualities(('SD', '480p', '720p', '1080p'))
         formats = []
-        for format_page in format_pages:
-            json_data = self._search_regex(
-                r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
-                format_page, 'json data', flags=re.DOTALL)
-            info = self._parse_json(json_data, video_id, fatal=False)
-            if not info:
-                continue
-            format_info = info.get('videoPlayerObject', {}).get('video', {})
-            if not format_info:
-                continue
-            video_info_list = format_info.get('videoInfoList')
-            if not video_info_list or not isinstance(video_info_list, list):
+        for encoding in video_metadata.get('encodings', []):
+            if not encoding or not isinstance(encoding, dict):
                 continue
-            video_info = video_info_list[0]
-            if not video_info or not isinstance(video_info, dict):
+            video_url = encoding.get('videoUrl')
+            if not video_url or not isinstance(video_url, compat_str):
                 continue
-            video_url = video_info.get('videoUrl')
-            if not video_url:
+            ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
                 continue
-            format_id = format_info.get('ffname')
+            format_id = encoding.get('definition')
             formats.append({
                 'format_id': format_id,
                 'url': video_url,
-                'ext': mimetype2ext(video_info.get('videoMimeType')),
+                'ext': ext,
                 'quality': quality(format_id),
             })
         self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': remove_end(self._og_search_title(webpage), ' - IMDb'),
+            'title': title,
             'formats': formats,
-            'description': descr,
-            'thumbnail': format_info.get('slate'),
+            'description': video_metadata.get('description'),
+            'thumbnail': video_metadata.get('slate', {}).get('url'),
+            'duration': parse_duration(video_metadata.get('duration')),
         }
 
 
 class ImdbListIE(InfoExtractor):
     IE_NAME = 'imdb:list'
     IE_DESC = 'Internet Movie Database lists'
-    _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+    _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/ls(?P<id>\d{9})(?!/videoplayer/vi\d+)'
     _TEST = {
-        'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
+        'url': 'https://www.imdb.com/list/ls009921623/',
         'info_dict': {
-            'id': 'JFs9NWw6XI0',
-            'title': 'March 23, 2012 Releases',
+            'id': '009921623',
+            'title': 'The Bourne Legacy',
+            'description': 'A list of trailers, clips, and more from The Bourne Legacy, starring Jeremy Renner and Rachel Weisz.',
         },
-        'playlist_count': 7,
+        'playlist_count': 8,
     }
 
     def _real_extract(self, url):
@@ -115,9 +108,13 @@ class ImdbListIE(InfoExtractor):
         webpage = self._download_webpage(url, list_id)
         entries = [
             self.url_result('http://www.imdb.com' + m, 'Imdb')
-            for m in re.findall(r'href="(/video/imdb/vi[^"]+)"\s+data-type="playlist"', webpage)]
+            for m in re.findall(r'href="(/list/ls%s/videoplayer/vi[^"]+)"' % list_id, webpage)]
 
         list_title = self._html_search_regex(
-            r'<h1 class="header">(.*?)</h1>', webpage, 'list title')
+            r'<h1[^>]+class="[^"]*header[^"]*"[^>]*>(.*?)</h1>',
+            webpage, 'list title')
+        list_description = self._html_search_regex(
+            r'<div[^>]+class="[^"]*list-description[^"]*"[^>]*><p>(.*?)</p>',
+            webpage, 'list description')
 
-        return self.playlist_result(entries, list_id, list_title)
+        return self.playlist_result(entries, list_id, list_title, list_description)
index 67c24a51c861f4dd9a1da8f790d61469c8e2220c..2901960a51f8faa28975a7c5bef7b965062573ba 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
 from ..utils import (
     int_or_none,
     js_to_json,
@@ -21,7 +20,7 @@ class ImgurIE(InfoExtractor):
             'id': 'A61SaA1',
             'ext': 'mp4',
             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
-            'description': 'Imgur: The most awesome images on the Internet.',
+            'description': 'Imgur: The magic of the Internet',
         },
     }, {
         'url': 'https://imgur.com/A61SaA1',
@@ -29,7 +28,7 @@ class ImgurIE(InfoExtractor):
             'id': 'A61SaA1',
             'ext': 'mp4',
             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
-            'description': 'Imgur: The most awesome images on the Internet.',
+            'description': 'Imgur: The magic of the Internet',
         },
     }, {
         'url': 'https://imgur.com/gallery/YcAQlkx',
@@ -37,8 +36,6 @@ class ImgurIE(InfoExtractor):
             'id': 'YcAQlkx',
             'ext': 'mp4',
             'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
-            'description': 'Imgur: The most awesome images on the Internet.'
-
         }
     }, {
         'url': 'http://imgur.com/topic/Funny/N8rOudd',
@@ -50,8 +47,8 @@ class ImgurIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(
-            compat_urlparse.urljoin(url, video_id), video_id)
+        gifv_url = 'https://i.imgur.com/{id}.gifv'.format(id=video_id)
+        webpage = self._download_webpage(gifv_url, video_id)
 
         width = int_or_none(self._og_search_property(
             'video:width', webpage, default=None))
@@ -107,7 +104,7 @@ class ImgurIE(InfoExtractor):
         return {
             'id': video_id,
             'formats': formats,
-            'description': self._og_search_description(webpage),
+            'description': self._og_search_description(webpage, default=None),
             'title': self._og_search_title(webpage),
         }
 
index 241ec83c4e9ebddc523b961db84fda04199755b6..d5b258a0fcf514d5f7cd19d615fc6aef5d404575 100644 (file)
@@ -21,6 +21,21 @@ class IncIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # div with id=kaltura_player_1_kqs38cgm
+        'url': 'https://www.inc.com/oscar-raymundo/richard-branson-young-entrepeneurs.html',
+        'info_dict': {
+            'id': '1_kqs38cgm',
+            'ext': 'mp4',
+            'title': 'Branson: "In the end, you have to say, Screw it. Just do it."',
+            'description': 'md5:21b832d034f9af5191ca5959da5e9cb6',
+            'timestamp': 1364403232,
+            'upload_date': '20130327',
+            'uploader_id': 'incdigital@inc.com',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
         'only_matching': True,
@@ -31,10 +46,13 @@ class IncIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         partner_id = self._search_regex(
-            r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage, 'partner id')
+            r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage,
+            'partner id', default='1034971')
 
-        kaltura_id = self._parse_json(self._search_regex(
-            r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
+        kaltura_id = self._search_regex(
+            r'id=(["\'])kaltura_player_(?P<id>.+?)\1', webpage, 'kaltura id',
+            default=None, group='id') or self._parse_json(self._search_regex(
+                r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
             display_id)['vid_kaltura_id']
 
         return self.url_result(
index 11cf3c60964fe55c21282ecccf48a7d80ae4bac5..2b5b2b5b0b303aa4c1b6bdb4a6e1226dea11e218 100644 (file)
@@ -1,11 +1,15 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     int_or_none,
     parse_age_limit,
     parse_iso8601,
+    update_url_query,
 )
 
 
@@ -13,7 +17,7 @@ class IndavideoEmbedIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
     _TESTS = [{
         'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
-        'md5': 'f79b009c66194acacd40712a6778acfa',
+        'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
         'info_dict': {
             'id': '1837039',
             'ext': 'mp4',
@@ -36,6 +40,20 @@ class IndavideoEmbedIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    # Some example URLs covered by generic extractor:
+    #   http://indavideo.hu/video/Vicces_cica_1
+    #   http://index.indavideo.hu/video/2015_0728_beregszasz
+    #   http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
+    #   http://erotika.indavideo.hu/video/Amator_tini_punci
+    #   http://film.indavideo.hu/video/f_hrom_nagymamm_volt
+    #   http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
+            webpage)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -45,7 +63,14 @@ class IndavideoEmbedIE(InfoExtractor):
 
         title = video['title']
 
-        video_urls = video.get('video_files', [])
+        video_urls = []
+
+        video_files = video.get('video_files')
+        if isinstance(video_files, list):
+            video_urls.extend(video_files)
+        elif isinstance(video_files, dict):
+            video_urls.extend(video_files.values())
+
         video_file = video.get('video_file')
         if video:
             video_urls.append(video_file)
@@ -58,11 +83,23 @@ class IndavideoEmbedIE(InfoExtractor):
             if flv_url not in video_urls:
                 video_urls.append(flv_url)
 
-        formats = [{
-            'url': video_url,
-            'height': int_or_none(self._search_regex(
-                r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)),
-        } for video_url in video_urls]
+        filesh = video.get('filesh')
+
+        formats = []
+        for video_url in video_urls:
+            height = int_or_none(self._search_regex(
+                r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
+            if filesh:
+                if not height:
+                    continue
+                token = filesh.get(compat_str(height))
+                if token is None:
+                    continue
+                video_url = update_url_query(video_url, {'token': token})
+            formats.append({
+                'url': video_url,
+                'height': height,
+            })
         self._sort_formats(formats)
 
         timestamp = video.get('date')
@@ -89,55 +126,3 @@ class IndavideoEmbedIE(InfoExtractor):
             'tags': tags,
             'formats': formats,
         }
-
-
-class IndavideoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:.+?\.)?indavideo\.hu/video/(?P<id>[^/#?]+)'
-    _TESTS = [{
-        'url': 'http://indavideo.hu/video/Vicces_cica_1',
-        'md5': '8c82244ba85d2a2310275b318eb51eac',
-        'info_dict': {
-            'id': '1335611',
-            'display_id': 'Vicces_cica_1',
-            'ext': 'mp4',
-            'title': 'Vicces cica',
-            'description': 'Játszik a tablettel. :D',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'uploader': 'Jet_Pack',
-            'uploader_id': '491217',
-            'timestamp': 1390821212,
-            'upload_date': '20140127',
-            'duration': 7,
-            'age_limit': 0,
-            'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'],
-        },
-    }, {
-        'url': 'http://index.indavideo.hu/video/2015_0728_beregszasz',
-        'only_matching': True,
-    }, {
-        'url': 'http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko',
-        'only_matching': True,
-    }, {
-        'url': 'http://erotika.indavideo.hu/video/Amator_tini_punci',
-        'only_matching': True,
-    }, {
-        'url': 'http://film.indavideo.hu/video/f_hrom_nagymamm_volt',
-        'only_matching': True,
-    }, {
-        'url': 'http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, display_id)
-        embed_url = self._search_regex(
-            r'<link[^>]+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url')
-
-        return {
-            '_type': 'url_transparent',
-            'ie_key': 'IndavideoEmbed',
-            'url': embed_url,
-            'display_id': display_id,
-        }
index fdfa7de9ef05ce5b509f786d99027c6b5deb0bdf..4b081bd469ca084f5ecf47ac38cc97326b011b31 100644 (file)
@@ -239,7 +239,7 @@ class IqiyiIE(InfoExtractor):
         return ohdave_rsa_encrypt(data, e, N)
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
 
         # No authentication to be performed
         if not username:
index 18a7d7f8cde2f2434d0db003836b5e7a437b2c9a..6a4f8a50569b5bebb0a46481843609fcd205d108 100644 (file)
@@ -7,6 +7,7 @@ import json
 import re
 
 from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
 from ..compat import (
     compat_str,
     compat_etree_register_namespace,
@@ -18,6 +19,7 @@ from ..utils import (
     xpath_text,
     int_or_none,
     parse_duration,
+    smuggle_url,
     ExtractorError,
     determine_ext,
 )
@@ -41,6 +43,14 @@ class ITVIE(InfoExtractor):
         # unavailable via data-playlist-url
         'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
         'only_matching': True,
+    }, {
+        # InvalidVodcrid
+        'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
+        'only_matching': True,
+    }, {
+        # ContentUnavailable
+        'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -127,7 +137,8 @@ class ITVIE(InfoExtractor):
             if fault_code == 'InvalidGeoRegion':
                 self.raise_geo_restricted(
                     msg=fault_string, countries=self._GEO_COUNTRIES)
-            elif fault_code != 'InvalidEntity':
+            elif fault_code not in (
+                    'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
                 raise ExtractorError(
                     '%s said: %s' % (self.IE_NAME, fault_string), expected=True)
             info.update({
@@ -251,3 +262,38 @@ class ITVIE(InfoExtractor):
             'subtitles': subtitles,
         })
         return info
+
+
+class ITVBTCCIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
+        'info_dict': {
+            'id': 'btcc-2018-all-the-action-from-brands-hatch',
+            'title': 'BTCC 2018: All the action from Brands Hatch',
+        },
+        'playlist_mincount': 9,
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result(
+                smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
+                    # ITV does not like some GB IP ranges, so here are some
+                    # IP blocks it accepts
+                    'geo_ip_blocks': [
+                        '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
+                    ],
+                    'referrer': url,
+                }),
+                ie=BrightcoveNewIE.ie_key(), video_id=video_id)
+            for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
+
+        title = self._og_search_title(webpage, fatal=False)
+
+        return self.playlist_result(entries, playlist_id, title)
index b1d72177d5acef2c48a82f7df18081005199b47e..f8fca6c8f47c9978e78d1c2c76d0dcb84a8d7f9d 100644 (file)
@@ -1,10 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
+    compat_str,
+    compat_urllib_parse_unquote,
+)
 from ..utils import (
     determine_ext,
     float_or_none,
@@ -57,12 +58,33 @@ class IzleseneIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        url = 'http://www.izlesene.com/video/%s' % video_id
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage('http://www.izlesene.com/video/%s' % video_id, video_id)
+
+        video = self._parse_json(
+            self._search_regex(
+                r'videoObj\s*=\s*({.+?})\s*;\s*\n', webpage, 'streams'),
+            video_id)
+
+        title = video.get('videoTitle') or self._og_search_title(webpage)
+
+        formats = []
+        for stream in video['media']['level']:
+            source_url = stream.get('source')
+            if not source_url or not isinstance(source_url, compat_str):
+                continue
+            ext = determine_ext(url, 'mp4')
+            quality = stream.get('value')
+            height = int_or_none(quality)
+            formats.append({
+                'format_id': '%sp' % quality if quality else 'sd',
+                'url': compat_urllib_parse_unquote(source_url),
+                'ext': ext,
+                'height': height,
+            })
+        self._sort_formats(formats)
 
-        title = self._og_search_title(webpage)
         description = self._og_search_description(webpage, default=None)
-        thumbnail = self._proto_relative_url(
+        thumbnail = video.get('posterURL') or self._proto_relative_url(
             self._og_search_thumbnail(webpage), scheme='http:')
 
         uploader = self._html_search_regex(
@@ -71,41 +93,15 @@ class IzleseneIE(InfoExtractor):
         timestamp = parse_iso8601(self._html_search_meta(
             'uploadDate', webpage, 'upload date'))
 
-        duration = float_or_none(self._html_search_regex(
-            r'"videoduration"\s*:\s*"([^"]+)"',
-            webpage, 'duration', fatal=False), scale=1000)
+        duration = float_or_none(video.get('duration') or self._html_search_regex(
+            r'videoduration["\']?\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+            webpage, 'duration', fatal=False, group='value'), scale=1000)
 
         view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
         comment_count = self._html_search_regex(
             r'comment_count\s*=\s*\'([^\']+)\';',
             webpage, 'comment_count', fatal=False)
 
-        content_url = self._html_search_meta(
-            'contentURL', webpage, 'content URL', fatal=False)
-        ext = determine_ext(content_url, 'mp4')
-
-        # Might be empty for some videos.
-        streams = self._html_search_regex(
-            r'"qualitylevel"\s*:\s*"([^"]+)"', webpage, 'streams', default='')
-
-        formats = []
-        if streams:
-            for stream in streams.split('|'):
-                quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
-                formats.append({
-                    'format_id': '%sp' % quality if quality else 'sd',
-                    'url': compat_urllib_parse_unquote(url),
-                    'ext': ext,
-                })
-        else:
-            stream_url = self._search_regex(
-                r'"streamurl"\s*:\s*"([^"]+)"', webpage, 'stream URL')
-            formats.append({
-                'format_id': 'sd',
-                'url': compat_urllib_parse_unquote(stream_url),
-                'ext': ext,
-            })
-
         return {
             'id': video_id,
             'title': title,
index 0ea89e4d66d9fb20a9e9d9cf6635d7dff09f4ba6..04f68fce41fc129f7cf65e0e1cddb0e85636fb9e 100644 (file)
@@ -136,9 +136,10 @@ class KalturaIE(InfoExtractor):
             re.search(
                 r'''(?xs)
                     <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
-                      (?:https?:)?//(?:(?:www|cdnapi)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
+                      (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
                       (?:(?!(?P=q1)).)*
                       [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
+                      (?:(?!(?P=q1)).)*
                     (?P=q1)
                 ''', webpage)
         )
index ffe10154b7c6acd986f41ea0036d4b195fb003fb..8dd1ce0d0e935888f3a90dedf92f113f2ef74f9b 100644 (file)
@@ -130,7 +130,7 @@ class LeIE(InfoExtractor):
             media_id, 'Downloading flash playJson data', query={
                 'id': media_id,
                 'platid': 1,
-                'splatid': 101,
+                'splatid': 105,
                 'format': 1,
                 'source': 1000,
                 'tkey': self.calc_time_key(int(time.time())),
index 2803d7e8df47c92003ccd11a23c951af1644a7a1..729d8de50fab70cd69bab41fae9db0cba4d7da9b 100644 (file)
@@ -282,7 +282,9 @@ class LimelightMediaIE(LimelightBaseIE):
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
         video_id = self._match_id(url)
-        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+        self._initialize_geo_bypass({
+            'countries': smuggled_data.get('geo_countries'),
+        })
 
         pc, mobile, metadata = self._extract(
             video_id, 'getPlaylistByMediaId',
diff --git a/youtube_dl/extractor/markiza.py b/youtube_dl/extractor/markiza.py
new file mode 100644 (file)
index 0000000..e6bfab1
--- /dev/null
@@ -0,0 +1,121 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    orderedSet,
+    parse_duration,
+    try_get,
+)
+
+
+class MarkizaIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
+    _TESTS = [{
+        'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
+        'md5': 'ada4e9fad038abeed971843aa028c7b0',
+        'info_dict': {
+            'id': '139078',
+            'ext': 'mp4',
+            'title': 'Oteckovia 109',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 2760,
+        },
+    }, {
+        'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
+        'info_dict': {
+            'id': '85430',
+            'title': 'Televízne noviny',
+        },
+        'playlist_count': 23,
+    }, {
+        'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
+        'only_matching': True,
+    }, {
+        'url': 'http://videoarchiv.markiza.sk/video/84723',
+        'only_matching': True,
+    }, {
+        'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
+        'only_matching': True,
+    }, {
+        'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
+        'only_matching': True,
+    }, {
+        'url': 'http://videoarchiv.markiza.sk/embed/85295',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        data = self._download_json(
+            'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
+            video_id, query={'id': video_id})
+
+        info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
+
+        if info.get('_type') == 'playlist':
+            info.update({
+                'id': video_id,
+                'title': try_get(
+                    data, lambda x: x['details']['name'], compat_str),
+            })
+        else:
+            info['duration'] = parse_duration(
+                try_get(data, lambda x: x['details']['duration'], compat_str))
+        return info
+
+
+class MarkizaPageIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
+    _TESTS = [{
+        'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
+        'md5': 'ada4e9fad038abeed971843aa028c7b0',
+        'info_dict': {
+            'id': '139355',
+            'ext': 'mp4',
+            'title': 'Oteckovia 110',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 2604,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
+        'only_matching': True,
+    }, {
+        'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
+        'only_matching': True,
+    }, {
+        'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
+        'only_matching': True,
+    }, {
+        'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if MarkizaIE.suitable(url) else super(MarkizaPageIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
+            for video_id in orderedSet(re.findall(
+                r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
+                webpage))]
+
+        return self.playlist_result(entries, playlist_id)
index 959a105892882c41b7a12206363939c2eb358f82..6367311956ca973328b546b7d7fd8d34f72f251e 100644 (file)
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    parse_codecs,
+)
 
 
 class MinotoIE(InfoExtractor):
@@ -26,7 +29,7 @@ class MinotoIE(InfoExtractor):
                 formats.extend(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
             else:
                 fmt_profile = fmt.get('profile') or {}
-                f = {
+                formats.append({
                     'format_id': fmt_profile.get('name-short'),
                     'format_note': fmt_profile.get('name'),
                     'url': fmt_url,
@@ -35,16 +38,8 @@ class MinotoIE(InfoExtractor):
                     'filesize': int_or_none(fmt.get('filesize')),
                     'width': int_or_none(fmt.get('width')),
                     'height': int_or_none(fmt.get('height')),
-                }
-                codecs = fmt.get('codecs')
-                if codecs:
-                    codecs = codecs.split(',')
-                    if len(codecs) == 2:
-                        f.update({
-                            'vcodec': codecs[0],
-                            'acodec': codecs[1],
-                        })
-                formats.append(f)
+                    'codecs': parse_codecs(fmt.get('codecs')),
+                })
         self._sort_formats(formats)
 
         return {
index a56b7690f8703cb873cba7d59cb097afee1b8121..b7bccb504529d7ca1e7f70ced45d86b9259d454b 100644 (file)
@@ -179,6 +179,10 @@ class MixcloudIE(InfoExtractor):
                     formats.append({
                         'format_id': 'http',
                         'url': decrypted,
+                        'downloader_options': {
+                            # Mixcloud starts throttling at >~5M
+                            'http_chunk_size': 5242880,
+                        },
                     })
             self._sort_formats(formats)
 
index 675ff687374a9a94928f3a899ffdb4a45b1b743c..b907f6b4926f9e13e58cb9aa61b1aebabfef1037 100644 (file)
@@ -1,96 +1,90 @@
 from __future__ import unicode_literals
 
-import re
+from .nhl import NHLBaseIE
 
-from .common import InfoExtractor
-from ..utils import (
-    parse_duration,
-    parse_iso8601,
-)
 
-
-class MLBIE(InfoExtractor):
+class MLBIE(NHLBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
-                        (?:[\da-z_-]+\.)*mlb\.com/
+                        (?:[\da-z_-]+\.)*(?P<site>mlb)\.com/
                         (?:
                             (?:
-                                (?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)|
+                                (?:[^/]+/)*c-|
                                 (?:
                                     shared/video/embed/(?:embed|m-internal-embed)\.html|
                                     (?:[^/]+/)+(?:play|index)\.jsp|
                                 )\?.*?\bcontent_id=
                             )
-                            (?P<id>n?\d+)|
-                            (?:[^/]+/)*(?P<path>[^/]+)
+                            (?P<id>\d+)
                         )
                     '''
+    _CONTENT_DOMAIN = 'content.mlb.com'
     _TESTS = [
         {
-            'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
-            'md5': 'ff56a598c2cf411a9a38a69709e97079',
+            'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
+            'md5': '632358dacfceec06bad823b83d21df2d',
             'info_dict': {
                 'id': '34698933',
                 'ext': 'mp4',
                 'title': "Ackley's spectacular catch",
                 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
                 'duration': 66,
-                'timestamp': 1405980600,
-                'upload_date': '20140721',
+                'timestamp': 1405995000,
+                'upload_date': '20140722',
                 'thumbnail': r're:^https?://.*\.jpg$',
             },
         },
         {
-            'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
-            'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
+            'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663',
+            'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f',
             'info_dict': {
                 'id': '34496663',
                 'ext': 'mp4',
                 'title': 'Stanton prepares for Derby',
                 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
                 'duration': 46,
-                'timestamp': 1405105800,
+                'timestamp': 1405120200,
                 'upload_date': '20140711',
                 'thumbnail': r're:^https?://.*\.jpg$',
             },
         },
         {
-            'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
-            'md5': '0e6e73d509321e142409b695eadd541f',
+            'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115',
+            'md5': '99bb9176531adc600b90880fb8be9328',
             'info_dict': {
                 'id': '34578115',
                 'ext': 'mp4',
                 'title': 'Cespedes repeats as Derby champ',
                 'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
                 'duration': 488,
-                'timestamp': 1405399936,
+                'timestamp': 1405414336,
                 'upload_date': '20140715',
                 'thumbnail': r're:^https?://.*\.jpg$',
             },
         },
         {
-            'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
-            'md5': 'b8fd237347b844365d74ea61d4245967',
+            'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915',
+            'md5': 'da8b57a12b060e7663ee1eebd6f330ec',
             'info_dict': {
                 'id': '34577915',
                 'ext': 'mp4',
                 'title': 'Bautista on Home Run Derby',
                 'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
                 'duration': 52,
-                'timestamp': 1405390722,
+                'timestamp': 1405405122,
                 'upload_date': '20140715',
                 'thumbnail': r're:^https?://.*\.jpg$',
             },
         },
         {
-            'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
-            'md5': 'aafaf5b0186fee8f32f20508092f8111',
+            'url': 'https://www.mlb.com/news/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer/c-118550098',
+            'md5': 'e09e37b552351fddbf4d9e699c924d68',
             'info_dict': {
                 'id': '75609783',
                 'ext': 'mp4',
                 'title': 'Must C: Pillar climbs for catch',
                 'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
-                'timestamp': 1429124820,
+                'timestamp': 1429139220,
                 'upload_date': '20150415',
             }
         },
@@ -111,7 +105,7 @@ class MLBIE(InfoExtractor):
             'only_matching': True,
         },
         {
-            'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
+            'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
             'only_matching': True,
         },
         {
@@ -120,58 +114,7 @@ class MLBIE(InfoExtractor):
             'only_matching': True,
         },
         {
-            'url': 'http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#',
+            'url': 'https://www.mlb.com/cut4/carlos-gomez-borrowed-sunglasses-from-an-as-fan/c-278912842',
             'only_matching': True,
         }
     ]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        if not video_id:
-            video_path = mobj.group('path')
-            webpage = self._download_webpage(url, video_path)
-            video_id = self._search_regex(
-                [r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id')
-
-        detail = self._download_xml(
-            'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
-            % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
-
-        title = detail.find('./headline').text
-        description = detail.find('./big-blurb').text
-        duration = parse_duration(detail.find('./duration').text)
-        timestamp = parse_iso8601(detail.attrib['date'][:-5])
-
-        thumbnails = [{
-            'url': thumbnail.text,
-        } for thumbnail in detail.findall('./thumbnailScenarios/thumbnailScenario')]
-
-        formats = []
-        for media_url in detail.findall('./url'):
-            playback_scenario = media_url.attrib['playback_scenario']
-            fmt = {
-                'url': media_url.text,
-                'format_id': playback_scenario,
-            }
-            m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
-            if m:
-                fmt.update({
-                    'vbr': int(m.group('vbr')) * 1000,
-                    'width': int(m.group('width')),
-                    'height': int(m.group('height')),
-                })
-            formats.append(fmt)
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'duration': duration,
-            'timestamp': timestamp,
-            'formats': formats,
-            'thumbnails': thumbnails,
-        }
diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py
deleted file mode 100644 (file)
index b208820..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import os.path
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    remove_start,
-    sanitized_Request,
-    urlencode_postdata,
-)
-
-
-class MonikerIE(InfoExtractor):
-    IE_DESC = 'allmyvideos.net and vidspot.net'
-    _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?:(?:2|v)/v-)?(?P<id>[a-zA-Z0-9_-]+)'
-
-    _TESTS = [{
-        'url': 'http://allmyvideos.net/jih3nce3x6wn',
-        'md5': '710883dee1bfc370ecf9fa6a89307c88',
-        'info_dict': {
-            'id': 'jih3nce3x6wn',
-            'ext': 'mp4',
-            'title': 'youtube-dl test video',
-        },
-    }, {
-        'url': 'http://allmyvideos.net/embed-jih3nce3x6wn',
-        'md5': '710883dee1bfc370ecf9fa6a89307c88',
-        'info_dict': {
-            'id': 'jih3nce3x6wn',
-            'ext': 'mp4',
-            'title': 'youtube-dl test video',
-        },
-    }, {
-        'url': 'http://vidspot.net/l2ngsmhs8ci5',
-        'md5': '710883dee1bfc370ecf9fa6a89307c88',
-        'info_dict': {
-            'id': 'l2ngsmhs8ci5',
-            'ext': 'mp4',
-            'title': 'youtube-dl test video',
-        },
-    }, {
-        'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
-        'only_matching': True,
-    }, {
-        'url': 'http://vidspot.net/2/v-ywDf99',
-        'md5': '5f8254ce12df30479428b0152fb8e7ba',
-        'info_dict': {
-            'id': 'ywDf99',
-            'ext': 'mp4',
-            'title': 'IL FAIT LE MALIN EN PORSHE CAYENNE ( mais pas pour longtemps)',
-            'description': 'IL FAIT LE MALIN EN PORSHE CAYENNE.',
-        },
-    }, {
-        'url': 'http://allmyvideos.net/v/v-HXZm5t',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        orig_video_id = self._match_id(url)
-        video_id = remove_start(orig_video_id, 'embed-')
-        url = url.replace(orig_video_id, video_id)
-        assert re.match(self._VALID_URL, url) is not None
-        orig_webpage = self._download_webpage(url, video_id)
-
-        if '>File Not Found<' in orig_webpage:
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
-        error = self._search_regex(
-            r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
-        if error:
-            raise ExtractorError(
-                '%s returned error: %s' % (self.IE_NAME, error), expected=True)
-
-        builtin_url = self._search_regex(
-            r'<iframe[^>]+src=(["\'])(?P<url>.+?/builtin-.+?)\1',
-            orig_webpage, 'builtin URL', default=None, group='url')
-
-        if builtin_url:
-            req = sanitized_Request(builtin_url)
-            req.add_header('Referer', url)
-            webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
-            title = self._og_search_title(orig_webpage).strip()
-            description = self._og_search_description(orig_webpage).strip()
-        else:
-            fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
-            data = dict(fields)
-
-            post = urlencode_postdata(data)
-            headers = {
-                b'Content-Type': b'application/x-www-form-urlencoded',
-            }
-            req = sanitized_Request(url, post, headers)
-            webpage = self._download_webpage(
-                req, video_id, note='Downloading video page ...')
-
-            title = os.path.splitext(data['fname'])[0]
-            description = None
-
-        # Could be several links with different quality
-        links = re.findall(r'"file" : "?(.+?)",', webpage)
-        # Assume the links are ordered in quality
-        formats = [{
-            'url': l,
-            'quality': i,
-        } for i, l in enumerate(links)]
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'formats': formats,
-        }
similarity index 59%
rename from youtube_dl/extractor/makerschannel.py
rename to youtube_dl/extractor/mychannels.py
index f5d00e61dd705eb418b5ef5f5dff7d4b44fb768c..b1ffe78489d8eb00186c251a05ec9124879878ae 100644 (file)
@@ -6,17 +6,17 @@ import re
 from .common import InfoExtractor
 
 
-class MakersChannelIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?makerschannel\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)'
+class MyChannelsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?mychannels\.com/.*(?P<id_type>video|production)_id=(?P<id>[0-9]+)'
     _TEST = {
-        'url': 'http://makerschannel.com/en/zoomin/community-highlights?video_id=849',
-        'md5': '624a512c6969236b5967bf9286345ad1',
+        'url': 'https://mychannels.com/missholland/miss-holland?production_id=3416',
+        'md5': 'b8993daad4262dd68d89d651c0c52c45',
         'info_dict': {
-            'id': '849',
+            'id': 'wUUDZZep6vQD',
             'ext': 'mp4',
-            'title': 'Landing a bus on a plane is an epic win',
-            'uploader': 'ZoomIn',
-            'description': 'md5:cd9cca2ea7b69b78be81d07020c97139',
+            'title': 'Miss Holland joins VOTE LEAVE',
+            'description': 'Miss Holland | #13 Not a potato',
+            'uploader': 'Miss Holland',
         }
     }
 
@@ -27,12 +27,12 @@ class MakersChannelIE(InfoExtractor):
 
         def extract_data_val(attr, fatal=False):
             return self._html_search_regex(r'data-%s\s*=\s*"([^"]+)"' % attr, video_data, attr, fatal=fatal)
-        minoto_id = self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id')
+        minoto_id = extract_data_val('minoto-id') or self._search_regex(r'/id/([a-zA-Z0-9]+)', extract_data_val('video-src', True), 'minoto id')
 
         return {
             '_type': 'url_transparent',
             'url': 'minoto:%s' % minoto_id,
-            'id': extract_data_val('video-id', True),
+            'id': url_id,
             'title': extract_data_val('title', True),
             'description': extract_data_val('description'),
             'thumbnail': extract_data_val('image'),
index 9dc8f9ebcbe7bd0fbde8374fb91d7727e7ebb648..c843f8649791727ca17e6a1ecbcb7bfb80c78205 100644 (file)
@@ -1,7 +1,8 @@
 from __future__ import unicode_literals
 
-import re
 import base64
+import json
+import re
 
 from .common import InfoExtractor
 from .theplatform import ThePlatformIE
@@ -9,6 +10,7 @@ from .adobepass import AdobePassIE
 from ..utils import (
     find_xpath_attr,
     smuggle_url,
+    try_get,
     unescapeHTML,
     update_url_query,
     int_or_none,
@@ -78,10 +80,14 @@ class NBCIE(AdobePassIE):
     def _real_extract(self, url):
         permalink, video_id = re.match(self._VALID_URL, url).groups()
         permalink = 'http' + permalink
-        video_data = self._download_json(
+        response = self._download_json(
             'https://api.nbc.com/v3/videos', video_id, query={
                 'filter[permalink]': permalink,
-            })['data'][0]['attributes']
+                'fields[videos]': 'description,entitlement,episodeNumber,guid,keywords,seasonNumber,title,vChipRating',
+                'fields[shows]': 'shortTitle',
+                'include': 'show.shortTitle',
+            })
+        video_data = response['data'][0]['attributes']
         query = {
             'mbr': 'true',
             'manifest': 'm3u',
@@ -103,10 +109,11 @@ class NBCIE(AdobePassIE):
             'title': title,
             'url': theplatform_url,
             'description': video_data.get('description'),
-            'keywords': video_data.get('keywords'),
+            'tags': video_data.get('keywords'),
             'season_number': int_or_none(video_data.get('seasonNumber')),
             'episode_number': int_or_none(video_data.get('episodeNumber')),
-            'series': video_data.get('showName'),
+            'episode': title,
+            'series': try_get(response, lambda x: x['included'][0]['attributes']['shortTitle']),
             'ie_key': 'ThePlatform',
         }
 
@@ -169,6 +176,65 @@ class NBCSportsIE(InfoExtractor):
             NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
 
 
+class NBCSportsStreamIE(AdobePassIE):
+    _VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
+        'info_dict': {
+            'id': '206559',
+            'ext': 'mp4',
+            'title': 'Amgen Tour of California Women\'s Recap',
+            'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'skip': 'Requires Adobe Pass Authentication',
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        live_source = self._download_json(
+            'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id,
+            video_id)
+        video_source = live_source['videoSources'][0]
+        title = video_source['title']
+        source_url = None
+        for k in ('source', 'msl4source', 'iossource', 'hlsv4'):
+            sk = k + 'Url'
+            source_url = video_source.get(sk) or video_source.get(sk + 'Alt')
+            if source_url:
+                break
+        else:
+            source_url = video_source['ottStreamUrl']
+        is_live = video_source.get('type') == 'live' or video_source.get('status') == 'Live'
+        resource = self._get_mvpd_resource('nbcsports', title, video_id, '')
+        token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource)
+        tokenized_url = self._download_json(
+            'https://token.playmakerservices.com/cdn',
+            video_id, data=json.dumps({
+                'requestorId': 'nbcsports',
+                'pid': video_id,
+                'application': 'NBCSports',
+                'version': 'v1',
+                'platform': 'desktop',
+                'cdn': 'akamai',
+                'url': video_source['sourceUrl'],
+                'token': base64.b64encode(token.encode()).decode(),
+                'resourceId': base64.b64encode(resource.encode()).decode(),
+            }).encode())['tokenizedUrl']
+        formats = self._extract_m3u8_formats(tokenized_url, video_id, 'mp4')
+        self._sort_formats(formats)
+        return {
+            'id': video_id,
+            'title': self._live_title(title) if is_live else title,
+            'description': live_source.get('description'),
+            'formats': formats,
+            'is_live': is_live,
+        }
+
+
 class CSNNEIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
 
index 5e46a75c0a783bca430c2fdfc8bd062528759704..82d526c228b9a41c90676aed6471f7613b9df7a7 100644 (file)
@@ -29,14 +29,13 @@ class NexxIE(InfoExtractor):
     _TESTS = [{
         # movie
         'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
-        'md5': '828cea195be04e66057b846288295ba1',
+        'md5': '31899fd683de49ad46f4ee67e53e83fe',
         'info_dict': {
             'id': '128907',
             'ext': 'mp4',
             'title': 'Stiftung Warentest',
             'alt_title': 'Wie ein Test abläuft',
             'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
-            'release_year': 2013,
             'creator': 'SPIEGEL TV',
             'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 2509,
@@ -62,6 +61,7 @@ class NexxIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+        'skip': 'HTTP Error 404: Not Found',
     }, {
         # does not work via arc
         'url': 'nexx:741:1269984',
@@ -71,12 +71,26 @@ class NexxIE(InfoExtractor):
             'ext': 'mp4',
             'title': '1 TAG ohne KLO... wortwörtlich! 😑',
             'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
-            'description': 'md5:4604539793c49eda9443ab5c5b1d612f',
             'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 607,
             'timestamp': 1518614955,
             'upload_date': '20180214',
         },
+    }, {
+        # free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
+        'url': 'nexx:747:1533779',
+        'md5': '6bf6883912b82b7069fb86c2297e9893',
+        'info_dict': {
+            'id': '1533779',
+            'ext': 'mp4',
+            'title': 'Aufregung um ausgebrochene Raubtiere',
+            'alt_title': 'Eifel-Zoo',
+            'description': 'md5:f21375c91c74ad741dcb164c427999d2',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 111,
+            'timestamp': 1527874460,
+            'upload_date': '20180601',
+        },
     }, {
         'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
         'only_matching': True,
@@ -141,6 +155,139 @@ class NexxIE(InfoExtractor):
         self._handle_error(result)
         return result['result']
 
+    def _extract_free_formats(self, video, video_id):
+        stream_data = video['streamdata']
+        cdn = stream_data['cdnType']
+        assert cdn == 'free'
+
+        hash = video['general']['hash']
+
+        ps = compat_str(stream_data['originalDomain'])
+        if stream_data['applyFolderHierarchy'] == 1:
+            s = ('%04d' % int(video_id))[::-1]
+            ps += '/%s/%s' % (s[0:2], s[2:4])
+        ps += '/%s/%s_' % (video_id, hash)
+
+        t = 'http://%s' + ps
+        fd = stream_data['azureFileDistribution'].split(',')
+        cdn_provider = stream_data['cdnProvider']
+
+        def p0(p):
+            return '_%s' % p if stream_data['applyAzureStructure'] == 1 else ''
+
+        formats = []
+        if cdn_provider == 'ak':
+            t += ','
+            for i in fd:
+                p = i.split(':')
+                t += p[1] + p0(int(p[0])) + ','
+            t += '.mp4.csmil/master.%s'
+        elif cdn_provider == 'ce':
+            k = t.split('/')
+            h = k.pop()
+            http_base = t = '/'.join(k)
+            http_base = http_base % stream_data['cdnPathHTTP']
+            t += '/asset.ism/manifest.%s?dcp_ver=aos4&videostream='
+            for i in fd:
+                p = i.split(':')
+                tbr = int(p[0])
+                filename = '%s%s%s.mp4' % (h, p[1], p0(tbr))
+                f = {
+                    'url': http_base + '/' + filename,
+                    'format_id': '%s-http-%d' % (cdn, tbr),
+                    'tbr': tbr,
+                }
+                width_height = p[1].split('x')
+                if len(width_height) == 2:
+                    f.update({
+                        'width': int_or_none(width_height[0]),
+                        'height': int_or_none(width_height[1]),
+                    })
+                formats.append(f)
+                a = filename + ':%s' % (tbr * 1000)
+                t += a + ','
+            t = t[:-1] + '&audiostream=' + a.split(':')[0]
+        else:
+            assert False
+
+        if cdn_provider == 'ce':
+            formats.extend(self._extract_mpd_formats(
+                t % (stream_data['cdnPathDASH'], 'mpd'), video_id,
+                mpd_id='%s-dash' % cdn, fatal=False))
+        formats.extend(self._extract_m3u8_formats(
+            t % (stream_data['cdnPathHLS'], 'm3u8'), video_id, 'mp4',
+            entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False))
+
+        return formats
+
+    def _extract_azure_formats(self, video, video_id):
+        stream_data = video['streamdata']
+        cdn = stream_data['cdnType']
+        assert cdn == 'azure'
+
+        azure_locator = stream_data['azureLocator']
+
+        def get_cdn_shield_base(shield_type='', static=False):
+            for secure in ('', 's'):
+                cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
+                if cdn_shield:
+                    return 'http%s://%s' % (secure, cdn_shield)
+            else:
+                if 'fb' in stream_data['azureAccount']:
+                    prefix = 'df' if static else 'f'
+                else:
+                    prefix = 'd' if static else 'p'
+                account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
+                return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
+
+        language = video['general'].get('language_raw') or ''
+
+        azure_stream_base = get_cdn_shield_base()
+        is_ml = ',' in language
+        azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
+            azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
+
+        protection_token = try_get(
+            video, lambda x: x['protectiondata']['token'], compat_str)
+        if protection_token:
+            azure_manifest_url += '?hdnts=%s' % protection_token
+
+        formats = self._extract_m3u8_formats(
+            azure_manifest_url % '(format=m3u8-aapl)',
+            video_id, 'mp4', 'm3u8_native',
+            m3u8_id='%s-hls' % cdn, fatal=False)
+        formats.extend(self._extract_mpd_formats(
+            azure_manifest_url % '(format=mpd-time-csf)',
+            video_id, mpd_id='%s-dash' % cdn, fatal=False))
+        formats.extend(self._extract_ism_formats(
+            azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
+
+        azure_progressive_base = get_cdn_shield_base('Prog', True)
+        azure_file_distribution = stream_data.get('azureFileDistribution')
+        if azure_file_distribution:
+            fds = azure_file_distribution.split(',')
+            if fds:
+                for fd in fds:
+                    ss = fd.split(':')
+                    if len(ss) == 2:
+                        tbr = int_or_none(ss[0])
+                        if tbr:
+                            f = {
+                                'url': '%s%s/%s_src_%s_%d.mp4' % (
+                                    azure_progressive_base, azure_locator, video_id, ss[1], tbr),
+                                'format_id': '%s-http-%d' % (cdn, tbr),
+                                'tbr': tbr,
+                            }
+                            width_height = ss[1].split('x')
+                            if len(width_height) == 2:
+                                f.update({
+                                    'width': int_or_none(width_height[0]),
+                                    'height': int_or_none(width_height[1]),
+                                })
+                            formats.append(f)
+
+        return formats
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
@@ -220,72 +367,15 @@ class NexxIE(InfoExtractor):
         general = video['general']
         title = general['title']
 
-        stream_data = video['streamdata']
-        language = general.get('language_raw') or ''
-
-        # TODO: reverse more cdns
-
-        cdn = stream_data['cdnType']
-        assert cdn == 'azure'
-
-        azure_locator = stream_data['azureLocator']
-
-        def get_cdn_shield_base(shield_type='', static=False):
-            for secure in ('', 's'):
-                cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
-                if cdn_shield:
-                    return 'http%s://%s' % (secure, cdn_shield)
-            else:
-                if 'fb' in stream_data['azureAccount']:
-                    prefix = 'df' if static else 'f'
-                else:
-                    prefix = 'd' if static else 'p'
-                account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
-                return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
-
-        azure_stream_base = get_cdn_shield_base()
-        is_ml = ',' in language
-        azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
-            azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
-
-        protection_token = try_get(
-            video, lambda x: x['protectiondata']['token'], compat_str)
-        if protection_token:
-            azure_manifest_url += '?hdnts=%s' % protection_token
+        cdn = video['streamdata']['cdnType']
 
-        formats = self._extract_m3u8_formats(
-            azure_manifest_url % '(format=m3u8-aapl)',
-            video_id, 'mp4', 'm3u8_native',
-            m3u8_id='%s-hls' % cdn, fatal=False)
-        formats.extend(self._extract_mpd_formats(
-            azure_manifest_url % '(format=mpd-time-csf)',
-            video_id, mpd_id='%s-dash' % cdn, fatal=False))
-        formats.extend(self._extract_ism_formats(
-            azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
-
-        azure_progressive_base = get_cdn_shield_base('Prog', True)
-        azure_file_distribution = stream_data.get('azureFileDistribution')
-        if azure_file_distribution:
-            fds = azure_file_distribution.split(',')
-            if fds:
-                for fd in fds:
-                    ss = fd.split(':')
-                    if len(ss) == 2:
-                        tbr = int_or_none(ss[0])
-                        if tbr:
-                            f = {
-                                'url': '%s%s/%s_src_%s_%d.mp4' % (
-                                    azure_progressive_base, azure_locator, video_id, ss[1], tbr),
-                                'format_id': '%s-http-%d' % (cdn, tbr),
-                                'tbr': tbr,
-                            }
-                            width_height = ss[1].split('x')
-                            if len(width_height) == 2:
-                                f.update({
-                                    'width': int_or_none(width_height[0]),
-                                    'height': int_or_none(width_height[1]),
-                                })
-                            formats.append(f)
+        if cdn == 'azure':
+            formats = self._extract_azure_formats(video, video_id)
+        elif cdn == 'free':
+            formats = self._extract_free_formats(video, video_id)
+        else:
+            # TODO: reverse more cdns
+            assert False
 
         self._sort_formats(formats)
 
index 62ce800c072d2a316a0c6b8b7479cc89dc29b90d..cf440f713c274ab96e61927c4f303e51abcdca12 100644 (file)
@@ -1,18 +1,10 @@
 from __future__ import unicode_literals
 
 import re
-import json
-import os
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urlparse,
-    compat_urllib_parse_urlencode,
-    compat_urllib_parse_urlparse,
-    compat_str,
-)
+from ..compat import compat_str
 from ..utils import (
-    unified_strdate,
     determine_ext,
     int_or_none,
     parse_iso8601,
@@ -20,236 +12,77 @@ from ..utils import (
 )
 
 
-class NHLBaseInfoExtractor(InfoExtractor):
-    @staticmethod
-    def _fix_json(json_string):
-        return json_string.replace('\\\'', '\'')
-
-    def _real_extract_video(self, video_id):
-        vid_parts = video_id.split(',')
-        if len(vid_parts) == 3:
-            video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0'))
-        json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
-        data = self._download_json(
-            json_url, video_id, transform_source=self._fix_json)
-        return self._extract_video(data[0])
-
-    def _extract_video(self, info):
-        video_id = info['id']
-        self.report_extraction(video_id)
-
-        initial_video_url = info['publishPoint']
-        if info['formats'] == '1':
-            parsed_url = compat_urllib_parse_urlparse(initial_video_url)
-            filename, ext = os.path.splitext(parsed_url.path)
-            path = '%s_sd%s' % (filename, ext)
-            data = compat_urllib_parse_urlencode({
-                'type': 'fvod',
-                'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
-            })
-            path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
-            path_doc = self._download_xml(
-                path_url, video_id, 'Downloading final video url')
-            video_url = path_doc.find('path').text
-        else:
-            video_url = initial_video_url
-
-        join = compat_urlparse.urljoin
-        ret = {
-            'id': video_id,
-            'title': info['name'],
-            'url': video_url,
-            'description': info['description'],
-            'duration': int(info['duration']),
-            'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
-            'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
-        }
-        if video_url.startswith('rtmp:'):
-            mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url)
-            ret.update({
-                'tc_url': mobj.group('tc_url'),
-                'play_path': mobj.group('play_path'),
-                'app': mobj.group('app'),
-                'no_resume': True,
-            })
-        return ret
-
-
-class NHLVideocenterIE(NHLBaseInfoExtractor):
-    IE_NAME = 'nhl.com:videocenter'
-    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
-
-    _TESTS = [{
-        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
-        'md5': 'db704a4ea09e8d3988c85e36cc892d09',
-        'info_dict': {
-            'id': '453614',
-            'ext': 'mp4',
-            'title': 'Quick clip: Weise 4-3 goal vs Flames',
-            'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
-            'duration': 18,
-            'upload_date': '20131006',
-        },
-    }, {
-        'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h',
-        'md5': 'd22e82bc592f52d37d24b03531ee9696',
-        'info_dict': {
-            'id': '2014020024-628-h',
-            'ext': 'mp4',
-            'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)',
-            'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014',
-            'duration': 0,
-            'upload_date': '20141011',
-        },
-    }, {
-        'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
-        'md5': 'c78fc64ea01777e426cfc202b746c825',
-        'info_dict': {
-            'id': '58665',
-            'ext': 'flv',
-            'title': 'Classic Game In Six - April 22, 1979',
-            'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
-            'duration': 400,
-            'upload_date': '20100129'
-        },
-    }, {
-        'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
-        'only_matching': True,
-    }, {
-        'url': 'http://video.nhl.com/videocenter/?id=736722',
-        'only_matching': True,
-    }, {
-        'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en',
-        'md5': '076fcb88c255154aacbf0a7accc3f340',
-        'info_dict': {
-            'id': '2014020299-X-h',
-            'ext': 'mp4',
-            'title': 'Penguins at Islanders / Game Highlights',
-            'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014',
-            'duration': 268,
-            'upload_date': '20141122',
-        }
-    }, {
-        'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4',
-        'info_dict': {
-            'id': '691469',
-            'ext': 'mp4',
-            'title': 'RAW | Craig MacTavish Full Press Conference',
-            'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.',
-            'upload_date': '20141205',
-        },
-        'params': {
-            'skip_download': True,  # Requires rtmpdump
-        }
-    }, {
-        'url': 'http://video.nhl.com/videocenter/embed?playlist=836127',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        return self._real_extract_video(video_id)
-
-
-class NHLNewsIE(NHLBaseInfoExtractor):
-    IE_NAME = 'nhl.com:news'
-    IE_DESC = 'NHL news'
-    _VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
-
-    _TESTS = [{
-        'url': 'http://www.nhl.com/ice/news.htm?id=750727',
-        'md5': '4b3d1262e177687a3009937bd9ec0be8',
-        'info_dict': {
-            'id': '736722',
-            'ext': 'mp4',
-            'title': 'Cal Clutterbuck has been fined $2,000',
-            'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
-            'duration': 37,
-            'upload_date': '20150128',
-        },
-    }, {
-        # iframe embed
-        'url': 'http://sabres.nhl.com/club/news.htm?id=780189',
-        'md5': '9f663d1c006c90ac9fb82777d4294e12',
-        'info_dict': {
-            'id': '836127',
-            'ext': 'mp4',
-            'title': 'Morning Skate: OTT vs. BUF (9/23/15)',
-            'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.",
-            'duration': 93,
-            'upload_date': '20150923',
-        },
-    }]
-
+class NHLBaseIE(InfoExtractor):
     def _real_extract(self, url):
-        news_id = self._match_id(url)
-        webpage = self._download_webpage(url, news_id)
-        video_id = self._search_regex(
-            [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'",
-             r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'],
-            webpage, 'video id')
-        return self._real_extract_video(video_id)
-
+        site, tmp_id = re.match(self._VALID_URL, url).groups()
+        video_data = self._download_json(
+            'https://%s/%s/%sid/v1/%s/details/web-v1.json'
+            % (self._CONTENT_DOMAIN, site[:3], 'item/' if site == 'mlb' else '', tmp_id), tmp_id)
+        if video_data.get('type') != 'video':
+            video_data = video_data['media']
+            video = video_data.get('video')
+            if video:
+                video_data = video
+            else:
+                videos = video_data.get('videos')
+                if videos:
+                    video_data = videos[0]
 
-class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
-    IE_NAME = 'nhl.com:videocenter:category'
-    IE_DESC = 'NHL videocenter category'
-    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
-    _TEST = {
-        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
-        'info_dict': {
-            'id': '999',
-            'title': 'Highlights',
-        },
-        'playlist_count': 12,
-    }
+        video_id = compat_str(video_data['id'])
+        title = video_data['title']
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        team = mobj.group('team')
-        webpage = self._download_webpage(url, team)
-        cat_id = self._search_regex(
-            [r'var defaultCatId = "(.+?)";',
-             r'{statusIndex:0,index:0,.*?id:(.*?),'],
-            webpage, 'category id')
-        playlist_title = self._html_search_regex(
-            r'tab0"[^>]*?>(.*?)</td>',
-            webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
+        formats = []
+        for playback in video_data.get('playbacks', []):
+            playback_url = playback.get('url')
+            if not playback_url:
+                continue
+            ext = determine_ext(playback_url)
+            if ext == 'm3u8':
+                m3u8_formats = self._extract_m3u8_formats(
+                    playback_url, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id=playback.get('name', 'hls'), fatal=False)
+                self._check_formats(m3u8_formats, video_id)
+                formats.extend(m3u8_formats)
+            else:
+                height = int_or_none(playback.get('height'))
+                formats.append({
+                    'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
+                    'url': playback_url,
+                    'width': int_or_none(playback.get('width')),
+                    'height': height,
+                    'tbr': int_or_none(self._search_regex(r'_(\d+)[kK]', playback_url, 'bitrate', default=None)),
+                })
+        self._sort_formats(formats)
 
-        data = compat_urllib_parse_urlencode({
-            'cid': cat_id,
-            # This is the default value
-            'count': 12,
-            'ptrs': 3,
-            'format': 'json',
-        })
-        path = '/videocenter/servlets/browse?' + data
-        request_url = compat_urlparse.urljoin(url, path)
-        response = self._download_webpage(request_url, playlist_title)
-        response = self._fix_json(response)
-        if not response.strip():
-            self._downloader.report_warning('Got an empty response, trying '
-                                            'adding the "newvideos" parameter')
-            response = self._download_webpage(request_url + '&newvideos=true',
-                                              playlist_title)
-            response = self._fix_json(response)
-        videos = json.loads(response)
+        thumbnails = []
+        cuts = video_data.get('image', {}).get('cuts') or []
+        if isinstance(cuts, dict):
+            cuts = cuts.values()
+        for thumbnail_data in cuts:
+            thumbnail_url = thumbnail_data.get('src')
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'url': thumbnail_url,
+                'width': int_or_none(thumbnail_data.get('width')),
+                'height': int_or_none(thumbnail_data.get('height')),
+            })
 
         return {
-            '_type': 'playlist',
-            'title': playlist_title,
-            'id': cat_id,
-            'entries': [self._extract_video(v) for v in videos],
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('description'),
+            'timestamp': parse_iso8601(video_data.get('date')),
+            'duration': parse_duration(video_data.get('duration')),
+            'thumbnails': thumbnails,
+            'formats': formats,
         }
 
 
-class NHLIE(InfoExtractor):
+class NHLIE(NHLBaseIE):
     IE_NAME = 'nhl.com'
     _VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
-    _SITES_MAP = {
-        'nhl': 'nhl',
-        'wch2016': 'wch',
-    }
+    _CONTENT_DOMAIN = 'nhl.bamcontent.com'
     _TESTS = [{
         # type=video
         'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
@@ -293,59 +126,3 @@ class NHLIE(InfoExtractor):
         'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
         'only_matching': True,
     }]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        tmp_id, site = mobj.group('id'), mobj.group('site')
-        video_data = self._download_json(
-            'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json'
-            % (self._SITES_MAP[site], tmp_id), tmp_id)
-        if video_data.get('type') == 'article':
-            video_data = video_data['media']
-
-        video_id = compat_str(video_data['id'])
-        title = video_data['title']
-
-        formats = []
-        for playback in video_data.get('playbacks', []):
-            playback_url = playback.get('url')
-            if not playback_url:
-                continue
-            ext = determine_ext(playback_url)
-            if ext == 'm3u8':
-                m3u8_formats = self._extract_m3u8_formats(
-                    playback_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id=playback.get('name', 'hls'), fatal=False)
-                self._check_formats(m3u8_formats, video_id)
-                formats.extend(m3u8_formats)
-            else:
-                height = int_or_none(playback.get('height'))
-                formats.append({
-                    'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
-                    'url': playback_url,
-                    'width': int_or_none(playback.get('width')),
-                    'height': height,
-                })
-        self._sort_formats(formats, ('preference', 'width', 'height', 'tbr', 'format_id'))
-
-        thumbnails = []
-        for thumbnail_id, thumbnail_data in video_data.get('image', {}).get('cuts', {}).items():
-            thumbnail_url = thumbnail_data.get('src')
-            if not thumbnail_url:
-                continue
-            thumbnails.append({
-                'id': thumbnail_id,
-                'url': thumbnail_url,
-                'width': int_or_none(thumbnail_data.get('width')),
-                'height': int_or_none(thumbnail_data.get('height')),
-            })
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': video_data.get('description'),
-            'timestamp': parse_iso8601(video_data.get('date')),
-            'duration': parse_duration(video_data.get('duration')),
-            'thumbnails': thumbnails,
-            'formats': formats,
-        }
index 256a24d86fb27c5f5fe905311f134c8c1fcffdba..5e34d776bd799490ac21282fed822cc52d467f75 100644 (file)
@@ -85,7 +85,7 @@ class NickBrIE(MTVServicesInfoExtractor):
                     https?://
                         (?:
                             (?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
-                            (?:www\.)?nickjr\.nl
+                            (?:www\.)?nickjr\.[a-z]{2}
                         )
                         /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
                     '''
@@ -98,6 +98,9 @@ class NickBrIE(MTVServicesInfoExtractor):
     }, {
         'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
         'only_matching': True,
+    }, {
+        'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index df7f528be2d4c8da7cfe826609608754f2f8e088..dbe871f1657e8ace3802704d14dd29f49efadec9 100644 (file)
@@ -163,7 +163,7 @@ class NiconicoIE(InfoExtractor):
         self._login()
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         # No authentication to be performed
         if not username:
             return True
index 8961309fdabccb34697b3ef29e70294414cd5802..65754c5e703acfc38e075f5bc974b5d4815c12d2 100644 (file)
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     parse_iso8601,
     float_or_none,
@@ -13,38 +12,11 @@ from ..utils import (
 )
 
 
-class NineCNineMediaBaseIE(InfoExtractor):
-    _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
-
-
-class NineCNineMediaStackIE(NineCNineMediaBaseIE):
-    IE_NAME = '9c9media:stack'
-    _GEO_COUNTRIES = ['CA']
-    _VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
-
-    def _real_extract(self, url):
-        destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups()
-        stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.'
-        stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id)
-
-        formats = []
-        formats.extend(self._extract_m3u8_formats(
-            stack_base_url + 'm3u8', stack_id, 'mp4',
-            'm3u8_native', m3u8_id='hls', fatal=False))
-        formats.extend(self._extract_f4m_formats(
-            stack_base_url + 'f4m', stack_id,
-            f4m_id='hds', fatal=False))
-        self._sort_formats(formats)
-
-        return {
-            'id': stack_id,
-            'formats': formats,
-        }
-
-
-class NineCNineMediaIE(NineCNineMediaBaseIE):
+class NineCNineMediaIE(InfoExtractor):
     IE_NAME = '9c9media'
+    _GEO_COUNTRIES = ['CA']
     _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
+    _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
 
     def _real_extract(self, url):
         destination_code, content_id = re.match(self._VALID_URL, url).groups()
@@ -58,13 +30,26 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
         content_package = content['ContentPackages'][0]
         package_id = content_package['Id']
         content_package_url = api_base_url + 'contentpackages/%s/' % package_id
-        content_package = self._download_json(content_package_url, content_id)
+        content_package = self._download_json(
+            content_package_url, content_id, query={
+                '$include': '[HasClosedCaptions]',
+            })
 
-        if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm':
+        if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
             raise ExtractorError('This video is DRM protected.', expected=True)
 
-        stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items']
-        multistacks = len(stacks) > 1
+        manifest_base_url = content_package_url + 'manifest.'
+        formats = []
+        formats.extend(self._extract_m3u8_formats(
+            manifest_base_url + 'm3u8', content_id, 'mp4',
+            'm3u8_native', m3u8_id='hls', fatal=False))
+        formats.extend(self._extract_f4m_formats(
+            manifest_base_url + 'f4m', content_id,
+            f4m_id='hds', fatal=False))
+        formats.extend(self._extract_mpd_formats(
+            manifest_base_url + 'mpd', content_id,
+            mpd_id='dash', fatal=False))
+        self._sort_formats(formats)
 
         thumbnails = []
         for image in content.get('Images', []):
@@ -85,10 +70,12 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
                     continue
                 container.append(e_name)
 
-        description = content.get('Desc') or content.get('ShortDesc')
         season = content.get('Season', {})
-        base_info = {
-            'description': description,
+
+        info = {
+            'id': content_id,
+            'title': title,
+            'description': content.get('Desc') or content.get('ShortDesc'),
             'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
             'episode_number': int_or_none(content.get('Episode')),
             'season': season.get('Name'),
@@ -97,26 +84,19 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
             'series': content.get('Media', {}).get('Name'),
             'tags': tags,
             'categories': categories,
+            'duration': float_or_none(content_package.get('Duration')),
+            'formats': formats,
         }
 
-        entries = []
-        for stack in stacks:
-            stack_id = compat_str(stack['Id'])
-            entry = {
-                '_type': 'url_transparent',
-                'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id),
-                'id': stack_id,
-                'title': '%s_part%s' % (title, stack['Name']) if multistacks else title,
-                'duration': float_or_none(stack.get('Duration')),
-                'ie_key': 'NineCNineMediaStack',
+        if content_package.get('HasClosedCaptions'):
+            info['subtitles'] = {
+                'en': [{
+                    'url': manifest_base_url + 'vtt',
+                    'ext': 'vtt',
+                }, {
+                    'url': manifest_base_url + 'srt',
+                    'ext': 'srt',
+                }]
             }
-            entry.update(base_info)
-            entries.append(entry)
 
-        return {
-            '_type': 'multi_video',
-            'id': content_id,
-            'title': title,
-            'description': description,
-            'entries': entries,
-        }
+        return info
index a9f9b10c47b8b9f481a8ab3f88705b1b0e76eb0f..58b371ed7462e7d43f490cfa3b94bee15e35f0f7 100644 (file)
@@ -65,7 +65,7 @@ class NocoIE(InfoExtractor):
         self._login()
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
index ff215338744893a29b93202e6f1f72dd0186546f..cb8319f0db1ce96f32b2890108d9ca3d2adde0c5 100644 (file)
@@ -36,8 +36,8 @@ class NPOIE(NPOBaseIE):
                         https?://
                             (?:www\.)?
                             (?:
-                                npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
-                                ntr\.nl/(?:[^/]+/){2,}|
+                                npo\.nl/(?:[^/]+/)*|
+                                (?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
                                 omroepwnl\.nl/video/fragment/[^/]+__|
                                 (?:zapp|npo3)\.nl/(?:[^/]+/){2,}
                             )
@@ -160,8 +160,20 @@ class NPOIE(NPOBaseIE):
     }, {
         'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
         'only_matching': True,
+    }, {
+        'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
+        'only_matching': True,
+    }, {
+        'url': 'https://npo.nl/KN_1698996',
+        'only_matching': True,
     }]
 
+    @classmethod
+    def suitable(cls, url):
+        return (False if any(ie.suitable(url)
+                for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE))
+                else super(NPOIE, cls).suitable(url))
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         return self._get_info(video_id)
@@ -389,7 +401,7 @@ class NPOLiveIE(NPOBaseIE):
 
 class NPORadioIE(InfoExtractor):
     IE_NAME = 'npo.nl:radio'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)'
 
     _TEST = {
         'url': 'http://www.npo.nl/radio/radio-1',
@@ -404,6 +416,10 @@ class NPORadioIE(InfoExtractor):
         }
     }
 
+    @classmethod
+    def suitable(cls, url):
+        return False if NPORadioFragmentIE.suitable(url) else super(NPORadioIE, cls).suitable(url)
+
     @staticmethod
     def _html_get_attribute_regex(attribute):
         return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
index 18ead94260650e9312cc8adab491423485a59494..7157e2390909dec9667ac0d563dbe88627249d34 100644 (file)
@@ -16,12 +16,22 @@ from ..utils import (
 class NRKBaseIE(InfoExtractor):
     _GEO_COUNTRIES = ['NO']
 
+    _api_host = None
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        data = self._download_json(
-            'http://%s/mediaelement/%s' % (self._API_HOST, video_id),
-            video_id, 'Downloading mediaelement JSON')
+        api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
+
+        for api_host in api_hosts:
+            data = self._download_json(
+                'http://%s/mediaelement/%s' % (api_host, video_id),
+                video_id, 'Downloading mediaelement JSON',
+                fatal=api_host == api_hosts[-1])
+            if not data:
+                continue
+            self._api_host = api_host
+            break
 
         title = data.get('fullTitle') or data.get('mainTitle') or data['title']
         video_id = data.get('id') or video_id
@@ -191,7 +201,7 @@ class NRKIE(NRKBaseIE):
                             )
                             (?P<id>[^?#&]+)
                         '''
-    _API_HOST = 'v8-psapi.nrk.no'
+    _API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
     _TESTS = [{
         # video
         'url': 'http://www.nrk.no/video/PS*150533',
@@ -237,8 +247,7 @@ class NRKTVIE(NRKBaseIE):
                             (?:/\d{2}-\d{2}-\d{4})?
                             (?:\#del=(?P<part_id>\d+))?
                     ''' % _EPISODE_RE
-    _API_HOST = 'psapi-ne.nrk.no'
-
+    _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
     _TESTS = [{
         'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
         'md5': '4e9ca6629f09e588ed240fb11619922a',
index d0bdd60b8208d2f4c44f18d0119770f5309b0495..d264fe20664523a2d0f6387fe1de1f242663b831 100644 (file)
@@ -243,7 +243,7 @@ class PhantomJSwrapper(object):
 
 
 class OpenloadIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
 
     _TESTS = [{
         'url': 'https://openload.co/f/kUEfGclsU9o',
@@ -301,6 +301,16 @@ class OpenloadIE(InfoExtractor):
     }, {
         'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
         'only_matching': True,
+    }, {
+        'url': 'https://oload.win/f/kUEfGclsU9o',
+        'only_matching': True,
+    }, {
+        'url': 'https://oload.download/f/kUEfGclsU9o',
+        'only_matching': True,
+    }, {
+        # Its title has not got its extension but url has it
+        'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
+        'only_matching': True,
     }]
 
     _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
@@ -362,8 +372,7 @@ class OpenloadIE(InfoExtractor):
             'title': title,
             'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
             'url': video_url,
-            # Seems all videos have extensions in their titles
-            'ext': determine_ext(title, 'mp4'),
+            'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
             'subtitles': subtitles,
             'http_headers': headers,
         }
index 8ed3c6347c90a6bfabe18d51e796a2df4737a804..56a2a1083a11275a02031b4233b44c76f2f6d3f8 100644 (file)
@@ -42,7 +42,7 @@ class PacktPubIE(PacktPubBaseIE):
     _TOKEN = None
 
     def _real_initialize(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
         try:
index d4b1d34ca8486442139409eda90ed95be6c0ffc9..9eb0276795356af12571e98982f35edec490d0ba 100644 (file)
@@ -53,7 +53,7 @@ class PatreonIE(InfoExtractor):
     # needed. Keeping this commented for when this inevitably changes.
     '''
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
index f11d5da5271d91fb15f9ee006743b1360897b08e..52ab2f158e28f3fe5d7553beb7a0c108c2608641 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     determine_ext,
@@ -360,6 +361,50 @@ class PBSIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            'url': 'http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/',
+            'info_dict': {
+                'id': '2365936247',
+                'ext': 'mp4',
+                'title': 'Antiques Roadshow - Indianapolis, Hour 2',
+                'description': 'md5:524b32249db55663e7231b6b8d1671a2',
+                'duration': 3180,
+                'thumbnail': r're:^https?://.*\.jpg$',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'expected_warnings': ['HTTP Error 403: Forbidden'],
+        },
+        {
+            'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/',
+            'info_dict': {
+                'id': '3007193718',
+                'ext': 'mp4',
+                'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster",
+                'description': 'md5:37efbac85e0c09b009586523ec143652',
+                'duration': 6292,
+                'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'expected_warnings': ['HTTP Error 403: Forbidden'],
+        },
+        {
+            'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/',
+            'info_dict': {
+                'id': '3011407934',
+                'ext': 'mp4',
+                'title': 'Stories from the Stage - Road Trip',
+                'duration': 1619,
+                'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'expected_warnings': ['HTTP Error 403: Forbidden'],
+        },
         {
             'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
             'only_matching': True,
@@ -422,6 +467,8 @@ class PBSIE(InfoExtractor):
                 r'<section[^>]+data-coveid="(\d+)"',                    # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
                 r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>',  # jwplayer
                 r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
+                r'<div[^>]+\bdata-cove-id=["\'](\d+)"',  # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
+                r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)',  # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
             ]
 
             media_id = self._search_regex(
@@ -456,7 +503,8 @@ class PBSIE(InfoExtractor):
             if not url:
                 url = self._og_search_url(webpage)
 
-            mobj = re.match(self._VALID_URL, url)
+            mobj = re.match(
+                self._VALID_URL, self._proto_relative_url(url.strip()))
 
         player_id = mobj.group('player_id')
         if not display_id:
@@ -466,13 +514,27 @@ class PBSIE(InfoExtractor):
                 url, display_id, note='Downloading player page',
                 errnote='Could not download player page')
             video_id = self._search_regex(
-                r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
+                r'<div\s+id=["\']video_(\d+)', player_page, 'video ID',
+                default=None)
+            if not video_id:
+                video_info = self._extract_video_data(
+                    player_page, 'video data', display_id)
+                video_id = compat_str(
+                    video_info.get('id') or video_info['contentID'])
         else:
             video_id = mobj.group('id')
             display_id = video_id
 
         return video_id, display_id, None, description
 
+    def _extract_video_data(self, string, name, video_id, fatal=True):
+        return self._parse_json(
+            self._search_regex(
+                [r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
+                 r'window\.videoBridge\s*=\s*({.+?});'],
+                string, name, default='{}'),
+            video_id, transform_source=js_to_json, fatal=fatal)
+
     def _real_extract(self, url):
         video_id, display_id, upload_date, description = self._extract_webpage(url)
 
@@ -503,20 +565,21 @@ class PBSIE(InfoExtractor):
                 'http://player.pbs.org/%s/%s' % (page, video_id),
                 display_id, 'Downloading %s page' % page, fatal=False)
             if player:
-                video_info = self._parse_json(
-                    self._search_regex(
-                        r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
-                        player, '%s video data' % page, default='{}'),
-                    display_id, transform_source=js_to_json, fatal=False)
+                video_info = self._extract_video_data(
+                    player, '%s video data' % page, display_id, fatal=False)
                 if video_info:
                     extract_redirect_urls(video_info)
                     if not info:
                         info = video_info
                 if not chapters:
-                    for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
-                        chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
-                        if not chapter:
-                            continue
+                    raw_chapters = video_info.get('chapters') or []
+                    if not raw_chapters:
+                        for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
+                            chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
+                            if not chapter:
+                                continue
+                            raw_chapters.append(chapter)
+                    for chapter in raw_chapters:
                         start_time = float_or_none(chapter.get('start_time'), 1000)
                         duration = float_or_none(chapter.get('duration'), 1000)
                         if start_time is None or duration is None:
diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py
new file mode 100644 (file)
index 0000000..a481b31
--- /dev/null
@@ -0,0 +1,228 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    parse_resolution,
+    try_get,
+    unified_timestamp,
+    urljoin,
+)
+
+
+class PeerTubeIE(InfoExtractor):
+    _INSTANCES_RE = r'''(?:
+                            # Taken from https://instances.joinpeertube.org/instances
+                            tube\.openalgeria\.org|
+                            peertube\.pointsecu\.fr|
+                            peertube\.nogafa\.org|
+                            peertube\.pl|
+                            megatube\.lilomoino\.fr|
+                            peertube\.tamanoir\.foucry\.net|
+                            peertube\.inapurna\.org|
+                            peertube\.netzspielplatz\.de|
+                            video\.deadsuperhero\.com|
+                            peertube\.devosi\.org|
+                            peertube\.1312\.media|
+                            tube\.worldofhauru\.xyz|
+                            tube\.bootlicker\.party|
+                            skeptikon\.fr|
+                            peertube\.geekshell\.fr|
+                            tube\.opportunis\.me|
+                            peertube\.peshane\.net|
+                            video\.blueline\.mg|
+                            tube\.homecomputing\.fr|
+                            videos\.cloudfrancois\.fr|
+                            peertube\.viviers-fibre\.net|
+                            tube\.ouahpiti\.info|
+                            video\.tedomum\.net|
+                            video\.g3l\.org|
+                            fontube\.fr|
+                            peertube\.gaialabs\.ch|
+                            peertube\.extremely\.online|
+                            peertube\.public-infrastructure\.eu|
+                            tube\.kher\.nl|
+                            peertube\.qtg\.fr|
+                            tube\.22decembre\.eu|
+                            facegirl\.me|
+                            video\.migennes\.net|
+                            janny\.moe|
+                            tube\.p2p\.legal|
+                            video\.atlanti\.se|
+                            troll\.tv|
+                            peertube\.geekael\.fr|
+                            vid\.leotindall\.com|
+                            video\.anormallostpod\.ovh|
+                            p-tube\.h3z\.jp|
+                            tube\.darfweb\.eu|
+                            videos\.iut-orsay\.fr|
+                            peertube\.solidev\.net|
+                            videos\.symphonie-of-code\.fr|
+                            testtube\.ortg\.de|
+                            videos\.cemea\.org|
+                            peertube\.gwendalavir\.eu|
+                            video\.passageenseine\.fr|
+                            videos\.festivalparminous\.org|
+                            peertube\.touhoppai\.moe|
+                            peertube\.duckdns\.org|
+                            sikke\.fi|
+                            peertube\.mastodon\.host|
+                            firedragonvideos\.com|
+                            vidz\.dou\.bet|
+                            peertube\.koehn\.com|
+                            peer\.hostux\.social|
+                            share\.tube|
+                            peertube\.walkingmountains\.fr|
+                            medias\.libox\.fr|
+                            peertube\.moe|
+                            peertube\.xyz|
+                            jp\.peertube\.network|
+                            videos\.benpro\.fr|
+                            tube\.otter\.sh|
+                            peertube\.angristan\.xyz|
+                            peertube\.parleur\.net|
+                            peer\.ecutsa\.fr|
+                            peertube\.heraut\.eu|
+                            peertube\.tifox\.fr|
+                            peertube\.maly\.io|
+                            vod\.mochi\.academy|
+                            exode\.me|
+                            coste\.video|
+                            tube\.aquilenet\.fr|
+                            peertube\.gegeweb\.eu|
+                            framatube\.org|
+                            thinkerview\.video|
+                            tube\.conferences-gesticulees\.net|
+                            peertube\.datagueule\.tv|
+                            video\.lqdn\.fr|
+                            meilleurtube\.delire\.party|
+                            tube\.mochi\.academy|
+                            peertube\.dav\.li|
+                            media\.zat\.im|
+                            pytu\.be|
+                            peertube\.valvin\.fr|
+                            peertube\.nsa\.ovh|
+                            video\.colibris-outilslibres\.org|
+                            video\.hispagatos\.org|
+                            tube\.svnet\.fr|
+                            peertube\.video|
+                            videos\.lecygnenoir\.info|
+                            peertube3\.cpy\.re|
+                            peertube2\.cpy\.re|
+                            videos\.tcit\.fr|
+                            peertube\.cpy\.re
+                        )'''
+    _VALID_URL = r'''(?x)
+                    https?://
+                        %s
+                        /(?:videos/(?:watch|embed)|api/v\d/videos)/
+                        (?P<id>[^/?\#&]+)
+                    ''' % _INSTANCES_RE
+    _TESTS = [{
+        'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
+        'md5': '80f24ff364cc9d333529506a263e7feb',
+        'info_dict': {
+            'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
+            'ext': 'mp4',
+            'title': 'wow',
+            'description': 'wow such video, so gif',
+            'thumbnail': r're:https?://.*\.(?:jpg|png)',
+            'timestamp': 1519297480,
+            'upload_date': '20180222',
+            'uploader': 'Luclu7',
+            'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
+            'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
+            'license': 'Unknown',
+            'duration': 3,
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+            'tags': list,
+            'categories': list,
+        }
+    }, {
+        'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
+        'only_matching': True,
+    }, {
+        # nsfw
+        'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
+        'only_matching': True,
+    }, {
+        'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
+        'only_matching': True,
+    }, {
+        'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return [
+            mobj.group('url')
+            for mobj in re.finditer(
+                r'''(?x)<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s/videos/embed/[^/?\#&]+)\1'''
+                % PeerTubeIE._INSTANCES_RE, webpage)]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_json(
+            urljoin(url, '/api/v1/videos/%s' % video_id), video_id)
+
+        title = video['name']
+
+        formats = []
+        for file_ in video['files']:
+            if not isinstance(file_, dict):
+                continue
+            file_url = file_.get('fileUrl')
+            if not file_url or not isinstance(file_url, compat_str):
+                continue
+            file_size = int_or_none(file_.get('size'))
+            format_id = try_get(
+                file_, lambda x: x['resolution']['label'], compat_str)
+            f = parse_resolution(format_id)
+            f.update({
+                'url': file_url,
+                'format_id': format_id,
+                'filesize': file_size,
+            })
+            formats.append(f)
+        self._sort_formats(formats)
+
+        def account_data(field):
+            return try_get(video, lambda x: x['account'][field], compat_str)
+
+        category = try_get(video, lambda x: x['category']['label'], compat_str)
+        categories = [category] if category else None
+
+        nsfw = video.get('nsfw')
+        if nsfw is bool:
+            age_limit = 18 if nsfw else 0
+        else:
+            age_limit = None
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video.get('description'),
+            'thumbnail': urljoin(url, video.get('thumbnailPath')),
+            'timestamp': unified_timestamp(video.get('publishedAt')),
+            'uploader': account_data('displayName'),
+            'uploader_id': account_data('uuid'),
+            'uploder_url': account_data('url'),
+            'license': try_get(
+                video, lambda x: x['licence']['label'], compat_str),
+            'duration': int_or_none(video.get('duration')),
+            'view_count': int_or_none(video.get('views')),
+            'like_count': int_or_none(video.get('likes')),
+            'dislike_count': int_or_none(video.get('dislikes')),
+            'age_limit': age_limit,
+            'tags': try_get(video, lambda x: x['tags'], list),
+            'categories': categories,
+            'formats': formats,
+        }
index aacc5d4bb8a8c139b354be7f724612c308bfc6c5..a207ca9cb93a3c839bf9304267d6a5b10d426b5e 100644 (file)
@@ -94,7 +94,7 @@ class PluralsightIE(PluralsightBaseIE):
         self._login()
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
@@ -140,10 +140,10 @@ class PluralsightIE(PluralsightBaseIE):
 
             raise ExtractorError('Unable to log in')
 
-    def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
+    def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id):
         captions_post = {
             'a': author,
-            'cn': clip_id,
+            'cn': clip_idx,
             'lc': lang,
             'm': name,
         }
@@ -195,13 +195,13 @@ class PluralsightIE(PluralsightBaseIE):
 
         author = qs.get('author', [None])[0]
         name = qs.get('name', [None])[0]
-        clip_id = qs.get('clip', [None])[0]
+        clip_idx = qs.get('clip', [None])[0]
         course_name = qs.get('course', [None])[0]
 
-        if any(not f for f in (author, name, clip_id, course_name,)):
+        if any(not f for f in (author, name, clip_idx, course_name,)):
             raise ExtractorError('Invalid URL', expected=True)
 
-        display_id = '%s-%s' % (name, clip_id)
+        display_id = '%s-%s' % (name, clip_idx)
 
         course = self._download_course(course_name, url, display_id)
 
@@ -217,7 +217,7 @@ class PluralsightIE(PluralsightBaseIE):
                         clip_index = clip_.get('index')
                     if clip_index is None:
                         continue
-                    if compat_str(clip_index) == clip_id:
+                    if compat_str(clip_index) == clip_idx:
                         clip = clip_
                         break
 
@@ -225,6 +225,7 @@ class PluralsightIE(PluralsightBaseIE):
             raise ExtractorError('Unable to resolve clip')
 
         title = clip['title']
+        clip_id = clip.get('clipName') or clip.get('name') or clip['clipId']
 
         QUALITIES = {
             'low': {'width': 640, 'height': 480},
@@ -277,7 +278,7 @@ class PluralsightIE(PluralsightBaseIE):
                 clip_post = {
                     'author': author,
                     'includeCaptions': False,
-                    'clipIndex': int(clip_id),
+                    'clipIndex': int(clip_idx),
                     'courseName': course_name,
                     'locale': 'en',
                     'moduleName': name,
@@ -330,10 +331,10 @@ class PluralsightIE(PluralsightBaseIE):
 
         # TODO: other languages?
         subtitles = self.extract_subtitles(
-            author, clip_id, 'en', name, duration, display_id)
+            author, clip_idx, 'en', name, duration, display_id)
 
         return {
-            'id': clip.get('clipName') or clip['name'],
+            'id': clip_id,
             'title': title,
             'duration': duration,
             'creator': author,
index afa7b91615eb33efcfb5260ac5d32979fa1f97d9..ae7413fb5eeb33f4f7e4e46ba816b4c554d20875 100644 (file)
@@ -53,7 +53,8 @@ class RBMARadioIE(InfoExtractor):
             'format_id': compat_str(abr),
             'abr': abr,
             'vcodec': 'none',
-        } for abr in (96, 128, 256)]
+        } for abr in (96, 128, 192, 256)]
+        self._check_formats(formats, episode_id)
 
         description = clean_html(episode.get('longTeaser'))
         thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
index bf200ea4d3f8b17f171bcce01c930b5d183fcc2e..8c016a77dea1122286d3eafb5475686a9f4e17ea 100644 (file)
@@ -19,7 +19,7 @@ class RDSIE(InfoExtractor):
         'info_dict': {
             'id': '604333',
             'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Fowler Jr. prend la direction de Jacksonville',
             'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
             'timestamp': 1430397346,
index 53b1c967e5cb98c300a81996bb483f204890b1ad..7b0aa6232a0dea6ff09481560dc95f9a667e56e8 100644 (file)
@@ -47,7 +47,7 @@ class RedditIE(InfoExtractor):
 
 
 class RedditRIE(InfoExtractor):
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
+    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
     _TESTS = [{
         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
         'info_dict': {
@@ -74,6 +74,10 @@ class RedditRIE(InfoExtractor):
         # imgur
         'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
         'only_matching': True,
+    }, {
+        # imgur @ old reddit
+        'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
+        'only_matching': True,
     }, {
         # streamable
         'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
@@ -82,6 +86,10 @@ class RedditRIE(InfoExtractor):
         # youtube
         'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
         'only_matching': True,
+    }, {
+        # reddit video @ nm reddit
+        'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 8b703800ecbd595c5cb1e188fdb830b5ed1cfa35..857434540d89a41dae80d9b4270e7bd6e6192acb 100644 (file)
@@ -50,7 +50,7 @@ class RoosterTeethIE(InfoExtractor):
     }]
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
index 28cc5522d89083cec2ad7631d51fb0aa0798ccbd..acff9766acb275dde443a250e5c8bd1dddbccfda 100644 (file)
@@ -1,10 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
-    int_or_none,
     ExtractorError,
+    float_or_none,
+    int_or_none,
+    strip_or_none,
 )
 
 
@@ -14,20 +18,19 @@ class RTBFIE(InfoExtractor):
         (?:
             video/[^?]+\?.*\bid=|
             ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
-            auvio/[^/]+\?.*id=
+            auvio/[^/]+\?.*\b(?P<live>l)?id=
         )(?P<id>\d+)'''
     _TESTS = [{
         'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
-        'md5': '799f334ddf2c0a582ba80c44655be570',
+        'md5': '8c876a1cceeb6cf31b476461ade72384',
         'info_dict': {
             'id': '1921274',
             'ext': 'mp4',
             'title': 'Les Diables au coeur (épisode 2)',
-            'description': 'Football - Diables Rouges',
-            'duration': 3099,
+            'description': '(du 25/04/2014)',
+            'duration': 3099.54,
             'upload_date': '20140425',
-            'timestamp': 1398456336,
-            'uploader': 'rtbfsport',
+            'timestamp': 1398456300,
         }
     }, {
         # geo restricted
@@ -39,6 +42,18 @@ class RTBFIE(InfoExtractor):
     }, {
         'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
         'only_matching': True,
+    }, {
+        # Live
+        'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
+        'only_matching': True,
+    }, {
+        # Audio
+        'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
+        'only_matching': True,
+    }, {
+        # With Subtitle
+        'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
+        'only_matching': True,
     }]
     _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
     _PROVIDERS = {
@@ -53,46 +68,94 @@ class RTBFIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        data = self._download_json(
-            'http://www.rtbf.be/api/media/video?method=getVideoDetail&args[]=%s' % video_id, video_id)
+        live, media_id = re.match(self._VALID_URL, url).groups()
+        embed_page = self._download_webpage(
+            'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
+            media_id, query={'id': media_id})
+        data = self._parse_json(self._html_search_regex(
+            r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
 
         error = data.get('error')
         if error:
             raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
 
-        data = data['data']
-
         provider = data.get('provider')
         if provider in self._PROVIDERS:
             return self.url_result(data['url'], self._PROVIDERS[provider])
 
+        title = data['title']
+        is_live = data.get('isLive')
+        if is_live:
+            title = self._live_title(title)
+        height_re = r'-(\d+)p\.'
         formats = []
-        for key, format_id in self._QUALITIES:
-            format_url = data.get(key + 'Url')
-            if format_url:
+
+        m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
+        if m3u8_url:
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
+
+        fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
+        http_url = data.get('url')
+        if formats and http_url and re.search(height_re, http_url):
+            http_url = fix_url(http_url)
+            for m3u8_f in formats.copy():
+                height = m3u8_f.get('height')
+                if not height:
+                    continue
+                f = m3u8_f.copy()
+                del f['protocol']
+                f.update({
+                    'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
+                    'url': re.sub(height_re, '-%dp.' % height, http_url),
+                })
+                formats.append(f)
+        else:
+            sources = data.get('sources') or {}
+            for key, format_id in self._QUALITIES:
+                format_url = sources.get(key)
+                if not format_url:
+                    continue
+                height = int_or_none(self._search_regex(
+                    height_re, format_url, 'height', default=None))
                 formats.append({
                     'format_id': format_id,
-                    'url': format_url,
+                    'url': fix_url(format_url),
+                    'height': height,
                 })
 
-        thumbnails = []
-        for thumbnail_id, thumbnail_url in data.get('thumbnail', {}).items():
-            if thumbnail_id != 'default':
-                thumbnails.append({
-                    'url': self._IMAGE_HOST + thumbnail_url,
-                    'id': thumbnail_id,
-                })
+        mpd_url = data.get('urlDash')
+        if not data.get('drm') and mpd_url:
+            formats.extend(self._extract_mpd_formats(
+                mpd_url, media_id, mpd_id='dash', fatal=False))
+
+        audio_url = data.get('urlAudio')
+        if audio_url:
+            formats.append({
+                'format_id': 'audio',
+                'url': audio_url,
+                'vcodec': 'none',
+            })
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for track in (data.get('tracks') or {}).values():
+            sub_url = track.get('url')
+            if not sub_url:
+                continue
+            subtitles.setdefault(track.get('lang') or 'fr', []).append({
+                'url': sub_url,
+            })
 
         return {
-            'id': video_id,
+            'id': media_id,
             'formats': formats,
-            'title': data['title'],
-            'description': data.get('description') or data.get('subtitle'),
-            'thumbnails': thumbnails,
-            'duration': data.get('duration') or data.get('realDuration'),
-            'timestamp': int_or_none(data.get('created')),
-            'view_count': int_or_none(data.get('viewCount')),
-            'uploader': data.get('channel'),
-            'tags': data.get('tags'),
+            'title': title,
+            'description': strip_or_none(data.get('description')),
+            'thumbnail': data.get('thumbnail'),
+            'duration': float_or_none(data.get('realDuration')),
+            'timestamp': int_or_none(data.get('liveFrom')),
+            'series': data.get('programLabel'),
+            'subtitles': subtitles,
+            'is_live': is_live,
         }
index cc6698f882a5859883372b32dc71578ebe37da8a..30e2a38b45f0c559b14e18c5e317e438a8d831f4 100644 (file)
@@ -27,7 +27,7 @@ class SafariBaseIE(InfoExtractor):
         self._login()
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
@@ -74,7 +74,14 @@ class SafariBaseIE(InfoExtractor):
 class SafariIE(SafariBaseIE):
     IE_NAME = 'safari'
     IE_DESC = 'safaribooksonline.com online video'
-    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html'
+    _VALID_URL = r'''(?x)
+                        https?://
+                            (?:www\.)?safaribooksonline\.com/
+                            (?:
+                                library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
+                                videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
+                            )
+                    '''
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
@@ -94,22 +101,41 @@ class SafariIE(SafariBaseIE):
     }, {
         'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
         'only_matching': True,
+    }, {
+        'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
+        'only_matching': True,
     }]
 
+    _PARTNER_ID = '1926081'
+    _UICONF_ID = '29375172'
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part'))
-
-        webpage = self._download_webpage(url, video_id)
-        reference_id = self._search_regex(
-            r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
-            webpage, 'kaltura reference id', group='id')
-        partner_id = self._search_regex(
-            r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
-            webpage, 'kaltura widget id', group='id')
-        ui_id = self._search_regex(
-            r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
-            webpage, 'kaltura uiconf id', group='id')
+
+        reference_id = mobj.group('reference_id')
+        if reference_id:
+            video_id = reference_id
+            partner_id = self._PARTNER_ID
+            ui_id = self._UICONF_ID
+        else:
+            video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part'))
+
+            webpage, urlh = self._download_webpage_handle(url, video_id)
+
+            mobj = re.match(self._VALID_URL, urlh.geturl())
+            reference_id = mobj.group('reference_id')
+            if not reference_id:
+                reference_id = self._search_regex(
+                    r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+                    webpage, 'kaltura reference id', group='id')
+            partner_id = self._search_regex(
+                r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+                webpage, 'kaltura widget id', default=self._PARTNER_ID,
+                group='id')
+            ui_id = self._search_regex(
+                r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+                webpage, 'kaltura uiconf id', default=self._UICONF_ID,
+                group='id')
 
         query = {
             'wid': '_%s' % partner_id,
@@ -159,10 +185,15 @@ class SafariCourseIE(SafariBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
-                            (?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)|
+                            (?:www\.)?safaribooksonline\.com/
+                            (?:
+                                library/view/[^/]+|
+                                api/v1/book|
+                                videos/[^/]+
+                            )|
                             techbus\.safaribooksonline\.com
                         )
-                        /(?P<id>[^/]+)/?(?:[#?]|$)
+                        /(?P<id>[^/]+)
                     '''
 
     _TESTS = [{
@@ -179,8 +210,16 @@ class SafariCourseIE(SafariBaseIE):
     }, {
         'url': 'http://techbus.safaribooksonline.com/9780134426365',
         'only_matching': True,
+    }, {
+        'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
+        'only_matching': True,
     }]
 
+    @classmethod
+    def suitable(cls, url):
+        return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url)
+                else super(SafariCourseIE, cls).suitable(url))
+
     def _real_extract(self, url):
         course_id = self._match_id(url)
 
index 8fc66732af70f4db5305fdc891c5142afd5c97c7..07b766b4a756f67d855ede90ebf72f0997da7e1d 100644 (file)
@@ -64,7 +64,7 @@ class SinaIE(InfoExtractor):
                 # The video id is in the redirected url
                 self.to_screen('Getting video id')
                 request = HEADRequest(url)
-                (_, urlh) = self._download_webpage_handle(request, 'NA', False)
+                _, urlh = self._download_webpage_handle(request, 'NA', False)
                 return self._real_extract(urlh.geturl())
             else:
                 pseudo_id = mobj.group('pseudo_id')
index 69951e38759945d34dd25ebf161464eeb36b0c13..1f8469a90876673a21cbd3d436fde3081c3e015c 100644 (file)
@@ -19,29 +19,33 @@ from ..utils import (
 
 class SixPlayIE(InfoExtractor):
     IE_NAME = '6play'
-    _VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://www.6play.fr/le-meilleur-patissier-p_1807/le-meilleur-patissier-special-fetes-mercredi-a-21-00-sur-m6-c_11638450',
-        'md5': '42310bffe4ba3982db112b9cd3467328',
+    _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay.be)/.+?-c_)(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
+        'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
         'info_dict': {
-            'id': '11638450',
+            'id': '12041051',
             'ext': 'mp4',
-            'title': 'Le Meilleur Pâtissier, spécial fêtes mercredi à 21:00 sur M6',
-            'description': 'md5:308853f6a5f9e2d55a30fc0654de415f',
-            'duration': 39,
-            'series': 'Le meilleur pâtissier',
+            'title': 'Le but qui a marqué l\'histoire du football français !',
+            'description': 'md5:b59e7e841d646ef1eb42a7868eb6a851',
         },
-        'params': {
-            'skip_download': True,
-        },
-    }
+    }, {
+        'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        domain, video_id = re.search(self._VALID_URL, url).groups()
+        service, consumer_name = {
+            '6play.fr': ('6play', 'm6web'),
+            'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
+        }.get(domain, ('6play', 'm6web'))
 
         data = self._download_json(
-            'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/6play/videos/clip_%s' % video_id,
-            video_id, query={
+            'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/%s/videos/clip_%s' % (service, video_id),
+            video_id, headers={
+                'x-customer-name': consumer_name
+            }, query={
                 'csa': 5,
                 'with': 'clips',
             })
@@ -65,7 +69,12 @@ class SixPlayIE(InfoExtractor):
                 subtitles.setdefault('fr', []).append({'url': asset_url})
                 continue
             if container == 'm3u8' or ext == 'm3u8':
-                if protocol == 'usp' and not compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
+                if protocol == 'usp':
+                    if compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
+                        urlh = self._request_webpage(asset_url, video_id, fatal=False)
+                        if not urlh:
+                            continue
+                        asset_url = urlh.geturl()
                     asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
                     formats.extend(self._extract_m3u8_formats(
                         asset_url, video_id, 'mp4', 'm3u8_native',
index 46332e5c238619c9b572e4c5701ed1169eae2d20..81c81c8d58ecf5940adfa200d59c5fa247b30cbb 100644 (file)
@@ -181,7 +181,6 @@ class SoundcloudIE(InfoExtractor):
         thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
         if isinstance(thumbnail, compat_str):
             thumbnail = thumbnail.replace('-large', '-t500x500')
-        ext = 'mp3'
         result = {
             'id': track_id,
             'uploader': info.get('user', {}).get('username'),
@@ -215,8 +214,11 @@ class SoundcloudIE(InfoExtractor):
             track_id, 'Downloading track url', query=query)
 
         for key, stream_url in format_dict.items():
-            abr = int_or_none(self._search_regex(
-                r'_(\d+)_url', key, 'audio bitrate', default=None))
+            ext, abr = 'mp3', None
+            mobj = re.search(r'_([^_]+)_(\d+)_url', key)
+            if mobj:
+                ext, abr = mobj.groups()
+                abr = int(abr)
             if key.startswith('http'):
                 stream_formats = [{
                     'format_id': key,
@@ -234,13 +236,14 @@ class SoundcloudIE(InfoExtractor):
                 }]
             elif key.startswith('hls'):
                 stream_formats = self._extract_m3u8_formats(
-                    stream_url, track_id, 'mp3', entry_protocol='m3u8_native',
+                    stream_url, track_id, ext, entry_protocol='m3u8_native',
                     m3u8_id=key, fatal=False)
             else:
                 continue
 
-            for f in stream_formats:
-                f['abr'] = abr
+            if abr:
+                for f in stream_formats:
+                    f['abr'] = abr
 
             formats.extend(stream_formats)
 
@@ -250,7 +253,7 @@ class SoundcloudIE(InfoExtractor):
             formats.append({
                 'format_id': 'fallback',
                 'url': update_url_query(info['stream_url'], query),
-                'ext': ext,
+                'ext': 'mp3',
             })
 
         for f in formats:
index fc995e8c14da760dc33c706bfadba532bd86b05d..4df7f4ddce4b701b66decee671e48ecb56af96fa 100644 (file)
@@ -11,9 +11,9 @@ from .nexx import (
 from .spiegeltv import SpiegeltvIE
 from ..compat import compat_urlparse
 from ..utils import (
-    extract_attributes,
-    unified_strdate,
-    get_element_by_attribute,
+    parse_duration,
+    strip_or_none,
+    unified_timestamp,
 )
 
 
@@ -21,35 +21,38 @@ class SpiegelIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'
     _TESTS = [{
         'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
-        'md5': '2c2754212136f35fb4b19767d242f66e',
+        'md5': 'b57399839d055fccfeb9a0455c439868',
         'info_dict': {
-            'id': '1259285',
+            'id': '563747',
             'ext': 'mp4',
             'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
             'description': 'md5:8029d8310232196eb235d27575a8b9f4',
             'duration': 49,
             'upload_date': '20130311',
+            'timestamp': 1362994320,
         },
     }, {
         'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
-        'md5': 'f2cdf638d7aa47654e251e1aee360af1',
+        'md5': '5b6c2f4add9d62912ed5fc78a1faed80',
         'info_dict': {
-            'id': '1309159',
+            'id': '580988',
             'ext': 'mp4',
             'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
             'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
             'duration': 983,
             'upload_date': '20131115',
+            'timestamp': 1384546642,
         },
     }, {
         'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
-        'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51',
+        'md5': '97b91083a672d72976faa8433430afb9',
         'info_dict': {
-            'id': '1519126',
+            'id': '601883',
             'ext': 'mp4',
             'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
             'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
             'upload_date': '20140904',
+            'timestamp': 1409834160,
         }
     }, {
         'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
@@ -62,59 +65,28 @@ class SpiegelIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage, handle = self._download_webpage_handle(url, video_id)
+        metadata_url = 'http://www.spiegel.de/video/metadata/video-%s.json' % video_id
+        handle = self._request_webpage(metadata_url, video_id)
 
         # 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
         if SpiegeltvIE.suitable(handle.geturl()):
             return self.url_result(handle.geturl(), 'Spiegeltv')
 
-        nexx_id = self._search_regex(
-            r'nexxOmniaId\s*:\s*(\d+)', webpage, 'nexx id', default=None)
-        if nexx_id:
-            domain_id = NexxIE._extract_domain_id(webpage) or '748'
-            return self.url_result(
-                'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
-                video_id=nexx_id)
-
-        video_data = extract_attributes(self._search_regex(r'(<div[^>]+id="spVideoElements"[^>]+>)', webpage, 'video element', default=''))
-
-        title = video_data.get('data-video-title') or get_element_by_attribute('class', 'module-title', webpage)
-        description = video_data.get('data-video-teaser') or self._html_search_meta('description', webpage, 'description')
-
-        base_url = self._search_regex(
-            [r'server\s*:\s*(["\'])(?P<url>.+?)\1', r'var\s+server\s*=\s*"(?P<url>[^"]+)\"'],
-            webpage, 'server URL', group='url')
-
-        xml_url = base_url + video_id + '.xml'
-        idoc = self._download_xml(xml_url, video_id)
-
-        formats = []
-        for n in list(idoc):
-            if n.tag.startswith('type') and n.tag != 'type6':
-                format_id = n.tag.rpartition('type')[2]
-                video_url = base_url + n.find('./filename').text
-                formats.append({
-                    'format_id': format_id,
-                    'url': video_url,
-                    'width': int(n.find('./width').text),
-                    'height': int(n.find('./height').text),
-                    'abr': int(n.find('./audiobitrate').text),
-                    'vbr': int(n.find('./videobitrate').text),
-                    'vcodec': n.find('./codec').text,
-                    'acodec': 'MP4A',
-                })
-        duration = float(idoc[0].findall('./duration')[0].text)
-
-        self._check_formats(formats, video_id)
-        self._sort_formats(formats)
+        video_data = self._parse_json(self._webpage_read_content(
+            handle, metadata_url, video_id), video_id)
+        title = video_data['title']
+        nexx_id = video_data['nexxOmniaId']
+        domain_id = video_data.get('nexxOmniaDomain') or '748'
 
         return {
+            '_type': 'url_transparent',
             'id': video_id,
+            'url': 'nexx:%s:%s' % (domain_id, nexx_id),
             'title': title,
-            'description': description.strip() if description else None,
-            'duration': duration,
-            'upload_date': unified_strdate(video_data.get('data-video-date')),
-            'formats': formats,
+            'description': strip_or_none(video_data.get('teaser')),
+            'duration': parse_duration(video_data.get('duration')),
+            'timestamp': unified_timestamp(video_data.get('datum')),
+            'ie_key': NexxIE.ie_key(),
         }
 
 
index a7b1b3b5f3f83567554f42bd6536a65a70a9d99d..e76522b45d2e83f537b057cd9eb4d4b61f7bc403 100644 (file)
@@ -1,55 +1,46 @@
 from __future__ import unicode_literals
 
-import re
-
 from .mtv import MTVServicesInfoExtractor
 
 
-class SpikeIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?spike\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
+class BellatorIE(MTVServicesInfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bellator\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
     _TESTS = [{
-        'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
-        'md5': '1a9265f32b0c375793d6c4ce45255256',
+        'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
         'info_dict': {
-            'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
+            'id': 'b55e434e-fde1-4a98-b7cc-92003a034de4',
             'ext': 'mp4',
-            'title': 'Auction Hunters|December 27, 2013|4|414|Can Allen Ride A Hundred Year-Old Motorcycle?',
-            'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
-            'timestamp': 1388120400,
-            'upload_date': '20131227',
+            'title': 'Douglas Lima vs. Paul Daley - Round 1',
+            'description': 'md5:805a8dd29310fd611d32baba2f767885',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         },
     }, {
-        'url': 'http://www.spike.com/full-episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-209',
-        'md5': 'b25c6f16418aefb9ad5a6cae2559321f',
+        'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
+        'only_matching': True,
+    }]
+
+    _FEED_URL = 'http://www.spike.com/feeds/mrss/'
+    _GEO_COUNTRIES = ['US']
+
+
+class ParamountNetworkIE(MTVServicesInfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
+    _TESTS = [{
+        'url': 'http://www.paramountnetwork.com/episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-13',
         'info_dict': {
             'id': '37ace3a8-1df6-48be-85b8-38df8229e241',
             'ext': 'mp4',
             'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1',
             'description': 'md5:a739ca8f978a7802f67f8016d27ce114',
         },
-    }, {
-        'url': 'http://www.spike.com/video-clips/lhtu8m/',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.spike.com/video-clips/lhtu8m',
-        'only_matching': True,
-    }, {
-        'url': 'http://bellator.spike.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
-        'only_matching': True,
-    }, {
-        'url': 'http://bellator.spike.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
-        'only_matching': True,
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }]
 
-    _FEED_URL = 'http://www.spike.com/feeds/mrss/'
-    _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
-    _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
+    _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
     _GEO_COUNTRIES = ['US']
-
-    def _extract_mgid(self, webpage):
-        mgid = super(SpikeIE, self)._extract_mgid(webpage)
-        if mgid is None:
-            url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id')
-            video_type, episode_id = url_parts.split('/', 1)
-            mgid = 'mgid:arc:{0}:spike.com:{1}'.format(video_type, episode_id)
-        return mgid
index edc31729d35f250d2b8267e87719ea54ce9480e3..784f8ed6639d9ef2eb4e73e4c5645d309e6bb500 100644 (file)
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .turner import TurnerBaseIE
+from ..compat import (
+    compat_urllib_parse_urlparse,
+    compat_parse_qs,
+)
 from ..utils import (
     float_or_none,
     int_or_none,
@@ -38,48 +42,22 @@ class TBSIE(TurnerBaseIE):
     def _real_extract(self, url):
         site, display_id = re.match(self._VALID_URL, url).groups()
         webpage = self._download_webpage(url, display_id)
-        video_data = self._parse_json(self._search_regex(
+        drupal_settings = self._parse_json(self._search_regex(
             r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
-            webpage, 'drupal setting'), display_id)['turner_playlist'][0]
+            webpage, 'drupal setting'), display_id)
+        video_data = drupal_settings['turner_playlist'][0]
 
         media_id = video_data['mediaID']
         title = video_data['title']
+        tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse(
+            drupal_settings['ngtv_token_url']).query)
 
-        streams_data = self._download_json(
-            'http://medium.ngtv.io/media/%s/tv' % media_id,
-            media_id)['media']['tv']
-        duration = None
-        chapters = []
-        formats = []
-        for supported_type in ('unprotected', 'bulkaes'):
-            stream_data = streams_data.get(supported_type, {})
-            m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
-            if not m3u8_url:
-                continue
-            if stream_data.get('playlistProtection') == 'spe':
-                m3u8_url = self._add_akamai_spe_token(
-                    'http://token.vgtf.net/token/token_spe',
-                    m3u8_url, media_id, {
-                        'url': url,
-                        'site_name': site[:3].upper(),
-                        'auth_required': video_data.get('authRequired') == '1',
-                    })
-            formats.extend(self._extract_m3u8_formats(
-                m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
-
-            duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
-
-            if not chapters:
-                for chapter in stream_data.get('contentSegments', []):
-                    start_time = float_or_none(chapter.get('start'))
-                    duration = float_or_none(chapter.get('duration'))
-                    if start_time is None or duration is None:
-                        continue
-                    chapters.append({
-                        'start_time': start_time,
-                        'end_time': start_time + duration,
-                    })
-        self._sort_formats(formats)
+        info = self._extract_ngtv_info(
+            media_id, tokenizer_query, {
+                'url': url,
+                'site_name': site[:3].upper(),
+                'auth_required': video_data.get('authRequired') == '1',
+            })
 
         thumbnails = []
         for image_id, image in video_data.get('images', {}).items():
@@ -98,15 +76,14 @@ class TBSIE(TurnerBaseIE):
                 })
             thumbnails.append(i)
 
-        return {
+        info.update({
             'id': media_id,
             'title': title,
             'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
-            'duration': duration,
+            'duration': float_or_none(video_data.get('duration')) or info.get('duration'),
             'timestamp': int_or_none(video_data.get('created')),
             'season_number': int_or_none(video_data.get('season')),
             'episode_number': int_or_none(video_data.get('episode')),
-            'cahpters': chapters,
             'thumbnails': thumbnails,
-            'formats': formats,
-        }
+        })
+        return info
index 9056c8cbc29a4431a364be6b3731f5aef2f06529..73469cc5d1b6247cb44dc33aaa96805dfd1768a0 100644 (file)
@@ -1,35 +1,34 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import binascii
-import re
 import json
 
-from .common import InfoExtractor
-from ..compat import (
-    compat_b64decode,
-    compat_ord,
-)
+from .turner import TurnerBaseIE
 from ..utils import (
+    determine_ext,
     ExtractorError,
+    int_or_none,
+    mimetype2ext,
+    parse_duration,
+    parse_iso8601,
     qualities,
-    determine_ext,
 )
 
 
-class TeamcocoIE(InfoExtractor):
-    _VALID_URL = r'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
+class TeamcocoIE(TurnerBaseIE):
+    _VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
     _TESTS = [
         {
-            'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
-            'md5': '3f7746aa0dc86de18df7539903d399ea',
+            'url': 'http://teamcoco.com/video/mary-kay-remote',
+            'md5': '55d532f81992f5c92046ad02fec34d7d',
             'info_dict': {
                 'id': '80187',
                 'ext': 'mp4',
                 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
-                'duration': 504,
-                'age_limit': 0,
+                'duration': 495.0,
+                'upload_date': '20140402',
+                'timestamp': 1396407600,
             }
         }, {
             'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
@@ -40,7 +39,8 @@ class TeamcocoIE(InfoExtractor):
                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
                 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
                 'duration': 288,
-                'age_limit': 0,
+                'upload_date': '20111104',
+                'timestamp': 1320405840,
             }
         }, {
             'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
@@ -49,6 +49,8 @@ class TeamcocoIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Timothy Olyphant Raises A Toast To “Justified”',
                 'description': 'md5:15501f23f020e793aeca761205e42c24',
+                'upload_date': '20150415',
+                'timestamp': 1429088400,
             },
             'params': {
                 'skip_download': True,  # m3u8 downloads
@@ -63,110 +65,125 @@ class TeamcocoIE(InfoExtractor):
             },
             'params': {
                 'skip_download': True,  # m3u8 downloads
-            }
+            },
+            'skip': 'This video is no longer available.',
+        }, {
+            'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',
+            'only_matching': True,
+        }, {
+            'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',
+            'only_matching': True,
+        }, {
+            'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',
+            'only_matching': True,
+        }, {
+            'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
+            'only_matching': True,
         }
     ]
-    _VIDEO_ID_REGEXES = (
-        r'"eVar42"\s*:\s*(\d+)',
-        r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
-        r'"id_not"\s*:\s*(\d+)'
-    )
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        display_id = mobj.group('display_id')
-        webpage, urlh = self._download_webpage_handle(url, display_id)
-        if 'src=expired' in urlh.geturl():
-            raise ExtractorError('This video is expired.', expected=True)
 
-        video_id = mobj.group('video_id')
-        if not video_id:
-            video_id = self._html_search_regex(
-                self._VIDEO_ID_REGEXES, webpage, 'video id')
+    def _graphql_call(self, query_template, object_type, object_id):
+        find_object = 'find' + object_type
+        return self._download_json(
+            'http://teamcoco.com/graphql/', object_id, data=json.dumps({
+                'query': query_template % (find_object, object_id)
+            }))['data'][find_object]
 
-        data = None
-
-        preload_codes = self._html_search_regex(
-            r'(function.+)setTimeout\(function\(\)\{playlist',
-            webpage, 'preload codes')
-        base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes)
-        base64_fragments.remove('init')
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        response = self._graphql_call('''{
+  %s(slug: "%s") {
+    ... on RecordSlug {
+      record {
+        id
+        title
+        teaser
+        publishOn
+        thumb {
+          preview
+        }
+        file {
+          url
+        }
+        tags {
+          name
+        }
+        duration
+        turnerMediaId
+        turnerMediaAuthToken
+      }
+    }
+    ... on NotFoundSlug {
+      status
+    }
+  }
+}''', 'Slug', display_id)
+        if response.get('status'):
+            raise ExtractorError('This video is no longer available.', expected=True)
+
+        record = response['record']
+        video_id = record['id']
+
+        info = {
+            'id': video_id,
+            'display_id': display_id,
+            'title': record['title'],
+            'thumbnail': record.get('thumb', {}).get('preview'),
+            'description': record.get('teaser'),
+            'duration': parse_duration(record.get('duration')),
+            'timestamp': parse_iso8601(record.get('publishOn')),
+        }
 
-        def _check_sequence(cur_fragments):
-            if not cur_fragments:
-                return
-            for i in range(len(cur_fragments)):
-                cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii')
-                try:
-                    raw_data = compat_b64decode(cur_sequence)
-                    if compat_ord(raw_data[0]) == compat_ord('{'):
-                        return json.loads(raw_data.decode('utf-8'))
-                except (TypeError, binascii.Error, UnicodeDecodeError, ValueError):
+        media_id = record.get('turnerMediaId')
+        if media_id:
+            self._initialize_geo_bypass({
+                'countries': ['US'],
+            })
+            info.update(self._extract_ngtv_info(media_id, {
+                'accessToken': record['turnerMediaAuthToken'],
+                'accessTokenType': 'jws',
+            }))
+        else:
+            video_sources = self._graphql_call('''{
+  %s(id: "%s") {
+    src
+  }
+}''', 'RecordVideoSource', video_id) or {}
+
+            formats = []
+            get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
+            for format_id, src in video_sources.get('src', {}).items():
+                if not isinstance(src, dict):
                     continue
-
-        def _check_data():
-            for i in range(len(base64_fragments) + 1):
-                for j in range(i, len(base64_fragments) + 1):
-                    data = _check_sequence(base64_fragments[:i] + base64_fragments[j:])
-                    if data:
-                        return data
-
-        self.to_screen('Try to compute possible data sequence. This may take some time.')
-        data = _check_data()
-
-        if not data:
-            raise ExtractorError(
-                'Preload information could not be extracted', expected=True)
-
-        formats = []
-        get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
-        for filed in data['files']:
-            if determine_ext(filed['url']) == 'm3u8':
-                # compat_urllib_parse.urljoin does not work here
-                if filed['url'].startswith('/'):
-                    m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url']
-                else:
-                    m3u8_url = filed['url']
-                m3u8_formats = self._extract_m3u8_formats(
-                    m3u8_url, video_id, ext='mp4')
-                for m3u8_format in m3u8_formats:
-                    if m3u8_format not in formats:
-                        formats.append(m3u8_format)
-            elif determine_ext(filed['url']) == 'f4m':
-                # TODO Correct f4m extraction
-                continue
-            else:
-                if filed['url'].startswith('/mp4:protected/'):
-                    # TODO Correct extraction for these files
+                src_url = src.get('src')
+                if not src_url:
                     continue
-                m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
-                if m_format is not None:
-                    format_id = m_format.group(1)
+                ext = determine_ext(src_url, mimetype2ext(src.get('type')))
+                if format_id == 'hls' or ext == 'm3u8':
+                    # compat_urllib_parse.urljoin does not work here
+                    if src_url.startswith('/'):
+                        src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
+                    formats.extend(self._extract_m3u8_formats(
+                        src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
                 else:
-                    format_id = filed['bitrate']
-                tbr = (
-                    int(filed['bitrate'])
-                    if filed['bitrate'].isdigit()
-                    else None)
-
-                formats.append({
-                    'url': filed['url'],
-                    'ext': 'mp4',
-                    'tbr': tbr,
-                    'format_id': format_id,
-                    'quality': get_quality(format_id),
-                })
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'formats': formats,
-            'title': data['title'],
-            'thumbnail': data.get('thumb', {}).get('href'),
-            'description': data.get('teaser'),
-            'duration': data.get('duration'),
-            'age_limit': self._family_friendly_search(webpage),
-        }
+                    if src_url.startswith('/mp4:protected/'):
+                        # TODO Correct extraction for these files
+                        continue
+                    tbr = int_or_none(self._search_regex(
+                        r'(\d+)k\.mp4', src_url, 'tbr', default=None))
+
+                    formats.append({
+                        'url': src_url,
+                        'ext': ext,
+                        'tbr': tbr,
+                        'format_id': format_id,
+                        'quality': get_quality(format_id),
+                    })
+            if not formats:
+                formats = self._extract_m3u8_formats(
+                    record['file']['url'], video_id, 'mp4', fatal=False)
+            self._sort_formats(formats)
+            info['formats'] = formats
+
+        return info
index 0c6f70784c00cd880fda85d4d830ab478ed77eeb..a586f30ad55a2e060257d86593c793471246d9fc 100644 (file)
@@ -32,7 +32,7 @@ class TennisTVIE(InfoExtractor):
     _NETRC_MACHINE = 'tennistv'
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if not username or not password:
             raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 
index e595c4a69b3f03361abc05f6bca61adecb61cf36..903f4738005625f35cebf298ee196dc6128460df 100644 (file)
@@ -19,6 +19,7 @@ class TF1IE(InfoExtractor):
             # Sometimes wat serves the whole file with the --test option
             'skip_download': True,
         },
+        'expected_warnings': ['HTTP Error 404'],
     }, {
         'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
         'info_dict': {
index 36f6c16732c7217141e860456788d389eb6bdd94..a51fa6515e6e09d4e2b1794c475f8c251eaf2385 100644 (file)
@@ -36,7 +36,7 @@ class TubiTvIE(InfoExtractor):
     }]
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
         self.report_login()
index 786143525d4d7cf4455ec59eff20a5e3a88dc4ea..edbb0aa6944ba82b36415875f2d99e570b3373fc 100644 (file)
@@ -4,11 +4,18 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    urlencode_postdata
+)
 
 
 class TumblrIE(InfoExtractor):
     _VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
+    _NETRC_MACHINE = 'tumblr'
+    _LOGIN_URL = 'https://www.tumblr.com/login'
     _TESTS = [{
         'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
         'md5': '479bb068e5b16462f5176a6828829767',
@@ -97,6 +104,45 @@ class TumblrIE(InfoExtractor):
         'add_ie': ['Instagram'],
     }]
 
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        username, password = self._get_login_info()
+        if username is None:
+            return
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading login page')
+
+        login_form = self._hidden_inputs(login_page)
+        login_form.update({
+            'user[email]': username,
+            'user[password]': password
+        })
+
+        response, urlh = self._download_webpage_handle(
+            self._LOGIN_URL, None, 'Logging in',
+            data=urlencode_postdata(login_form), headers={
+                'Content-Type': 'application/x-www-form-urlencoded',
+                'Referer': self._LOGIN_URL,
+            })
+
+        # Successful login
+        if '/dashboard' in urlh.geturl():
+            return
+
+        login_errors = self._parse_json(
+            self._search_regex(
+                r'RegistrationForm\.errors\s*=\s*(\[.+?\])\s*;', response,
+                'login errors', default='[]'),
+            None, fatal=False)
+        if login_errors:
+            raise ExtractorError(
+                'Unable to login: %s' % login_errors[0], expected=True)
+
+        self.report_warning('Login has probably failed')
+
     def _real_extract(self, url):
         m_url = re.match(self._VALID_URL, url)
         video_id = m_url.group('id')
@@ -105,11 +151,19 @@ class TumblrIE(InfoExtractor):
         url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
         webpage, urlh = self._download_webpage_handle(url, video_id)
 
+        redirect_url = compat_str(urlh.geturl())
+        if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
+            raise ExtractorError(
+                'This Tumblr may contain sensitive media. '
+                'Disable safe mode in your account settings '
+                'at https://www.tumblr.com/settings/account#safe_mode',
+                expected=True)
+
         iframe_url = self._search_regex(
             r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
             webpage, 'iframe url', default=None)
         if iframe_url is None:
-            return self.url_result(urlh.geturl(), 'Generic')
+            return self.url_result(redirect_url, 'Generic')
 
         iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page')
 
index 7e51de89ed6082d35737142e85efb19726b03985..c7a5f5a63a0f683776017e24090054b8fdfbe3ae 100644 (file)
@@ -62,7 +62,7 @@ class TuneInBaseIE(InfoExtractor):
 
         return {
             'id': content_id,
-            'title': title,
+            'title': self._live_title(title) if is_live else title,
             'formats': formats,
             'thumbnail': thumbnail,
             'location': location,
index e73b64aebd80977cbf9551f20e5dadac2a15a9df..2b7b0d6e1b046d7184b194b5688af5295e3b8436 100644 (file)
@@ -9,6 +9,7 @@ from ..utils import (
     xpath_text,
     int_or_none,
     determine_ext,
+    float_or_none,
     parse_duration,
     xpath_attr,
     update_url_query,
@@ -23,14 +24,17 @@ class TurnerBaseIE(AdobePassIE):
     def _extract_timestamp(self, video_data):
         return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
 
-    def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data):
+    def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
         secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
         token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
         if not token:
             query = {
                 'path': secure_path,
-                'videoId': content_id,
             }
+            if custom_tokenizer_query:
+                query.update(custom_tokenizer_query)
+            else:
+                query['videoId'] = content_id
             if ap_data.get('auth_required'):
                 query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
             auth = self._download_xml(
@@ -188,3 +192,42 @@ class TurnerBaseIE(AdobePassIE):
             'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
             'is_live': is_live,
         }
+
+    def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
+        streams_data = self._download_json(
+            'http://medium.ngtv.io/media/%s/tv' % media_id,
+            media_id)['media']['tv']
+        duration = None
+        chapters = []
+        formats = []
+        for supported_type in ('unprotected', 'bulkaes'):
+            stream_data = streams_data.get(supported_type, {})
+            m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
+            if not m3u8_url:
+                continue
+            if stream_data.get('playlistProtection') == 'spe':
+                m3u8_url = self._add_akamai_spe_token(
+                    'http://token.ngtv.io/token/token_spe',
+                    m3u8_url, media_id, ap_data or {}, tokenizer_query)
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
+
+            duration = float_or_none(stream_data.get('totalRuntime'))
+
+            if not chapters:
+                for chapter in stream_data.get('contentSegments', []):
+                    start_time = float_or_none(chapter.get('start'))
+                    chapter_duration = float_or_none(chapter.get('duration'))
+                    if start_time is None or chapter_duration is None:
+                        continue
+                    chapters.append({
+                        'start_time': start_time,
+                        'end_time': start_time + chapter_duration,
+                    })
+        self._sort_formats(formats)
+
+        return {
+            'formats': formats,
+            'chapters': chapters,
+            'duration': duration,
+        }
index cfcce020a62f1cf5728e275a8122d68428865b4b..51923e44afcc7913c32406c940bba6aec9edbf13 100644 (file)
@@ -1,13 +1,12 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     int_or_none,
     parse_iso8601,
-    try_get,
-    determine_ext,
 )
 
 
@@ -78,42 +77,25 @@ class TV4IE(InfoExtractor):
 
         title = info['title']
 
-        subtitles = {}
-        formats = []
-        # http formats are linked with unresolvable host
-        for kind in ('hls3', ''):
-            data = self._download_json(
-                'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
-                video_id, 'Downloading sources JSON', query={
-                    'protocol': kind,
-                    'videoFormat': 'MP4+WEBVTT',
-                })
-            items = try_get(data, lambda x: x['playback']['items']['item'])
-            if not items:
-                continue
-            if isinstance(items, dict):
-                items = [items]
-            for item in items:
-                manifest_url = item.get('url')
-                if not isinstance(manifest_url, compat_str):
-                    continue
-                ext = determine_ext(manifest_url)
-                if ext == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                        m3u8_id=kind, fatal=False))
-                elif ext == 'f4m':
-                    formats.extend(self._extract_akamai_formats(
-                        manifest_url, video_id, {
-                            'hls': 'tv4play-i.akamaihd.net',
-                        }))
-                elif ext == 'webvtt':
-                    subtitles = self._merge_subtitles(
-                        subtitles, {
-                            'sv': [{
-                                'url': manifest_url,
-                                'ext': 'vtt',
-                            }]})
+        manifest_url = self._download_json(
+            'https://playback-api.b17g.net/media/' + video_id,
+            video_id, query={
+                'service': 'tv4',
+                'device': 'browser',
+                'protocol': 'hls',
+            })['playbackItem']['manifestUrl']
+        formats = self._extract_m3u8_formats(
+            manifest_url, video_id, 'mp4',
+            'm3u8_native', m3u8_id='hls', fatal=False)
+        formats.extend(self._extract_mpd_formats(
+            manifest_url.replace('.m3u8', '.mpd'),
+            video_id, mpd_id='dash', fatal=False))
+        formats.extend(self._extract_f4m_formats(
+            manifest_url.replace('.m3u8', '.f4m'),
+            video_id, f4m_id='hds', fatal=False))
+        formats.extend(self._extract_ism_formats(
+            re.sub(r'\.ism/.+?\.m3u8', r'.ism/Manifest', manifest_url),
+            video_id, ism_id='mss', fatal=False))
 
         if not formats and info.get('is_geo_restricted'):
             self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
@@ -124,7 +106,7 @@ class TV4IE(InfoExtractor):
             'id': video_id,
             'title': title,
             'formats': formats,
-            'subtitles': subtitles,
+            'subtitles': subtitles,
             'description': info.get('description'),
             'timestamp': parse_iso8601(info.get('broadcast_date_time')),
             'duration': int_or_none(info.get('duration')),
diff --git a/youtube_dl/extractor/tvnet.py b/youtube_dl/extractor/tvnet.py
new file mode 100644 (file)
index 0000000..2b2630b
--- /dev/null
@@ -0,0 +1,148 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    unescapeHTML,
+)
+
+
+class TVNetIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?:\d+/)?(?P<id>\d+)(?:/|$)'
+    _TESTS = [{
+        # video
+        'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h',
+        'md5': 'b4d7abe0252c9b47774760b7519c7558',
+        'info_dict': {
+            'id': '109788',
+            'ext': 'mp4',
+            'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang',
+            'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+            'is_live': False,
+            'view_count': int,
+        },
+    }, {
+        # audio
+        'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi',
+        'md5': 'b5875ce9b0a2eecde029216d0e6db2ae',
+        'info_dict': {
+            'id': '27017',
+            'ext': 'm4a',
+            'title': 'VOV1 - Bản tin chiều (10/06/2018)',
+            'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+            'is_live': False,
+        },
+    }, {
+        'url': 'http://us.tvnet.gov.vn/video/118023/129999/ngay-0705',
+        'info_dict': {
+            'id': '129999',
+            'ext': 'mp4',
+            'title': 'VTV1 - Quốc hội với cử tri (11/06/2018)',
+            'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+            'is_live': False,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # live stream
+        'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1',
+        'info_dict': {
+            'id': '1011',
+            'ext': 'mp4',
+            'title': r're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # radio live stream
+        'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014',
+        'info_dict': {
+            'id': '1014',
+            'ext': 'm4a',
+            'title': r're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://us.tvnet.gov.vn/phim/6136/25510/vtv3---ca-mot-doi-an-oan-tap-1-50/phim-truyen-hinh',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(
+            webpage, default=None) or self._html_search_meta(
+            'title', webpage, default=None) or self._search_regex(
+            r'<title>([^<]+)<', webpage, 'title')
+        title = re.sub(r'\s*-\s*TV Net\s*$', '', title)
+
+        if '/video/' in url or '/radio/' in url:
+            is_live = False
+        elif '/kenh-truyen-hinh/' in url:
+            is_live = True
+        else:
+            is_live = None
+
+        data_file = unescapeHTML(self._search_regex(
+            r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
+            'data file', group='url'))
+
+        stream_urls = set()
+        formats = []
+        for stream in self._download_json(data_file, video_id):
+            if not isinstance(stream, dict):
+                continue
+            stream_url = stream.get('url')
+            if (stream_url in stream_urls or not stream_url or
+                    not isinstance(stream_url, compat_str)):
+                continue
+            stream_urls.add(stream_url)
+            formats.extend(self._extract_m3u8_formats(
+                stream_url, video_id, 'mp4',
+                entry_protocol='m3u8' if is_live else 'm3u8_native',
+                m3u8_id='hls', fatal=False))
+        self._sort_formats(formats)
+
+        # better support for radio streams
+        if title.startswith('VOV'):
+            for f in formats:
+                f.update({
+                    'ext': 'm4a',
+                    'vcodec': 'none',
+                })
+
+        thumbnail = self._og_search_thumbnail(
+            webpage, default=None) or unescapeHTML(
+            self._search_regex(
+                r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
+                'thumbnail', default=None, group='url'))
+
+        if is_live:
+            title = self._live_title(title)
+
+        view_count = int_or_none(self._search_regex(
+            r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>',
+            webpage, 'view count', default=None))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'is_live': is_live,
+            'view_count': view_count,
+            'formats': formats,
+        }
index 84597b55e0f6047a1dccd1905cb4771949b3cf00..e09b5f804d897954f4488344d27beaa8a7a2eea6 100644 (file)
@@ -227,14 +227,16 @@ class TVPlayIE(InfoExtractor):
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
-        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+        self._initialize_geo_bypass({
+            'countries': smuggled_data.get('geo_countries'),
+        })
 
         video_id = self._match_id(url)
         geo_country = self._search_regex(
             r'https?://[^/]+\.([a-z]{2})', url,
             'geo country', default=None)
         if geo_country:
-            self._initialize_geo_bypass([geo_country.upper()])
+            self._initialize_geo_bypass({'countries': [geo_country.upper()]})
         video = self._download_json(
             'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
 
index 4c11fd3c38abb88fb77baf199b47039b79c43458..e01f11331007d072e6e78272d97fedce599ede4a 100644 (file)
@@ -8,6 +8,7 @@ import random
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
+    compat_kwargs,
     compat_parse_qs,
     compat_str,
     compat_urllib_parse_urlencode,
@@ -16,11 +17,14 @@ from ..compat import (
 from ..utils import (
     clean_html,
     ExtractorError,
+    float_or_none,
     int_or_none,
-    js_to_json,
     orderedSet,
     parse_duration,
     parse_iso8601,
+    qualities,
+    try_get,
+    unified_timestamp,
     update_url_query,
     urlencode_postdata,
     urljoin,
@@ -45,10 +49,11 @@ class TwitchBaseIE(InfoExtractor):
                 '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
                 expected=True)
 
-    def _call_api(self, path, item_id, note):
+    def _call_api(self, path, item_id, *args, **kwargs):
+        kwargs.setdefault('headers', {})['Client-ID'] = self._CLIENT_ID
         response = self._download_json(
-            '%s/%s' % (self._API_BASE, path), item_id, note,
-            headers={'Client-ID': self._CLIENT_ID})
+            '%s/%s' % (self._API_BASE, path), item_id,
+            *args, **compat_kwargs(kwargs))
         self._handle_error(response)
         return response
 
@@ -56,7 +61,7 @@ class TwitchBaseIE(InfoExtractor):
         self._login()
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
@@ -622,21 +627,23 @@ class TwitchStreamIE(TwitchBaseIE):
         }
 
 
-class TwitchClipsIE(InfoExtractor):
+class TwitchClipsIE(TwitchBaseIE):
     IE_NAME = 'twitch:clips'
     _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
 
     _TESTS = [{
-        'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound',
+        'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
         'md5': '761769e1eafce0ffebfb4089cb3847cd',
         'info_dict': {
-            'id': 'AggressiveCobraPoooound',
+            'id': '42850523',
             'ext': 'mp4',
             'title': 'EA Play 2016 Live from the Novo Theatre',
             'thumbnail': r're:^https?://.*\.jpg',
+            'timestamp': 1465767393,
+            'upload_date': '20160612',
             'creator': 'EA',
             'uploader': 'stereotype_',
-            'uploader_id': 'stereotype_',
+            'uploader_id': '43566419',
         },
     }, {
         # multiple formats
@@ -647,34 +654,63 @@ class TwitchClipsIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
+        status = self._download_json(
+            'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id,
+            video_id)
 
-        clip = self._parse_json(
-            self._search_regex(
-                r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
-            video_id, transform_source=js_to_json)
+        formats = []
 
-        title = clip.get('title') or clip.get('channel_title') or self._og_search_title(webpage)
-
-        formats = [{
-            'url': option['source'],
-            'format_id': option.get('quality'),
-            'height': int_or_none(option.get('quality')),
-        } for option in clip.get('quality_options', []) if option.get('source')]
-
-        if not formats:
-            formats = [{
-                'url': clip['clip_video_url'],
-            }]
+        for option in status['quality_options']:
+            if not isinstance(option, dict):
+                continue
+            source = option.get('source')
+            if not source or not isinstance(source, compat_str):
+                continue
+            formats.append({
+                'url': source,
+                'format_id': option.get('quality'),
+                'height': int_or_none(option.get('quality')),
+                'fps': int_or_none(option.get('frame_rate')),
+            })
 
         self._sort_formats(formats)
 
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': self._og_search_thumbnail(webpage),
-            'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'),
-            'uploader': clip.get('curator_login'),
-            'uploader_id': clip.get('curator_display_name'),
+        info = {
             'formats': formats,
         }
+
+        clip = self._call_api(
+            'kraken/clips/%s' % video_id, video_id, fatal=False, headers={
+                'Accept': 'application/vnd.twitchtv.v5+json',
+            })
+
+        if clip:
+            quality_key = qualities(('tiny', 'small', 'medium'))
+            thumbnails = []
+            thumbnails_dict = clip.get('thumbnails')
+            if isinstance(thumbnails_dict, dict):
+                for thumbnail_id, thumbnail_url in thumbnails_dict.items():
+                    thumbnails.append({
+                        'id': thumbnail_id,
+                        'url': thumbnail_url,
+                        'preference': quality_key(thumbnail_id),
+                    })
+
+            info.update({
+                'id': clip.get('tracking_id') or video_id,
+                'title': clip.get('title') or video_id,
+                'duration': float_or_none(clip.get('duration')),
+                'views': int_or_none(clip.get('views')),
+                'timestamp': unified_timestamp(clip.get('created_at')),
+                'thumbnails': thumbnails,
+                'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str),
+                'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str),
+                'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
+            })
+        else:
+            info.update({
+                'title': video_id,
+                'id': video_id,
+            })
+
+        return info
index d7e425041f1246a20d587174619db6db3f7c0c83..de41065d64921af6e861775f85dc5f3011caa524 100644 (file)
@@ -63,7 +63,7 @@ class TwitterCardIE(TwitterBaseIE):
                 'id': '623160978427936768',
                 'ext': 'mp4',
                 'title': 'Twitter web player',
-                'thumbnail': r're:^https?://.*(?:\bformat=|\.)jpg',
+                'thumbnail': r're:^https?://.*$',
             },
         },
         {
@@ -108,6 +108,8 @@ class TwitterCardIE(TwitterBaseIE):
         },
     ]
 
+    _API_BASE = 'https://api.twitter.com/1.1'
+
     def _parse_media_info(self, media_info, video_id):
         formats = []
         for media_variant in media_info.get('variants', []):
@@ -149,7 +151,7 @@ class TwitterCardIE(TwitterBaseIE):
             main_script, 'bearer token')
         # https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id
         api_data = self._download_json(
-            'https://api.twitter.com/1.1/statuses/show/%s.json' % video_id,
+            '%s/statuses/show/%s.json' % (self._API_BASE, video_id),
             video_id, 'Downloading API data',
             headers={
                 'Authorization': 'Bearer ' + bearer_token,
@@ -223,15 +225,49 @@ class TwitterCardIE(TwitterBaseIE):
                 formats.extend(self._extract_mobile_formats(username, video_id))
 
             if formats:
+                title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
+                thumbnail = config.get('posterImageUrl') or config.get('image_src')
+                duration = float_or_none(config.get('duration'), scale=1000) or duration
                 break
 
+        if not formats:
+            headers = {
+                'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
+                'Referer': url,
+            }
+            ct0 = self._get_cookies(url).get('ct0')
+            if ct0:
+                headers['csrf_token'] = ct0.value
+            guest_token = self._download_json(
+                '%s/guest/activate.json' % self._API_BASE, video_id,
+                'Downloading guest token', data=b'',
+                headers=headers)['guest_token']
+            headers['x-guest-token'] = guest_token
+            self._set_cookie('api.twitter.com', 'gt', guest_token)
+            config = self._download_json(
+                '%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id),
+                video_id, headers=headers)
+            track = config['track']
+            vmap_url = track.get('vmapUrl')
+            if vmap_url:
+                formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
+            else:
+                playback_url = track['playbackUrl']
+                if determine_ext(playback_url) == 'm3u8':
+                    formats = self._extract_m3u8_formats(
+                        playback_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id='hls')
+                else:
+                    formats = [{
+                        'url': playback_url,
+                    }]
+            title = 'Twitter web player'
+            thumbnail = config.get('posterImage')
+            duration = float_or_none(track.get('durationMs'), scale=1000)
+
         self._remove_duplicate_formats(formats)
         self._sort_formats(formats)
 
-        title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
-        thumbnail = config.get('posterImageUrl') or config.get('image_src')
-        duration = float_or_none(config.get('duration'), scale=1000) or duration
-
         return {
             'id': video_id,
             'title': title,
@@ -375,6 +411,22 @@ class TwitterIE(InfoExtractor):
         'params': {
             'skip_download': True,  # requires ffmpeg
         },
+    }, {
+        # card via api.twitter.com/1.1/videos/tweet/config
+        'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
+        'info_dict': {
+            'id': '1001551623938805763',
+            'ext': 'mp4',
+            'title': 're:.*?Shep is on a roll today.*?',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'description': 'md5:63b036c228772523ae1924d5f8e5ed6b',
+            'uploader': 'Lis Power',
+            'uploader_id': 'LisPower1',
+            'duration': 111.278,
+        },
+        'params': {
+            'skip_download': True,  # requires ffmpeg
+        },
     }]
 
     def _real_extract(self, url):
index 6d6c0a98fa64e9e2afc68ce2ad569f5a91d5c24b..a7196997ec111cc4e98331d70cfb5df46682fa49 100644 (file)
@@ -18,6 +18,7 @@ from ..utils import (
     int_or_none,
     js_to_json,
     sanitized_Request,
+    try_get,
     unescapeHTML,
     urlencode_postdata,
 )
@@ -58,6 +59,10 @@ class UdemyIE(InfoExtractor):
         # no url in outputs format entry
         'url': 'https://www.udemy.com/learn-web-development-complete-step-by-step-guide-to-success/learn/v4/t/lecture/4125812',
         'only_matching': True,
+    }, {
+        # only outputs rendition
+        'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0',
+        'only_matching': True,
     }]
 
     def _extract_course_info(self, webpage, video_id):
@@ -101,7 +106,7 @@ class UdemyIE(InfoExtractor):
             % (course_id, lecture_id),
             lecture_id, 'Downloading lecture JSON', query={
                 'fields[lecture]': 'title,description,view_html,asset',
-                'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data',
+                'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
             })
 
     def _handle_error(self, response):
@@ -115,9 +120,9 @@ class UdemyIE(InfoExtractor):
                 error_str += ' - %s' % error_data.get('formErrors')
             raise ExtractorError(error_str, expected=True)
 
-    def _download_webpage(self, *args, **kwargs):
+    def _download_webpage_handle(self, *args, **kwargs):
         kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
-        return super(UdemyIE, self)._download_webpage(
+        return super(UdemyIE, self)._download_webpage_handle(
             *args, **compat_kwargs(kwargs))
 
     def _download_json(self, url_or_request, *args, **kwargs):
@@ -146,7 +151,7 @@ class UdemyIE(InfoExtractor):
         self._login()
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
@@ -299,9 +304,25 @@ class UdemyIE(InfoExtractor):
                     'url': src,
                 })
 
-        download_urls = asset.get('download_urls')
-        if isinstance(download_urls, dict):
-            extract_formats(download_urls.get('Video'))
+        for url_kind in ('download', 'stream'):
+            urls = asset.get('%s_urls' % url_kind)
+            if isinstance(urls, dict):
+                extract_formats(urls.get('Video'))
+
+        captions = asset.get('captions')
+        if isinstance(captions, list):
+            for cc in captions:
+                if not isinstance(cc, dict):
+                    continue
+                cc_url = cc.get('url')
+                if not cc_url or not isinstance(cc_url, compat_str):
+                    continue
+                lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
+                sub_dict = (automatic_captions if cc.get('source') == 'auto'
+                            else subtitles)
+                sub_dict.setdefault(lang or 'en', []).append({
+                    'url': cc_url,
+                })
 
         view_html = lecture.get('view_html')
         if view_html:
@@ -357,6 +378,12 @@ class UdemyIE(InfoExtractor):
                     fatal=False)
                 extract_subtitles(text_tracks)
 
+        if not formats and outputs:
+            for format_id, output in outputs.items():
+                f = extract_output_format(output, format_id)
+                if f.get('url'):
+                    formats.append(f)
+
         self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
 
         return {
index ab823814bfe47ee4d5aa16947aea7450d91829ed..f3eaee6b3f45b71dadb0a1d3310aa3bf9cebf073 100644 (file)
@@ -3,13 +3,16 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
     parse_duration,
     parse_iso8601,
+    urlencode_postdata,
 )
 
 
 class UFCTVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P<id>[^/]+)'
+    _NETRC_MACHINE = 'ufctv'
     _TEST = {
         'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode',
         'info_dict': {
@@ -26,6 +29,21 @@ class UFCTVIE(InfoExtractor):
         }
     }
 
+    def _real_initialize(self):
+        username, password = self._get_login_info()
+        if username is None:
+            return
+
+        code = self._download_json(
+            'https://www.ufc.tv/secure/authenticate',
+            None, 'Logging in', data=urlencode_postdata({
+                'username': username,
+                'password': password,
+                'format': 'json',
+            })).get('code')
+        if code and code != 'loginsuccess':
+            raise ExtractorError(code, expected=True)
+
     def _real_extract(self, url):
         display_id = self._match_id(url)
         video_data = self._download_json(url, display_id, query={
index 80a643dfe6d6a7a160cb4035b52b9a95b03769cf..31eee0ba72588ce85ad9a5bbe6e296f5a4b455a2 100644 (file)
@@ -75,7 +75,7 @@ class VesselIE(InfoExtractor):
                     'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
         self.report_login()
index 9026e778cfb108462c5fdfa5ca2a42d166dc37a7..d70283479ae18c5c01b0c5caba75ad0e3392698f 100644 (file)
@@ -54,7 +54,8 @@ class VidziIE(InfoExtractor):
                 self._search_regex(
                     r'setup\(([^)]+)\)', code, 'jwplayer data',
                     default=NO_DEFAULT if num == len(codes) else '{}'),
-                video_id, transform_source=js_to_json)
+                video_id, transform_source=lambda s: js_to_json(
+                    re.sub(r'\s*\+\s*window\[.+?\]', '', s)))
             if jwplayer_data:
                 break
 
index 1f29c273f814bcefbacc4678fe0079686adb1ea6..c43d1a1e838987bb916c7b3eca83b7a900457c2b 100644 (file)
@@ -1,24 +1,27 @@
 from __future__ import unicode_literals
 
+import base64
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
 from ..utils import (
     ExtractorError,
     clean_html,
     determine_ext,
     int_or_none,
     js_to_json,
+    parse_age_limit,
     parse_duration,
 )
 
 
 class ViewLiftBaseIE(InfoExtractor):
-    _DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
+    _DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'
 
 
 class ViewLiftEmbedIE(ViewLiftBaseIE):
-    _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' % ViewLiftBaseIE._DOMAINS_REGEX
+    _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
     _TESTS = [{
         'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
         'md5': '2924e9215c6eff7a55ed35b72276bd93',
@@ -60,8 +63,10 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
 
         formats = []
         has_bitrate = False
-        for source in self._parse_json(js_to_json(self._search_regex(
-                r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
+        sources = self._parse_json(self._search_regex(
+            r'(?s)sources:\s*(\[.+?\]),', webpage,
+            'sources', default='[]'), video_id, js_to_json)
+        for source in sources:
             file_ = source.get('file')
             if not file_:
                 continue
@@ -70,7 +75,8 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
             format_id = source.get('label') or ext
             if all(v in ('m3u8', 'hls') for v in (type_, ext)):
                 formats.extend(self._extract_m3u8_formats(
-                    file_, video_id, 'mp4', m3u8_id='hls'))
+                    file_, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
             else:
                 bitrate = int_or_none(self._search_regex(
                     [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
@@ -85,6 +91,13 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
                     'tbr': bitrate,
                     'height': height,
                 })
+        if not formats:
+            hls_url = self._parse_json(self._search_regex(
+                r'filmInfo\.src\s*=\s*({.+?});',
+                webpage, 'src'), video_id, js_to_json)['src']
+            formats = self._extract_m3u8_formats(
+                hls_url, video_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False)
         field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
         self._sort_formats(formats, field_preference)
 
@@ -109,10 +122,13 @@ class ViewLiftIE(ViewLiftBaseIE):
             'display_id': 'lost_for_life',
             'ext': 'mp4',
             'title': 'Lost for Life',
-            'description': 'md5:fbdacc8bb6b455e464aaf98bc02e1c82',
+            'description': 'md5:ea10b5a50405ae1f7b5269a6ec594102',
             'thumbnail': r're:^https?://.*\.jpg',
             'duration': 4489,
-            'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals']
+            'categories': 'mincount:3',
+            'age_limit': 14,
+            'upload_date': '20150421',
+            'timestamp': 1429656819,
         }
     }, {
         'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
@@ -125,7 +141,9 @@ class ViewLiftIE(ViewLiftBaseIE):
             'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
             'thumbnail': r're:^https?://.*\.jpg',
             'duration': 979,
-            'categories': ['Documentary', 'Sports', 'Politics']
+            'categories': 'mincount:2',
+            'timestamp': 1399478279,
+            'upload_date': '20140507',
         }
     }, {
         # Film is not playable in your area.
@@ -138,9 +156,6 @@ class ViewLiftIE(ViewLiftBaseIE):
     }, {
         'url': 'http://www.winnersview.com/videos/the-good-son',
         'only_matching': True,
-    }, {
-        'url': 'http://www.kesari.tv/news/video/1461919076414',
-        'only_matching': True,
     }, {
         # Was once Kaltura embed
         'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
@@ -156,45 +171,96 @@ class ViewLiftIE(ViewLiftBaseIE):
             raise ExtractorError(
                 'Film %s is not available.' % display_id, expected=True)
 
-        film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
-
-        snag = self._parse_json(
-            self._search_regex(
-                r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'),
-            display_id)
-
-        for item in snag:
-            if item.get('data', {}).get('film', {}).get('id') == film_id:
-                data = item['data']['film']
-                title = data['title']
-                description = clean_html(data.get('synopsis'))
-                thumbnail = data.get('image')
-                duration = int_or_none(data.get('duration') or data.get('runtime'))
-                categories = [
-                    category['title'] for category in data.get('categories', [])
-                    if category.get('title')]
-                break
+        initial_store_state = self._search_regex(
+            r"window\.initialStoreState\s*=.*?JSON\.parse\(unescape\(atob\('([^']+)'\)\)\)",
+            webpage, 'Initial Store State', default=None)
+        if initial_store_state:
+            modules = self._parse_json(compat_urllib_parse_unquote(base64.b64decode(
+                initial_store_state).decode()), display_id)['page']['data']['modules']
+            content_data = next(m['contentData'][0] for m in modules if m.get('moduleType') == 'VideoDetailModule')
+            gist = content_data['gist']
+            film_id = gist['id']
+            title = gist['title']
+            video_assets = content_data['streamingInfo']['videoAssets']
+
+            formats = []
+            mpeg_video_assets = video_assets.get('mpeg') or []
+            for video_asset in mpeg_video_assets:
+                video_asset_url = video_asset.get('url')
+                if not video_asset:
+                    continue
+                bitrate = int_or_none(video_asset.get('bitrate'))
+                height = int_or_none(self._search_regex(
+                    r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
+                    'height', default=None))
+                formats.append({
+                    'url': video_asset_url,
+                    'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
+                    'tbr': bitrate,
+                    'height': height,
+                    'vcodec': video_asset.get('codec'),
+                })
+
+            hls_url = video_assets.get('hls')
+            if hls_url:
+                formats.extend(self._extract_m3u8_formats(
+                    hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+            self._sort_formats(formats, ('height', 'tbr', 'format_id'))
+
+            info = {
+                'id': film_id,
+                'display_id': display_id,
+                'title': title,
+                'description': gist.get('description'),
+                'thumbnail': gist.get('videoImageUrl'),
+                'duration': int_or_none(gist.get('runtime')),
+                'age_limit': parse_age_limit(content_data.get('parentalRating')),
+                'timestamp': int_or_none(gist.get('publishDate'), 1000),
+                'formats': formats,
+            }
+            for k in ('categories', 'tags'):
+                info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
+            return info
         else:
-            title = self._search_regex(
-                r'itemprop="title">([^<]+)<', webpage, 'title')
-            description = self._html_search_regex(
-                r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
-                webpage, 'description', default=None) or self._og_search_description(webpage)
-            thumbnail = self._og_search_thumbnail(webpage)
-            duration = parse_duration(self._search_regex(
-                r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
-                webpage, 'duration', fatal=False))
-            categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
+            film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
 
-        return {
-            '_type': 'url_transparent',
-            'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
-            'id': film_id,
-            'display_id': display_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'categories': categories,
-            'ie_key': 'ViewLiftEmbed',
-        }
+            snag = self._parse_json(
+                self._search_regex(
+                    r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag', default='[]'),
+                display_id)
+
+            for item in snag:
+                if item.get('data', {}).get('film', {}).get('id') == film_id:
+                    data = item['data']['film']
+                    title = data['title']
+                    description = clean_html(data.get('synopsis'))
+                    thumbnail = data.get('image')
+                    duration = int_or_none(data.get('duration') or data.get('runtime'))
+                    categories = [
+                        category['title'] for category in data.get('categories', [])
+                        if category.get('title')]
+                    break
+            else:
+                title = self._search_regex(
+                    r'itemprop="title">([^<]+)<', webpage, 'title')
+                description = self._html_search_regex(
+                    r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
+                    webpage, 'description', default=None) or self._og_search_description(webpage)
+                thumbnail = self._og_search_thumbnail(webpage)
+                duration = parse_duration(self._search_regex(
+                    r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
+                    webpage, 'duration', fatal=False))
+                categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
+
+            return {
+                '_type': 'url_transparent',
+                'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
+                'id': film_id,
+                'display_id': display_id,
+                'title': title,
+                'description': description,
+                'thumbnail': thumbnail,
+                'duration': duration,
+                'categories': categories,
+                'ie_key': 'ViewLiftEmbed',
+            }
index ad2a2a4b70fdde18548e4dde62fcf9ccfc264ba4..546de95d8544a540da735a014bffd3ac096e25b6 100644 (file)
@@ -88,7 +88,7 @@ class VikiBaseIE(InfoExtractor):
         self._login()
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
index 08257147ef5abba46224190b9d8c32ab280182bf..3baa2d075543fe951db281e081da8527aeced13c 100644 (file)
@@ -16,6 +16,7 @@ from ..utils import (
     ExtractorError,
     InAdvancePagedList,
     int_or_none,
+    merge_dicts,
     NO_DEFAULT,
     RegexNotFoundError,
     sanitized_Request,
@@ -36,7 +37,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
     _LOGIN_URL = 'https://vimeo.com/log_in'
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             if self._LOGIN_REQUIRED:
                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
@@ -639,16 +640,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
                             'preference': 1,
                         })
 
-        info_dict = self._parse_config(config, video_id)
-        formats.extend(info_dict['formats'])
+        info_dict_config = self._parse_config(config, video_id)
+        formats.extend(info_dict_config['formats'])
         self._vimeo_sort_formats(formats)
 
+        json_ld = self._search_json_ld(webpage, video_id, default={})
+
         if not cc_license:
             cc_license = self._search_regex(
                 r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
                 webpage, 'license', default=None, group='license')
 
-        info_dict.update({
+        info_dict = {
             'id': video_id,
             'formats': formats,
             'timestamp': unified_timestamp(timestamp),
@@ -658,7 +661,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'like_count': like_count,
             'comment_count': comment_count,
             'license': cc_license,
-        })
+        }
+
+        info_dict = merge_dicts(info_dict, info_dict_config, json_ld)
 
         return info_dict
 
@@ -984,10 +989,10 @@ class VimeoWatchLaterIE(VimeoChannelIE):
 
 
 class VimeoLikesIE(InfoExtractor):
-    _VALID_URL = r'https://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
+    _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
     IE_NAME = 'vimeo:likes'
     IE_DESC = 'Vimeo user likes'
-    _TEST = {
+    _TESTS = [{
         'url': 'https://vimeo.com/user755559/likes/',
         'playlist_mincount': 293,
         'info_dict': {
@@ -995,7 +1000,10 @@ class VimeoLikesIE(InfoExtractor):
             'description': 'See all the videos urza likes',
             'title': 'Videos urza likes',
         },
-    }
+    }, {
+        'url': 'https://vimeo.com/stormlapse/likes',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         user_id = self._match_id(url)
@@ -1004,7 +1012,7 @@ class VimeoLikesIE(InfoExtractor):
             self._search_regex(
                 r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)">
                     .*?</a></li>\s*<li\s+class="pagination_next">
-                ''', webpage, 'page count'),
+                ''', webpage, 'page count', default=1),
             'page count', fatal=True)
         PAGE_SIZE = 12
         title = self._html_search_regex(
@@ -1012,7 +1020,7 @@ class VimeoLikesIE(InfoExtractor):
         description = self._html_search_meta('description', webpage)
 
         def _get_page(idx):
-            page_url = 'https://vimeo.com/user%s/likes/page:%d/sort:date' % (
+            page_url = 'https://vimeo.com/%s/likes/page:%d/sort:date' % (
                 user_id, idx + 1)
             webpage = self._download_webpage(
                 page_url, user_id,
@@ -1032,7 +1040,7 @@ class VimeoLikesIE(InfoExtractor):
 
         return {
             '_type': 'playlist',
-            'id': 'user%s_likes' % user_id,
+            'id': '%s_likes' % user_id,
             'title': title,
             'description': description,
             'entries': pl,
index b50d4f170328728fbfc75b75e5a5ed6dcf281f84..29002b35fc08469c0d39a73cdec2c812869c1c21 100644 (file)
@@ -32,7 +32,7 @@ class VKBaseIE(InfoExtractor):
     _NETRC_MACHINE = 'vk'
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
index 20fef1f04ea776ba21869dfca9e46bd6af591c9f..8ef3e0906436b3a13e1bc368173e7f7c81ba6c22 100644 (file)
@@ -19,7 +19,6 @@ class WatIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
-            'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
             'info_dict': {
                 'id': '11713067',
                 'ext': 'mp4',
@@ -28,10 +27,15 @@ class WatIE(InfoExtractor):
                 'upload_date': '20140819',
                 'duration': 120,
             },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+            'expected_warnings': ['HTTP Error 404'],
         },
         {
             'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
-            'md5': '34bdfa5ca9fd3c7eb88601b635b0424c',
+            'md5': 'b16574df2c3cd1a36ca0098f2a791925',
             'info_dict': {
                 'id': '11713075',
                 'ext': 'mp4',
@@ -98,38 +102,25 @@ class WatIE(InfoExtractor):
 
         formats = []
         try:
+            alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
             manifest_urls = self._download_json(
                 'http://www.wat.tv/get/webhtml/' + video_id, video_id)
             m3u8_url = manifest_urls.get('hls')
             if m3u8_url:
                 m3u8_url = remove_bitrate_limit(m3u8_url)
-                m3u8_formats = self._extract_m3u8_formats(
-                    m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
-                if m3u8_formats:
-                    formats.extend(m3u8_formats)
+                for m3u8_alt_url in alt_urls(m3u8_url):
+                    formats.extend(self._extract_m3u8_formats(
+                        m3u8_alt_url, video_id, 'mp4',
+                        'm3u8_native', m3u8_id='hls', fatal=False))
                     formats.extend(self._extract_f4m_formats(
-                        m3u8_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
+                        m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
                         video_id, f4m_id='hds', fatal=False))
-                    http_url = extract_url('android5/%s.mp4', 'http')
-                    if http_url:
-                        for m3u8_format in m3u8_formats:
-                            vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
-                            if not vbr or not abr:
-                                continue
-                            format_id = m3u8_format['format_id'].replace('hls', 'http')
-                            fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url)
-                            if self._is_valid_url(fmt_url, video_id, format_id):
-                                f = m3u8_format.copy()
-                                f.update({
-                                    'url': fmt_url,
-                                    'format_id': format_id,
-                                    'protocol': 'http',
-                                })
-                                formats.append(f)
             mpd_url = manifest_urls.get('mpd')
             if mpd_url:
-                formats.extend(self._extract_mpd_formats(remove_bitrate_limit(
-                    mpd_url), video_id, mpd_id='dash', fatal=False))
+                mpd_url = remove_bitrate_limit(mpd_url)
+                for mpd_alt_url in alt_urls(mpd_url):
+                    formats.extend(self._extract_mpd_formats(
+                        mpd_alt_url, video_id, mpd_id='dash', fatal=False))
             self._sort_formats(formats)
         except ExtractorError:
             abr = 64
index b382338fabfeb4b2698712648a1ba7219e966866..be0bcba15380041ca1698c1687497bffc524b4ef 100644 (file)
@@ -69,7 +69,7 @@ class WatchBoxIE(InfoExtractor):
 
         source = self._parse_json(
             self._search_regex(
-                r'(?s)source\s*:\s*({.+?})\s*,\s*\n', webpage, 'source',
+                r'(?s)source["\']?\s*:\s*({.+?})\s*[,}]', webpage, 'source',
                 default='{}'),
             video_id, transform_source=js_to_json, fatal=False) or {}
 
index c022fb33e94ef7f9e6f0e90d73300f866e8ffc76..3dab9145ba9c57bfd1d78a90a847761c23f0d8a8 100644 (file)
@@ -36,7 +36,8 @@ class WimpIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         youtube_id = self._search_regex(
-            r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
+            (r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
+             r'data-id=["\']([0-9A-Za-z_-]{11})'),
             webpage, 'video URL', default=None)
         if youtube_id:
             return {
index 7f871c8ec7c65b8e969517165126824342dbf2e4..8333fb5349980b964d31256d98f0ef7fb576d738 100644 (file)
@@ -9,8 +9,8 @@ from ..utils import int_or_none
 class XiamiBaseIE(InfoExtractor):
     _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
 
-    def _download_webpage(self, *args, **kwargs):
-        webpage = super(XiamiBaseIE, self)._download_webpage(*args, **kwargs)
+    def _download_webpage_handle(self, *args, **kwargs):
+        webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs)
         if '>Xiami is currently not available in your country.<' in webpage:
             self.raise_geo_restricted('Xiami is currently not available in your country')
         return webpage
index eb1062142ecbc6a4702f0dc7763c414934fd3645..00920385152f03b988786486e4a36eaac10feca8 100644 (file)
@@ -34,8 +34,8 @@ class YandexMusicBaseIE(InfoExtractor):
             'youtube-dl with --cookies',
             expected=True)
 
-    def _download_webpage(self, *args, **kwargs):
-        webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
+    def _download_webpage_handle(self, *args, **kwargs):
+        webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
         if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
             self._raise_captcha()
         return webpage
@@ -57,14 +57,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
         'info_dict': {
             'id': '4878838',
             'ext': 'mp3',
-            'title': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio - Gypsy Eyes 1',
+            'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
             'filesize': 4628061,
             'duration': 193.04,
             'track': 'Gypsy Eyes 1',
             'album': 'Gypsy Soul',
             'album_artist': 'Carlo Ambrosio',
-            'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio',
-            'release_year': '2009',
+            'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari',
+            'release_year': 2009,
         },
         'skip': 'Travis CI servers blocked by YandexMusic',
     }
@@ -120,7 +120,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
                 track_info.update({
                     'album': album.get('title'),
                     'album_artist': extract_artist(album.get('artists')),
-                    'release_year': compat_str(year) if year else None,
+                    'release_year': int_or_none(year),
                 })
 
         track_artist = extract_artist(track.get('artists'))
index e7bd1f18fb504c8541d0fbdc301980cab07d1f54..89c8b7f8d9f534134ef8e405c6ba97d7c232f515 100644 (file)
@@ -37,6 +37,7 @@ from ..utils import (
     orderedSet,
     parse_codecs,
     parse_duration,
+    qualities,
     remove_quotes,
     remove_start,
     smuggle_url,
@@ -84,7 +85,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
         """
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         # No authentication to be performed
         if username is None:
             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
@@ -246,9 +247,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
         return True
 
-    def _download_webpage(self, *args, **kwargs):
+    def _download_webpage_handle(self, *args, **kwargs):
         kwargs.setdefault('query', {})['disable_polymer'] = 'true'
-        return super(YoutubeBaseInfoExtractor, self)._download_webpage(
+        return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
             *args, **compat_kwargs(kwargs))
 
     def _real_initialize(self):
@@ -509,6 +510,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
                 'license': 'Standard YouTube License',
                 'creator': 'Icona Pop',
+                'track': 'I Love It (feat. Charli XCX)',
+                'artist': 'Icona Pop',
             }
         },
         {
@@ -527,6 +530,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
                 'license': 'Standard YouTube License',
                 'creator': 'Justin Timberlake',
+                'track': 'Tunnel Vision',
+                'artist': 'Justin Timberlake',
                 'age_limit': 18,
             }
         },
@@ -596,7 +601,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'IB3lcPjvWLA',
                 'ext': 'm4a',
                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
-                'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
+                'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
                 'duration': 244,
                 'uploader': 'AfrojackVEVO',
                 'uploader_id': 'AfrojackVEVO',
@@ -637,7 +642,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'duration': 219,
                 'upload_date': '20100909',
-                'uploader': 'The Amazing Atheist',
+                'uploader': 'TJ Kirk',
                 'uploader_id': 'TheAmazingAtheist',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
                 'license': 'Standard YouTube License',
@@ -667,10 +672,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
             'info_dict': {
                 'id': '6kLq3WMV1nU',
-                'ext': 'mp4',
+                'ext': 'webm',
                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
-                'duration': 247,
+                'duration': 246,
                 'uploader': 'LloydVEVO',
                 'uploader_id': 'LloydVEVO',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
@@ -732,7 +737,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_id': 'AllenMeow',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
-                'uploader': '孫艾倫',
+                'uploader': '孫ᄋᄅ',
                 'license': 'Standard YouTube License',
                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
             },
@@ -759,7 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
             'info_dict': {
                 'id': 'FIl7x6_3R5Y',
-                'ext': 'mp4',
+                'ext': 'webm',
                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
                 'duration': 220,
@@ -768,8 +773,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
                 'uploader': 'dorappi2000',
                 'license': 'Standard YouTube License',
-                'formats': 'mincount:32',
+                'formats': 'mincount:31',
             },
+            'skip': 'not actual anymore',
         },
         # DASH manifest with segment_list
         {
@@ -884,7 +890,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'lsguqyKfVQg',
                 'ext': 'mp4',
                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
-                'alt_title': 'Dark Walk',
+                'alt_title': 'Dark Walk - Position Music',
                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                 'duration': 133,
                 'upload_date': '20151119',
@@ -892,7 +898,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
                 'uploader': 'IronSoulElf',
                 'license': 'Standard YouTube License',
-                'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
+                'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
+                'track': 'Dark Walk - Position Music',
+                'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
             },
             'params': {
                 'skip_download': True,
@@ -949,7 +957,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
                 'duration': 4060,
                 'upload_date': '20151119',
-                'uploader': 'Bernie 2016',
+                'uploader': 'Bernie Sanders',
                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
                 'license': 'Creative Commons Attribution license (reuse allowed)',
@@ -984,6 +992,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'params': {
                 'skip_download': True,
             },
+            'skip': 'This video is not available.',
         },
         {
             # YouTube Red video with episode data
@@ -992,7 +1001,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'iqKdEhx-dD4',
                 'ext': 'mp4',
                 'title': 'Isolation - Mind Field (Ep 1)',
-                'description': 'md5:8013b7ddea787342608f63a13ddc9492',
+                'description': 'md5:25b78d2f64ae81719f5c96319889b736',
                 'duration': 2085,
                 'upload_date': '20170118',
                 'uploader': 'Vsauce',
@@ -1025,7 +1034,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
                 'license': 'Standard YouTube License',
-                'view_count': int,
             },
             'params': {
                 'skip_download': True,
@@ -1537,7 +1545,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
             if ytplayer_config:
                 args = ytplayer_config['args']
-                if args.get('url_encoded_fmt_stream_map'):
+                if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
                     # Convert to the same format returned by compat_parse_qs
                     video_info = dict((k, [v]) for k, v in args.items())
                     add_dash_mpd(video_info)
@@ -1693,125 +1701,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
             raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
 
-        # Start extracting information
-        self.report_information_extraction(video_id)
-
-        # uploader
-        if 'author' not in video_info:
-            raise ExtractorError('Unable to extract uploader name')
-        video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
-
-        # uploader_id
-        video_uploader_id = None
-        video_uploader_url = None
-        mobj = re.search(
-            r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
-            video_webpage)
-        if mobj is not None:
-            video_uploader_id = mobj.group('uploader_id')
-            video_uploader_url = mobj.group('uploader_url')
-        else:
-            self._downloader.report_warning('unable to extract uploader nickname')
-
-        # thumbnail image
-        # We try first to get a high quality image:
-        m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
-                            video_webpage, re.DOTALL)
-        if m_thumb is not None:
-            video_thumbnail = m_thumb.group(1)
-        elif 'thumbnail_url' not in video_info:
-            self._downloader.report_warning('unable to extract video thumbnail')
-            video_thumbnail = None
-        else:   # don't panic if we can't find it
-            video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
-
-        # upload date
-        upload_date = self._html_search_meta(
-            'datePublished', video_webpage, 'upload date', default=None)
-        if not upload_date:
-            upload_date = self._search_regex(
-                [r'(?s)id="eow-date.*?>(.*?)</span>',
-                 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
-                video_webpage, 'upload date', default=None)
-        upload_date = unified_strdate(upload_date)
-
-        video_license = self._html_search_regex(
-            r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
-            video_webpage, 'license', default=None)
-
-        m_music = re.search(
-            r'''(?x)
-                <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
-                <ul[^>]*>\s*
-                <li>(?P<title>.+?)
-                by (?P<creator>.+?)
-                (?:
-                    \(.+?\)|
-                    <a[^>]*
-                        (?:
-                            \bhref=["\']/red[^>]*>|             # drop possible
-                            >\s*Listen ad-free with YouTube Red # YouTube Red ad
-                        )
-                    .*?
-                )?</li
-            ''',
-            video_webpage)
-        if m_music:
-            video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
-            video_creator = clean_html(m_music.group('creator'))
-        else:
-            video_alt_title = video_creator = None
-
-        m_episode = re.search(
-            r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
-            video_webpage)
-        if m_episode:
-            series = m_episode.group('series')
-            season_number = int(m_episode.group('season'))
-            episode_number = int(m_episode.group('episode'))
-        else:
-            series = season_number = episode_number = None
-
-        m_cat_container = self._search_regex(
-            r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
-            video_webpage, 'categories', default=None)
-        if m_cat_container:
-            category = self._html_search_regex(
-                r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
-                default=None)
-            video_categories = None if category is None else [category]
-        else:
-            video_categories = None
-
-        video_tags = [
-            unescapeHTML(m.group('content'))
-            for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
-
-        def _extract_count(count_name):
-            return str_to_int(self._search_regex(
-                r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
-                % re.escape(count_name),
-                video_webpage, count_name, default=None))
-
-        like_count = _extract_count('like')
-        dislike_count = _extract_count('dislike')
-
-        # subtitles
-        video_subtitles = self.extract_subtitles(video_id, video_webpage)
-        automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
-
-        video_duration = try_get(
-            video_info, lambda x: int_or_none(x['length_seconds'][0]))
-        if not video_duration:
-            video_duration = parse_duration(self._html_search_meta(
-                'duration', video_webpage, 'video duration'))
-
-        # annotations
-        video_annotations = None
-        if self._downloader.params.get('writeannotations', False):
-            video_annotations = self._extract_annotations(video_id)
-
-        chapters = self._extract_chapters(description_original, video_duration)
+        def _extract_filesize(media_url):
+            return int_or_none(self._search_regex(
+                r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
 
         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
             self.report_rtmp_download()
@@ -1838,6 +1730,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                 'width': int_or_none(width_height[0]),
                                 'height': int_or_none(width_height[1]),
                             }
+            q = qualities(['small', 'medium', 'hd720'])
             formats = []
             for url_data_str in encoded_url_map.split(','):
                 url_data = compat_parse_qs(url_data_str)
@@ -1917,13 +1810,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
 
+                filesize = int_or_none(url_data.get(
+                    'clen', [None])[0]) or _extract_filesize(url)
+
+                quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
+
                 more_fields = {
-                    'filesize': int_or_none(url_data.get('clen', [None])[0]),
+                    'filesize': filesize,
                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
                     'width': width,
                     'height': height,
                     'fps': int_or_none(url_data.get('fps', [None])[0]),
-                    'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
+                    'format_note': quality,
+                    'quality': q(quality),
                 }
                 for key, value in more_fields.items():
                     if value:
@@ -1969,11 +1868,140 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
                 formats.append(a_format)
         else:
-            unavailable_message = extract_unavailable_message()
-            if unavailable_message:
-                raise ExtractorError(unavailable_message, expected=True)
+            error_message = clean_html(video_info.get('reason', [None])[0])
+            if not error_message:
+                error_message = extract_unavailable_message()
+            if error_message:
+                raise ExtractorError(error_message, expected=True)
             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
 
+        # uploader
+        video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
+        if video_uploader:
+            video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
+        else:
+            self._downloader.report_warning('unable to extract uploader name')
+
+        # uploader_id
+        video_uploader_id = None
+        video_uploader_url = None
+        mobj = re.search(
+            r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
+            video_webpage)
+        if mobj is not None:
+            video_uploader_id = mobj.group('uploader_id')
+            video_uploader_url = mobj.group('uploader_url')
+        else:
+            self._downloader.report_warning('unable to extract uploader nickname')
+
+        # thumbnail image
+        # We try first to get a high quality image:
+        m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
+                            video_webpage, re.DOTALL)
+        if m_thumb is not None:
+            video_thumbnail = m_thumb.group(1)
+        elif 'thumbnail_url' not in video_info:
+            self._downloader.report_warning('unable to extract video thumbnail')
+            video_thumbnail = None
+        else:   # don't panic if we can't find it
+            video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
+
+        # upload date
+        upload_date = self._html_search_meta(
+            'datePublished', video_webpage, 'upload date', default=None)
+        if not upload_date:
+            upload_date = self._search_regex(
+                [r'(?s)id="eow-date.*?>(.*?)</span>',
+                 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
+                video_webpage, 'upload date', default=None)
+        upload_date = unified_strdate(upload_date)
+
+        video_license = self._html_search_regex(
+            r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
+            video_webpage, 'license', default=None)
+
+        m_music = re.search(
+            r'''(?x)
+                <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
+                <ul[^>]*>\s*
+                <li>(?P<title>.+?)
+                by (?P<creator>.+?)
+                (?:
+                    \(.+?\)|
+                    <a[^>]*
+                        (?:
+                            \bhref=["\']/red[^>]*>|             # drop possible
+                            >\s*Listen ad-free with YouTube Red # YouTube Red ad
+                        )
+                    .*?
+                )?</li
+            ''',
+            video_webpage)
+        if m_music:
+            video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
+            video_creator = clean_html(m_music.group('creator'))
+        else:
+            video_alt_title = video_creator = None
+
+        def extract_meta(field):
+            return self._html_search_regex(
+                r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
+                video_webpage, field, default=None)
+
+        track = extract_meta('Song')
+        artist = extract_meta('Artist')
+
+        m_episode = re.search(
+            r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
+            video_webpage)
+        if m_episode:
+            series = m_episode.group('series')
+            season_number = int(m_episode.group('season'))
+            episode_number = int(m_episode.group('episode'))
+        else:
+            series = season_number = episode_number = None
+
+        m_cat_container = self._search_regex(
+            r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
+            video_webpage, 'categories', default=None)
+        if m_cat_container:
+            category = self._html_search_regex(
+                r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
+                default=None)
+            video_categories = None if category is None else [category]
+        else:
+            video_categories = None
+
+        video_tags = [
+            unescapeHTML(m.group('content'))
+            for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+
+        def _extract_count(count_name):
+            return str_to_int(self._search_regex(
+                r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
+                % re.escape(count_name),
+                video_webpage, count_name, default=None))
+
+        like_count = _extract_count('like')
+        dislike_count = _extract_count('dislike')
+
+        # subtitles
+        video_subtitles = self.extract_subtitles(video_id, video_webpage)
+        automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
+
+        video_duration = try_get(
+            video_info, lambda x: int_or_none(x['length_seconds'][0]))
+        if not video_duration:
+            video_duration = parse_duration(self._html_search_meta(
+                'duration', video_webpage, 'video duration'))
+
+        # annotations
+        video_annotations = None
+        if self._downloader.params.get('writeannotations', False):
+            video_annotations = self._extract_annotations(video_id)
+
+        chapters = self._extract_chapters(description_original, video_duration)
+
         # Look for the DASH manifest
         if self._downloader.params.get('youtube_include_dash_manifest', True):
             dash_mpd_fatal = True
@@ -1990,6 +2018,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     for df in self._extract_mpd_formats(
                             mpd_url, video_id, fatal=dash_mpd_fatal,
                             formats_dict=self._formats):
+                        if not df.get('filesize'):
+                            df['filesize'] = _extract_filesize(df['url'])
                         # Do not overwrite DASH format found in some previous DASH manifest
                         if df['format_id'] not in dash_formats:
                             dash_formats[df['format_id']] = df
@@ -2037,9 +2067,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'uploader_url': video_uploader_url,
             'upload_date': upload_date,
             'license': video_license,
-            'creator': video_creator,
+            'creator': video_creator or artist,
             'title': video_title,
-            'alt_title': video_alt_title,
+            'alt_title': video_alt_title or track,
             'thumbnail': video_thumbnail,
             'description': video_description,
             'categories': video_categories,
@@ -2062,6 +2092,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'series': series,
             'season_number': season_number,
             'episode_number': episode_number,
+            'track': track,
+            'artist': artist,
         }
 
 
diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py
new file mode 100644 (file)
index 0000000..b5a3a07
--- /dev/null
@@ -0,0 +1,270 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from uuid import uuid4
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_HTTPError,
+    compat_str,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    try_get,
+    urlencode_postdata,
+)
+
+
+class ZattooBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'zattoo'
+    _HOST_URL = 'https://zattoo.com'
+
+    _power_guide_hash = None
+
+    def _login(self):
+        username, password = self._get_login_info()
+        if not username or not password:
+            self.raise_login_required(
+                'A valid %s account is needed to access this media.'
+                % self._NETRC_MACHINE)
+
+        try:
+            data = self._download_json(
+                '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in',
+                data=urlencode_postdata({
+                    'login': username,
+                    'password': password,
+                    'remember': 'true',
+                }), headers={
+                    'Referer': '%s/login' % self._HOST_URL,
+                    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+                })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+                raise ExtractorError(
+                    'Unable to login: incorrect username and/or password',
+                    expected=True)
+            raise
+
+        self._power_guide_hash = data['session']['power_guide_hash']
+
+    def _real_initialize(self):
+        webpage = self._download_webpage(
+            self._HOST_URL, None, 'Downloading app token')
+        app_token = self._html_search_regex(
+            r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
+            webpage, 'app token', group='token')
+        app_version = self._html_search_regex(
+            r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')
+
+        # Will setup appropriate cookies
+        self._request_webpage(
+            '%s/zapi/v2/session/hello' % self._HOST_URL, None,
+            'Opening session', data=urlencode_postdata({
+                'client_app_token': app_token,
+                'uuid': compat_str(uuid4()),
+                'lang': 'en',
+                'app_version': app_version,
+                'format': 'json',
+            }))
+
+        self._login()
+
+    def _extract_cid(self, video_id, channel_name):
+        channel_groups = self._download_json(
+            '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,
+                                               self._power_guide_hash),
+            video_id, 'Downloading channel list',
+            query={'details': False})['channel_groups']
+        channel_list = []
+        for chgrp in channel_groups:
+            channel_list.extend(chgrp['channels'])
+        try:
+            return next(
+                chan['cid'] for chan in channel_list
+                if chan.get('cid') and (
+                    chan.get('display_alias') == channel_name or
+                    chan.get('cid') == channel_name))
+        except StopIteration:
+            raise ExtractorError('Could not extract channel id')
+
+    def _extract_cid_and_video_info(self, video_id):
+        data = self._download_json(
+            '%s/zapi/program/details' % self._HOST_URL,
+            video_id,
+            'Downloading video information',
+            query={
+                'program_id': video_id,
+                'complete': True
+            })
+
+        p = data['program']
+        cid = p['cid']
+
+        info_dict = {
+            'id': video_id,
+            'title': p.get('title') or p['episode_title'],
+            'description': p.get('description'),
+            'thumbnail': p.get('image_url'),
+            'creator': p.get('channel_name'),
+            'episode': p.get('episode_title'),
+            'episode_number': int_or_none(p.get('episode_number')),
+            'season_number': int_or_none(p.get('season_number')),
+            'release_year': int_or_none(p.get('year')),
+            'categories': try_get(p, lambda x: x['categories'], list),
+        }
+
+        return cid, info_dict
+
+    def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
+        postdata_common = {
+            'https_watch_urls': True,
+        }
+
+        if is_live:
+            postdata_common.update({'timeshift': 10800})
+            url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)
+        elif record_id:
+            url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
+        else:
+            url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)
+
+        formats = []
+        for stream_type in ('dash', 'hls', 'hls5', 'hds'):
+            postdata = postdata_common.copy()
+            postdata['stream_type'] = stream_type
+
+            data = self._download_json(
+                url, video_id, 'Downloading %s formats' % stream_type.upper(),
+                data=urlencode_postdata(postdata), fatal=False)
+            if not data:
+                continue
+
+            watch_urls = try_get(
+                data, lambda x: x['stream']['watch_urls'], list)
+            if not watch_urls:
+                continue
+
+            for watch in watch_urls:
+                if not isinstance(watch, dict):
+                    continue
+                watch_url = watch.get('url')
+                if not watch_url or not isinstance(watch_url, compat_str):
+                    continue
+                format_id_list = [stream_type]
+                maxrate = watch.get('maxrate')
+                if maxrate:
+                    format_id_list.append(compat_str(maxrate))
+                audio_channel = watch.get('audio_channel')
+                if audio_channel:
+                    format_id_list.append(compat_str(audio_channel))
+                preference = 1 if audio_channel == 'A' else None
+                format_id = '-'.join(format_id_list)
+                if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
+                    this_formats = self._extract_mpd_formats(
+                        watch_url, video_id, mpd_id=format_id, fatal=False)
+                elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
+                    this_formats = self._extract_m3u8_formats(
+                        watch_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id=format_id,
+                        fatal=False)
+                elif stream_type == 'hds':
+                    this_formats = self._extract_f4m_formats(
+                        watch_url, video_id, f4m_id=format_id, fatal=False)
+                elif stream_type == 'smooth_playready':
+                    this_formats = self._extract_ism_formats(
+                        watch_url, video_id, ism_id=format_id, fatal=False)
+                else:
+                    assert False
+                for this_format in this_formats:
+                    this_format['preference'] = preference
+                formats.extend(this_formats)
+        self._sort_formats(formats)
+        return formats
+
+    def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
+        if is_live:
+            cid = self._extract_cid(video_id, channel_name)
+            info_dict = {
+                'id': channel_name,
+                'title': self._live_title(channel_name),
+                'is_live': True,
+            }
+        else:
+            cid, info_dict = self._extract_cid_and_video_info(video_id)
+        formats = self._extract_formats(
+            cid, video_id, record_id=record_id, is_live=is_live)
+        info_dict['formats'] = formats
+        return info_dict
+
+
+class QuicklineBaseIE(ZattooBaseIE):
+    _NETRC_MACHINE = 'quickline'
+    _HOST_URL = 'https://mobiltv.quickline.com'
+
+
+class QuicklineIE(QuicklineBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        channel_name, video_id = re.match(self._VALID_URL, url).groups()
+        return self._extract_video(channel_name, video_id)
+
+
+class QuicklineLiveIE(QuicklineBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)'
+
+    _TEST = {
+        'url': 'https://mobiltv.quickline.com/watch/srf1',
+        'only_matching': True,
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        channel_name = video_id = self._match_id(url)
+        return self._extract_video(channel_name, video_id, is_live=True)
+
+
+class ZattooIE(ZattooBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
+
+    # Since regular videos are only available for 7 days and recorded videos
+    # are only available for a specific user, we cannot have detailed tests.
+    _TESTS = [{
+        'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
+        'only_matching': True,
+    }, {
+        'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
+        return self._extract_video(channel_name, video_id, record_id)
+
+
+class ZattooLiveIE(ZattooBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'
+
+    _TEST = {
+        'url': 'https://zattoo.com/watch/srf1',
+        'only_matching': True,
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        channel_name = video_id = self._match_id(url)
+        return self._extract_video(channel_name, video_id, is_live=True)
index 3e4ac03a240844ef3b69fd21dbedcc441a46e5c7..e83d546a082af8e24f2f01438605aabad41857af 100644 (file)
@@ -203,7 +203,7 @@ def parseOpts(overrideArguments=None):
     network.add_option(
         '--proxy', dest='proxy',
         default=None, metavar='URL',
-        help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable experimental '
+        help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
              'SOCKS proxy, specify a proper scheme. For example '
              'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") '
              'for direct connection')
@@ -232,7 +232,7 @@ def parseOpts(overrideArguments=None):
         '--geo-verification-proxy',
         dest='geo_verification_proxy', default=None, metavar='URL',
         help='Use this proxy to verify the IP address for some geo-restricted sites. '
-        'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.')
+        'The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading.')
     geo.add_option(
         '--cn-verification-proxy',
         dest='cn_verification_proxy', default=None, metavar='URL',
@@ -240,15 +240,19 @@ def parseOpts(overrideArguments=None):
     geo.add_option(
         '--geo-bypass',
         action='store_true', dest='geo_bypass', default=True,
-        help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
+        help='Bypass geographic restriction via faking X-Forwarded-For HTTP header')
     geo.add_option(
         '--no-geo-bypass',
         action='store_false', dest='geo_bypass', default=True,
-        help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
+        help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header')
     geo.add_option(
         '--geo-bypass-country', metavar='CODE',
         dest='geo_bypass_country', default=None,
-        help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)')
+        help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code')
+    geo.add_option(
+        '--geo-bypass-ip-block', metavar='IP_BLOCK',
+        dest='geo_bypass_ip_block', default=None,
+        help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation')
 
     selection = optparse.OptionGroup(parser, 'Video Selection')
     selection.add_option(
@@ -498,7 +502,7 @@ def parseOpts(overrideArguments=None):
     downloader.add_option(
         '--xattr-set-filesize',
         dest='xattr_set_filesize', action='store_true',
-        help='Set file xattribute ytdl.filesize with expected file size (experimental)')
+        help='Set file xattribute ytdl.filesize with expected file size')
     downloader.add_option(
         '--hls-prefer-native',
         dest='hls_prefer_native', action='store_true', default=None,
index 574284e944508340ea603e8e047008edee8a35ae..6a3199fb992b72e70b6588d3999e9c6ec0a87890 100644 (file)
@@ -1228,7 +1228,7 @@ def unified_timestamp(date_str, day_first=True):
 
 
 def determine_ext(url, default_ext='unknown_video'):
-    if url is None:
+    if url is None or '.' not in url:
         return default_ext
     guess = url.partition('?')[0].rpartition('.')[2]
     if re.match(r'^[A-Za-z0-9]+$', guess):
@@ -2225,6 +2225,20 @@ def try_get(src, getter, expected_type=None):
                 return v
 
 
+def merge_dicts(*dicts):
+    merged = {}
+    for a_dict in dicts:
+        for k, v in a_dict.items():
+            if v is None:
+                continue
+            if (k not in merged or
+                    (isinstance(v, compat_str) and v and
+                        isinstance(merged[k], compat_str) and
+                        not merged[k])):
+                merged[k] = v
+    return merged
+
+
 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
 
@@ -2258,7 +2272,10 @@ def parse_age_limit(s):
         return int(m.group('age'))
     if s in US_RATINGS:
         return US_RATINGS[s]
-    return TV_PARENTAL_GUIDELINES.get(s)
+    m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
+    if m:
+        return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
+    return None
 
 
 def strip_jsonp(code):
@@ -2650,6 +2667,7 @@ def dfxp2srt(dfxp_data):
     ]
 
     _x = functools.partial(xpath_with_ns, ns_map={
+        'xml': 'http://www.w3.org/XML/1998/namespace',
         'ttml': 'http://www.w3.org/ns/ttml',
         'tts': 'http://www.w3.org/ns/ttml#styling',
     })
@@ -2741,7 +2759,9 @@ def dfxp2srt(dfxp_data):
     repeat = False
     while True:
         for style in dfxp.findall(_x('.//ttml:style')):
-            style_id = style.get('id')
+            style_id = style.get('id') or style.get(_x('xml:id'))
+            if not style_id:
+                continue
             parent_style_id = style.get('style')
             if parent_style_id:
                 if parent_style_id not in styles:
@@ -3520,10 +3540,13 @@ class GeoUtils(object):
     }
 
     @classmethod
-    def random_ipv4(cls, code):
-        block = cls._country_ip_map.get(code.upper())
-        if not block:
-            return None
+    def random_ipv4(cls, code_or_block):
+        if len(code_or_block) == 2:
+            block = cls._country_ip_map.get(code_or_block.upper())
+            if not block:
+                return None
+        else:
+            block = code_or_block
         addr, preflen = block.split('/')
         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
         addr_max = addr_min | (0xffffffff >> int(preflen))
index 4e3cb39c62c8097981ab870be70b8ea37b54f76e..49fef60ea7460780706f0b40d610c36c99505da1 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2018.04.25'
+__version__ = '2018.06.18'