]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2018.04.25
authorRogério Brito <rbrito@ime.usp.br>
Fri, 27 Apr 2018 20:12:50 +0000 (17:12 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Fri, 27 Apr 2018 20:12:50 +0000 (17:12 -0300)
80 files changed:
AUTHORS
ChangeLog
Makefile
README.md
README.txt
docs/supportedsites.md
test/test_InfoExtractor.py
test/test_subtitles.py
test/test_utils.py
test/test_youtube_lists.py
test/testdata/xspf/foo_xspf.xspf [new file with mode: 0644]
youtube-dl
youtube-dl.1
youtube-dl.fish
youtube_dl/YoutubeDL.py
youtube_dl/downloader/common.py
youtube_dl/downloader/external.py
youtube_dl/downloader/fragment.py
youtube_dl/extractor/abc.py
youtube_dl/extractor/acast.py
youtube_dl/extractor/afreecatv.py
youtube_dl/extractor/americastestkitchen.py [changed mode: 0755->0644]
youtube_dl/extractor/bilibili.py
youtube_dl/extractor/breakcom.py
youtube_dl/extractor/canalc2.py
youtube_dl/extractor/cbc.py
youtube_dl/extractor/cbs.py
youtube_dl/extractor/cbssports.py
youtube_dl/extractor/ccma.py
youtube_dl/extractor/cda.py [changed mode: 0755->0644]
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/common.py
youtube_dl/extractor/crackle.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/drtuber.py
youtube_dl/extractor/etonline.py [deleted file]
youtube_dl/extractor/extractors.py
youtube_dl/extractor/extremetube.py
youtube_dl/extractor/fxnetworks.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/heise.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/joj.py [changed mode: 0755->0644]
youtube_dl/extractor/kaltura.py
youtube_dl/extractor/keezmovies.py
youtube_dl/extractor/lenta.py [new file with mode: 0644]
youtube_dl/extractor/libsyn.py
youtube_dl/extractor/liveleak.py
youtube_dl/extractor/medialaan.py
youtube_dl/extractor/mofosex.py
youtube_dl/extractor/nationalgeographic.py
youtube_dl/extractor/naver.py
youtube_dl/extractor/nexx.py
youtube_dl/extractor/nick.py
youtube_dl/extractor/ninenow.py
youtube_dl/extractor/odnoklassniki.py
youtube_dl/extractor/openload.py
youtube_dl/extractor/picarto.py [new file with mode: 0644]
youtube_dl/extractor/pornflip.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/rentv.py
youtube_dl/extractor/sevenplus.py
youtube_dl/extractor/smotri.py
youtube_dl/extractor/steam.py
youtube_dl/extractor/svt.py
youtube_dl/extractor/tube8.py
youtube_dl/extractor/tvnow.py
youtube_dl/extractor/twentyfourvideo.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/videa.py
youtube_dl/extractor/vine.py
youtube_dl/extractor/vrv.py
youtube_dl/extractor/xfileshare.py
youtube_dl/extractor/xvideos.py
youtube_dl/extractor/youku.py
youtube_dl/extractor/youtube.py
youtube_dl/options.py
youtube_dl/utils.py
youtube_dl/version.py

diff --git a/AUTHORS b/AUTHORS
index 6223212aad6eaeee4cc0a0d5a4b3ffbb3a9ff2fe..eaf96d79d87ad6c73a8ff46a2a0ab8e344fcf419 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -236,3 +236,6 @@ Lei Wang
 Petr Novák
 Leonardo Taccari
 Martin Weinelt
+Surya Oktafendri
+TingPing
+Alexandre Macabies
index 47736e076e0925fb6540ad4cc01fa997b2bd2af1..4a3df67df4bda200f29cf4e6e551949c3af54cf2 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,126 @@
+version 2018.04.25
+
+Core
+* [utils] Fix match_str for boolean meta fields
++ [Makefile] Add support for pandoc 2 and disable smart extension (#16251)
+* [YoutubeDL] Fix typo in media extension compatibility checker (#16215)
+
+Extractors
++ [openload] Recognize IPv6 stream URLs (#16136, #16137, #16205, #16246,
+  #16250)
++ [twitch] Extract is_live according to status (#16259)
+* [pornflip] Relax URL regular expression (#16258)
+- [etonline] Remove extractor (#16256)
+* [breakcom] Fix extraction (#16254)
++ [youtube] Add ability to authenticate with cookies
+* [youtube:feed] Implement lazy playlist extraction (#10184)
++ [svt] Add support for TV channel live streams (#15279, #15809)
+* [ccma] Fix video extraction (#15931)
+* [rentv] Fix extraction (#15227)
++ [nick] Add support for nickjr.nl (#16230)
+* [extremetube] Fix metadata extraction
++ [keezmovies] Add support for generic embeds (#16134, #16154)
+* [nexx] Extract new azure URLs (#16223)
+* [cbssports] Fix extraction (#16217)
+* [kaltura] Improve embeds detection (#16201)
+* [instagram:user] Fix extraction (#16119)
+* [cbs] Skip DRM asset types (#16104)
+
+
+version 2018.04.16
+
+Extractors
+* [smotri:broadcast] Fix extraction (#16180)
++ [picarto] Add support for picarto.tv (#6205, #12514, #15276, #15551)
+* [vine:user] Fix extraction (#15514, #16190)
+* [pornhub] Relax URL regular expression (#16165)
+* [cbc:watch] Re-acquire device token when expired (#16160)
++ [fxnetworks] Add support for https theplatform URLs (#16125, #16157)
++ [instagram:user] Add request signing (#16119)
++ [twitch] Add support for mobile URLs (#16146)
+
+
+version 2018.04.09
+
+Core
+* [YoutubeDL] Do not save/restore console title while simulate (#16103)
+* [extractor/common] Relax JSON-LD context check (#16006)
+
+Extractors
++ [generic] Add support for tube8 embeds
++ [generic] Add support for share-videos.se embeds (#16089, #16115)
+* [odnoklassniki] Extend URL regular expression (#16081)
+* [steam] Bypass mature content check (#16113)
++ [acast] Extract more metadata
+* [acast] Fix extraction (#16118)
+* [instagram:user] Fix extraction (#16119)
+* [drtuber] Fix title extraction (#16107, #16108)
+* [liveleak] Extend URL regular expression (#16117)
++ [openload] Add support for oload.xyz
+* [openload] Relax stream URL regular expression
+* [openload] Fix extraction (#16099)
++ [svtplay:series] Add support for season URLs
++ [svtplay:series] Add support for series (#11130, #16059)
+
+
+version 2018.04.03
+
+Extractors
++ [tvnow] Add support for shows (#15837)
+* [dramafever] Fix authentication (#16067)
+* [afreecatv] Use partial view only when necessary (#14450)
++ [afreecatv] Add support for authentication (#14450)
++ [nationalgeographic] Add support for new URL schema (#16001, #16054)
+* [xvideos] Fix thumbnail extraction (#15978, #15979)
+* [medialaan] Fix vod id (#16038)
++ [openload] Add support for oload.site (#16039)
+* [naver] Fix extraction (#16029)
+* [dramafever] Partially switch to API v5 (#16026)
+* [abc:iview] Unescape title and series meta fields (#15994)
+* [videa] Extend URL regular expression (#16003)
+
+
+version 2018.03.26.1
+
+Core
++ [downloader/external] Add elapsed time to progress hook (#10876)
+* [downloader/external,fragment] Fix download finalization when writing file
+  to stdout (#10809, #10876, #15799)
+
+Extractors
+* [vrv] Fix extraction on python2 (#15928)
+* [afreecatv] Update referrer (#15947)
++ [24video] Add support for 24video.sexy (#15973)
+* [crackle] Bypass geo restriction
+* [crackle] Fix extraction (#15969)
++ [lenta] Add support for lenta.ru (#15953)
++ [instagram:user] Add pagination (#15934)
+* [youku] Update ccode (#15939)
+* [libsyn] Adapt to new page structure
+
+
+version 2018.03.20
+
+Core
+* [extractor/common] Improve thumbnail extraction for HTML5 entries
+* Generalize XML manifest processing code and improve XSPF parsing
++ [extractor/common] Add _download_xml_handle
++ [extractor/common] Add support for relative URIs in _parse_xspf (#15794)
+
+Extractors
++ [7plus] Extract series metadata (#15862, #15906)
+* [9now] Bypass geo restriction (#15920)
+* [cbs] Skip unavailable assets (#13490, #13506, #15776)
++ [canalc2] Add support for HTML5 videos (#15916, #15919)
++ [ceskatelevize] Add support for iframe embeds (#15918)
++ [prosiebensat1] Add support for galileo.tv (#15894)
++ [generic] Add support for xfileshare embeds (#15879)
+* [bilibili] Switch to v2 playurl API
+* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863)
+* [heise] Improve extraction (#15496, #15784, #15026)
+* [instagram] Fix user videos extraction (#15858)
+
+
 version 2018.03.14
 
 Extractors
index fe247810fead99df5f519c9e8b8b66dd373a50f1..4a62f44bc0b739bfe56a202861f23b0f62359f67 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -14,6 +14,9 @@ PYTHON ?= /usr/bin/env python
 # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
 SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
 
+# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2
+MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi)
+
 install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
        install -d $(DESTDIR)$(BINDIR)
        install -m 755 youtube-dl $(DESTDIR)$(BINDIR)
@@ -82,11 +85,11 @@ supportedsites:
        $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md
 
 README.txt: README.md
-       pandoc -f markdown -t plain README.md -o README.txt
+       pandoc -f $(MARKDOWN) -t plain README.md -o README.txt
 
 youtube-dl.1: README.md
        $(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md
-       pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
+       pandoc -s -f $(MARKDOWN) -t man youtube-dl.1.temp.md -o youtube-dl.1
        rm -f youtube-dl.1.temp.md
 
 youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in
index 7dba5775d9d910d396175eeefb51f38b71c6c831..5af0f387be8e34800e97acc46033d0702f90284e 100644 (file)
--- a/README.md
+++ b/README.md
@@ -223,7 +223,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
 
 ## Filesystem Options:
     -a, --batch-file FILE            File containing URLs to download ('-' for
-                                     stdin)
+                                     stdin), one URL per line. Lines starting
+                                     with '#', ';' or ']' are considered as
+                                     comments and ignored.
     --id                             Use only video ID in file name
     -o, --output TEMPLATE            Output filename template, see the "OUTPUT
                                      TEMPLATE" for all the info
index 24959f03234926da60136026cc16348bfbdb812a..55ff41210d91f0e75d9f91dc36775048c561297d 100644 (file)
@@ -254,7 +254,9 @@ Download Options:
 Filesystem Options:
 
     -a, --batch-file FILE            File containing URLs to download ('-' for
-                                     stdin)
+                                     stdin), one URL per line. Lines starting
+                                     with '#', ';' or ']' are considered as
+                                     comments and ignored.
     --id                             Use only video ID in file name
     -o, --output TEMPLATE            Output filename template, see the "OUTPUT
                                      TEMPLATE" for all the info
@@ -1351,12 +1353,12 @@ yourextractor):
 1.  Fork this repository
 2.  Check out the source code with:
 
-        git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
+         git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
 
 3.  Start a new git branch with
 
-        cd youtube-dl
-        git checkout -b yourextractor
+         cd youtube-dl
+         git checkout -b yourextractor
 
 4.  Start with this simple template and save it to
     youtube_dl/extractor/yourextractor.py:
@@ -1419,10 +1421,10 @@ yourextractor):
 9.  When the tests pass, add the new files and commit them and push the
     result, like this:
 
-        $ git add youtube_dl/extractor/extractors.py
-        $ git add youtube_dl/extractor/yourextractor.py
-        $ git commit -m '[yourextractor] Add new extractor'
-        $ git push origin yourextractor
+         $ git add youtube_dl/extractor/extractors.py
+         $ git add youtube_dl/extractor/yourextractor.py
+         $ git commit -m '[yourextractor] Add new extractor'
+         $ git push origin yourextractor
 
 10. Finally, create a pull request. We'll then review and merge it.
 
index 80358bb147d64dd8520f008c12d5ec45ccef8c33..a110f687b1ad0c84914d4c403073bea6458c06cc 100644 (file)
  - **ESPN**
  - **ESPNArticle**
  - **EsriVideo**
- - **ETOnline**
  - **Europa**
  - **EveryonesMixtape**
  - **ExpoTV**
  - **Lecture2Go**
  - **LEGO**
  - **Lemonde**
+ - **Lenta**
  - **LePlaylist**
  - **LetvCloud**: 乐视云
  - **Libsyn**
  - **PhilharmonieDeParis**: Philharmonie de Paris
  - **phoenix.de**
  - **Photobucket**
+ - **Picarto**
+ - **PicartoVod**
  - **Piksel**
  - **Pinkbike**
  - **Pladform**
  - **SunPorno**
  - **SVT**
  - **SVTPlay**: SVT Play and Öppet arkiv
+ - **SVTSeries**
  - **SWRMediathek**
  - **Syfy**
  - **SztvHu**
  - **TVNoe**
  - **TVNow**
  - **TVNowList**
+ - **TVNowShow**
  - **tvp**: Telewizja Polska
  - **tvp:embed**: Telewizja Polska
  - **tvp:series**
index 7b31d5198b561f7e3725a15055439ca5f35b791b..4833396a521bf1d7a072db8ad425bed333235248 100644 (file)
@@ -694,6 +694,55 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
 
+    def test_parse_xspf(self):
+        _TEST_CASES = [
+            (
+                'foo_xspf',
+                'https://example.org/src/foo_xspf.xspf',
+                [{
+                    'id': 'foo_xspf',
+                    'title': 'Pandemonium',
+                    'description': 'Visit http://bigbrother404.bandcamp.com',
+                    'duration': 202.416,
+                    'formats': [{
+                        'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+                        'url': 'https://example.org/src/cd1/track%201.mp3',
+                    }],
+                }, {
+                    'id': 'foo_xspf',
+                    'title': 'Final Cartridge (Nichico Twelve Remix)',
+                    'description': 'Visit http://bigbrother404.bandcamp.com',
+                    'duration': 255.857,
+                    'formats': [{
+                        'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+                        'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
+                    }],
+                }, {
+                    'id': 'foo_xspf',
+                    'title': 'Rebuilding Nightingale',
+                    'description': 'Visit http://bigbrother404.bandcamp.com',
+                    'duration': 287.915,
+                    'formats': [{
+                        'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+                        'url': 'https://example.org/src/track3.mp3',
+                    }, {
+                        'manifest_url': 'https://example.org/src/foo_xspf.xspf',
+                        'url': 'https://example.com/track3.mp3',
+                    }]
+                }]
+            ),
+        ]
+
+        for xspf_file, xspf_url, expected_entries in _TEST_CASES:
+            with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
+                         mode='r', encoding='utf-8') as f:
+                entries = self.ie._parse_xspf(
+                    compat_etree_fromstring(f.read().encode('utf-8')),
+                    xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
+                expect_value(self, entries, expected_entries, None)
+                for i in range(len(entries)):
+                    expect_dict(self, entries[i], expected_entries[i])
+
 
 if __name__ == '__main__':
     unittest.main()
index 1b8de822a8b5c95e836b954633cdd90318c97da9..7d57a628e5ef79c5e12d13ccd0a2b515548ffa60 100644 (file)
@@ -232,7 +232,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
 
 
 class TestMTVSubtitles(BaseTestSubtitles):
-    url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
+    url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
     IE = ComedyCentralIE
 
     def getInfoDict(self):
@@ -243,7 +243,7 @@ class TestMTVSubtitles(BaseTestSubtitles):
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(set(subtitles.keys()), set(['en']))
-        self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
+        self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
 
 
 class TestNRKSubtitles(BaseTestSubtitles):
index a1fe6fdb2cad56151ff1b21f210112269740cff1..253a7fe176c69491df316af3925b565713e5e5c2 100644 (file)
@@ -1072,6 +1072,18 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
         self.assertFalse(match_str(
             'like_count > 100 & dislike_count <? 50 & description',
             {'like_count': 190, 'dislike_count': 10}))
+        self.assertTrue(match_str('is_live', {'is_live': True}))
+        self.assertFalse(match_str('is_live', {'is_live': False}))
+        self.assertFalse(match_str('is_live', {'is_live': None}))
+        self.assertFalse(match_str('is_live', {}))
+        self.assertFalse(match_str('!is_live', {'is_live': True}))
+        self.assertTrue(match_str('!is_live', {'is_live': False}))
+        self.assertTrue(match_str('!is_live', {'is_live': None}))
+        self.assertTrue(match_str('!is_live', {}))
+        self.assertTrue(match_str('title', {'title': 'abc'}))
+        self.assertTrue(match_str('title', {'title': ''}))
+        self.assertFalse(match_str('!title', {'title': 'abc'}))
+        self.assertFalse(match_str('!title', {'title': ''}))
 
     def test_parse_dfxp_time_expr(self):
         self.assertEqual(parse_dfxp_time_expr(None), None)
index 7a33dbf88e90f2d901b144759ffa90552787885c..c4f0abbeaaacbe3b469320c13a205e8f738c443b 100644 (file)
@@ -61,7 +61,7 @@ class TestYoutubeLists(unittest.TestCase):
         dl = FakeYDL()
         dl.params['extract_flat'] = True
         ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
+        result = ie.extract('https://www.youtube.com/playlist?list=PL-KKIb8rvtMSrAO9YFbeM6UQrAqoFTUWv')
         self.assertIsPlaylist(result)
         for entry in result['entries']:
             self.assertTrue(entry.get('title'))
diff --git a/test/testdata/xspf/foo_xspf.xspf b/test/testdata/xspf/foo_xspf.xspf
new file mode 100644 (file)
index 0000000..b7f0086
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<playlist version="1" xmlns="http://xspf.org/ns/0/">
+    <date>2018-03-09T18:01:43Z</date>
+    <trackList>
+        <track>
+            <location>cd1/track%201.mp3</location>
+            <title>Pandemonium</title>
+            <creator>Foilverb</creator>
+            <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+            <album>Pandemonium EP</album>
+            <trackNum>1</trackNum>
+            <duration>202416</duration>
+        </track>
+        <track>
+            <location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
+            <title>Final Cartridge (Nichico Twelve Remix)</title>
+            <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+            <creator>Foilverb</creator>
+            <album>Pandemonium EP</album>
+            <trackNum>2</trackNum>
+            <duration>255857</duration>
+        </track>
+        <track>
+            <location>track3.mp3</location>
+            <location>https://example.com/track3.mp3</location>
+            <title>Rebuilding Nightingale</title>
+            <annotation>Visit http://bigbrother404.bandcamp.com</annotation>
+            <creator>Foilverb</creator>
+            <album>Pandemonium EP</album>
+            <trackNum>3</trackNum>
+            <duration>287915</duration>
+        </track>
+    </trackList>
+</playlist>
index 56daa4b63bc52c7df059c5f00e2303143c887c85..7875c3a7f8bc1194834c7e7c341fc5f1204716e0 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index b859b1de59c61f51f5cd0e76466aab6ebf3f0b42..976505407a134122a37a351da616e66708bcecaf 100644 (file)
@@ -1,4 +1,7 @@
+.\" Automatically generated by Pandoc 2.1.3
+.\"
 .TH "YOUTUBE\-DL" "1" "" "" ""
+.hy
 .SH NAME
 .PP
 youtube\-dl \- download videos from youtube.com or other video platforms
@@ -383,7 +386,10 @@ Give these arguments to the external downloader
 .SS Filesystem Options:
 .TP
 .B \-a, \-\-batch\-file \f[I]FILE\f[]
-File containing URLs to download (\[aq]\-\[aq] for stdin)
+File containing URLs to download (\[aq]\-\[aq] for stdin), one URL per
+line.
+Lines starting with \[aq]#\[aq], \[aq];\[aq] or \[aq]]\[aq] are
+considered as comments and ignored.
 .RS
 .RE
 .TP
@@ -994,7 +1000,7 @@ machine\ twitch\ login\ my_twitch_account_name\ password\ my_twitch_password
 .PP
 To activate authentication with the \f[C]\&.netrc\f[] file you should
 pass \f[C]\-\-netrc\f[] to youtube\-dl or place it in the configuration
-file (#configuration).
+file.
 .PP
 On Windows you may also need to setup the \f[C]%HOME%\f[] environment
 variable manually.
@@ -1010,7 +1016,7 @@ set\ HOME=%USERPROFILE%
 The \f[C]\-o\f[] option allows users to indicate a template for the
 output file names.
 .PP
-\f[B]tl;dr:\f[] navigate me to examples (#output-template-examples).
+\f[B]tl;dr:\f[] navigate me to examples.
 .PP
 The basic usage is not to set any template arguments when downloading a
 single file, like in
@@ -1290,7 +1296,7 @@ expression\f[], i.e.
 an expression that describes format or formats you would like to
 download.
 .PP
-\f[B]tl;dr:\f[] navigate me to examples (#format-selection-examples).
+\f[B]tl;dr:\f[] navigate me to examples.
 .PP
 The simplest case is requesting a specific format, for example with
 \f[C]\-f\ 22\f[] you can download the format with format code equal to
@@ -1415,8 +1421,8 @@ If you want to preserve the old format selection behavior (prior to
 youtube\-dl 2015.04.26), i.e.
 you want to download the best available quality media served as a single
 file, you should explicitly specify your choice with \f[C]\-f\ best\f[].
-You may want to add it to the configuration file (#configuration) in
-order not to type it every time you run youtube\-dl.
+You may want to add it to the configuration file in order not to type it
+every time you run youtube\-dl.
 .SS Format selection examples
 .PP
 Note that on Windows you may need to use double quotes instead of
@@ -1626,12 +1632,12 @@ youtube\-dl in turn.
 .PP
 YouTube has switched to a new video info format in July 2011 which is
 not supported by old versions of youtube\-dl.
-See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+See above for how to update youtube\-dl.
 .SS ERROR: unable to download video
 .PP
 YouTube requires an additional signature since September 2012 which is
 not supported by old versions of youtube\-dl.
-See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+See above for how to update youtube\-dl.
 .SS Video URL contains an ampersand and I\[aq]m getting some strange
 output \f[C][1]\ 2839\f[] or
 \f[C]\[aq]v\[aq]\ is\ not\ recognized\ as\ an\ internal\ or\ external\ command\f[]
@@ -1662,15 +1668,15 @@ For Windows you have to use the double quotes:
 .PP
 In February 2015, the new YouTube player contained a character sequence
 in a string that was misinterpreted by old versions of youtube\-dl.
-See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+See above for how to update youtube\-dl.
 .SS HTTP Error 429: Too Many Requests or 402: Payment Required
 .PP
 These two error codes indicate that the service is blocking your IP
 address because of overuse.
 Contact the service and ask them to unblock your IP address, or \- if
 you have acquired a whitelisted IP address already \- use the
-\f[C]\-\-proxy\f[] or \f[C]\-\-source\-address\f[]
-options (#network-options) to select another IP address.
+\f[C]\-\-proxy\f[] or \f[C]\-\-source\-address\f[] options to select
+another IP address.
 .SS SyntaxError: Non\-ASCII character
 .PP
 The error
@@ -1718,10 +1724,10 @@ ffmpeg) by simply typing \f[C]youtube\-dl\f[] or \f[C]ffmpeg\f[], no
 matter what directory you\[aq]re in.
 .SS How do I put downloads into a specific folder?
 .PP
-Use the \f[C]\-o\f[] to specify an output template (#output-template),
-for example \f[C]\-o\ "/home/user/videos/%(title)s\-%(id)s.%(ext)s"\f[].
+Use the \f[C]\-o\f[] to specify an output template, for example
+\f[C]\-o\ "/home/user/videos/%(title)s\-%(id)s.%(ext)s"\f[].
 If you want this for all of your downloads, put the option into your
-configuration file (#configuration).
+configuration file.
 .SS How do I download a video starting with a \f[C]\-\f[]?
 .PP
 Either prepend \f[C]https://www.youtube.com/watch?v=\f[] or separate the
@@ -1869,7 +1875,7 @@ serves as a unified point.
 Unfortunately, the youtube\-dl project has grown too large to use
 personal email as an effective communication channel.
 .PP
-Please read the bug reporting instructions (#bugs) below.
+Please read the bug reporting instructions below.
 A lot of bugs lack all the necessary information.
 If you can, offer proxy, VPN, or shell access to the youtube\-dl
 developers.
@@ -1915,15 +1921,14 @@ you run it from a Python program.
 .SH Why do I need to go through that much red tape when filing bugs?
 .PP
 Before we had the issue template, despite our extensive bug reporting
-instructions (#bugs), about 80% of the issue reports we got were
-useless, for instance because people used ancient versions hundreds of
-releases old, because of simple syntactic errors (not in youtube\-dl but
-in general shell usage), because the problem was already reported
-multiple times before, because people did not actually read an error
-message, even if it said "please install ffmpeg", because people did not
-mention the URL they were trying to download and many more simple,
-easy\-to\-avoid problems, many of whom were totally unrelated to
-youtube\-dl.
+instructions, about 80% of the issue reports we got were useless, for
+instance because people used ancient versions hundreds of releases old,
+because of simple syntactic errors (not in youtube\-dl but in general
+shell usage), because the problem was already reported multiple times
+before, because people did not actually read an error message, even if
+it said "please install ffmpeg", because people did not mention the URL
+they were trying to download and many more simple, easy\-to\-avoid
+problems, many of whom were totally unrelated to youtube\-dl.
 .PP
 youtube\-dl is an open\-source project manned by too few volunteers, so
 we\[aq]d rather spend time fixing bugs where we are certain none of
@@ -1960,8 +1965,8 @@ nosetests
 \f[]
 .fi
 .PP
-See item 6 of new extractor tutorial (#adding-support-for-a-new-site)
-for how to run extractor specific test cases.
+See item 6 of new extractor tutorial for how to run extractor specific
+test cases.
 .PP
 If you want to create a build of youtube\-dl yourself, you\[aq]ll need
 .IP \[bu] 2
@@ -1993,7 +1998,7 @@ Check out the source code with:
 .IP
 .nf
 \f[C]
-git\ clone\ git\@github.com:YOUR_GITHUB_USERNAME/youtube\-dl.git
+git\ clone\ git\@github.com:YOUR_GITHUB_USERNAME/youtube\-dl.git
 \f[]
 .fi
 .RE
@@ -2003,8 +2008,8 @@ Start a new git branch with
 .IP
 .nf
 \f[C]
-cd\ youtube\-dl
-git\ checkout\ \-b\ yourextractor
+cd\ youtube\-dl
+git\ checkout\ \-b\ yourextractor
 \f[]
 .fi
 .RE
@@ -2079,9 +2084,8 @@ extractor should and may
 return (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252).
 Add tests and code for as many as you want.
 .IP " 8." 4
-Make sure your code follows youtube\-dl coding
-conventions (#youtube-dl-coding-conventions) and check the code with
-flake8 (https://pypi.python.org/pypi/flake8).
+Make sure your code follows youtube\-dl coding conventions and check the
+code with flake8 (https://pypi.python.org/pypi/flake8).
 Also make sure your code works under all
 Python (https://www.python.org/) versions claimed supported by
 youtube\-dl, namely 2.6, 2.7, and 3.2+.
@@ -2093,10 +2097,10 @@ push (https://git-scm.com/docs/git-push) the result, like this:
 .IP
 .nf
 \f[C]
-$\ git\ add\ youtube_dl/extractor/extractors.py
-$\ git\ add\ youtube_dl/extractor/yourextractor.py
-$\ git\ commit\ \-m\ \[aq][yourextractor]\ Add\ new\ extractor\[aq]
-$\ git\ push\ origin\ yourextractor
+$\ git\ add\ youtube_dl/extractor/extractors.py
+$\ git\ add\ youtube_dl/extractor/yourextractor.py
+$\ git\ commit\ \-m\ \[aq][yourextractor]\ Add\ new\ extractor\[aq]
+$\ git\ push\ origin\ yourextractor
 \f[]
 .fi
 .RE
@@ -2401,7 +2405,7 @@ For discussions, join us in the IRC channel
 (webchat (https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
 .PP
 \f[B]Please include the full output of youtube\-dl when run with
-\f[C]\-v\f[]\f[], i.e.
+\f[BC]\-v\f[B]\f[], i.e.
 \f[B]add\f[] \f[C]\-v\f[] flag to \f[B]your command line\f[], copy the
 \f[B]whole\f[] output and post it in the issue body wrapped in ``` for
 better formatting.
index aa7c59b8af3ce645d8a3f493be2039501ef18579..ec98a2e5eb1658b79549ac57e027dfe012500311 100644 (file)
@@ -62,7 +62,7 @@ complete --command youtube-dl --long-option hls-prefer-ffmpeg --description 'Use
 complete --command youtube-dl --long-option hls-use-mpegts --description 'Use the mpegts container for HLS videos, allowing to play the video while downloading (some players may not be able to play it)'
 complete --command youtube-dl --long-option external-downloader --description 'Use the specified external downloader. Currently supports aria2c,avconv,axel,curl,ffmpeg,httpie,wget'
 complete --command youtube-dl --long-option external-downloader-args --description 'Give these arguments to the external downloader'
-complete --command youtube-dl --long-option batch-file --short-option a --description 'File containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
+complete --command youtube-dl --long-option batch-file --short-option a --description 'File containing URLs to download ('"'"'-'"'"' for stdin), one URL per line. Lines starting with '"'"'#'"'"', '"'"';'"'"' or '"'"']'"'"' are considered as comments and ignored.' --require-parameter
 complete --command youtube-dl --long-option id --description 'Use only video ID in file name'
 complete --command youtube-dl --long-option output --short-option o --description 'Output filename template, see the "OUTPUT TEMPLATE" for all the info'
 complete --command youtube-dl --long-option autonumber-size
index 523dd1f7daf80839839d7b8a014f86897640b425..ad359880526116d71d13fa2a736ab02ca79e19c0 100755 (executable)
@@ -532,6 +532,8 @@ class YoutubeDL(object):
     def save_console_title(self):
         if not self.params.get('consoletitle', False):
             return
+        if self.params.get('simulate', False):
+            return
         if compat_os_name != 'nt' and 'TERM' in os.environ:
             # Save the title on stack
             self._write_string('\033[22;0t', self._screen_file)
@@ -539,6 +541,8 @@ class YoutubeDL(object):
     def restore_console_title(self):
         if not self.params.get('consoletitle', False):
             return
+        if self.params.get('simulate', False):
+            return
         if compat_os_name != 'nt' and 'TERM' in os.environ:
             # Restore the title from stack
             self._write_string('\033[23;0t', self._screen_file)
@@ -1849,7 +1853,7 @@ class YoutubeDL(object):
                     def compatible_formats(formats):
                         video, audio = formats
                         # Check extension
-                        video_ext, audio_ext = audio.get('ext'), video.get('ext')
+                        video_ext, audio_ext = video.get('ext'), audio.get('ext')
                         if video_ext and audio_ext:
                             COMPATIBLE_EXTS = (
                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
index cc16bbb83fb5da958df1564a85e50f04dca30ce0..edd125ee2c6539cf04cbba4ba282faa15b913496 100644 (file)
@@ -249,12 +249,13 @@ class FileDownloader(object):
             if self.params.get('noprogress', False):
                 self.to_screen('[download] Download completed')
             else:
-                s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+                msg_template = '100%%'
+                if s.get('total_bytes') is not None:
+                    s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+                    msg_template += ' of %(_total_bytes_str)s'
                 if s.get('elapsed') is not None:
                     s['_elapsed_str'] = self.format_seconds(s['elapsed'])
-                    msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
-                else:
-                    msg_template = '100%% of %(_total_bytes_str)s'
+                    msg_template += ' in %(_elapsed_str)s'
                 self._report_progress_status(
                     msg_template % s, is_last_line=True)
 
index db018fa89e7b137c55fae08bc78d5b8d1c98f83f..958d00aac0ae0a3d58eca9069429cd305a137449 100644 (file)
@@ -1,9 +1,10 @@
 from __future__ import unicode_literals
 
 import os.path
+import re
 import subprocess
 import sys
-import re
+import time
 
 from .common import FileDownloader
 from ..compat import (
@@ -30,6 +31,7 @@ class ExternalFD(FileDownloader):
         tmpfilename = self.temp_name(filename)
 
         try:
+            started = time.time()
             retval = self._call_downloader(tmpfilename, info_dict)
         except KeyboardInterrupt:
             if not info_dict.get('is_live'):
@@ -41,15 +43,20 @@ class ExternalFD(FileDownloader):
             self.to_screen('[%s] Interrupted by user' % self.get_basename())
 
         if retval == 0:
-            fsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
-            self.try_rename(tmpfilename, filename)
-            self._hook_progress({
-                'downloaded_bytes': fsize,
-                'total_bytes': fsize,
+            status = {
                 'filename': filename,
                 'status': 'finished',
-            })
+                'elapsed': time.time() - started,
+            }
+            if filename != '-':
+                fsize = os.path.getsize(encodeFilename(tmpfilename))
+                self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
+                self.try_rename(tmpfilename, filename)
+                status.update({
+                    'downloaded_bytes': fsize,
+                    'total_bytes': fsize,
+                })
+            self._hook_progress(status)
             return True
         else:
             self.to_stderr('\n')
index ea5e3a4b5df9f957328557b6f822ef5494cfc9a6..927c7e491655f950bb1a1c316fcd7911b4b3f2fe 100644 (file)
@@ -241,12 +241,16 @@ class FragmentFD(FileDownloader):
             if os.path.isfile(ytdl_filename):
                 os.remove(ytdl_filename)
         elapsed = time.time() - ctx['started']
-        self.try_rename(ctx['tmpfilename'], ctx['filename'])
-        fsize = os.path.getsize(encodeFilename(ctx['filename']))
+
+        if ctx['tmpfilename'] == '-':
+            downloaded_bytes = ctx['complete_frags_downloaded_bytes']
+        else:
+            self.try_rename(ctx['tmpfilename'], ctx['filename'])
+            downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
 
         self._hook_progress({
-            'downloaded_bytes': fsize,
-            'total_bytes': fsize,
+            'downloaded_bytes': downloaded_bytes,
+            'total_bytes': downloaded_bytes,
             'filename': ctx['filename'],
             'status': 'finished',
             'elapsed': elapsed,
index 87017ed397bfd75690b0e4cab3821f549d6132cd..512f046849e3b6090ec06e14938d57c5279d66a1 100644 (file)
@@ -13,6 +13,7 @@ from ..utils import (
     int_or_none,
     parse_iso8601,
     try_get,
+    unescapeHTML,
     update_url_query,
 )
 
@@ -109,16 +110,17 @@ class ABCIViewIE(InfoExtractor):
 
     # ABC iview programs are normally available for 14 days only.
     _TESTS = [{
-        'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00',
+        'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
         'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
         'info_dict': {
-            'id': 'ZW0898A003S00',
+            'id': 'ZY9247A021S00',
             'ext': 'mp4',
-            'title': 'Series 5 Ep 3',
-            'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79',
-            'upload_date': '20171228',
-            'uploader_id': 'abc1',
-            'timestamp': 1514499187,
+            'title': "Gaston's Visit",
+            'series': "Ben And Holly's Little Kingdom",
+            'description': 'md5:18db170ad71cf161e006a4c688e33155',
+            'upload_date': '20180318',
+            'uploader_id': 'abc4kids',
+            'timestamp': 1521400959,
         },
         'params': {
             'skip_download': True,
@@ -169,12 +171,12 @@ class ABCIViewIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'title': title,
+            'title': unescapeHTML(title),
             'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
             'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
             'duration': int_or_none(video_params.get('eventDuration')),
             'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
-            'series': video_params.get('seriesTitle'),
+            'series': unescapeHTML(video_params.get('seriesTitle')),
             'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
             'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
             'episode': self._html_search_meta('episode_title', webpage, default=None),
index 5871e72dca61cc64dd57833d891bd77e854e93df..6d846ea7a18829d9af03278b5da3996782bb26ab 100644 (file)
@@ -7,7 +7,9 @@ import functools
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
+    float_or_none,
     int_or_none,
+    try_get,
     unified_timestamp,
     OnDemandPagedList,
 )
@@ -24,40 +26,58 @@ class ACastIE(InfoExtractor):
             'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
             'ext': 'mp3',
             'title': '"Where Are You?": Taipei 101, Taiwan',
+            'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
             'timestamp': 1196172000,
             'upload_date': '20071127',
-            'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
             'duration': 211,
+            'creator': 'Concierge',
+            'series': 'Condé Nast Traveler Podcast',
+            'episode': '"Where Are You?": Taipei 101, Taiwan',
         }
     }, {
         # test with multiple blings
         'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
-        'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0',
+        'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
         'info_dict': {
             'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
             'ext': 'mp3',
             'title': '2. Raggarmordet - Röster ur det förflutna',
+            'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
             'timestamp': 1477346700,
             'upload_date': '20161024',
-            'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
-            'duration': 2766,
+            'duration': 2766.602563,
+            'creator': 'Anton Berg & Martin Johnson',
+            'series': 'Spår',
+            'episode': '2. Raggarmordet - Röster ur det förflutna',
         }
     }]
 
     def _real_extract(self, url):
         channel, display_id = re.match(self._VALID_URL, url).groups()
+        s = self._download_json(
+            'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id),
+            display_id)['result']
+        media_url = s['url']
         cast_data = self._download_json(
-            'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)
-        e = cast_data['result']['episode']
+            'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
+            display_id)['result']
+        e = cast_data['episode']
+        title = e['name']
         return {
             'id': compat_str(e['id']),
             'display_id': display_id,
-            'url': e['mediaUrl'],
-            'title': e['name'],
-            'description': e.get('description'),
+            'url': media_url,
+            'title': title,
+            'description': e.get('description') or e.get('summary'),
             'thumbnail': e.get('image'),
             'timestamp': unified_timestamp(e.get('publishingDate')),
-            'duration': int_or_none(e.get('duration')),
+            'duration': float_or_none(s.get('duration') or e.get('duration')),
+            'filesize': int_or_none(e.get('contentLength')),
+            'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
+            'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
+            'season_number': int_or_none(e.get('seasonNumber')),
+            'episode': title,
+            'episode_number': int_or_none(e.get('episodeNumber')),
         }
 
 
index df2a3fc4a2a24d2feeb81083d2414d8dc09b682b..4b3d9713654ee7a0b939c8f4d39e97ab79c91071 100644 (file)
@@ -9,6 +9,7 @@ from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
+    urlencode_postdata,
     xpath_text,
 )
 
@@ -28,6 +29,7 @@ class AfreecaTVIE(InfoExtractor):
                         )
                         (?P<id>\d+)
                     '''
+    _NETRC_MACHINE = 'afreecatv'
     _TESTS = [{
         'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
         'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
@@ -139,22 +141,22 @@ class AfreecaTVIE(InfoExtractor):
             'skip_download': True,
         },
     }, {
-        # adult video
-        'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
+        # PARTIAL_ADULT
+        'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
         'info_dict': {
-            'id': '20171001_F1AE1711_196617479_1',
+            'id': '20180327_27901457_202289533_1',
             'ext': 'mp4',
-            'title': '[생]서아 초심 찾기 방송 (part 1)',
+            'title': '[생]빨개요♥ (part 1)',
             'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
-            'uploader': 'BJ서아',
+            'uploader': '[SA]서아',
             'uploader_id': 'bjdyrksu',
-            'upload_date': '20171001',
-            'duration': 3600,
-            'age_limit': 18,
+            'upload_date': '20180327',
+            'duration': 3601,
         },
         'params': {
             'skip_download': True,
         },
+        'expected_warnings': ['adult content'],
     }, {
         'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
         'only_matching': True,
@@ -172,6 +174,51 @@ class AfreecaTVIE(InfoExtractor):
             video_key['part'] = int(m.group('part'))
         return video_key
 
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        username, password = self._get_login_info()
+        if username is None:
+            return
+
+        login_form = {
+            'szWork': 'login',
+            'szType': 'json',
+            'szUid': username,
+            'szPassword': password,
+            'isSaveId': 'false',
+            'szScriptVar': 'oLoginRet',
+            'szAction': '',
+        }
+
+        response = self._download_json(
+            'https://login.afreecatv.com/app/LoginAction.php', None,
+            'Logging in', data=urlencode_postdata(login_form))
+
+        _ERRORS = {
+            -4: 'Your account has been suspended due to a violation of our terms and policies.',
+            -5: 'https://member.afreecatv.com/app/user_delete_progress.php',
+            -6: 'https://login.afreecatv.com/membership/changeMember.php',
+            -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
+            -9: 'https://member.afreecatv.com/app/pop_login_block.php',
+            -11: 'https://login.afreecatv.com/afreeca/second_login.php',
+            -12: 'https://member.afreecatv.com/app/user_security.php',
+            0: 'The username does not exist or you have entered the wrong password.',
+            -1: 'The username does not exist or you have entered the wrong password.',
+            -3: 'You have entered your username/password incorrectly.',
+            -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
+            -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
+            -32008: 'You have failed to log in. Please contact our Help Center.',
+        }
+
+        result = int_or_none(response.get('RESULT'))
+        if result != 1:
+            error = _ERRORS.get(result, 'You have failed to log in.')
+            raise ExtractorError(
+                'Unable to login: %s said: %s' % (self.IE_NAME, error),
+                expected=True)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -188,21 +235,41 @@ class AfreecaTVIE(InfoExtractor):
         video_id = self._search_regex(
             r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
 
-        video_xml = self._download_xml(
-            'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
-            video_id, headers={
-                'Referer': 'http://vod.afreecatv.com/embed.php',
-            }, query={
+        partial_view = False
+        for _ in range(2):
+            query = {
                 'nTitleNo': video_id,
                 'nStationNo': station_id,
                 'nBbsNo': bbs_id,
-                'partialView': 'SKIP_ADULT',
-            })
+            }
+            if partial_view:
+                query['partialView'] = 'SKIP_ADULT'
+            video_xml = self._download_xml(
+                'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
+                video_id, 'Downloading video info XML%s'
+                % (' (skipping adult)' if partial_view else ''),
+                video_id, headers={
+                    'Referer': url,
+                }, query=query)
 
-        flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
-        if flag and flag != 'SUCCEED':
+            flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
+            if flag and flag == 'SUCCEED':
+                break
+            if flag == 'PARTIAL_ADULT':
+                self._downloader.report_warning(
+                    'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
+                    'Only content suitable for all ages will be downloaded. '
+                    'Provide account credentials if you wish to download restricted content.')
+                partial_view = True
+                continue
+            elif flag == 'ADULT':
+                error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
+            else:
+                error = flag
             raise ExtractorError(
-                '%s said: %s' % (self.IE_NAME, flag), expected=True)
+                '%s said: %s' % (self.IE_NAME, error), expected=True)
+        else:
+            raise ExtractorError('Unable to download video info')
 
         video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
         if video_element is None or video_element.text is None:
old mode 100755 (executable)
new mode 100644 (file)
index beffcecd09f55ad4bd5365639ffb2d0459a624f2..3e3348ef5baed8f6e9a31634778421c921e352ce 100644 (file)
@@ -27,14 +27,14 @@ class BiliBiliIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://www.bilibili.tv/video/av1074402/',
-        'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
+        'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
         'info_dict': {
             'id': '1074402',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': '【金坷垃】金泡沫',
             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
-            'duration': 308.315,
-            'timestamp': 1398012660,
+            'duration': 308.067,
+            'timestamp': 1398012678,
             'upload_date': '20140420',
             'thumbnail': r're:^https?://.+\.jpg',
             'uploader': '菊子桑',
@@ -59,17 +59,38 @@ class BiliBiliIE(InfoExtractor):
         'url': 'http://www.bilibili.com/video/av8903802/',
         'info_dict': {
             'id': '8903802',
-            'ext': 'mp4',
             'title': '阿滴英文|英文歌分享#6 "Closer',
             'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
-            'uploader': '阿滴英文',
-            'uploader_id': '65880958',
-            'timestamp': 1488382620,
-            'upload_date': '20170301',
-        },
-        'params': {
-            'skip_download': True,  # Test metadata only
         },
+        'playlist': [{
+            'info_dict': {
+                'id': '8903802_part1',
+                'ext': 'flv',
+                'title': '阿滴英文|英文歌分享#6 "Closer',
+                'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
+                'uploader': '阿滴英文',
+                'uploader_id': '65880958',
+                'timestamp': 1488382634,
+                'upload_date': '20170301',
+            },
+            'params': {
+                'skip_download': True,  # Test metadata only
+            },
+        }, {
+            'info_dict': {
+                'id': '8903802_part2',
+                'ext': 'flv',
+                'title': '阿滴英文|英文歌分享#6 "Closer',
+                'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
+                'uploader': '阿滴英文',
+                'uploader_id': '65880958',
+                'timestamp': 1488382634,
+                'upload_date': '20170301',
+            },
+            'params': {
+                'skip_download': True,  # Test metadata only
+            },
+        }]
     }]
 
     _APP_KEY = '84956560bc028eb7'
@@ -92,8 +113,12 @@ class BiliBiliIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         if 'anime/' not in url:
-            cid = compat_parse_qs(self._search_regex(
+            cid = self._search_regex(
+                r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
+                default=None
+            ) or compat_parse_qs(self._search_regex(
                 [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
+                 r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
                  r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
                 webpage, 'player parameters'))['cid'][0]
         else:
@@ -114,53 +139,66 @@ class BiliBiliIE(InfoExtractor):
                 self._report_error(js)
             cid = js['result']['cid']
 
-        payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
-        sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
-
         headers = {
             'Referer': url
         }
         headers.update(self.geo_verification_headers())
 
-        video_info = self._download_json(
-            'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
-            video_id, note='Downloading video info page',
-            headers=headers)
-
-        if 'durl' not in video_info:
-            self._report_error(video_info)
-
         entries = []
 
-        for idx, durl in enumerate(video_info['durl']):
-            formats = [{
-                'url': durl['url'],
-                'filesize': int_or_none(durl['size']),
-            }]
-            for backup_url in durl.get('backup_url', []):
-                formats.append({
-                    'url': backup_url,
-                    # backup URLs have lower priorities
-                    'preference': -2 if 'hd.mp4' in backup_url else -3,
+        RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
+        for num, rendition in enumerate(RENDITIONS, start=1):
+            payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
+            sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
+
+            video_info = self._download_json(
+                'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
+                video_id, note='Downloading video info page',
+                headers=headers, fatal=num == len(RENDITIONS))
+
+            if not video_info:
+                continue
+
+            if 'durl' not in video_info:
+                if num < len(RENDITIONS):
+                    continue
+                self._report_error(video_info)
+
+            for idx, durl in enumerate(video_info['durl']):
+                formats = [{
+                    'url': durl['url'],
+                    'filesize': int_or_none(durl['size']),
+                }]
+                for backup_url in durl.get('backup_url', []):
+                    formats.append({
+                        'url': backup_url,
+                        # backup URLs have lower priorities
+                        'preference': -2 if 'hd.mp4' in backup_url else -3,
+                    })
+
+                for a_format in formats:
+                    a_format.setdefault('http_headers', {}).update({
+                        'Referer': url,
+                    })
+
+                self._sort_formats(formats)
+
+                entries.append({
+                    'id': '%s_part%s' % (video_id, idx),
+                    'duration': float_or_none(durl.get('length'), 1000),
+                    'formats': formats,
                 })
+            break
 
-            for a_format in formats:
-                a_format.setdefault('http_headers', {}).update({
-                    'Referer': url,
-                })
-
-            self._sort_formats(formats)
-
-            entries.append({
-                'id': '%s_part%s' % (video_id, idx),
-                'duration': float_or_none(durl.get('length'), 1000),
-                'formats': formats,
-            })
-
-        title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
+        title = self._html_search_regex(
+            ('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
+             '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
+            group='title')
         description = self._html_search_meta('description', webpage)
         timestamp = unified_timestamp(self._html_search_regex(
-            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
+            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
+            default=None) or self._html_search_meta(
+            'uploadDate', webpage, 'timestamp', default=None))
         thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
 
         # TODO 'view_count' requires deobfuscating Javascript
@@ -174,13 +212,16 @@ class BiliBiliIE(InfoExtractor):
         }
 
         uploader_mobj = re.search(
-            r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
+            r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
             webpage)
         if uploader_mobj:
             info.update({
                 'uploader': uploader_mobj.group('name'),
                 'uploader_id': uploader_mobj.group('id'),
             })
+        if not info.get('uploader'):
+            info['uploader'] = self._html_search_meta(
+                'author', webpage, 'uploader', default=None)
 
         for entry in entries:
             entry.update(info)
index 5a87c2661910303d638351a0f5155dd20db35793..70d16767f19966fe625f8496f334fde1a15929f8 100644 (file)
@@ -3,15 +3,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from .youtube import YoutubeIE
 from ..compat import compat_str
-from ..utils import (
-    int_or_none,
-    parse_age_limit,
-)
+from ..utils import int_or_none
 
 
 class BreakIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<site>break|screenjunkies)\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
+    _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
     _TESTS = [{
         'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
         'info_dict': {
@@ -19,125 +17,73 @@ class BreakIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'When Girls Act Like D-Bags',
             'age_limit': 13,
-        }
-    }, {
-        'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915',
-        'md5': '5c2b686bec3d43de42bde9ec047536b0',
-        'info_dict': {
-            'id': '2841915',
-            'display_id': 'best-quentin-tarantino-movie',
-            'ext': 'mp4',
-            'title': 'Best Quentin Tarantino Movie',
-            'thumbnail': r're:^https?://.*\.jpg',
-            'duration': 3671,
-            'age_limit': 13,
-            'tags': list,
-        },
-    }, {
-        'url': 'http://www.screenjunkies.com/video/honest-trailers-the-dark-knight',
-        'info_dict': {
-            'id': '2348808',
-            'display_id': 'honest-trailers-the-dark-knight',
-            'ext': 'mp4',
-            'title': 'Honest Trailers - The Dark Knight',
-            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
-            'age_limit': 10,
-            'tags': list,
         },
     }, {
-        # requires subscription but worked around
-        'url': 'http://www.screenjunkies.com/video/knocking-dead-ep-1-the-show-so-far-3003285',
+        # youtube embed
+        'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work',
         'info_dict': {
-            'id': '3003285',
-            'display_id': 'knocking-dead-ep-1-the-show-so-far',
+            'id': 'RrrDLdeL2HQ',
             'ext': 'mp4',
-            'title': 'State of The Dead Recap: Knocking Dead Pilot',
-            'thumbnail': r're:^https?://.*\.jpg',
-            'duration': 3307,
-            'age_limit': 13,
-            'tags': list,
+            'title': 'Whale Watching Boat Crashing Into San Diego Dock',
+            'description': 'md5:afc1b2772f0a8468be51dd80eb021069',
+            'upload_date': '20160331',
+            'uploader': 'Steve Holden',
+            'uploader_id': 'sdholden07',
         },
+        'params': {
+            'skip_download': True,
+        }
     }, {
         'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
         'only_matching': True,
     }]
 
-    _DEFAULT_BITRATES = (48, 150, 320, 496, 864, 2240, 3264)
-
     def _real_extract(self, url):
-        site, display_id, video_id = re.match(self._VALID_URL, url).groups()
+        display_id, video_id = re.match(self._VALID_URL, url).groups()
 
-        if not video_id:
-            webpage = self._download_webpage(url, display_id)
-            video_id = self._search_regex(
-                (r'src=["\']/embed/(\d+)', r'data-video-content-id=["\'](\d+)'),
-                webpage, 'video id')
+        webpage = self._download_webpage(url, display_id)
 
-        webpage = self._download_webpage(
-            'http://www.%s.com/embed/%s' % (site, video_id),
-            display_id, 'Downloading video embed page')
-        embed_vars = self._parse_json(
+        youtube_url = YoutubeIE._extract_url(webpage)
+        if youtube_url:
+            return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
+
+        content = self._parse_json(
             self._search_regex(
-                r'(?s)embedVars\s*=\s*({.+?})\s*</script>', webpage, 'embed vars'),
+                r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage,
+                'content'),
             display_id)
 
-        youtube_id = embed_vars.get('youtubeId')
-        if youtube_id:
-            return self.url_result(youtube_id, 'Youtube')
-
-        title = embed_vars['contentName']
-
         formats = []
-        bitrates = []
-        for f in embed_vars.get('media', []):
-            if not f.get('uri') or f.get('mediaPurpose') != 'play':
+        for video in content:
+            video_url = video.get('url')
+            if not video_url or not isinstance(video_url, compat_str):
                 continue
-            bitrate = int_or_none(f.get('bitRate'))
-            if bitrate:
-                bitrates.append(bitrate)
+            bitrate = int_or_none(self._search_regex(
+                r'(\d+)_kbps', video_url, 'tbr', default=None))
             formats.append({
-                'url': f['uri'],
+                'url': video_url,
                 'format_id': 'http-%d' % bitrate if bitrate else 'http',
-                'width': int_or_none(f.get('width')),
-                'height': int_or_none(f.get('height')),
                 'tbr': bitrate,
-                'format': 'mp4',
             })
+        self._sort_formats(formats)
 
-        if not bitrates:
-            # When subscriptionLevel > 0, i.e. plus subscription is required
-            # media list will be empty. However, hds and hls uris are still
-            # available. We can grab them assuming bitrates to be default.
-            bitrates = self._DEFAULT_BITRATES
-
-        auth_token = embed_vars.get('AuthToken')
+        title = self._search_regex(
+            (r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+             r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value')
 
-        def construct_manifest_url(base_url, ext):
-            pieces = [base_url]
-            pieces.extend([compat_str(b) for b in bitrates])
-            pieces.append('_kbps.mp4.%s?%s' % (ext, auth_token))
-            return ','.join(pieces)
+        def get(key, name):
+            return int_or_none(self._search_regex(
+                r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name,
+                default=None))
 
-        if bitrates and auth_token:
-            hds_url = embed_vars.get('hdsUri')
-            if hds_url:
-                formats.extend(self._extract_f4m_formats(
-                    construct_manifest_url(hds_url, 'f4m'),
-                    display_id, f4m_id='hds', fatal=False))
-            hls_url = embed_vars.get('hlsUri')
-            if hls_url:
-                formats.extend(self._extract_m3u8_formats(
-                    construct_manifest_url(hls_url, 'm3u8'),
-                    display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
-        self._sort_formats(formats)
+        age_limit = get('ratings', 'age limit')
+        video_id = video_id or get('pid', 'video id') or display_id
 
         return {
             'id': video_id,
             'display_id': display_id,
             'title': title,
-            'thumbnail': embed_vars.get('thumbUri'),
-            'duration': int_or_none(embed_vars.get('videoLengthInSeconds')) or None,
-            'age_limit': parse_age_limit(embed_vars.get('audienceRating')),
-            'tags': embed_vars.get('tags', '').split(','),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'age_limit': age_limit,
             'formats': formats,
         }
index acd87e371abeec9072b73a28cf37a32972ad41fd..407cc8084836cec7f82113eb71ff8e0314a8e932 100644 (file)
@@ -31,6 +31,10 @@ class Canalc2IE(InfoExtractor):
         webpage = self._download_webpage(
             'http://www.canalc2.tv/video/%s' % video_id, video_id)
 
+        title = self._html_search_regex(
+            r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
+            webpage, 'title')
+
         formats = []
         for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
             if video_url.startswith('rtmp://'):
@@ -49,17 +53,21 @@ class Canalc2IE(InfoExtractor):
                     'url': video_url,
                     'format_id': 'http',
                 })
-        self._sort_formats(formats)
 
-        title = self._html_search_regex(
-            r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')
-        duration = parse_duration(self._search_regex(
-            r'id=["\']video_duree["\'][^>]*>([^<]+)',
-            webpage, 'duration', fatal=False))
+        if formats:
+            info = {
+                'formats': formats,
+            }
+        else:
+            info = self._parse_html5_media_entries(url, webpage, url)[0]
+
+        self._sort_formats(info['formats'])
 
-        return {
+        info.update({
             'id': video_id,
             'title': title,
-            'duration': duration,
-            'formats': formats,
-        }
+            'duration': parse_duration(self._search_regex(
+                r'id=["\']video_duree["\'][^>]*>([^<]+)',
+                webpage, 'duration', fatal=False)),
+        })
+        return info
index 3be0c646bb8b4e431ea47fa5db4b46e13208dcd6..54b4b9be958ae49f0ea4f7d37cadcdf4e2c8b1c7 100644 (file)
@@ -5,7 +5,10 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+    compat_str,
+    compat_HTTPError,
+)
 from ..utils import (
     js_to_json,
     smuggle_url,
@@ -206,30 +209,48 @@ class CBCWatchBaseIE(InfoExtractor):
 
     def _call_api(self, path, video_id):
         url = path if path.startswith('http') else self._API_BASE_URL + path
-        result = self._download_xml(url, video_id, headers={
-            'X-Clearleap-DeviceId': self._device_id,
-            'X-Clearleap-DeviceToken': self._device_token,
-        })
+        for _ in range(2):
+            try:
+                result = self._download_xml(url, video_id, headers={
+                    'X-Clearleap-DeviceId': self._device_id,
+                    'X-Clearleap-DeviceToken': self._device_token,
+                })
+            except ExtractorError as e:
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                    # Device token has expired, re-acquiring device token
+                    self._register_device()
+                    continue
+                raise
         error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
         if error_message:
             raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
         return result
 
     def _real_initialize(self):
-        if not self._device_id or not self._device_token:
-            device = self._downloader.cache.load('cbcwatch', 'device') or {}
-            self._device_id, self._device_token = device.get('id'), device.get('token')
-            if not self._device_id or not self._device_token:
-                result = self._download_xml(
-                    self._API_BASE_URL + 'device/register',
-                    None, data=b'<device><type>web</type></device>')
-                self._device_id = xpath_text(result, 'deviceId', fatal=True)
-                self._device_token = xpath_text(result, 'deviceToken', fatal=True)
-                self._downloader.cache.store(
-                    'cbcwatch', 'device', {
-                        'id': self._device_id,
-                        'token': self._device_token,
-                    })
+        if self._valid_device_token():
+            return
+        device = self._downloader.cache.load('cbcwatch', 'device') or {}
+        self._device_id, self._device_token = device.get('id'), device.get('token')
+        if self._valid_device_token():
+            return
+        self._register_device()
+
+    def _valid_device_token(self):
+        return self._device_id and self._device_token
+
+    def _register_device(self):
+        self._device_id = self._device_token = None
+        result = self._download_xml(
+            self._API_BASE_URL + 'device/register',
+            None, 'Acquiring device token',
+            data=b'<device><type>web</type></device>')
+        self._device_id = xpath_text(result, 'deviceId', fatal=True)
+        self._device_token = xpath_text(result, 'deviceToken', fatal=True)
+        self._downloader.cache.store(
+            'cbcwatch', 'device', {
+                'id': self._device_id,
+                'token': self._device_token,
+            })
 
     def _parse_rss_feed(self, rss):
         channel = xpath_element(rss, 'channel', fatal=True)
index 1268e38ef3c266bd5f4fac39b961f4788150ba71..1799d63ea86ad9f9b1d7048792b52385928e20f4 100644 (file)
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 from .theplatform import ThePlatformFeedIE
 from ..utils import (
+    ExtractorError,
     int_or_none,
     find_xpath_attr,
     xpath_element,
@@ -61,9 +62,10 @@ class CBSIE(CBSBaseIE):
         asset_types = []
         subtitles = {}
         formats = []
+        last_e = None
         for item in items_data.findall('.//item'):
             asset_type = xpath_text(item, 'assetType')
-            if not asset_type or asset_type in asset_types:
+            if not asset_type or asset_type in asset_types or asset_type in ('HLS_FPS', 'DASH_CENC'):
                 continue
             asset_types.append(asset_type)
             query = {
@@ -74,11 +76,17 @@ class CBSIE(CBSBaseIE):
                 query['formats'] = 'MPEG4,M3U'
             elif asset_type in ('RTMP', 'WIFI', '3G'):
                 query['formats'] = 'MPEG4,FLV'
-            tp_formats, tp_subtitles = self._extract_theplatform_smil(
-                update_url_query(tp_release_url, query), content_id,
-                'Downloading %s SMIL data' % asset_type)
+            try:
+                tp_formats, tp_subtitles = self._extract_theplatform_smil(
+                    update_url_query(tp_release_url, query), content_id,
+                    'Downloading %s SMIL data' % asset_type)
+            except ExtractorError as e:
+                last_e = e
+                continue
             formats.extend(tp_formats)
             subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+        if last_e and not formats:
+            raise last_e
         self._sort_formats(formats)
 
         info = self._extract_theplatform_metadata(tp_path, content_id)
index 3a62c840b42bace9993ddb3cb77fc89201b0578e..83b76476245d553f6f9bc976723f2573dd6c72c2 100644 (file)
@@ -4,28 +4,35 @@ from .cbs import CBSBaseIE
 
 
 class CBSSportsIE(CBSBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
 
     _TESTS = [{
-        'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast',
+        'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
         'info_dict': {
-            'id': '708337219968',
+            'id': '1214315075735',
             'ext': 'mp4',
-            'title': 'Ben Simmons the next LeBron? Not so fast',
-            'description': 'md5:854294f627921baba1f4b9a990d87197',
-            'timestamp': 1466293740,
-            'upload_date': '20160618',
+            'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
+            'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
+            'timestamp': 1524111457,
+            'upload_date': '20180419',
             'uploader': 'CBSI-NEW',
         },
         'params': {
             # m3u8 download
             'skip_download': True,
         }
+    }, {
+        'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
+        'only_matching': True,
     }]
 
     def _extract_video_info(self, filter_query, video_id):
         return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            [r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
+            webpage, 'video id')
         return self._extract_video_info('byId=%s' % video_id, video_id)
index bec0a825a0df28e953aeee58c73ea5cbc6c0461b..07f5206c124e42368809ceb89149c3b5a9d5bdf7 100644 (file)
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
+    clean_html,
     int_or_none,
     parse_duration,
     parse_iso8601,
-    clean_html,
+    parse_resolution,
 )
 
 
@@ -40,34 +42,42 @@ class CCMAIE(InfoExtractor):
 
     def _real_extract(self, url):
         media_type, media_id = re.match(self._VALID_URL, url).groups()
-        media_data = {}
-        formats = []
-        profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc']
-        for i, profile in enumerate(profiles):
-            md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
+
+        media = self._download_json(
+            'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
                 'media': media_type,
                 'idint': media_id,
-                'profile': profile,
-            }, fatal=False)
-            if md:
-                media_data = md
-                media_url = media_data.get('media', {}).get('url')
-                if media_url:
-                    formats.append({
-                        'format_id': profile,
-                        'url': media_url,
-                        'quality': i,
-                    })
+            })
+
+        formats = []
+        media_url = media['media']['url']
+        if isinstance(media_url, list):
+            for format_ in media_url:
+                format_url = format_.get('file')
+                if not format_url or not isinstance(format_url, compat_str):
+                    continue
+                label = format_.get('label')
+                f = parse_resolution(label)
+                f.update({
+                    'url': format_url,
+                    'format_id': label,
+                })
+                formats.append(f)
+        else:
+            formats.append({
+                'url': media_url,
+                'vcodec': 'none' if media_type == 'audio' else None,
+            })
         self._sort_formats(formats)
 
-        informacio = media_data['informacio']
+        informacio = media['informacio']
         title = informacio['titol']
         durada = informacio.get('durada', {})
         duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
         timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
 
         subtitles = {}
-        subtitols = media_data.get('subtitols', {})
+        subtitols = media.get('subtitols', {})
         if subtitols:
             sub_url = subtitols.get('url')
             if sub_url:
@@ -77,7 +87,7 @@ class CCMAIE(InfoExtractor):
                     })
 
         thumbnails = []
-        imatges = media_data.get('imatges', {})
+        imatges = media.get('imatges', {})
         if imatges:
             thumbnail_url = imatges.get('url')
             if thumbnail_url:
old mode 100755 (executable)
new mode 100644 (file)
index e250de18ceb555e4750df54fcb7de1f1b92d6d49..6bad908595e825ac75b80330a0718d029578e72c 100644 (file)
@@ -13,6 +13,7 @@ from ..utils import (
     float_or_none,
     sanitized_Request,
     unescapeHTML,
+    update_url_query,
     urlencode_postdata,
     USER_AGENTS,
 )
@@ -265,6 +266,10 @@ class CeskaTelevizePoradyIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         },
+    }, {
+        # iframe embed
+        'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -272,8 +277,11 @@ class CeskaTelevizePoradyIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        data_url = unescapeHTML(self._search_regex(
-            r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
-            webpage, 'iframe player url', group='url'))
+        data_url = update_url_query(unescapeHTML(self._search_regex(
+            (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+             r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
+            webpage, 'iframe player url', group='url')), query={
+                'autoStart': 'true',
+        })
 
         return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
index fcdd0fd14a85a12690031b409058d932a3d4e4db..59b9d373951331ef5c02bb6ed80d0f28bfaf4c8f 100644 (file)
@@ -644,19 +644,31 @@ class InfoExtractor(object):
             content, _ = res
             return content
 
+    def _download_xml_handle(
+            self, url_or_request, video_id, note='Downloading XML',
+            errnote='Unable to download XML', transform_source=None,
+            fatal=True, encoding=None, data=None, headers={}, query={}):
+        """Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
+        res = self._download_webpage_handle(
+            url_or_request, video_id, note, errnote, fatal=fatal,
+            encoding=encoding, data=data, headers=headers, query=query)
+        if res is False:
+            return res
+        xml_string, urlh = res
+        return self._parse_xml(
+            xml_string, video_id, transform_source=transform_source,
+            fatal=fatal), urlh
+
     def _download_xml(self, url_or_request, video_id,
                       note='Downloading XML', errnote='Unable to download XML',
                       transform_source=None, fatal=True, encoding=None,
                       data=None, headers={}, query={}):
         """Return the xml as an xml.etree.ElementTree.Element"""
-        xml_string = self._download_webpage(
-            url_or_request, video_id, note, errnote, fatal=fatal,
-            encoding=encoding, data=data, headers=headers, query=query)
-        if xml_string is False:
-            return xml_string
-        return self._parse_xml(
-            xml_string, video_id, transform_source=transform_source,
-            fatal=fatal)
+        res = self._download_xml_handle(
+            url_or_request, video_id, note=note, errnote=errnote,
+            transform_source=transform_source, fatal=fatal, encoding=encoding,
+            data=data, headers=headers, query=query)
+        return res if res is False else res[0]
 
     def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
         if transform_source:
@@ -1013,7 +1025,7 @@ class InfoExtractor(object):
             })
 
         for e in json_ld:
-            if e.get('@context') == 'http://schema.org':
+            if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
                 item_type = e.get('@type')
                 if expected_type is not None and expected_type != item_type:
                     return info
@@ -1694,22 +1706,24 @@ class InfoExtractor(object):
             })
         return subtitles
 
-    def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
+    def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
         xspf = self._download_xml(
-            playlist_url, playlist_id, 'Downloading xpsf playlist',
+            xspf_url, playlist_id, 'Downloading xpsf playlist',
             'Unable to download xspf manifest', fatal=fatal)
         if xspf is False:
             return []
-        return self._parse_xspf(xspf, playlist_id)
+        return self._parse_xspf(
+            xspf, playlist_id, xspf_url=xspf_url,
+            xspf_base_url=base_url(xspf_url))
 
-    def _parse_xspf(self, playlist, playlist_id):
+    def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
         NS_MAP = {
             'xspf': 'http://xspf.org/ns/0/',
             's1': 'http://static.streamone.nl/player/ns/0',
         }
 
         entries = []
-        for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
+        for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
             title = xpath_text(
                 track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
             description = xpath_text(
@@ -1719,12 +1733,18 @@ class InfoExtractor(object):
             duration = float_or_none(
                 xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
 
-            formats = [{
-                'url': location.text,
-                'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
-                'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
-                'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
-            } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
+            formats = []
+            for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
+                format_url = urljoin(xspf_base_url, location.text)
+                if not format_url:
+                    continue
+                formats.append({
+                    'url': format_url,
+                    'manifest_url': xspf_url,
+                    'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+                    'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+                    'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+                })
             self._sort_formats(formats)
 
             entries.append({
@@ -1738,18 +1758,18 @@ class InfoExtractor(object):
         return entries
 
     def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
-        res = self._download_webpage_handle(
+        res = self._download_xml_handle(
             mpd_url, video_id,
             note=note or 'Downloading MPD manifest',
             errnote=errnote or 'Failed to download MPD manifest',
             fatal=fatal)
         if res is False:
             return []
-        mpd, urlh = res
+        mpd_doc, urlh = res
         mpd_base_url = base_url(urlh.geturl())
 
         return self._parse_mpd_formats(
-            compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
+            mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
             formats_dict=formats_dict, mpd_url=mpd_url)
 
     def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
@@ -2023,17 +2043,16 @@ class InfoExtractor(object):
         return formats
 
     def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
-        res = self._download_webpage_handle(
+        res = self._download_xml_handle(
             ism_url, video_id,
             note=note or 'Downloading ISM manifest',
             errnote=errnote or 'Failed to download ISM manifest',
             fatal=fatal)
         if res is False:
             return []
-        ism, urlh = res
+        ism_doc, urlh = res
 
-        return self._parse_ism_formats(
-            compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
+        return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
 
     def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
         """
@@ -2131,8 +2150,8 @@ class InfoExtractor(object):
         return formats
 
     def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
-        def absolute_url(video_url):
-            return compat_urlparse.urljoin(base_url, video_url)
+        def absolute_url(item_url):
+            return urljoin(base_url, item_url)
 
         def parse_content_type(content_type):
             if not content_type:
@@ -2189,7 +2208,7 @@ class InfoExtractor(object):
             if src:
                 _, formats = _media_formats(src, media_type)
                 media_info['formats'].extend(formats)
-            media_info['thumbnail'] = media_attributes.get('poster')
+            media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
             if media_content:
                 for source_tag in re.findall(r'<source[^>]+>', media_content):
                     source_attributes = extract_attributes(source_tag)
index 13f425b2bf1672c66a723f22a0e6e3d73c0456c3..fc014f8b558008f971448153c56fb45354ee641f 100644 (file)
@@ -1,31 +1,45 @@
 # coding: utf-8
 from __future__ import unicode_literals, division
 
+import re
+
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..compat import (
+    compat_str,
+    compat_HTTPError,
+)
+from ..utils import (
+    determine_ext,
+    float_or_none,
+    int_or_none,
+    parse_age_limit,
+    parse_duration,
+    ExtractorError
+)
 
 
 class CrackleIE(InfoExtractor):
-    _GEO_COUNTRIES = ['US']
     _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
     _TEST = {
-        'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
+        # geo restricted to CA
+        'url': 'https://www.crackle.com/andromeda/2502343',
         'info_dict': {
-            'id': '2498934',
+            'id': '2502343',
             'ext': 'mp4',
-            'title': 'Everybody Respects A Bloody Nose',
-            'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.',
-            'thumbnail': r're:^https?://.*\.jpg',
-            'duration': 906,
-            'series': 'Comedians In Cars Getting Coffee',
-            'season_number': 8,
-            'episode_number': 4,
-            'subtitles': {
-                'en-US': [
-                    {'ext': 'vtt'},
-                    {'ext': 'tt'},
-                ]
-            },
+            'title': 'Under The Night',
+            'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
+            'duration': 2583,
+            'view_count': int,
+            'average_rating': 0,
+            'age_limit': 14,
+            'genre': 'Action, Sci-Fi',
+            'creator': 'Allan Kroeker',
+            'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
+            'release_year': 2000,
+            'series': 'Andromeda',
+            'episode': 'Under The Night',
+            'season_number': 1,
+            'episode_number': 1,
         },
         'params': {
             # m3u8 download
@@ -33,109 +47,118 @@ class CrackleIE(InfoExtractor):
         }
     }
 
-    _THUMBNAIL_RES = [
-        (120, 90),
-        (208, 156),
-        (220, 124),
-        (220, 220),
-        (240, 180),
-        (250, 141),
-        (315, 236),
-        (320, 180),
-        (360, 203),
-        (400, 300),
-        (421, 316),
-        (460, 330),
-        (460, 460),
-        (462, 260),
-        (480, 270),
-        (587, 330),
-        (640, 480),
-        (700, 330),
-        (700, 394),
-        (854, 480),
-        (1024, 1024),
-        (1920, 1080),
-    ]
-
-    # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
-    _MEDIA_FILE_SLOTS = {
-        'c544.flv': {
-            'width': 544,
-            'height': 306,
-        },
-        '360p.mp4': {
-            'width': 640,
-            'height': 360,
-        },
-        '480p.mp4': {
-            'width': 852,
-            'height': 478,
-        },
-        '480p_1mbps.mp4': {
-            'width': 852,
-            'height': 478,
-        },
-    }
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        config_doc = self._download_xml(
-            'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16',
-            video_id, 'Downloading config')
-
-        item = self._download_xml(
-            'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
-            video_id, headers=self.geo_verification_headers()).find('i')
-        title = item.attrib['t']
-
-        subtitles = {}
-        formats = self._extract_m3u8_formats(
-            'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
-            video_id, 'mp4', m3u8_id='hls', fatal=None)
-        thumbnails = []
-        path = item.attrib.get('p')
-        if path:
-            for width, height in self._THUMBNAIL_RES:
-                res = '%dx%d' % (width, height)
-                thumbnails.append({
-                    'id': res,
-                    'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res),
-                    'width': width,
-                    'height': height,
-                    'resolution': res,
-                })
-            http_base_url = 'http://ahttp.crackle.com/' + path
-            for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
-                formats.append({
-                    'url': http_base_url + mfs_path,
-                    'format_id': 'http-' + mfs_path.split('.')[0],
-                    'width': mfs_info['width'],
-                    'height': mfs_info['height'],
-                })
-            for cc in item.findall('cc'):
-                locale = cc.attrib.get('l')
-                v = cc.attrib.get('v')
-                if locale and v:
-                    if locale not in subtitles:
-                        subtitles[locale] = []
-                    for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')):
-                        subtitles.setdefault(locale, []).append({
-                            'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext),
-                            'ext': ext,
-                        })
-        self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': item.attrib.get('d'),
-            'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None,
-            'series': item.attrib.get('sn'),
-            'season_number': int_or_none(item.attrib.get('se')),
-            'episode_number': int_or_none(item.attrib.get('ep')),
-            'thumbnails': thumbnails,
-            'subtitles': subtitles,
-            'formats': formats,
-        }
+        country_code = self._downloader.params.get('geo_bypass_country', None)
+        countries = [country_code] if country_code else (
+            'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
+
+        last_e = None
+
+        for country in countries:
+            try:
+                media = self._download_json(
+                    'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
+                    % (video_id, country), video_id,
+                    'Downloading media JSON as %s' % country,
+                    'Unable to download media JSON', query={
+                        'disableProtocols': 'true',
+                        'format': 'json'
+                    })
+            except ExtractorError as e:
+                # 401 means geo restriction, trying next country
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                    last_e = e
+                    continue
+                raise
+
+            media_urls = media.get('MediaURLs')
+            if not media_urls or not isinstance(media_urls, list):
+                continue
+
+            title = media['Title']
+
+            formats = []
+            for e in media['MediaURLs']:
+                if e.get('UseDRM') is True:
+                    continue
+                format_url = e.get('Path')
+                if not format_url or not isinstance(format_url, compat_str):
+                    continue
+                ext = determine_ext(format_url)
+                if ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls', fatal=False))
+                elif ext == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        format_url, video_id, mpd_id='dash', fatal=False))
+            self._sort_formats(formats)
+
+            description = media.get('Description')
+            duration = int_or_none(media.get(
+                'DurationInSeconds')) or parse_duration(media.get('Duration'))
+            view_count = int_or_none(media.get('CountViews'))
+            average_rating = float_or_none(media.get('UserRating'))
+            age_limit = parse_age_limit(media.get('Rating'))
+            genre = media.get('Genre')
+            release_year = int_or_none(media.get('ReleaseYear'))
+            creator = media.get('Directors')
+            artist = media.get('Cast')
+
+            if media.get('MediaTypeDisplayValue') == 'Full Episode':
+                series = media.get('ShowName')
+                episode = title
+                season_number = int_or_none(media.get('Season'))
+                episode_number = int_or_none(media.get('Episode'))
+            else:
+                series = episode = season_number = episode_number = None
+
+            subtitles = {}
+            cc_files = media.get('ClosedCaptionFiles')
+            if isinstance(cc_files, list):
+                for cc_file in cc_files:
+                    if not isinstance(cc_file, dict):
+                        continue
+                    cc_url = cc_file.get('Path')
+                    if not cc_url or not isinstance(cc_url, compat_str):
+                        continue
+                    lang = cc_file.get('Locale') or 'en'
+                    subtitles.setdefault(lang, []).append({'url': cc_url})
+
+            thumbnails = []
+            images = media.get('Images')
+            if isinstance(images, list):
+                for image_key, image_url in images.items():
+                    mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
+                    if not mobj:
+                        continue
+                    thumbnails.append({
+                        'url': image_url,
+                        'width': int(mobj.group(1)),
+                        'height': int(mobj.group(2)),
+                    })
+
+            return {
+                'id': video_id,
+                'title': title,
+                'description': description,
+                'duration': duration,
+                'view_count': view_count,
+                'average_rating': average_rating,
+                'age_limit': age_limit,
+                'genre': genre,
+                'creator': creator,
+                'artist': artist,
+                'release_year': release_year,
+                'series': series,
+                'episode': episode,
+                'season_number': season_number,
+                'episode_number': episode_number,
+                'thumbnails': thumbnails,
+                'subtitles': subtitles,
+                'formats': formats,
+            }
+
+        raise last_e
index 6b60e542b37418473e1ee2a959cb6ae08a60d2e5..ffbd2623d1e5c1707a643931ab36e928fc2d5fc6 100644 (file)
@@ -2,26 +2,26 @@
 from __future__ import unicode_literals
 
 import itertools
+import json
 
-from .amp import AMPIE
+from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
+    compat_str,
     compat_urlparse,
 )
 from ..utils import (
-    ExtractorError,
     clean_html,
+    ExtractorError,
     int_or_none,
-    remove_end,
-    sanitized_Request,
-    urlencode_postdata
+    parse_age_limit,
+    parse_duration,
+    unified_timestamp,
 )
 
 
-class DramaFeverBaseIE(AMPIE):
-    _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
+class DramaFeverBaseIE(InfoExtractor):
     _NETRC_MACHINE = 'dramafever'
-    _GEO_COUNTRIES = ['US', 'CA']
 
     _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
 
@@ -38,8 +38,8 @@ class DramaFeverBaseIE(AMPIE):
             'consumer secret', default=self._CONSUMER_SECRET)
 
     def _real_initialize(self):
-        self._login()
         self._consumer_secret = self._get_consumer_secret()
+        self._login()
 
     def _login(self):
         (username, password) = self._get_login_info()
@@ -51,37 +51,49 @@ class DramaFeverBaseIE(AMPIE):
             'password': password,
         }
 
-        request = sanitized_Request(
-            self._LOGIN_URL, urlencode_postdata(login_form))
-        response = self._download_webpage(
-            request, None, 'Logging in')
+        try:
+            response = self._download_json(
+                'https://www.dramafever.com/api/users/login', None, 'Logging in',
+                data=json.dumps(login_form).encode('utf-8'), headers={
+                    'x-consumer-key': self._consumer_secret,
+                })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404):
+                response = self._parse_json(
+                    e.cause.read().decode('utf-8'), None)
+            else:
+                raise
 
-        if all(logout_pattern not in response
-               for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
-            error = self._html_search_regex(
-                r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
-                response, 'error message', default=None)
-            if error:
-                raise ExtractorError('Unable to login: %s' % error, expected=True)
-            raise ExtractorError('Unable to log in')
+        # Successful login
+        if response.get('result') or response.get('guid') or response.get('user_guid'):
+            return
+
+        errors = response.get('errors')
+        if errors and isinstance(errors, list):
+            error = errors[0]
+            message = error.get('message') or error['reason']
+            raise ExtractorError('Unable to login: %s' % message, expected=True)
+        raise ExtractorError('Unable to log in')
 
 
 class DramaFeverIE(DramaFeverBaseIE):
     IE_NAME = 'dramafever'
     _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
     _TESTS = [{
-        'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
+        'url': 'https://www.dramafever.com/drama/4274/1/Heirs/',
         'info_dict': {
-            'id': '4512.1',
-            'ext': 'flv',
-            'title': 'Cooking with Shin',
-            'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
+            'id': '4274.1',
+            'ext': 'wvm',
+            'title': 'Heirs - Episode 1',
+            'description': 'md5:362a24ba18209f6276e032a651c50bc2',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 3783,
+            'timestamp': 1381354993,
+            'upload_date': '20131009',
+            'series': 'Heirs',
+            'season_number': 1,
             'episode': 'Episode 1',
             'episode_number': 1,
-            'thumbnail': r're:^https?://.*\.jpg',
-            'timestamp': 1404336058,
-            'upload_date': '20140702',
-            'duration': 344,
         },
         'params': {
             # m3u8 download
@@ -110,50 +122,95 @@ class DramaFeverIE(DramaFeverBaseIE):
         'only_matching': True,
     }]
 
+    def _call_api(self, path, video_id, note, fatal=False):
+        return self._download_json(
+            'https://www.dramafever.com/api/5/' + path,
+            video_id, note=note, headers={
+                'x-consumer-key': self._consumer_secret,
+            }, fatal=fatal)
+
+    def _get_subtitles(self, video_id):
+        subtitles = {}
+        subs = self._call_api(
+            'video/%s/subtitles/webvtt/' % video_id, video_id,
+            'Downloading subtitles JSON', fatal=False)
+        if not subs or not isinstance(subs, list):
+            return subtitles
+        for sub in subs:
+            if not isinstance(sub, dict):
+                continue
+            sub_url = sub.get('url')
+            if not sub_url or not isinstance(sub_url, compat_str):
+                continue
+            subtitles.setdefault(
+                sub.get('code') or sub.get('language') or 'en', []).append({
+                    'url': sub_url
+                })
+        return subtitles
+
     def _real_extract(self, url):
         video_id = self._match_id(url).replace('/', '.')
 
-        try:
-            info = self._extract_feed_info(
-                'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError):
-                self.raise_geo_restricted(
-                    msg='Currently unavailable in your country',
-                    countries=self._GEO_COUNTRIES)
-            raise
+        series_id, episode_number = video_id.split('.')
 
-        # title is postfixed with video id for some reason, removing
-        if info.get('title'):
-            info['title'] = remove_end(info['title'], video_id).strip()
+        video = self._call_api(
+            'series/%s/episodes/%s/' % (series_id, episode_number), video_id,
+            'Downloading video JSON')
 
-        series_id, episode_number = video_id.split('.')
-        episode_info = self._download_json(
-            # We only need a single episode info, so restricting page size to one episode
-            # and dealing with page number as with episode number
-            r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
-            % (self._consumer_secret, series_id, episode_number),
-            video_id, 'Downloading episode info JSON', fatal=False)
-        if episode_info:
-            value = episode_info.get('value')
-            if isinstance(value, list):
-                for v in value:
-                    if v.get('type') == 'Episode':
-                        subfile = v.get('subfile') or v.get('new_subfile')
-                        if subfile and subfile != 'http://www.dramafever.com/st/':
-                            info.setdefault('subtitles', {}).setdefault('English', []).append({
-                                'ext': 'srt',
-                                'url': subfile,
-                            })
-                        episode_number = int_or_none(v.get('number'))
-                        episode_fallback = 'Episode'
-                        if episode_number:
-                            episode_fallback += ' %d' % episode_number
-                        info['episode'] = v.get('title') or episode_fallback
-                        info['episode_number'] = episode_number
-                        break
-
-        return info
+        formats = []
+        download_assets = video.get('download_assets')
+        if download_assets and isinstance(download_assets, dict):
+            for format_id, format_dict in download_assets.items():
+                if not isinstance(format_dict, dict):
+                    continue
+                format_url = format_dict.get('url')
+                if not format_url or not isinstance(format_url, compat_str):
+                    continue
+                formats.append({
+                    'url': format_url,
+                    'format_id': format_id,
+                    'filesize': int_or_none(video.get('filesize')),
+                })
+
+        stream = self._call_api(
+            'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON',
+            fatal=False)
+        if stream:
+            stream_url = stream.get('stream_url')
+            if stream_url:
+                formats.extend(self._extract_m3u8_formats(
+                    stream_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+        self._sort_formats(formats)
+
+        title = video.get('title') or 'Episode %s' % episode_number
+        description = video.get('description')
+        thumbnail = video.get('thumbnail')
+        timestamp = unified_timestamp(video.get('release_date'))
+        duration = parse_duration(video.get('duration'))
+        age_limit = parse_age_limit(video.get('tv_rating'))
+        series = video.get('series_title')
+        season_number = int_or_none(video.get('season'))
+
+        if series:
+            title = '%s - %s' % (series, title)
+
+        subtitles = self.extract_subtitles(video_id)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'age_limit': age_limit,
+            'series': series,
+            'season_number': season_number,
+            'episode_number': int_or_none(episode_number),
+            'formats': formats,
+            'subtitles': subtitles,
+        }
 
 
 class DramaFeverSeriesIE(DramaFeverBaseIE):
index c88b3126b1676f11ee3696a2499e1f7a0a57d8b3..5c41c8022e6215ac01d617972f6e988a0425d163 100644 (file)
@@ -66,7 +66,9 @@ class DrTuberIE(InfoExtractor):
         self._sort_formats(formats)
 
         title = self._html_search_regex(
-            (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
+            (r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
+             r'<title>([^<]+)\s*@\s+DrTuber',
+             r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
              r'<p[^>]+class="title_substrate">([^<]+)</p>',
              r'<title>([^<]+) - \d+'),
             webpage, 'title')
diff --git a/youtube_dl/extractor/etonline.py b/youtube_dl/extractor/etonline.py
deleted file mode 100644 (file)
index 17d7cfe..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class ETOnlineIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?etonline\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _TESTS = [{
-        'url': 'http://www.etonline.com/tv/211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale/',
-        'info_dict': {
-            'id': '211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale',
-            'title': 'md5:a21ec7d3872ed98335cbd2a046f34ee6',
-            'description': 'md5:8b94484063f463cca709617c79618ccd',
-        },
-        'playlist_count': 2,
-    }, {
-        'url': 'http://www.etonline.com/media/video/here_are_the_stars_who_love_bringing_their_moms_as_dates_to_the_oscars-211359/',
-        'only_matching': True,
-    }]
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911076001/default_default/index.html?videoId=ref:%s'
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, playlist_id)
-
-        entries = [
-            self.url_result(
-                self.BRIGHTCOVE_URL_TEMPLATE % video_id, 'BrightcoveNew', video_id)
-            for video_id in re.findall(
-                r'site\.brightcove\s*\([^,]+,\s*["\'](title_\d+)', webpage)]
-
-        return self.playlist_result(
-            entries, playlist_id,
-            self._og_search_title(webpage, fatal=False),
-            self._og_search_description(webpage))
index 3bde40eb3cef5ad6c9a22ad894de334e03ea0ce2..6fb65e4fe5bca6ee9ecfc2647a330604786a46ab 100644 (file)
@@ -326,7 +326,6 @@ from .espn import (
     FiveThirtyEightIE,
 )
 from .esri import EsriVideoIE
-from .etonline import ETOnlineIE
 from .europa import EuropaIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .expotv import ExpoTVIE
@@ -532,13 +531,14 @@ from .lcp import (
 )
 from .learnr import LearnrIE
 from .lecture2go import Lecture2GoIE
-from .lego import LEGOIE
-from .lemonde import LemondeIE
 from .leeco import (
     LeIE,
     LePlaylistIE,
     LetvCloudIE,
 )
+from .lego import LEGOIE
+from .lemonde import LemondeIE
+from .lenta import LentaIE
 from .libraryofcongress import LibraryOfCongressIE
 from .libsyn import LibsynIE
 from .lifenews import (
@@ -814,6 +814,10 @@ from .periscope import (
 from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
+from .picarto import (
+    PicartoIE,
+    PicartoVodIE,
+)
 from .piksel import PikselIE
 from .pinkbike import PinkbikeIE
 from .pladform import PladformIE
@@ -1030,6 +1034,7 @@ from .sunporno import SunPornoIE
 from .svt import (
     SVTIE,
     SVTPlayIE,
+    SVTSeriesIE,
 )
 from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
@@ -1135,6 +1140,7 @@ from .tvnoe import TVNoeIE
 from .tvnow import (
     TVNowIE,
     TVNowListIE,
+    TVNowShowIE,
 )
 from .tvp import (
     TVPEmbedIE,
index 445f9438db182d0ced6d48233306a53e56271f9d..acd4090fa351c56147245983b3b1b7dd0c1e5495 100644 (file)
@@ -8,12 +8,12 @@ class ExtremeTubeIE(KeezMoviesIE):
     _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
     _TESTS = [{
         'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
-        'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
+        'md5': '92feaafa4b58e82f261e5419f39c60cb',
         'info_dict': {
             'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
             'ext': 'mp4',
             'title': 'Music Video 14 british euro brit european cumshots swallow',
-            'uploader': 'unknown',
+            'uploader': 'anonim',
             'view_count': int,
             'age_limit': 18,
         }
@@ -36,10 +36,10 @@ class ExtremeTubeIE(KeezMoviesIE):
                 r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
 
         uploader = self._html_search_regex(
-            r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
+            r'Uploaded by:\s*</[^>]+>\s*<a[^>]+>(.+?)</a>',
             webpage, 'uploader', fatal=False)
         view_count = str_to_int(self._search_regex(
-            r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
+            r'Views:\s*</[^>]+>\s*<[^>]+>([\d,\.]+)</',
             webpage, 'view count', fatal=False))
 
         info.update({
index 37549fb01ccfc05fb4a642f948ee70227588f6e0..00e67426b951b00d797e4a7641a037fb607e2963 100644 (file)
@@ -41,7 +41,7 @@ class FXNetworksIE(AdobePassIE):
         if 'The content you are trying to access is not available in your region.' in webpage:
             self.raise_geo_restricted()
         video_data = extract_attributes(self._search_regex(
-            r'(<a.+?rel="http://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
+            r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
         player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
         release_url = video_data['rel']
         title = video_data['data-title']
index a98f3636ab4bc44ef2f5d8a604d80bf6d48b38b1..af1322e0085befa144605f16c22a52fcca5a3bcf 100644 (file)
@@ -58,6 +58,7 @@ from .xhamster import XHamsterEmbedIE
 from .tnaflix import TNAFlixNetworkEmbedIE
 from .drtuber import DrTuberIE
 from .redtube import RedTubeIE
+from .tube8 import Tube8IE
 from .vimeo import VimeoIE
 from .dailymotion import DailymotionIE
 from .dailymail import DailyMailIE
@@ -104,6 +105,7 @@ from .mediasite import MediasiteIE
 from .springboardplatform import SpringboardPlatformIE
 from .yapfiles import YapFilesIE
 from .vice import ViceIE
+from .xfileshare import XFileShareIE
 
 
 class GenericIE(InfoExtractor):
@@ -1218,7 +1220,7 @@ class GenericIE(InfoExtractor):
                 'title': '35871',
                 'timestamp': 1355743100,
                 'upload_date': '20121217',
-                'uploader_id': 'batchUser',
+                'uploader_id': 'cplapp@learn360.com',
             },
             'add_ie': ['Kaltura'],
         },
@@ -1269,23 +1271,21 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Kaltura'],
         },
-        # EaglePlatform embed (generic URL)
         {
-            'url': 'http://lenta.ru/news/2015/03/06/navalny/',
-            # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
+            # meta twitter:player
+            'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
             'info_dict': {
-                'id': '227304',
+                'id': '0_01b42zps',
                 'ext': 'mp4',
-                'title': 'Навальный вышел на свободу',
-                'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
-                'thumbnail': r're:^https?://.*\.jpg$',
-                'duration': 87,
-                'view_count': int,
-                'age_limit': 0,
+                'title': 'Main Twerk (Video)',
+                'upload_date': '20171208',
+                'uploader_id': 'sebastian.salinas@thechive.com',
+                'timestamp': 1512713057,
             },
             'params': {
                 'skip_download': True,
             },
+            'add_ie': ['Kaltura'],
         },
         # referrer protected EaglePlatform embed
         {
@@ -1984,7 +1984,17 @@ class GenericIE(InfoExtractor):
             'params': {
                 'skip_download': True,
             },
-        }
+        },
+        {
+            'url': 'http://share-videos.se/auto/video/83645793?uid=13',
+            'md5': 'b68d276de422ab07ee1d49388103f457',
+            'info_dict': {
+                'id': '83645793',
+                'title': 'Lock up and get excited',
+                'ext': 'mp4'
+            },
+            'skip': 'TODO: fix nested playlists processing in tests',
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
@@ -2231,7 +2241,11 @@ class GenericIE(InfoExtractor):
                 self._sort_formats(smil['formats'])
                 return smil
             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
-                return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
+                return self.playlist_result(
+                    self._parse_xspf(
+                        doc, video_id, xspf_url=url,
+                        xspf_base_url=compat_str(full_response.geturl())),
+                    video_id)
             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                 info_dict['formats'] = self._parse_mpd_formats(
                     doc,
@@ -2559,6 +2573,11 @@ class GenericIE(InfoExtractor):
         if redtube_urls:
             return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
 
+        # Look for embedded Tube8 player
+        tube8_urls = Tube8IE._extract_urls(webpage)
+        if tube8_urls:
+            return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
+
         # Look for embedded Tvigle player
         mobj = re.search(
             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@@ -2971,6 +2990,18 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 vice_urls, video_id, video_title, ie=ViceIE.ie_key())
 
+        xfileshare_urls = XFileShareIE._extract_urls(webpage)
+        if xfileshare_urls:
+            return self.playlist_from_matches(
+                xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
+
+        sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
+            r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
+            webpage)]
+        if sharevideos_urls:
+            return self.playlist_from_matches(
+                sharevideos_urls, video_id, video_title)
+
         def merge_dicts(dict1, dict2):
             merged = {}
             for k, v in dict1.items():
index 8f49f52efd5398abbc7b922b9e2a268b3f609e2a..5c03780a3389fa8272663b805855c305bf934689 100644 (file)
@@ -7,6 +7,7 @@ from .youtube import YoutubeIE
 from ..utils import (
     determine_ext,
     int_or_none,
+    NO_DEFAULT,
     parse_iso8601,
     smuggle_url,
     xpath_text,
@@ -16,18 +17,19 @@ from ..utils import (
 class HeiseIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html'
     _TESTS = [{
+        # kaltura embed
         'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html',
-        'md5': 'ffed432483e922e88545ad9f2f15d30e',
         'info_dict': {
-            'id': '2404147',
+            'id': '1_kkrq94sm',
             'ext': 'mp4',
             'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone",
-            'format_id': 'mp4_720p',
-            'timestamp': 1411812600,
-            'upload_date': '20140927',
+            'timestamp': 1512734959,
+            'upload_date': '20171208',
             'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
-            'thumbnail': r're:^https?://.*/gallery/$',
-        }
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         # YouTube embed
         'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
@@ -46,13 +48,26 @@ class HeiseIE(InfoExtractor):
         },
     }, {
         'url': 'https://www.heise.de/video/artikel/nachgehakt-Wie-sichert-das-c-t-Tool-Restric-tor-Windows-10-ab-3700244.html',
-        'md5': '4b58058b46625bdbd841fc2804df95fc',
         'info_dict': {
             'id': '1_ntrmio2s',
+            'ext': 'mp4',
+            'title': "nachgehakt: Wie sichert das c't-Tool Restric'tor Windows 10 ab?",
+            'description': 'md5:47e8ffb6c46d85c92c310a512d6db271',
             'timestamp': 1512470717,
             'upload_date': '20171205',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.heise.de/ct/artikel/c-t-uplink-20-8-Staubsaugerroboter-Xiaomi-Vacuum-2-AR-Brille-Meta-2-und-Android-rooten-3959893.html',
+        'info_dict': {
+            'id': '1_59mk80sf',
             'ext': 'mp4',
-            'title': 'ct10 nachgehakt hos restrictor',
+            'title': "c't uplink 20.8: Staubsaugerroboter Xiaomi Vacuum 2, AR-Brille Meta 2 und Android rooten",
+            'description': 'md5:f50fe044d3371ec73a8f79fcebd74afc',
+            'timestamp': 1517567237,
+            'upload_date': '20180202',
         },
         'params': {
             'skip_download': True,
@@ -72,19 +87,40 @@ class HeiseIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        title = self._html_search_meta('fulltitle', webpage, default=None)
-        if not title or title == "c't":
-            title = self._search_regex(
-                r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
-                webpage, 'title')
+        def extract_title(default=NO_DEFAULT):
+            title = self._html_search_meta(
+                ('fulltitle', 'title'), webpage, default=None)
+            if not title or title == "c't":
+                title = self._search_regex(
+                    r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
+                    webpage, 'title', default=None)
+            if not title:
+                title = self._html_search_regex(
+                    r'<h1[^>]+\bclass=["\']article_page_title[^>]+>(.+?)<',
+                    webpage, 'title', default=default)
+            return title
 
-        yt_urls = YoutubeIE._extract_urls(webpage)
-        if yt_urls:
-            return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
+        title = extract_title(default=None)
+        description = self._og_search_description(
+            webpage, default=None) or self._html_search_meta(
+            'description', webpage)
 
         kaltura_url = KalturaIE._extract_url(webpage)
         if kaltura_url:
-            return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
+            return {
+                '_type': 'url_transparent',
+                'url': smuggle_url(kaltura_url, {'source_url': url}),
+                'ie_key': KalturaIE.ie_key(),
+                'title': title,
+                'description': description,
+            }
+
+        yt_urls = YoutubeIE._extract_urls(webpage)
+        if yt_urls:
+            return self.playlist_from_matches(
+                yt_urls, video_id, title, ie=YoutubeIE.ie_key())
+
+        title = extract_title()
 
         container_id = self._search_regex(
             r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
@@ -115,10 +151,6 @@ class HeiseIE(InfoExtractor):
             })
         self._sort_formats(formats)
 
-        description = self._og_search_description(
-            webpage, default=None) or self._html_search_meta(
-            'description', webpage)
-
         return {
             'id': video_id,
             'title': title,
index a77f619d291ba4e02ecdc2c785795229fd0de2ca..0c13f54ee04fe4df7169203b27e222cd2771b4be 100644 (file)
@@ -1,14 +1,21 @@
 from __future__ import unicode_literals
 
 import itertools
+import hashlib
+import json
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+    compat_str,
+    compat_HTTPError,
+)
 from ..utils import (
+    ExtractorError,
     get_element_by_attribute,
     int_or_none,
     lowercase_escape,
+    std_headers,
     try_get,
 )
 
@@ -237,37 +244,68 @@ class InstagramUserIE(InfoExtractor):
         }
     }
 
-    def _entries(self, uploader_id):
-        query = {
-            '__a': 1,
-        }
+    _gis_tmpl = None
 
-        def get_count(kind):
+    def _entries(self, data):
+        def get_count(suffix):
             return int_or_none(try_get(
-                node, lambda x: x['%ss' % kind]['count']))
-
-        for page_num in itertools.count(1):
-            page = self._download_json(
-                'https://instagram.com/%s/' % uploader_id, uploader_id,
-                note='Downloading page %d' % page_num,
-                fatal=False, query=query)
-            if not page:
-                break
+                node, lambda x: x['edge_media_' + suffix]['count']))
 
-            nodes = try_get(page, lambda x: x['user']['media']['nodes'], list)
-            if not nodes:
-                break
+        uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
+        csrf_token = data['config']['csrf_token']
+        rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
 
-            max_id = None
+        self._set_cookie('instagram.com', 'ig_pr', '1')
 
-            for node in nodes:
-                node_id = node.get('id')
-                if node_id:
-                    max_id = node_id
+        cursor = ''
+        for page_num in itertools.count(1):
+            variables = json.dumps({
+                'id': uploader_id,
+                'first': 12,
+                'after': cursor,
+            })
+
+            if self._gis_tmpl:
+                gis_tmpls = [self._gis_tmpl]
+            else:
+                gis_tmpls = [
+                    '%s' % rhx_gis,
+                    '',
+                    '%s:%s' % (rhx_gis, csrf_token),
+                    '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
+                ]
+
+            for gis_tmpl in gis_tmpls:
+                try:
+                    media = self._download_json(
+                        'https://www.instagram.com/graphql/query/', uploader_id,
+                        'Downloading JSON page %d' % page_num, headers={
+                            'X-Requested-With': 'XMLHttpRequest',
+                            'X-Instagram-GIS': hashlib.md5(
+                                ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(),
+                        }, query={
+                            'query_hash': '42323d64886122307be10013ad2dcc44',
+                            'variables': variables,
+                        })['data']['user']['edge_owner_to_timeline_media']
+                    self._gis_tmpl = gis_tmpl
+                    break
+                except ExtractorError as e:
+                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                        if gis_tmpl != gis_tmpls[-1]:
+                            continue
+                    raise
+
+            edges = media.get('edges')
+            if not edges or not isinstance(edges, list):
+                break
 
+            for edge in edges:
+                node = edge.get('node')
+                if not node or not isinstance(node, dict):
+                    continue
                 if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
                     continue
-                video_id = node.get('code')
+                video_id = node.get('shortcode')
                 if not video_id:
                     continue
 
@@ -276,14 +314,14 @@ class InstagramUserIE(InfoExtractor):
                     ie=InstagramIE.ie_key(), video_id=video_id)
 
                 description = try_get(
-                    node, [lambda x: x['caption'], lambda x: x['text']['id']],
+                    node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
                     compat_str)
                 thumbnail = node.get('thumbnail_src') or node.get('display_src')
-                timestamp = int_or_none(node.get('date'))
+                timestamp = int_or_none(node.get('taken_at_timestamp'))
 
-                comment_count = get_count('comment')
-                like_count = get_count('like')
-                view_count = int_or_none(node.get('video_views'))
+                comment_count = get_count('to_comment')
+                like_count = get_count('preview_like')
+                view_count = int_or_none(node.get('video_view_count'))
 
                 info.update({
                     'description': description,
@@ -296,12 +334,27 @@ class InstagramUserIE(InfoExtractor):
 
                 yield info
 
-            if not max_id:
+            page_info = media.get('page_info')
+            if not page_info or not isinstance(page_info, dict):
+                break
+
+            has_next_page = page_info.get('has_next_page')
+            if not has_next_page:
                 break
 
-            query['max_id'] = max_id
+            cursor = page_info.get('end_cursor')
+            if not cursor or not isinstance(cursor, compat_str):
+                break
 
     def _real_extract(self, url):
-        uploader_id = self._match_id(url)
+        username = self._match_id(url)
+
+        webpage = self._download_webpage(url, username)
+
+        data = self._parse_json(
+            self._search_regex(
+                r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
+            username)
+
         return self.playlist_result(
-            self._entries(uploader_id), uploader_id, uploader_id)
+            self._entries(data), username, username)
old mode 100755 (executable)
new mode 100644 (file)
index 562e25f6d3ca979fe40ef3a5e46ee05ca8e92244..0ea89e4d66d9fb20a9e9d9cf6635d7dff09f4ba6 100644 (file)
@@ -135,10 +135,10 @@ class KalturaIE(InfoExtractor):
                 ''', webpage) or
             re.search(
                 r'''(?xs)
-                    <iframe[^>]+src=(?P<q1>["'])
-                      (?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
+                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+                      (?:https?:)?//(?:(?:www|cdnapi)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
                       (?:(?!(?P=q1)).)*
-                      [?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
+                      [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
                     (?P=q1)
                 ''', webpage)
         )
index e83115e2a6c7b7a63be5237340ca0845272f8c03..d4e6f7ac17be4f19e4fdf3bff891fde0b087d006 100644 (file)
@@ -20,23 +20,23 @@ from ..utils import (
 class KeezMoviesIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
     _TESTS = [{
-        'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
-        'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0',
+        'url': 'https://www.keezmovies.com/video/arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money-18070681',
+        'md5': '2ac69cdb882055f71d82db4311732a1a',
         'info_dict': {
-            'id': '1214711',
-            'display_id': 'petite-asian-lady-mai-playing-in-bathtub',
+            'id': '18070681',
+            'display_id': 'arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money',
             'ext': 'mp4',
-            'title': 'Petite Asian Lady Mai Playing In Bathtub',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'title': 'Arab wife want it so bad I see she thirsty and has tiny money.',
+            'thumbnail': None,
             'view_count': int,
             'age_limit': 18,
         }
     }, {
-        'url': 'http://www.keezmovies.com/video/1214711',
+        'url': 'http://www.keezmovies.com/video/18070681',
         'only_matching': True,
     }]
 
-    def _extract_info(self, url):
+    def _extract_info(self, url, fatal=True):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         display_id = (mobj.group('display_id')
@@ -55,7 +55,7 @@ class KeezMoviesIE(InfoExtractor):
         encrypted = False
 
         def extract_format(format_url, height=None):
-            if not isinstance(format_url, compat_str) or not format_url.startswith('http'):
+            if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')):
                 return
             if format_url in format_urls:
                 return
@@ -105,7 +105,11 @@ class KeezMoviesIE(InfoExtractor):
                 raise ExtractorError(
                     'Video %s is no longer available' % video_id, expected=True)
 
-        self._sort_formats(formats)
+        try:
+            self._sort_formats(formats)
+        except ExtractorError:
+            if fatal:
+                raise
 
         if not title:
             title = self._html_search_regex(
@@ -122,7 +126,9 @@ class KeezMoviesIE(InfoExtractor):
         }
 
     def _real_extract(self, url):
-        webpage, info = self._extract_info(url)
+        webpage, info = self._extract_info(url, fatal=False)
+        if not info['formats']:
+            return self.url_result(url, 'Generic')
         info['view_count'] = str_to_int(self._search_regex(
             r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
         return info
diff --git a/youtube_dl/extractor/lenta.py b/youtube_dl/extractor/lenta.py
new file mode 100644 (file)
index 0000000..2ebd4e5
--- /dev/null
@@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class LentaIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',
+        'info_dict': {
+            'id': '964400',
+            'ext': 'mp4',
+            'title': 'Надежду Савченко задержали',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 61,
+            'view_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # EaglePlatform iframe embed
+        'url': 'http://lenta.ru/news/2015/03/06/navalny/',
+        'info_dict': {
+            'id': '227304',
+            'ext': 'mp4',
+            'title': 'Навальный вышел на свободу',
+            'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 87,
+            'view_count': int,
+            'age_limit': 0,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            r'vid\s*:\s*["\']?(\d+)', webpage, 'eagleplatform id',
+            default=None)
+        if video_id:
+            return self.url_result(
+                'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id,
+                ie='EaglePlatform', video_id=video_id)
+
+        return self.url_result(url, ie='Generic')
index 4750b03a3fb2f47818858338b7eb9a8b4889c012..f7311f4832eeb38d5c97a3363066842f0f43dfd1 100644 (file)
@@ -1,24 +1,28 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..utils import (
+    parse_duration,
+    unified_strdate,
+)
 
 
 class LibsynIE(InfoExtractor):
     _VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
 
     _TESTS = [{
-        'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
-        'md5': '443360ee1b58007bc3dcf09b41d093bb',
+        'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/',
+        'md5': '2a55e75496c790cdeb058e7e6c087746',
         'info_dict': {
-            'id': '3377616',
+            'id': '6385796',
             'ext': 'mp3',
-            'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
-            'description': 'md5:601cb790edd05908957dae8aaa866465',
-            'upload_date': '20150220',
+            'title': "Champion Minded - Developing a Growth Mindset",
+            'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
+            'upload_date': '20180320',
             'thumbnail': 're:^https?://.*',
         },
     }, {
@@ -39,31 +43,45 @@ class LibsynIE(InfoExtractor):
         url = m.group('mainurl')
         webpage = self._download_webpage(url, video_id)
 
-        formats = [{
-            'url': media_url,
-        } for media_url in set(re.findall(r'var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
-
         podcast_title = self._search_regex(
-            r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None)
+            r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None)
+        if podcast_title:
+            podcast_title = podcast_title.strip()
         episode_title = self._search_regex(
-            r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title')
+            r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title')
+        if episode_title:
+            episode_title = episode_title.strip()
 
         title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
 
         description = self._html_search_regex(
-            r'<div id="info_text_body">(.+?)</div>', webpage,
+            r'<p\s+id="info_text_body">(.+?)</p>', webpage,
             'description', default=None)
-        thumbnail = self._search_regex(
-            r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
-            webpage, 'thumbnail', fatal=False)
+        if description:
+            # Strip non-breaking and normal spaces
+            description = description.replace('\u00A0', ' ').strip()
         release_date = unified_strdate(self._search_regex(
             r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
 
+        data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block')
+        data = json.loads(data_json)
+
+        formats = [{
+            'url': data['media_url'],
+            'format_id': 'main',
+        }, {
+            'url': data['media_url_libsyn'],
+            'format_id': 'libsyn',
+        }]
+        thumbnail = data.get('thumbnail_url')
+        duration = parse_duration(data.get('duration'))
+
         return {
             'id': video_id,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
             'upload_date': release_date,
+            'duration': duration,
             'formats': formats,
         }
index 246aac576a2c6b275ed38ef614eb7a569b9e2ffb..26671753c429401fc9085b69e5a0aae10d495e6f 100644 (file)
@@ -7,7 +7,7 @@ from ..utils import int_or_none
 
 
 class LiveLeakIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
+    _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
     _TESTS = [{
         'url': 'http://www.liveleak.com/view?i=757_1364311680',
         'md5': '0813c2430bea7a46bf13acf3406992f4',
@@ -79,6 +79,9 @@ class LiveLeakIE(InfoExtractor):
             'title': 'Fuel Depot in China Explosion caught on video',
         },
         'playlist_count': 3,
+    }, {
+        'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
+        'only_matching': True,
     }]
 
     @staticmethod
index f8c30052f32486f511622656bdadbcf91f80717c..50d5db80276d3be6e68a773dbe5827a29555dff7 100644 (file)
@@ -141,6 +141,7 @@ class MedialaanIE(GigyaBaseIE):
 
         vod_id = config.get('vodId') or self._search_regex(
             (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
+             r'"vodId"\s*:\s*"(.+?)"',
              r'<[^>]+id=["\']vod-(\d+)'),
             webpage, 'video_id', default=None)
 
index 54716f5c7af1dc15e9b0a5b5174b08ba68782bce..1c652813adb96b994c3ba517db994805e8ea8eb3 100644 (file)
@@ -12,7 +12,7 @@ class MofosexIE(KeezMoviesIE):
     _VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html'
     _TESTS = [{
         'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html',
-        'md5': '39a15853632b7b2e5679f92f69b78e91',
+        'md5': '558fcdafbb63a87c019218d6e49daf8a',
         'info_dict': {
             'id': '318131',
             'display_id': 'amateur-teen-playing-and-masturbating-318131',
index 246f6795a131908bf62c2b8c1477a7bc327afce5..4d2ee64080710a0a531aaf391eeb116da730cc89 100644 (file)
@@ -68,11 +68,11 @@ class NationalGeographicVideoIE(InfoExtractor):
 
 class NationalGeographicIE(ThePlatformIE, AdobePassIE):
     IE_NAME = 'natgeo'
-    _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)'
+    _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
 
     _TESTS = [
         {
-            'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
+            'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
             'md5': '518c9aa655686cf81493af5cc21e2a04',
             'info_dict': {
                 'id': 'vKInpacll2pC',
@@ -86,7 +86,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
             'add_ie': ['ThePlatform'],
         },
         {
-            'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
+            'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
             'md5': 'c4912f656b4cbe58f3e000c489360989',
             'info_dict': {
                 'id': 'Pok5lWCkiEFA',
@@ -106,6 +106,14 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
         {
             'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
             'only_matching': True,
+        },
+        {
+            'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
+            'only_matching': True,
         }
     ]
 
index 2047d440266907ea6cd16631cbf235d751c83d9a..bb3d944133d6a1e2685779b86a7565ad9b0985f0 100644 (file)
@@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
@@ -43,9 +41,14 @@ class NaverIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"',
-                         webpage)
-        if m_id is None:
+        vid = self._search_regex(
+            r'videoId["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+            'video id', fatal=None, group='value')
+        in_key = self._search_regex(
+            r'inKey["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+            'key', default=None, group='value')
+
+        if not vid or not in_key:
             error = self._html_search_regex(
                 r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
                 webpage, 'error', default=None)
@@ -53,9 +56,9 @@ class NaverIE(InfoExtractor):
                 raise ExtractorError(error, expected=True)
             raise ExtractorError('couldn\'t extract vid and key')
         video_data = self._download_json(
-            'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1),
+            'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
             video_id, query={
-                'key': m_id.group(2),
+                'key': in_key,
             })
         meta = video_data['meta']
         title = meta['subject']
index c7029d29ebc0f4f9b28ee4474a1cfa04600d3650..5e46a75c0a783bca430c2fdfc8bd062528759704 100644 (file)
@@ -230,15 +230,18 @@ class NexxIE(InfoExtractor):
 
         azure_locator = stream_data['azureLocator']
 
-        AZURE_URL = 'http://nx%s%02d.akamaized.net/'
-
-        def get_cdn_shield_base(shield_type='', prefix='-p'):
+        def get_cdn_shield_base(shield_type='', static=False):
             for secure in ('', 's'):
                 cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
                 if cdn_shield:
                     return 'http%s://%s' % (secure, cdn_shield)
             else:
-                return AZURE_URL % (prefix, int(stream_data['azureAccount'].replace('nexxplayplus', '')))
+                if 'fb' in stream_data['azureAccount']:
+                    prefix = 'df' if static else 'f'
+                else:
+                    prefix = 'd' if static else 'p'
+                account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
+                return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
 
         azure_stream_base = get_cdn_shield_base()
         is_ml = ',' in language
@@ -260,7 +263,7 @@ class NexxIE(InfoExtractor):
         formats.extend(self._extract_ism_formats(
             azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
 
-        azure_progressive_base = get_cdn_shield_base('Prog', '-d')
+        azure_progressive_base = get_cdn_shield_base('Prog', True)
         azure_file_distribution = stream_data.get('azureFileDistribution')
         if azure_file_distribution:
             fds = azure_file_distribution.split(',')
index 090f1acee4744740a45d9c1a0895c0ec5047a38a..256a24d86fb27c5f5fe905311f134c8c1fcffdba 100644 (file)
@@ -81,13 +81,23 @@ class NickIE(MTVServicesInfoExtractor):
 
 class NickBrIE(MTVServicesInfoExtractor):
     IE_NAME = 'nickelodeon:br'
-    _VALID_URL = r'https?://(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?#.]+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
+                            (?:www\.)?nickjr\.nl
+                        )
+                        /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
+                    '''
     _TESTS = [{
         'url': 'http://www.nickjr.com.br/patrulha-canina/videos/210-labirinto-de-pipoca/',
         'only_matching': True,
     }, {
         'url': 'http://mundonick.uol.com.br/programas/the-loud-house/videos/muitas-irmas/7ljo9j',
         'only_matching': True,
+    }, {
+        'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 351bea7baecccb520911829ff85db809d80c1eb6..f32f530f75b16bec0040ce581b4122bd1166c795 100644 (file)
@@ -4,15 +4,17 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
+    ExtractorError,
     int_or_none,
     float_or_none,
-    ExtractorError,
+    smuggle_url,
 )
 
 
 class NineNowIE(InfoExtractor):
     IE_NAME = '9now.com.au'
     _VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
+    _GEO_COUNTRIES = ['AU']
     _TESTS = [{
         # clip
         'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
@@ -75,7 +77,9 @@ class NineNowIE(InfoExtractor):
 
         return {
             '_type': 'url_transparent',
-            'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+            'url': smuggle_url(
+                self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+                {'geo_countries': self._GEO_COUNTRIES}),
             'id': video_id,
             'title': title,
             'description': common_data.get('description'),
index 5c8b37e18bf6232bfd70c6221b6da925411b9fe7..190d8af4da373962cc2f1e1366a24e51de0b8b36 100644 (file)
@@ -19,7 +19,18 @@ from ..utils import (
 
 
 class OdnoklassnikiIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer|live)/(?P<id>[\d-]+)'
+    _VALID_URL = r'''(?x)
+                https?://
+                    (?:(?:www|m|mobile)\.)?
+                    (?:odnoklassniki|ok)\.ru/
+                    (?:
+                        video(?:embed)?/|
+                        web-api/video/moviePlayer/|
+                        live/|
+                        dk\?.*?st\.mvId=
+                    )
+                    (?P<id>[\d-]+)
+                '''
     _TESTS = [{
         # metadata in JSON
         'url': 'http://ok.ru/video/20079905452',
@@ -101,6 +112,9 @@ class OdnoklassnikiIE(InfoExtractor):
     }, {
         'url': 'https://www.ok.ru/live/484531969818',
         'only_matching': True,
+    }, {
+        'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index eaaaf8a081782ae597f2ed6a376c09fca6fbf5e5..d0bdd60b8208d2f4c44f18d0119770f5309b0495 100644 (file)
@@ -243,7 +243,7 @@ class PhantomJSwrapper(object):
 
 
 class OpenloadIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
 
     _TESTS = [{
         'url': 'https://openload.co/f/kUEfGclsU9o',
@@ -298,6 +298,9 @@ class OpenloadIE(InfoExtractor):
     }, {
         'url': 'https://oload.stream/f/KnG-kKZdcfY',
         'only_matching': True,
+    }, {
+        'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
+        'only_matching': True,
     }]
 
     _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
@@ -334,10 +337,14 @@ class OpenloadIE(InfoExtractor):
 
         decoded_id = (get_element_by_id('streamurl', webpage) or
                       get_element_by_id('streamuri', webpage) or
-                      get_element_by_id('streamurj', webpage))
-
-        if not decoded_id:
-            raise ExtractorError('Can\'t find stream URL', video_id=video_id)
+                      get_element_by_id('streamurj', webpage) or
+                      self._search_regex(
+                          (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
+                           r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
+                           r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
+                           r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
+                           r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
+                          'stream URL'))
 
         video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
 
diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py
new file mode 100644 (file)
index 0000000..2366dfb
--- /dev/null
@@ -0,0 +1,165 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    js_to_json,
+    try_get,
+    update_url_query,
+    urlencode_postdata,
+)
+
+
+class PicartoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
+    _TEST = {
+        'url': 'https://picarto.tv/Setz',
+        'info_dict': {
+            'id': 'Setz',
+            'ext': 'mp4',
+            'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'timestamp': int,
+            'is_live': True
+        },
+        'skip': 'Stream is offline',
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        stream_page = self._download_webpage(url, channel_id)
+
+        if '>This channel does not exist' in stream_page:
+            raise ExtractorError(
+                'Channel %s does not exist' % channel_id, expected=True)
+
+        player = self._parse_json(
+            self._search_regex(
+                r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
+                'player settings'),
+            channel_id, transform_source=js_to_json)
+
+        if player.get('online') is False:
+            raise ExtractorError('Stream is offline', expected=True)
+
+        cdn_data = self._download_json(
+            'https://picarto.tv/process/channel', channel_id,
+            data=urlencode_postdata({'loadbalancinginfo': channel_id}),
+            note='Downloading load balancing info')
+
+        def get_event(key):
+            return try_get(player, lambda x: x['event'][key], compat_str) or ''
+
+        params = {
+            'token': player.get('token') or '',
+            'ticket': get_event('ticket'),
+            'con': int(time.time() * 1000),
+            'type': get_event('ticket'),
+            'scope': get_event('scope'),
+        }
+
+        prefered_edge = cdn_data.get('preferedEdge')
+        default_tech = player.get('defaultTech')
+
+        formats = []
+
+        for edge in cdn_data['edges']:
+            edge_ep = edge.get('ep')
+            if not edge_ep or not isinstance(edge_ep, compat_str):
+                continue
+            edge_id = edge.get('id')
+            for tech in cdn_data['techs']:
+                tech_label = tech.get('label')
+                tech_type = tech.get('type')
+                preference = 0
+                if edge_id == prefered_edge:
+                    preference += 1
+                if tech_type == default_tech:
+                    preference += 1
+                format_id = []
+                if edge_id:
+                    format_id.append(edge_id)
+                if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
+                    format_id.append('hls')
+                    formats.extend(self._extract_m3u8_formats(
+                        update_url_query(
+                            'https://%s/hls/%s/index.m3u8'
+                            % (edge_ep, channel_id), params),
+                        channel_id, 'mp4', preference=preference,
+                        m3u8_id='-'.join(format_id), fatal=False))
+                    continue
+                elif tech_type == 'video/mp4' or tech_label == 'MP4':
+                    format_id.append('mp4')
+                    formats.append({
+                        'url': update_url_query(
+                            'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
+                            params),
+                        'format_id': '-'.join(format_id),
+                        'preference': preference,
+                    })
+                else:
+                    # rtmp format does not seem to work
+                    continue
+        self._sort_formats(formats)
+
+        mature = player.get('mature')
+        if mature is None:
+            age_limit = None
+        else:
+            age_limit = 18 if mature is True else 0
+
+        return {
+            'id': channel_id,
+            'title': self._live_title(channel_id),
+            'is_live': True,
+            'thumbnail': player.get('vodThumb'),
+            'age_limit': age_limit,
+            'formats': formats,
+        }
+
+
+class PicartoVodIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
+        'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
+        'info_dict': {
+            'id': 'ArtofZod_2017.12.12.00.13.23.flv',
+            'ext': 'mp4',
+            'title': 'ArtofZod_2017.12.12.00.13.23.flv',
+            'thumbnail': r're:^https?://.*\.jpg'
+        },
+    }, {
+        'url': 'https://picarto.tv/videopopout/Plague',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        vod_info = self._parse_json(
+            self._search_regex(
+                r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
+                video_id),
+            video_id, transform_source=js_to_json)
+
+        formats = self._extract_m3u8_formats(
+            vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
+            m3u8_id='hls')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'thumbnail': vod_info.get('vodThumb'),
+            'formats': formats,
+        }
index ee04936e1a44819301207a362d7aa8242ff49271..025985fbcd1558af7f0041cf189a20d0529e65b1 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class PornFlipIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z-]{11})'
+    _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
         'md5': '98c46639849145ae1fd77af532a9278c',
@@ -40,6 +40,9 @@ class PornFlipIE(InfoExtractor):
     }, {
         'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
         'only_matching': True,
+    }, {
+        'url': 'https://www.pornflip.com/v/NG9q6Pb_iK8',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 9ce513aeb1968264b0150faecc3612b6739f4dc7..23e24d216c04773841944b26d1c524e260b6d5ee 100644 (file)
@@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
-                            (?:[a-z]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+                            (?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
                             (?:www\.)?thumbzilla\.com/video/
                         )
                         (?P<id>[\da-z]+)
@@ -264,7 +264,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
 
 
 class PornHubPlaylistIE(PornHubPlaylistBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.pornhub.com/playlist/4667351',
         'info_dict': {
@@ -272,11 +272,14 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
             'title': 'Nataly Hot',
         },
         'playlist_mincount': 2,
+    }, {
+        'url': 'https://de.pornhub.com/playlist/4667351',
+        'only_matching': True,
     }]
 
 
 class PornHubUserVideosIE(PornHubPlaylistBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
+    _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
     _TESTS = [{
         'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
         'info_dict': {
@@ -305,6 +308,9 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
         # Most Viewed Videos
         'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
         'only_matching': True,
+    }, {
+        'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 7efff45662906b7a861ba6e22fc1a840f82635f6..d0955d07903cca42806b4c53ca72eb87157a09fc 100644 (file)
@@ -133,7 +133,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
                             (?:
                                 prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
                             )\.(?:de|at|ch)|
-                            ran\.de|fem\.com|advopedia\.de
+                            ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
                         )
                         /(?P<id>.+)
                     '''
@@ -326,6 +326,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
             'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
             'only_matching': True,
         },
+        {
+            # geo restricted to Germany
+            'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
+            'only_matching': True,
+        },
         {
             'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
             'only_matching': True,
@@ -343,7 +348,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
         r'"clip_id"\s*:\s+"(\d+)"',
         r'clipid: "(\d+)"',
         r'clip[iI]d=(\d+)',
-        r'clip[iI]d\s*=\s*["\'](\d+)',
+        r'clip[iI][dD]\s*=\s*["\'](\d+)',
         r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
         r'proMamsId&quot;\s*:\s*&quot;(\d+)',
         r'proMamsId"\s*:\s*"(\d+)',
index d338b3a933cf10fb50621ea4d785d4e83324466c..8bcf87126b18dd82f4c08bc7a9c586b0f99f112e 100644 (file)
@@ -3,6 +3,10 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..compat import compat_str
+from ..utils import (
+    determine_ext,
+    int_or_none,
+)
 
 
 class RENTVIE(InfoExtractor):
@@ -13,7 +17,9 @@ class RENTVIE(InfoExtractor):
         'info_dict': {
             'id': '118577',
             'ext': 'mp4',
-            'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"'
+            'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"',
+            'timestamp': 1472230800,
+            'upload_date': '20160826',
         }
     }, {
         'url': 'http://ren.tv/player/118577',
@@ -26,9 +32,33 @@ class RENTVIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id)
-        jw_config = self._parse_json(self._search_regex(
-            r'config\s*=\s*({.+});', webpage, 'jw config'), video_id)
-        return self._parse_jwplayer_data(jw_config, video_id, m3u8_id='hls')
+        config = self._parse_json(self._search_regex(
+            r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id)
+        title = config['title']
+        formats = []
+        for video in config['src']:
+            src = video.get('src')
+            if not src or not isinstance(src, compat_str):
+                continue
+            ext = determine_ext(src)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                formats.append({
+                    'url': src,
+                })
+        self._sort_formats(formats)
+        return {
+            'id': video_id,
+            'title': title,
+            'description': config.get('description'),
+            'thumbnail': config.get('image'),
+            'duration': int_or_none(config.get('duration')),
+            'timestamp': int_or_none(config.get('date')),
+            'formats': formats,
+        }
 
 
 class RENTVArticleIE(InfoExtractor):
index 9792f820a522fc6735772bb0d4ba8a45e28cd93f..84568ac69f4bc761faa20fb3039d3070ae109ba5 100644 (file)
@@ -4,22 +4,30 @@ from __future__ import unicode_literals
 import re
 
 from .brightcove import BrightcoveNewIE
-from ..utils import update_url_query
+from ..compat import compat_str
+from ..utils import (
+    try_get,
+    update_url_query,
+)
 
 
 class SevenPlusIE(BrightcoveNewIE):
     IE_NAME = '7plus'
     _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
     _TESTS = [{
-        'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001',
+        'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
         'info_dict': {
-            'id': 'BEAT-001',
+            'id': 'MTYS7-003',
             'ext': 'mp4',
-            'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds',
-            'description': 'md5:37718bea20a8eedaca7f7361af566131',
+            'title': 'S7 E3 - Wind Surf',
+            'description': 'md5:29c6a69f21accda7601278f81b46483d',
             'uploader_id': '5303576322001',
-            'upload_date': '20171031',
-            'timestamp': 1509440068,
+            'upload_date': '20171201',
+            'timestamp': 1512106377,
+            'series': 'Mighty Ships',
+            'season_number': 7,
+            'episode_number': 3,
+            'episode': 'Wind Surf',
         },
         'params': {
             'format': 'bestvideo',
@@ -63,5 +71,14 @@ class SevenPlusIE(BrightcoveNewIE):
                     value = item.get(src_key)
                     if value:
                         info[dst_key] = value
+                info['series'] = try_get(
+                    item, lambda x: x['seriesLogo']['name'], compat_str)
+                mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
+                if mobj:
+                    info.update({
+                        'season_number': int(mobj.group(1)),
+                        'episode_number': int(mobj.group(2)),
+                        'episode': mobj.group(3),
+                    })
 
         return info
index 370fa887968128281a6286f78a1fdf4bf59f7b9f..45995f30f301ff1a442c584505bf3d0d848ca069 100644 (file)
@@ -310,6 +310,7 @@ class SmotriBroadcastIE(InfoExtractor):
     IE_DESC = 'Smotri.com broadcasts'
     IE_NAME = 'smotri:broadcast'
     _VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
+    _NETRC_MACHINE = 'smotri'
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -352,17 +353,18 @@ class SmotriBroadcastIE(InfoExtractor):
             adult_content = False
 
         ticket = self._html_search_regex(
-            r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
-            broadcast_page, 'broadcast ticket')
+            (r'data-user-file=(["\'])(?P<ticket>(?!\1).+)\1',
+             r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'(?P<ticket>[^']+)'\)"),
+            broadcast_page, 'broadcast ticket', group='ticket')
 
-        url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
+        broadcast_url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
 
         broadcast_password = self._downloader.params.get('videopassword')
         if broadcast_password:
-            url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
+            broadcast_url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
 
         broadcast_json_page = self._download_webpage(
-            url, broadcast_id, 'Downloading broadcast JSON')
+            broadcast_url, broadcast_id, 'Downloading broadcast JSON')
 
         try:
             broadcast_json = json.loads(broadcast_json_page)
index e5ac586a7f750b131de611ca9bf66b6f826e54dd..a6a191cebb4b98e46627ecc9ae8bd70071193ea6 100644 (file)
@@ -75,6 +75,9 @@ class SteamIE(InfoExtractor):
             gameID = m.group('gameID')
             playlist_id = gameID
             videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
+
+        self._set_cookie('steampowered.com', 'mature_content', '1')
+
         webpage = self._download_webpage(videourl, playlist_id)
 
         if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
index 48bc4529e6ae8a265a672c066ac59b388ea3a5d5..f71eab8b25014501aa6d123e70fba4506c095cea 100644 (file)
@@ -4,11 +4,17 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
     determine_ext,
     dict_get,
     int_or_none,
     try_get,
+    urljoin,
+    compat_str,
 )
 
 
@@ -16,6 +22,8 @@ class SVTBaseIE(InfoExtractor):
     _GEO_COUNTRIES = ['SE']
 
     def _extract_video(self, video_info, video_id):
+        is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
+        m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
         formats = []
         for vr in video_info['videoReferences']:
             player_type = vr.get('playerType') or vr.get('format')
@@ -24,7 +32,7 @@ class SVTBaseIE(InfoExtractor):
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                     vurl, video_id,
-                    ext='mp4', entry_protocol='m3u8_native',
+                    ext='mp4', entry_protocol=m3u8_protocol,
                     m3u8_id=player_type, fatal=False))
             elif ext == 'f4m':
                 formats.extend(self._extract_f4m_formats(
@@ -84,6 +92,7 @@ class SVTBaseIE(InfoExtractor):
             'season_number': season_number,
             'episode': episode,
             'episode_number': episode_number,
+            'is_live': is_live,
         }
 
 
@@ -122,9 +131,13 @@ class SVTIE(SVTBaseIE):
         return info_dict
 
 
-class SVTPlayIE(SVTBaseIE):
+class SVTPlayBaseIE(SVTBaseIE):
+    _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
+
+
+class SVTPlayIE(SVTPlayBaseIE):
     IE_DESC = 'SVT Play and Öppet arkiv'
-    _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
         'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
@@ -148,6 +161,9 @@ class SVTPlayIE(SVTBaseIE):
     }, {
         'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
         'only_matching': True,
+    }, {
+        'url': 'https://www.svtplay.se/kanaler/svt1',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -157,12 +173,16 @@ class SVTPlayIE(SVTBaseIE):
 
         data = self._parse_json(
             self._search_regex(
-                r'root\["__svtplay"\]\s*=\s*([^;]+);',
-                webpage, 'embedded data', default='{}'),
+                self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
+                group='json'),
             video_id, fatal=False)
 
         thumbnail = self._og_search_thumbnail(webpage)
 
+        def adjust_title(info):
+            if info['is_live']:
+                info['title'] = self._live_title(info['title'])
+
         if data:
             video_info = try_get(
                 data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
@@ -173,6 +193,7 @@ class SVTPlayIE(SVTBaseIE):
                     'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
                     'thumbnail': thumbnail,
                 })
+                adjust_title(info_dict)
                 return info_dict
 
         video_id = self._search_regex(
@@ -188,4 +209,86 @@ class SVTPlayIE(SVTBaseIE):
                 info_dict['title'] = re.sub(
                     r'\s*\|\s*.+?$', '',
                     info_dict.get('episode') or self._og_search_title(webpage))
+            adjust_title(info_dict)
             return info_dict
+
+
+class SVTSeriesIE(SVTPlayBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)'
+    _TESTS = [{
+        'url': 'https://www.svtplay.se/rederiet',
+        'info_dict': {
+            'id': 'rederiet',
+            'title': 'Rederiet',
+            'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
+        },
+        'playlist_mincount': 318,
+    }, {
+        'url': 'https://www.svtplay.se/rederiet?tab=sasong2',
+        'info_dict': {
+            'id': 'rederiet-sasong2',
+            'title': 'Rederiet - Säsong 2',
+            'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
+        },
+        'playlist_count': 12,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        series_id = self._match_id(url)
+
+        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        season_slug = qs.get('tab', [None])[0]
+
+        if season_slug:
+            series_id += '-%s' % season_slug
+
+        webpage = self._download_webpage(
+            url, series_id, 'Downloading series page')
+
+        root = self._parse_json(
+            self._search_regex(
+                self._SVTPLAY_RE, webpage, 'content', group='json'),
+            series_id)
+
+        season_name = None
+
+        entries = []
+        for season in root['relatedVideoContent']['relatedVideosAccordion']:
+            if not isinstance(season, dict):
+                continue
+            if season_slug:
+                if season.get('slug') != season_slug:
+                    continue
+                season_name = season.get('name')
+            videos = season.get('videos')
+            if not isinstance(videos, list):
+                continue
+            for video in videos:
+                content_url = video.get('contentUrl')
+                if not content_url or not isinstance(content_url, compat_str):
+                    continue
+                entries.append(
+                    self.url_result(
+                        urljoin(url, content_url),
+                        ie=SVTPlayIE.ie_key(),
+                        video_title=video.get('title')
+                    ))
+
+        metadata = root.get('metaData')
+        if not isinstance(metadata, dict):
+            metadata = {}
+
+        title = metadata.get('title')
+        season_name = season_name or season_slug
+
+        if title and season_name:
+            title = '%s - %s' % (title, season_name)
+        elif season_slug:
+            title = season_slug
+
+        return self.playlist_result(
+            entries, series_id, title, metadata.get('description'))
index 1853a1104c2b8957793ede25c6296598eb0babc9..368c45729af533eca244f4d0e8a29bfe5f4f7a8a 100644 (file)
@@ -31,6 +31,12 @@ class Tube8IE(KeezMoviesIE):
         'only_matching': True,
     }]
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)',
+            webpage)
+
     def _real_extract(self, url):
         webpage, info = self._extract_info(url)
 
index 1bf47244440809f154d01b5664f1ffa085e4fb29..808571ece0fce5c9698c6c351b51f1ffcd717171 100644 (file)
@@ -10,6 +10,7 @@ from ..utils import (
     int_or_none,
     parse_iso8601,
     parse_duration,
+    try_get,
     update_url_query,
 )
 
@@ -58,14 +59,22 @@ class TVNowBaseIE(InfoExtractor):
         duration = parse_duration(info.get('duration'))
 
         f = info.get('format', {})
+
+        thumbnails = [{
+            'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
+        }]
         thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+        if thumbnail:
+            thumbnails.append({
+                'url': thumbnail,
+            })
 
         return {
             'id': video_id,
             'display_id': display_id,
             'title': title,
             'description': description,
-            'thumbnail': thumbnail,
+            'thumbnails': thumbnails,
             'timestamp': timestamp,
             'duration': duration,
             'series': f.get('title'),
@@ -77,7 +86,12 @@ class TVNowBaseIE(InfoExtractor):
 
 
 class TVNowIE(TVNowBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
+                        (?P<show_id>[^/]+)/
+                        (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
+                    '''
 
     _TESTS = [{
         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
@@ -99,27 +113,30 @@ class TVNowIE(TVNowBaseIE):
     }, {
         # rtl2
         'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
-        'only_matching': 'True',
+        'only_matching': True,
     }, {
         # rtlnitro
         'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
-        'only_matching': 'True',
+        'only_matching': True,
     }, {
         # superrtl
         'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
-        'only_matching': 'True',
+        'only_matching': True,
     }, {
         # ntv
         'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
-        'only_matching': 'True',
+        'only_matching': True,
     }, {
         # vox
         'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
-        'only_matching': 'True',
+        'only_matching': True,
     }, {
         # rtlplus
         'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
-        'only_matching': 'True',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -133,8 +150,30 @@ class TVNowIE(TVNowBaseIE):
         return self._extract_video(info, display_id)
 
 
-class TVNowListIE(TVNowBaseIE):
-    _VALID_URL = r'(?P<base_url>https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/)list/(?P<id>[^?/#&]+)$'
+class TVNowListBaseIE(TVNowBaseIE):
+    _SHOW_VALID_URL = r'''(?x)
+                    (?P<base_url>
+                        https?://
+                            (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
+                            (?P<show_id>[^/]+)
+                    )
+                    '''
+
+    def _extract_list_info(self, display_id, show_id):
+        fields = list(self._SHOW_FIELDS)
+        fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
+        fields.extend(
+            'formatTabs.formatTabPages.container.movies.%s' % field
+            for field in self._VIDEO_FIELDS)
+        return self._call_api(
+            'formats/seo', display_id, query={
+                'fields': ','.join(fields),
+                'name': show_id + '.php'
+            })
+
+
+class TVNowListIE(TVNowListBaseIE):
+    _VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
 
     _SHOW_FIELDS = ('title', )
     _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
@@ -147,38 +186,94 @@ class TVNowListIE(TVNowBaseIE):
             'title': '30 Minuten Deutschland - Aktuell',
         },
         'playlist_mincount': 1,
+    }, {
+        'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
+        'only_matching': True,
     }]
 
+    @classmethod
+    def suitable(cls, url):
+        return (False if TVNowIE.suitable(url)
+                else super(TVNowListIE, cls).suitable(url))
+
     def _real_extract(self, url):
         base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
 
-        fields = []
-        fields.extend(self._SHOW_FIELDS)
-        fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
-        fields.extend(
-            'formatTabs.formatTabPages.container.movies.%s' % field
-            for field in self._VIDEO_FIELDS)
-
-        list_info = self._call_api(
-            'formats/seo', season_id, query={
-                'fields': ','.join(fields),
-                'name': show_id + '.php'
-            })
+        list_info = self._extract_list_info(season_id, show_id)
 
         season = next(
             season for season in list_info['formatTabs']['items']
             if season.get('seoheadline') == season_id)
 
-        title = '%s - %s' % (list_info['title'], season['headline'])
+        title = list_info.get('title')
+        headline = season.get('headline')
+        if title and headline:
+            title = '%s - %s' % (title, headline)
+        else:
+            title = headline or title
 
         entries = []
         for container in season['formatTabPages']['items']:
-            for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
+            items = try_get(
+                container, lambda x: x['container']['movies']['items'],
+                list) or []
+            for info in items:
                 seo_url = info.get('seoUrl')
                 if not seo_url:
                     continue
+                video_id = info.get('id')
                 entries.append(self.url_result(
-                    base_url + seo_url + '/player', 'TVNow', info.get('id')))
+                    '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
+                    compat_str(video_id) if video_id else None))
 
         return self.playlist_result(
             entries, compat_str(season.get('id') or season_id), title)
+
+
+class TVNowShowIE(TVNowListBaseIE):
+    _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
+
+    _SHOW_FIELDS = ('id', 'title', )
+    _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
+    _VIDEO_FIELDS = ()
+
+    _TESTS = [{
+        'url': 'https://www.tvnow.at/vox/ab-ins-beet',
+        'info_dict': {
+            'id': 'ab-ins-beet',
+            'title': 'Ab ins Beet!',
+        },
+        'playlist_mincount': 7,
+    }, {
+        'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
+                else super(TVNowShowIE, cls).suitable(url))
+
+    def _real_extract(self, url):
+        base_url, show_id = re.match(self._VALID_URL, url).groups()
+
+        list_info = self._extract_list_info(show_id, show_id)
+
+        entries = []
+        for season_info in list_info['formatTabs']['items']:
+            season_url = season_info.get('seoheadline')
+            if not season_url:
+                continue
+            season_id = season_info.get('id')
+            entries.append(self.url_result(
+                '%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
+                compat_str(season_id) if season_id else None,
+                season_info.get('headline')))
+
+        return self.playlist_result(entries, show_id, list_info.get('title'))
index 96e0b96e3556c0331064a1357bd809ced7c3bf52..4b3b3e705eb8f758afb466970ed50b8e2ee102fd 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 class TwentyFourVideoIE(InfoExtractor):
     IE_NAME = '24video'
-    _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.24video.net/video/view/1044982',
index 1981b4d4a8064541c0fcf2faa04d863c72eebb14..4c11fd3c38abb88fb77baf199b47039b79c43458 100644 (file)
@@ -28,7 +28,7 @@ from ..utils import (
 
 
 class TwitchBaseIE(InfoExtractor):
-    _VALID_URL_BASE = r'https?://(?:(?:www|go)\.)?twitch\.tv'
+    _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
 
     _API_BASE = 'https://api.twitch.tv'
     _USHER_BASE = 'https://usher.ttvnw.net'
@@ -168,6 +168,13 @@ class TwitchItemBaseIE(TwitchBaseIE):
         return self.playlist_result(entries, info['id'], info['title'])
 
     def _extract_info(self, info):
+        status = info.get('status')
+        if status == 'recording':
+            is_live = True
+        elif status == 'recorded':
+            is_live = False
+        else:
+            is_live = None
         return {
             'id': info['_id'],
             'title': info.get('title') or 'Untitled Broadcast',
@@ -178,6 +185,7 @@ class TwitchItemBaseIE(TwitchBaseIE):
             'uploader_id': info.get('channel', {}).get('name'),
             'timestamp': parse_iso8601(info.get('recorded_at')),
             'view_count': int_or_none(info.get('views')),
+            'is_live': is_live,
         }
 
     def _real_extract(self, url):
@@ -226,7 +234,7 @@ class TwitchVodIE(TwitchItemBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
-                            (?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
+                            (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
                             player\.twitch\.tv/\?.*?\bvideo=v
                         )
                         (?P<id>\d+)
@@ -279,6 +287,9 @@ class TwitchVodIE(TwitchItemBaseIE):
     }, {
         'url': 'https://www.twitch.tv/videos/6528877',
         'only_matching': True,
+    }, {
+        'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -390,14 +401,17 @@ class TwitchProfileIE(TwitchPlaylistBaseIE):
     _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
     _PLAYLIST_TYPE = 'profile'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.twitch.tv/vanillatv/profile',
         'info_dict': {
             'id': 'vanillatv',
             'title': 'VanillaTV',
         },
         'playlist_mincount': 412,
-    }
+    }, {
+        'url': 'http://m.twitch.tv/vanillatv/profile',
+        'only_matching': True,
+    }]
 
 
 class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
@@ -411,14 +425,17 @@ class TwitchAllVideosIE(TwitchVideosBaseIE):
     _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
     _PLAYLIST_TYPE = 'all videos'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.twitch.tv/spamfish/videos/all',
         'info_dict': {
             'id': 'spamfish',
             'title': 'Spamfish',
         },
         'playlist_mincount': 869,
-    }
+    }, {
+        'url': 'https://m.twitch.tv/spamfish/videos/all',
+        'only_matching': True,
+    }]
 
 
 class TwitchUploadsIE(TwitchVideosBaseIE):
@@ -427,14 +444,17 @@ class TwitchUploadsIE(TwitchVideosBaseIE):
     _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
     _PLAYLIST_TYPE = 'uploads'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.twitch.tv/spamfish/videos/uploads',
         'info_dict': {
             'id': 'spamfish',
             'title': 'Spamfish',
         },
         'playlist_mincount': 0,
-    }
+    }, {
+        'url': 'https://m.twitch.tv/spamfish/videos/uploads',
+        'only_matching': True,
+    }]
 
 
 class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
@@ -443,14 +463,17 @@ class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
     _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
     _PLAYLIST_TYPE = 'past broadcasts'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts',
         'info_dict': {
             'id': 'spamfish',
             'title': 'Spamfish',
         },
         'playlist_mincount': 0,
-    }
+    }, {
+        'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts',
+        'only_matching': True,
+    }]
 
 
 class TwitchHighlightsIE(TwitchVideosBaseIE):
@@ -459,14 +482,17 @@ class TwitchHighlightsIE(TwitchVideosBaseIE):
     _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
     _PLAYLIST_TYPE = 'highlights'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.twitch.tv/spamfish/videos/highlights',
         'info_dict': {
             'id': 'spamfish',
             'title': 'Spamfish',
         },
         'playlist_mincount': 805,
-    }
+    }, {
+        'url': 'https://m.twitch.tv/spamfish/videos/highlights',
+        'only_matching': True,
+    }]
 
 
 class TwitchStreamIE(TwitchBaseIE):
@@ -474,7 +500,7 @@ class TwitchStreamIE(TwitchBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
-                            (?:(?:www|go)\.)?twitch\.tv/|
+                            (?:(?:www|go|m)\.)?twitch\.tv/|
                             player\.twitch\.tv/\?.*?\bchannel=
                         )
                         (?P<id>[^/#?]+)
@@ -508,6 +534,9 @@ class TwitchStreamIE(TwitchBaseIE):
     }, {
         'url': 'https://go.twitch.tv/food',
         'only_matching': True,
+    }, {
+        'url': 'https://m.twitch.tv/food',
+        'only_matching': True,
     }]
 
     @classmethod
index 311df58f4a057ecfd128e76ebfe6f5a692ea0c23..d0e34c81980c51b6cd464dfc1e1e34ff9b959c44 100644 (file)
@@ -16,7 +16,7 @@ from ..utils import (
 class VideaIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                     https?://
-                        videa\.hu/
+                        videa(?:kid)?\.hu/
                         (?:
                             videok/(?:[^/]+/)*[^?#&]+-|
                             player\?.*?\bv=|
@@ -31,7 +31,7 @@ class VideaIE(InfoExtractor):
             'id': '8YfIAjxwWGwT8HVQ',
             'ext': 'mp4',
             'title': 'Az őrült kígyász 285 kígyót enged szabadon',
-            'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3',
+            'thumbnail': r're:^https?://.*',
             'duration': 21,
         },
     }, {
@@ -43,6 +43,15 @@ class VideaIE(InfoExtractor):
     }, {
         'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
         'only_matching': True,
+    }, {
+        'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
+        'only_matching': True,
+    }, {
+        'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ',
+        'only_matching': True,
+    }, {
+        'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
+        'only_matching': True,
     }]
 
     @staticmethod
index 46950d3a1499b20bec50a9482d47a0ebdbbfb8b3..80b896b5638ce5c7835c3a5ec8ab687cacc61ae0 100644 (file)
@@ -2,9 +2,9 @@
 from __future__ import unicode_literals
 
 import re
-import itertools
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     determine_ext,
     int_or_none,
@@ -112,21 +112,24 @@ class VineIE(InfoExtractor):
 
 class VineUserIE(InfoExtractor):
     IE_NAME = 'vine:user'
-    _VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$'
+    _VALID_URL = r'https?://vine\.co/(?P<u>u/)?(?P<user>[^/]+)'
     _VINE_BASE_URL = 'https://vine.co/'
-    _TESTS = [
-        {
-            'url': 'https://vine.co/Visa',
-            'info_dict': {
-                'id': 'Visa',
-            },
-            'playlist_mincount': 46,
-        },
-        {
-            'url': 'https://vine.co/u/941705360593584128',
-            'only_matching': True,
+    _TESTS = [{
+        'url': 'https://vine.co/itsruthb',
+        'info_dict': {
+            'id': 'itsruthb',
+            'title': 'Ruth B',
+            'description': '| Instagram/Twitter: itsruthb | still a lost boy from neverland',
         },
-    ]
+        'playlist_mincount': 611,
+    }, {
+        'url': 'https://vine.co/u/942914934646415360',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if VineIE.suitable(url) else super(VineUserIE, cls).suitable(url)
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -138,17 +141,14 @@ class VineUserIE(InfoExtractor):
         profile_data = self._download_json(
             profile_url, user, note='Downloading user profile data')
 
-        user_id = profile_data['data']['userId']
-        timeline_data = []
-        for pagenum in itertools.count(1):
-            timeline_url = '%sapi/timelines/users/%s?page=%s&size=100' % (
-                self._VINE_BASE_URL, user_id, pagenum)
-            timeline_page = self._download_json(
-                timeline_url, user, note='Downloading page %d' % pagenum)
-            timeline_data.extend(timeline_page['data']['records'])
-            if timeline_page['data']['nextPage'] is None:
-                break
-
+        data = profile_data['data']
+        user_id = data.get('userId') or data['userIdStr']
+        profile = self._download_json(
+            'https://archive.vine.co/profiles/%s.json' % user_id, user_id)
         entries = [
-            self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data]
-        return self.playlist_result(entries, user)
+            self.url_result(
+                'https://vine.co/v/%s' % post_id, ie='Vine', video_id=post_id)
+            for post_id in profile['posts']
+            if post_id and isinstance(post_id, compat_str)]
+        return self.playlist_result(
+            entries, user, profile.get('username'), profile.get('description'))
index 9959627c0ad3690e02594bc10b9633c023604f7e..64b13f0ed00f6f58e6eea6d88bacb04f7a3f757d 100644 (file)
@@ -12,7 +12,7 @@ import time
 from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse_urlencode,
-    compat_urlparse,
+    compat_urllib_parse,
 )
 from ..utils import (
     float_or_none,
@@ -39,11 +39,11 @@ class VRVBaseIE(InfoExtractor):
             data = json.dumps(data).encode()
             headers['Content-Type'] = 'application/json'
         method = 'POST' if data else 'GET'
-        base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')])
+        base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')])
         oauth_signature = base64.b64encode(hmac.new(
             (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
             base_string.encode(), hashlib.sha1).digest()).decode()
-        encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '')
+        encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
         return self._download_json(
             '?'.join([base_url, encoded_query]), video_id,
             note='Downloading %s JSON metadata' % note, headers=headers, data=data)
index ad747978d2fdb468d387191578e33b6bebf41f14..bc3239f68864b0e5aff7fc4dd3459e8656453ee5 100644 (file)
@@ -118,6 +118,15 @@ class XFileShareIE(InfoExtractor):
         'only_matching': True
     }]
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return [
+            mobj.group('url')
+            for mobj in re.finditer(
+                r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
+                % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
+                webpage)]
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
index 085c8d4f35a68c74c61ca9af571d421a68783d93..efee95651df0c21d57bca7b0ef5625dcc53d5cac 100644 (file)
@@ -58,7 +58,9 @@ class XVideosIE(InfoExtractor):
             group='title') or self._og_search_title(webpage)
 
         thumbnail = self._search_regex(
-            r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
+            (r'setThumbUrl\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1',
+             r'url_bigthumb=(?P<thumbnail>.+?)&amp'),
+            webpage, 'thumbnail', fatal=False, group='thumbnail')
         duration = int_or_none(self._og_search_property(
             'duration', webpage, default=None)) or parse_duration(
             self._search_regex(
index 5b0b248cdd031588a958056e85f3efa9a6579eb4..2f5a7b023ba70cf320cbd7ec11f07262e1d2378a 100644 (file)
@@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
         # request basic data
         basic_data_params = {
             'vid': video_id,
-            'ccode': '0507',
+            'ccode': '0590',
             'client_ip': '192.168.1.1',
             'utid': cna,
             'client_ts': time.time() / 1000,
index 617be8e96b6e305988b3ae4d0c5d3790074868e4..e7bd1f18fb504c8541d0fbdc301980cab07d1f54 100644 (file)
@@ -87,7 +87,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         (username, password) = self._get_login_info()
         # No authentication to be performed
         if username is None:
-            if self._LOGIN_REQUIRED:
+            if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
             return True
 
@@ -2699,10 +2699,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
     def _real_initialize(self):
         self._login()
 
-    def _real_extract(self, url):
-        page = self._download_webpage(
-            'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
-
+    def _entries(self, page):
         # The extraction process is the same as for playlists, but the regex
         # for the video ids doesn't contain an index
         ids = []
@@ -2713,12 +2710,15 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
             # 'recommended' feed has infinite 'load more' and each new portion spins
             # the same videos in (sometimes) slightly different order, so we'll check
             # for unicity and break when portion has no new videos
-            new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
+            new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
             if not new_ids:
                 break
 
             ids.extend(new_ids)
 
+            for entry in self._ids_to_results(new_ids):
+                yield entry
+
             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
             if not mobj:
                 break
@@ -2730,8 +2730,12 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
             content_html = more['content_html']
             more_widget_html = more['load_more_widget_html']
 
+    def _real_extract(self, url):
+        page = self._download_webpage(
+            'https://www.youtube.com/feed/%s' % self._FEED_NAME,
+            self._PLAYLIST_TITLE)
         return self.playlist_result(
-            self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
+            self._entries(page), playlist_title=self._PLAYLIST_TITLE)
 
 
 class YoutubeWatchLaterIE(YoutubePlaylistIE):
index 7d1bbc02102ec860ab417301f478309fe0c379e0..3e4ac03a240844ef3b69fd21dbedcc441a46e5c7 100644 (file)
@@ -676,7 +676,8 @@ def parseOpts(overrideArguments=None):
     filesystem.add_option(
         '-a', '--batch-file',
         dest='batchfile', metavar='FILE',
-        help='File containing URLs to download (\'-\' for stdin)')
+        help="File containing URLs to download ('-' for stdin), one URL per line. "
+             "Lines starting with '#', ';' or ']' are considered as comments and ignored.")
     filesystem.add_option(
         '--id', default=False,
         action='store_true', dest='useid', help='Use only video ID in file name')
index 027d12785da68055477b0bd7475cfd25e4678c6b..574284e944508340ea603e8e047008edee8a35ae 100644 (file)
@@ -2574,8 +2574,8 @@ def _match_one(filter_part, dct):
         return op(actual_value, comparison_value)
 
     UNARY_OPERATORS = {
-        '': lambda v: v is not None,
-        '!': lambda v: v is None,
+        '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
+        '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
     }
     operator_rex = re.compile(r'''(?x)\s*
         (?P<op>%s)\s*(?P<key>[a-z_]+)
index 6ce11c39bc2016e5d29829f41e530064dd3717d9..4e3cb39c62c8097981ab870be70b8ea37b54f76e 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2018.03.14'
+__version__ = '2018.04.25'