]> Raphaël G. Git Repositories - youtubedl/commitdiff
Updated version 2017.10.15.1 from 'upstream/2017.10.15.1'
authorRogério Brito <rbrito@ime.usp.br>
Thu, 19 Oct 2017 17:24:13 +0000 (15:24 -0200)
committerRogério Brito <rbrito@ime.usp.br>
Thu, 19 Oct 2017 17:24:13 +0000 (15:24 -0200)
with Debian dir e04fc0593bd02862ee48ad1d1a4629d9f3e1df96

87 files changed:
ChangeLog
README.md
docs/supportedsites.md
test/test_YoutubeDL.py
youtube-dl
youtube-dl.1
youtube_dl/YoutubeDL.py
youtube_dl/downloader/fragment.py
youtube_dl/downloader/hls.py
youtube_dl/extractor/aenetworks.py
youtube_dl/extractor/afreecatv.py
youtube_dl/extractor/anvato.py
youtube_dl/extractor/appletrailers.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/beeg.py
youtube_dl/extractor/canvas.py
youtube_dl/extractor/channel9.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/deezer.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/fox.py
youtube_dl/extractor/freespeech.py
youtube_dl/extractor/funk.py [new file with mode: 0644]
youtube_dl/extractor/gamespot.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/gfycat.py
youtube_dl/extractor/googleplus.py
youtube_dl/extractor/howstuffworks.py
youtube_dl/extractor/hrti.py
youtube_dl/extractor/ign.py
youtube_dl/extractor/infoq.py
youtube_dl/extractor/jeuxvideo.py
youtube_dl/extractor/kaltura.py
youtube_dl/extractor/ketnet.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/lnkgo.py
youtube_dl/extractor/makertv.py
youtube_dl/extractor/mangomolo.py
youtube_dl/extractor/meipai.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/myvideo.py
youtube_dl/extractor/nationalgeographic.py
youtube_dl/extractor/naver.py
youtube_dl/extractor/nba.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/nexx.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/once.py
youtube_dl/extractor/onionstudios.py
youtube_dl/extractor/pornflip.py
youtube_dl/extractor/reddit.py
youtube_dl/extractor/rtlnl.py
youtube_dl/extractor/rtve.py
youtube_dl/extractor/ruhd.py
youtube_dl/extractor/scrippsnetworks.py
youtube_dl/extractor/shahid.py
youtube_dl/extractor/slideslive.py [new file with mode: 0644]
youtube_dl/extractor/spike.py
youtube_dl/extractor/stanfordoc.py
youtube_dl/extractor/steam.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/thisav.py
youtube_dl/extractor/tubitv.py
youtube_dl/extractor/tva.py
youtube_dl/extractor/tvn24.py
youtube_dl/extractor/tvp.py
youtube_dl/extractor/twitter.py
youtube_dl/extractor/udn.py
youtube_dl/extractor/vh1.py
youtube_dl/extractor/vice.py
youtube_dl/extractor/videopremium.py
youtube_dl/extractor/voxmedia.py
youtube_dl/extractor/vvvvid.py
youtube_dl/extractor/wdr.py
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/xtube.py
youtube_dl/extractor/xvideos.py
youtube_dl/extractor/yahoo.py
youtube_dl/extractor/youtube.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/utils.py
youtube_dl/version.py

index da60c1b36ccaaea705c5986bd1f104eee95df6dc..d728e4d0362356f39af55c2da18a035e6b3a5d38 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,103 @@
+version 2017.10.15.1
+
+Core
+* [downloader/hls] Ignore anvato ad fragments (#14496)
+* [downloader/fragment] Output ad fragment count
+
+Extractors
+* [scrippsnetworks:watch] Bypass geo restriction
++ [anvato] Add ability to bypass geo restriction
+* [redditr] Fix extraction for URLs with query (#14495)
+
+
+version 2017.10.15
+
+Core
++ [common] Add support for jwplayer youtube embeds
+
+Extractors
+* [scrippsnetworks:watch] Fix extraction (#14389)
+* [anvato] Process master m3u8 manifests
+* [youtube] Fix relative URLs in description
+* [spike] Bypass geo restriction
++ [howstuffworks] Add support for more domains
+* [infoq] Fix http format downloading
++ [rtlnl] Add support for another type of embeds
++ [onionstudios] Add support for bulbs-video embeds
+* [udn] Fix extraction
+* [shahid] Fix extraction (#14448)
+* [kaltura] Ignore Widevine encrypted video (.wvm) (#14471)
+* [vh1] Fix extraction (#9613)
+
+
+version 2017.10.12
+
+Core
+* [YoutubeDL] Improve _default_format_spec (#14461)
+
+Extractors
+* [steam] Fix extraction (#14067)
++ [funk] Add support for funk.net (#14464)
++ [nexx] Add support for shortcuts and relax domain id extraction
++ [voxmedia] Add support for recode.net (#14173)
++ [once] Add support for vmap URLs
++ [generic] Add support for channel9 embeds (#14469)
+* [tva] Fix extraction (#14328)
++ [tubitv] Add support for new URL format (#14460)
+- [afreecatv:global] Remove extractor
+- [youtube:shared] Removed extractor (#14420)
++ [slideslive] Add support for slideslive.com (#2680)
++ [facebook] Support thumbnails (#14416)
+* [vvvvid] Fix episode number extraction (#14456)
+* [hrti:playlist] Relax URL regular expression
+* [wdr] Relax media link regular expression (#14447)
+* [hrti] Relax URL regular expression (#14443)
+* [fox] Delegate extraction to uplynk:preplay (#14147)
++ [youtube] Add support for hooktube.com (#14437)
+
+
+version 2017.10.07
+
+Core
+* [YoutubeDL] Ignore duplicates in --playlist-items
+* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and
+  reduce code duplication (#14425)
++ [utils] Use cache in OnDemandPagedList by default
+* [postprocessor/ffmpeg] Convert to opus using libopus (#14381)
+
+Extractors
+* [reddit] Sort formats (#14430)
+* [lnkgo] Relax URL regular expression (#14423)
+* [pornflip] Extend URL regular expression (#14405, #14406)
++ [xtube] Add support for embed URLs (#14417)
++ [xvideos] Add support for embed URLs and improve extraction (#14409)
+* [beeg] Fix extraction (#14403)
+* [tvn24] Relax URL regular expression (#14395)
+* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378,
+  #14392, #14414, #14419, #14431)
++ [ketnet] Add support for videos without direct sources (#14377)
+* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een
++ [afreecatv] Add support for adult videos (#14376)
+
+
+version 2017.10.01
+
+Core
+* [YoutubeDL] Document youtube_include_dash_manifest
+
+Extractors
++ [tvp] Add support for new URL schema (#14368)
++ [generic] Add support for single format Video.js embeds (#14371)
+* [yahoo] Bypass geo restriction for brightcove (#14210)
+* [yahoo] Use extracted brightcove account id (#14210)
+* [rtve:alacarta] Fix extraction (#14290)
++ [yahoo] Add support for custom brigthcove embeds (#14210)
++ [generic] Add support for Video.js embeds
++ [gfycat] Add support for /gifs/detail URLs (#14322)
+* [generic] Fix infinite recursion for twitter:player URLs (#14339)
+* [xhamsterembed] Fix extraction (#14308)
+
+
 version 2017.09.24
 
 Core
index 7818e58df062863ccd70a4efe71c9ff5c7750d7e..2879aad24c23b7c88ffe2259219beab73aca226c 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1167,7 +1167,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
     ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
 ```
 
-Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
+Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
 
 Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
 
index d36a07cf66321aa897185265f2a64547d04c43d1..7071450d4dbba00e39dd416f092d842cfa49f8c2 100644 (file)
@@ -36,7 +36,6 @@
  - **AdultSwim**
  - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
  - **afreecatv**: afreecatv.com
- - **afreecatv:global**: afreecatv.com
  - **AirMozilla**
  - **AliExpressLive**
  - **AlJazeera**
  - **CamWithHer**
  - **canalc2.tv**
  - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- - **Canvas**: canvas.be and een.be
+ - **Canvas**
+ - **CanvasEen**: canvas.be and een.be
  - **CarambaTV**
  - **CarambaTVPage**
  - **CartoonNetwork**
  - **freespeech.org**
  - **FreshLive**
  - **Funimation**
+ - **Funk**
  - **FunnyOrDie**
  - **Fusion**
  - **Fux**
  - **skynewsarabia:video**
  - **SkySports**
  - **Slideshare**
+ - **SlidesLive**
  - **Slutload**
  - **smotri**: Smotri.com
  - **smotri:broadcast**: Smotri.com broadcasts
  - **VoiceRepublic**
  - **Voot**
  - **VoxMedia**
+ - **VoxMediaVolume**
  - **Vporn**
  - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
  - **Vrak**
  - **youtube:search**: YouTube.com searches
  - **youtube:search:date**: YouTube.com searches, newest videos first
  - **youtube:search_url**: YouTube.com search URLs
- - **youtube:shared**
  - **youtube:show**: YouTube.com (multi-season) shows
  - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
index e70cbcd375a4670bb586612ccaa107605dc985dc..4af92fbd4b8ac0cff84b290bc737f173cc6b34ca 100644 (file)
@@ -466,12 +466,18 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL({'simulate': True})
         self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
 
+        ydl = YDL({'is_live': True})
+        self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
+
+        ydl = YDL({'simulate': True, 'is_live': True})
+        self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
+
         ydl = YDL({'outtmpl': '-'})
-        self.assertEqual(ydl._default_format_spec({}), 'best')
+        self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
 
         ydl = YDL({})
         self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
-        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best')
+        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
 
 
 class TestYoutubeDL(unittest.TestCase):
@@ -770,6 +776,12 @@ class TestYoutubeDL(unittest.TestCase):
         result = get_ids({'playlist_items': '10'})
         self.assertEqual(result, [])
 
+        result = get_ids({'playlist_items': '3-10'})
+        self.assertEqual(result, [3, 4])
+
+        result = get_ids({'playlist_items': '2-4,3-4,3'})
+        self.assertEqual(result, [2, 3, 4])
+
     def test_urlopen_no_file_protocol(self):
         # see https://github.com/rg3/youtube-dl/issues/8227
         ydl = YDL()
index b87f23e832c1ce34e5d3ecfdf2dc3080fe40f8c3..15c016a00eae3eae1c6d7aec617a4930fa8dfd42 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 6c8c7bc947de48153ef2db370cd0d1702baba69c..9ab22b088eb9f20093113f24114b551de9843ebd 100644 (file)
@@ -2323,7 +2323,7 @@ with\ youtube_dl.YoutubeDL(ydl_opts)\ as\ ydl:
 .PP
 Most likely, you\[aq]ll want to use various options.
 For a list of options available, have a look at
-\f[C]youtube_dl/YoutubeDL.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279).
+\f[C]youtube_dl/YoutubeDL.py\f[] (https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312).
 For a start, if you want to intercept youtube\-dl\[aq]s output, set a
 \f[C]logger\f[] object.
 .PP
index 0a7f36c98a02e8401a966c95e6b8a780e8566e98..342d6b47c03cc817994662eeed5d9245f45609de 100755 (executable)
@@ -65,6 +65,7 @@ from .utils import (
     locked_file,
     make_HTTPS_handler,
     MaxDownloadsReached,
+    orderedSet,
     PagedList,
     parse_filesize,
     PerRequestProxyHandler,
@@ -304,6 +305,12 @@ class YoutubeDL(object):
                        otherwise prefer avconv.
     postprocessor_args: A list of additional command-line arguments for the
                         postprocessor.
+
+    The following options are used by the Youtube extractor:
+    youtube_include_dash_manifest: If True (default), DASH manifests and related
+                        data will be downloaded and processed by extractor.
+                        You can reduce network I/O by disabling it if you don't
+                        care about DASH.
     """
 
     _NUMERIC_FIELDS = set((
@@ -902,15 +909,25 @@ class YoutubeDL(object):
                                 yield int(item)
                         else:
                             yield int(string_segment)
-                playlistitems = iter_playlistitems(playlistitems_str)
+                playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
 
             ie_entries = ie_result['entries']
+
+            def make_playlistitems_entries(list_ie_entries):
+                num_entries = len(list_ie_entries)
+                return [
+                    list_ie_entries[i - 1] for i in playlistitems
+                    if -num_entries <= i - 1 < num_entries]
+
+            def report_download(num_entries):
+                self.to_screen(
+                    '[%s] playlist %s: Downloading %d videos' %
+                    (ie_result['extractor'], playlist, num_entries))
+
             if isinstance(ie_entries, list):
                 n_all_entries = len(ie_entries)
                 if playlistitems:
-                    entries = [
-                        ie_entries[i - 1] for i in playlistitems
-                        if -n_all_entries <= i - 1 < n_all_entries]
+                    entries = make_playlistitems_entries(ie_entries)
                 else:
                     entries = ie_entries[playliststart:playlistend]
                 n_entries = len(entries)
@@ -928,20 +945,15 @@ class YoutubeDL(object):
                     entries = ie_entries.getslice(
                         playliststart, playlistend)
                 n_entries = len(entries)
-                self.to_screen(
-                    '[%s] playlist %s: Downloading %d videos' %
-                    (ie_result['extractor'], playlist, n_entries))
+                report_download(n_entries)
             else:  # iterable
                 if playlistitems:
-                    entry_list = list(ie_entries)
-                    entries = [entry_list[i - 1] for i in playlistitems]
+                    entries = make_playlistitems_entries(list(ie_entries))
                 else:
                     entries = list(itertools.islice(
                         ie_entries, playliststart, playlistend))
                 n_entries = len(entries)
-                self.to_screen(
-                    '[%s] playlist %s: Downloading %d videos' %
-                    (ie_result['extractor'], playlist, n_entries))
+                report_download(n_entries)
 
             if self.params.get('playlistreverse', False):
                 entries = entries[::-1]
@@ -1066,22 +1078,27 @@ class YoutubeDL(object):
         return _filter
 
     def _default_format_spec(self, info_dict, download=True):
-        req_format_list = []
 
-        def can_have_partial_formats():
+        def can_merge():
+            merger = FFmpegMergerPP(self)
+            return merger.available and merger.can_merge()
+
+        def prefer_best():
             if self.params.get('simulate', False):
-                return True
+                return False
             if not download:
-                return True
-            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
                 return False
+            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+                return True
             if info_dict.get('is_live'):
-                return False
-            merger = FFmpegMergerPP(self)
-            return merger.available and merger.can_merge()
-        if can_have_partial_formats():
-            req_format_list.append('bestvideo+bestaudio')
-        req_format_list.append('best')
+                return True
+            if not can_merge():
+                return True
+            return False
+
+        req_format_list = ['bestvideo+bestaudio', 'best']
+        if prefer_best():
+            req_format_list.reverse()
         return '/'.join(req_format_list)
 
     def build_format_selector(self, format_spec):
index 6f6fb4a77a9dc2b3e4e4278bc51b12755bc709ee..7e891b92a3b6a05257484c5d8ba81590013bb74b 100644 (file)
@@ -117,9 +117,15 @@ class FragmentFD(FileDownloader):
     def _prepare_frag_download(self, ctx):
         if 'live' not in ctx:
             ctx['live'] = False
+        if not ctx['live']:
+            total_frags_str = '%d' % ctx['total_frags']
+            ad_frags = ctx.get('ad_frags', 0)
+            if ad_frags:
+                total_frags_str += ' (not including %d ad)' % ad_frags
+        else:
+            total_frags_str = 'unknown (live)'
         self.to_screen(
-            '[%s] Total fragments: %s'
-            % (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
+            '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
         self.report_destination(ctx['filename'])
         dl = HttpQuietDownloader(
             self.ydl,
index 46308cf072c25086d896bb759adad10a74d2cfc6..7955ca510dd200fedb5d8507d937b716d8d56772 100644 (file)
@@ -75,15 +75,29 @@ class HlsFD(FragmentFD):
                 fd.add_progress_hook(ph)
             return fd.real_download(filename, info_dict)
 
-        total_frags = 0
+        def anvato_ad(s):
+            return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
+
+        media_frags = 0
+        ad_frags = 0
+        ad_frag_next = False
         for line in s.splitlines():
             line = line.strip()
-            if line and not line.startswith('#'):
-                total_frags += 1
+            if not line:
+                continue
+            if line.startswith('#'):
+                if anvato_ad(line):
+                    ad_frags += 1
+                continue
+            if ad_frag_next:
+                ad_frag_next = False
+                continue
+            media_frags += 1
 
         ctx = {
             'filename': filename,
-            'total_frags': total_frags,
+            'total_frags': media_frags,
+            'ad_frags': ad_frags,
         }
 
         self._prepare_and_start_frag_download(ctx)
@@ -101,10 +115,14 @@ class HlsFD(FragmentFD):
         decrypt_info = {'METHOD': 'NONE'}
         byte_range = {}
         frag_index = 0
+        ad_frag_next = False
         for line in s.splitlines():
             line = line.strip()
             if line:
                 if not line.startswith('#'):
+                    if ad_frag_next:
+                        ad_frag_next = False
+                        continue
                     frag_index += 1
                     if frag_index <= ctx['fragment_index']:
                         continue
@@ -175,6 +193,8 @@ class HlsFD(FragmentFD):
                         'start': sub_range_start,
                         'end': sub_range_start + int(splitted_byte_range[0]),
                     }
+                elif anvato_ad(line):
+                    ad_frag_next = True
 
         self._finish_frag_download(ctx)
 
index 2dcdba9d22c54e57811a85cd2bceb81bf11bbdce..da1b566c20eb6c4477e86f26dfec21b281a5f07c 100644 (file)
@@ -131,7 +131,7 @@ class AENetworksIE(AENetworksBaseIE):
              r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
             webpage, 'video url', group='url')
         theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
-            r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
+            r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
         info = self._parse_theplatform_metadata(theplatform_metadata)
         if theplatform_metadata.get('AETN$isBehindWall'):
             requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
index c8cb91dcba63cd7e532945eedb114f1975750b6f..e6513c7a4d86caabe282fe874826ef5219826085 100644 (file)
@@ -138,6 +138,23 @@ class AfreecaTVIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # adult video
+        'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
+        'info_dict': {
+            'id': '20171001_F1AE1711_196617479_1',
+            'ext': 'mp4',
+            'title': '[생]서아 초심 찾기 방송 (part 1)',
+            'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+            'uploader': 'BJ서아',
+            'uploader_id': 'bjdyrksu',
+            'upload_date': '20171001',
+            'duration': 3600,
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
         'only_matching': True,
@@ -160,7 +177,15 @@ class AfreecaTVIE(InfoExtractor):
 
         video_xml = self._download_xml(
             'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
-            video_id, query={'nTitleNo': video_id})
+            video_id, query={
+                'nTitleNo': video_id,
+                'partialView': 'SKIP_ADULT',
+            })
+
+        flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
+        if flag and flag != 'SUCCEED':
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, flag), expected=True)
 
         video_element = video_xml.findall(compat_xpath('./track/video'))[1]
         if video_element is None or video_element.text is None:
@@ -246,107 +271,3 @@ class AfreecaTVIE(InfoExtractor):
             })
 
         return info
-
-
-class AfreecaTVGlobalIE(AfreecaTVIE):
-    IE_NAME = 'afreecatv:global'
-    _VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?'
-    _TESTS = [{
-        'url': 'http://afreeca.tv/36853014/v/58301',
-        'info_dict': {
-            'id': '58301',
-            'title': 'tryhard top100',
-            'uploader_id': '36853014',
-            'uploader': 'makgi Hearthstone Live!',
-        },
-        'playlist_count': 3,
-    }]
-
-    def _real_extract(self, url):
-        channel_id, video_id = re.match(self._VALID_URL, url).groups()
-        video_type = 'video' if video_id else 'live'
-        query = {
-            'pt': 'view',
-            'bid': channel_id,
-        }
-        if video_id:
-            query['vno'] = video_id
-        video_data = self._download_json(
-            'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type),
-            video_id or channel_id, query=query)['channel']
-
-        if video_data.get('result') != 1:
-            raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg']))
-
-        title = video_data['title']
-
-        info = {
-            'thumbnail': video_data.get('thumb'),
-            'view_count': int_or_none(video_data.get('vcnt')),
-            'age_limit': int_or_none(video_data.get('grade')),
-            'uploader_id': channel_id,
-            'uploader': video_data.get('cname'),
-        }
-
-        if video_id:
-            entries = []
-            for i, f in enumerate(video_data.get('flist', [])):
-                video_key = self.parse_video_key(f.get('key', ''))
-                f_url = f.get('file')
-                if not video_key or not f_url:
-                    continue
-                entries.append({
-                    'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
-                    'title': title,
-                    'upload_date': video_key.get('upload_date'),
-                    'duration': int_or_none(f.get('length')),
-                    'url': f_url,
-                    'protocol': 'm3u8_native',
-                    'ext': 'mp4',
-                })
-
-            info.update({
-                'id': video_id,
-                'title': title,
-                'duration': int_or_none(video_data.get('length')),
-            })
-            if len(entries) > 1:
-                info['_type'] = 'multi_video'
-                info['entries'] = entries
-            elif len(entries) == 1:
-                i = entries[0].copy()
-                i.update(info)
-                info = i
-        else:
-            formats = []
-            for s in video_data.get('strm', []):
-                s_url = s.get('purl')
-                if not s_url:
-                    continue
-                stype = s.get('stype')
-                if stype == 'HLS':
-                    formats.extend(self._extract_m3u8_formats(
-                        s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
-                elif stype == 'RTMP':
-                    format_id = [stype]
-                    label = s.get('label')
-                    if label:
-                        format_id.append(label)
-                    formats.append({
-                        'format_id': '-'.join(format_id),
-                        'url': s_url,
-                        'tbr': int_or_none(s.get('bps')),
-                        'height': int_or_none(s.get('brt')),
-                        'ext': 'flv',
-                        'rtmp_live': True,
-                    })
-            self._sort_formats(formats)
-
-            info.update({
-                'id': channel_id,
-                'title': self._live_title(title),
-                'is_live': True,
-                'formats': formats,
-            })
-
-        return info
index 8023da70236599e1777172ac416a8ad828a6ec0c..7a29cd2c6315fad6caa05ac6a499f07f80a41566 100644 (file)
@@ -18,6 +18,7 @@ from ..utils import (
     int_or_none,
     strip_jsonp,
     unescapeHTML,
+    unsmuggle_url,
 )
 
 
@@ -197,12 +198,16 @@ class AnvatoIE(InfoExtractor):
                 'tbr': tbr if tbr != 0 else None,
             }
 
-            if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
-                if tbr is not None:
-                    a_format.update({
-                        'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
-                        'ext': 'mp4',
-                    })
+            if media_format == 'm3u8' and tbr is not None:
+                a_format.update({
+                    'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
+                    'ext': 'mp4',
+                })
+            elif media_format == 'm3u8-variant' or ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+                continue
             elif ext == 'mp3' or media_format == 'mp3':
                 a_format['vcodec'] = 'none'
             else:
@@ -271,6 +276,9 @@ class AnvatoIE(InfoExtractor):
             anvplayer_data['accessKey'], anvplayer_data['video'])
 
     def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+
         mobj = re.match(self._VALID_URL, url)
         access_key, video_id = mobj.group('access_key_or_mcp', 'id')
         if access_key not in self._ANVACK_TABLE:
index b45b431e19c1526eb9578e9cbf6a13a97939b2a8..a9ef733e011237338d904f956c4324cc6dd7a72b 100644 (file)
@@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor):
                             continue
                         formats.append({
                             'format_id': '%s-%s' % (version, size),
-                            'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
+                            'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
                             'width': int_or_none(size_data.get('width')),
                             'height': int_or_none(size_data.get('height')),
                             'language': version[:2],
@@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor):
             formats = []
             for format in settings['metadata']['sizes']:
                 # The src is a file pointing to the real video file
-                format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
+                format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
                 formats.append({
                     'url': format_url,
                     'format': format['type'],
index 3f248b14728ab3655a2e17f7b38a95184042d770..915f8862e3769c3f186209435dcc34029dead932 100644 (file)
@@ -195,7 +195,7 @@ class ARDMediathekIE(InfoExtractor):
 
         title = self._html_search_regex(
             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
-             r'<meta name="dcterms.title" content="(.*?)"/>',
+             r'<meta name="dcterms\.title" content="(.*?)"/>',
              r'<h4 class="headline">(.*?)</h4>'],
             webpage, 'title')
         description = self._html_search_meta(
index 8b20c03d6e424b95e42b1bea1ac3fb91e24bea11..5525f7c9b998c57271aba072bb698f6c03b36777 100644 (file)
@@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor):
                             m3u8_id=format_id, fatal=False))
                         if re.search(self._USP_RE, href):
                             usp_formats = self._extract_m3u8_formats(
-                                re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
+                                re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href),
                                 programme_id, ext='mp4', entry_protocol='m3u8_native',
                                 m3u8_id=format_id, fatal=False)
                             for f in usp_formats:
index bbeae4bacbe164e42e007573d987105c104b6752..bf22a41b745db2eef277c6fc41cf716cef6d6366 100644 (file)
@@ -60,9 +60,13 @@ class BeegIE(InfoExtractor):
         beeg_version = beeg_version or '2185'
         beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
 
-        video = self._download_json(
-            'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
-            video_id)
+        for api_path in ('', 'api.'):
+            video = self._download_json(
+                'https://%sbeeg.com/api/v6/%s/video/%s'
+                % (api_path, beeg_version, video_id), video_id,
+                fatal=api_path == 'api.')
+            if video:
+                break
 
         def split(o, e):
             def cut(s, x):
index aada02917cb7911a44c1a1e4d63ec4db1c443c3b..6899f8431788fad62e4615a33d95f3ffa65f4ea1 100644 (file)
@@ -3,24 +3,104 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import float_or_none
+from ..utils import (
+    float_or_none,
+    strip_or_none,
+)
 
 
 class CanvasIE(InfoExtractor):
+    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+        'md5': '90139b746a0a9bd7bb631283f6e2a64e',
+        'info_dict': {
+            'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+            'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+            'ext': 'flv',
+            'title': 'Nachtwacht: De Greystook',
+            'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 1468.03,
+        },
+        'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
+    }, {
+        'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        site_id, video_id = mobj.group('site_id'), mobj.group('id')
+
+        data = self._download_json(
+            'https://mediazone.vrt.be/api/v1/%s/assets/%s'
+            % (site_id, video_id), video_id)
+
+        title = data['title']
+        description = data.get('description')
+
+        formats = []
+        for target in data['targetUrls']:
+            format_url, format_type = target.get('url'), target.get('type')
+            if not format_url or not format_type:
+                continue
+            if format_type == 'HLS':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id=format_type, fatal=False))
+            elif format_type == 'HDS':
+                formats.extend(self._extract_f4m_formats(
+                    format_url, video_id, f4m_id=format_type, fatal=False))
+            elif format_type == 'MPEG_DASH':
+                formats.extend(self._extract_mpd_formats(
+                    format_url, video_id, mpd_id=format_type, fatal=False))
+            elif format_type == 'HSS':
+                formats.extend(self._extract_ism_formats(
+                    format_url, video_id, ism_id='mss', fatal=False))
+            else:
+                formats.append({
+                    'format_id': format_type,
+                    'url': format_url,
+                })
+        self._sort_formats(formats)
+
+        subtitles = {}
+        subtitle_urls = data.get('subtitleUrls')
+        if isinstance(subtitle_urls, list):
+            for subtitle in subtitle_urls:
+                subtitle_url = subtitle.get('url')
+                if subtitle_url and subtitle.get('type') == 'CLOSED':
+                    subtitles.setdefault('nl', []).append({'url': subtitle_url})
+
+        return {
+            'id': video_id,
+            'display_id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+            'duration': float_or_none(data.get('duration'), 1000),
+            'thumbnail': data.get('posterImageUrl'),
+            'subtitles': subtitles,
+        }
+
+
+class CanvasEenIE(InfoExtractor):
     IE_DESC = 'canvas.be and een.be'
     _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
-        'md5': 'ea838375a547ac787d4064d8c7860a6c',
+        'md5': 'ed66976748d12350b118455979cca293',
         'info_dict': {
             'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
             'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'De afspraak veilt voor de Warmste Week',
             'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
             'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 49.02,
-        }
+        },
+        'expected_warnings': ['is not a supported codec'],
     }, {
         # with subtitles
         'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
@@ -40,7 +120,8 @@ class CanvasIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'skip': 'Pagina niet gevonden',
     }, {
         'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
         'info_dict': {
@@ -54,7 +135,8 @@ class CanvasIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'skip': 'Episode no longer available',
     }, {
         'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
         'only_matching': True,
@@ -66,55 +148,21 @@ class CanvasIE(InfoExtractor):
 
         webpage = self._download_webpage(url, display_id)
 
-        title = (self._search_regex(
+        title = strip_or_none(self._search_regex(
             r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
             webpage, 'title', default=None) or self._og_search_title(
-            webpage)).strip()
+            webpage, default=None))
 
         video_id = self._html_search_regex(
-            r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
-
-        data = self._download_json(
-            'https://mediazone.vrt.be/api/v1/%s/assets/%s'
-            % (site_id, video_id), display_id)
-
-        formats = []
-        for target in data['targetUrls']:
-            format_url, format_type = target.get('url'), target.get('type')
-            if not format_url or not format_type:
-                continue
-            if format_type == 'HLS':
-                formats.extend(self._extract_m3u8_formats(
-                    format_url, display_id, entry_protocol='m3u8_native',
-                    ext='mp4', preference=0, fatal=False, m3u8_id=format_type))
-            elif format_type == 'HDS':
-                formats.extend(self._extract_f4m_formats(
-                    format_url, display_id, f4m_id=format_type, fatal=False))
-            elif format_type == 'MPEG_DASH':
-                formats.extend(self._extract_mpd_formats(
-                    format_url, display_id, mpd_id=format_type, fatal=False))
-            else:
-                formats.append({
-                    'format_id': format_type,
-                    'url': format_url,
-                })
-        self._sort_formats(formats)
-
-        subtitles = {}
-        subtitle_urls = data.get('subtitleUrls')
-        if isinstance(subtitle_urls, list):
-            for subtitle in subtitle_urls:
-                subtitle_url = subtitle.get('url')
-                if subtitle_url and subtitle.get('type') == 'CLOSED':
-                    subtitles.setdefault('nl', []).append({'url': subtitle_url})
+            r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
+            group='id')
 
         return {
+            '_type': 'url_transparent',
+            'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
+            'ie_key': CanvasIE.ie_key(),
             'id': video_id,
             'display_id': display_id,
             'title': title,
             'description': self._og_search_description(webpage),
-            'formats': formats,
-            'duration': float_or_none(data.get('duration'), 1000),
-            'thumbnail': data.get('posterImageUrl'),
-            'subtitles': subtitles,
         }
index e928942465fbae45a0854172b90987ee65b355a0..81108e70424f5a98bc97dcdb2ee9b11c6869ed08 100644 (file)
@@ -81,6 +81,12 @@ class Channel9IE(InfoExtractor):
 
     _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
+            webpage)
+
     def _extract_list(self, video_id, rss_url=None):
         if not rss_url:
             rss_url = self._RSS_URL % video_id
index 4cac294153f166b676f1bdcdb2d47ee9e5fdf693..d08b909a68ec2014f2021b193454b0434df69e0c 100644 (file)
@@ -120,13 +120,16 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor):
 
 
 class ComedyCentralShortnameIE(InfoExtractor):
-    _VALID_URL = r'^:(?P<id>tds|thedailyshow)$'
+    _VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
     _TESTS = [{
         'url': ':tds',
         'only_matching': True,
     }, {
         'url': ':thedailyshow',
         'only_matching': True,
+    }, {
+        'url': ':theopposition',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -134,5 +137,6 @@ class ComedyCentralShortnameIE(InfoExtractor):
         shortcut_map = {
             'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
             'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
+            'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
         }
         return self.url_result(shortcut_map[video_id])
index 2bbbf8f4d463a0774d75caf8669b693faa9d0232..a692406931d4b63711609c7b8355635235849275 100644 (file)
@@ -1920,7 +1920,7 @@ class InfoExtractor(object):
                             # can't be used at the same time
                             if '%(Number' in media_template and 's' not in representation_ms_info:
                                 segment_duration = None
-                                if 'total_number' not in representation_ms_info and 'segment_duration':
+                                if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
                                     segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
                                     representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
                                 representation_ms_info['fragments'] = [{
@@ -2322,7 +2322,6 @@ class InfoExtractor(object):
             formats = self._parse_jwplayer_formats(
                 video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
                 mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
-            self._sort_formats(formats)
 
             subtitles = {}
             tracks = video_data.get('tracks')
@@ -2339,16 +2338,25 @@ class InfoExtractor(object):
                         'url': self._proto_relative_url(track_url)
                     })
 
-            entries.append({
+            entry = {
                 'id': this_video_id,
-                'title': video_data['title'] if require_title else video_data.get('title'),
+                'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
                 'description': video_data.get('description'),
                 'thumbnail': self._proto_relative_url(video_data.get('image')),
                 'timestamp': int_or_none(video_data.get('pubdate')),
                 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
                 'subtitles': subtitles,
-                'formats': formats,
-            })
+            }
+            # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
+            if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
+                entry.update({
+                    '_type': 'url_transparent',
+                    'url': formats[0]['url'],
+                })
+            else:
+                self._sort_formats(formats)
+                entry['formats'] = formats
+            entries.append(entry)
         if len(entries) == 1:
             return entries[0]
         else:
index e9d0dd19cf157d3e707da8e18638d83749b7fdc5..21a2d02392a7ad0393b7db499eb1b4a0ee054547 100644 (file)
@@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
 
         # vevo embed
         vevo_id = self._search_regex(
-            r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
+            r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)',
             webpage, 'vevo embed', default=None)
         if vevo_id:
             return self.url_result('vevo:%s' % vevo_id, 'Vevo')
index ec87b94dbcc74ae60e05d1c6f43a6e4429cbb721..a38b2683d5932fa55089bc6bc610d1795c5bc07d 100644 (file)
@@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor):
             'id': '176747451',
             'title': 'Best!',
             'uploader': 'Anonymous',
-            'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$',
+            'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$',
         },
         'playlist_count': 30,
         'skip': 'Only available in .de',
index 4232a4fefd9973070525c83b8c3d51c47f1eea26..ecb33bc9e7dbbb673a87fc93036e75d06c6f6229 100644 (file)
@@ -31,10 +31,7 @@ from .aenetworks import (
     AENetworksIE,
     HistoryTopicIE,
 )
-from .afreecatv import (
-    AfreecaTVIE,
-    AfreecaTVGlobalIE,
-)
+from .afreecatv import AfreecaTVIE
 from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
@@ -150,7 +147,10 @@ from .camdemy import (
 from .camwithher import CamWithHerIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
-from .canvas import CanvasIE
+from .canvas import (
+    CanvasIE,
+    CanvasEenIE,
+)
 from .carambatv import (
     CarambaTVIE,
     CarambaTVPageIE,
@@ -381,6 +381,7 @@ from .freesound import FreesoundIE
 from .freespeech import FreespeechIE
 from .freshlive import FreshLiveIE
 from .funimation import FunimationIE
+from .funk import FunkIE
 from .funnyordie import FunnyOrDieIE
 from .fusion import FusionIE
 from .fxnetworks import FXNetworksIE
@@ -940,6 +941,7 @@ from .skynewsarabia import (
 )
 from .skysports import SkySportsIE
 from .slideshare import SlideshareIE
+from .slideslive import SlidesLiveIE
 from .slutload import SlutloadIE
 from .smotri import (
     SmotriIE,
@@ -1243,7 +1245,10 @@ from .vodpl import VODPlIE
 from .vodplatform import VODPlatformIE
 from .voicerepublic import VoiceRepublicIE
 from .voot import VootIE
-from .voxmedia import VoxMediaIE
+from .voxmedia import (
+    VoxMediaVolumeIE,
+    VoxMediaIE,
+)
 from .vporn import VpornIE
 from .vrt import VRTIE
 from .vrak import VrakIE
@@ -1342,7 +1347,6 @@ from .youtube import (
     YoutubeSearchDateIE,
     YoutubeSearchIE,
     YoutubeSearchURLIE,
-    YoutubeSharedVideoIE,
     YoutubeShowIE,
     YoutubeSubscriptionsIE,
     YoutubeTruncatedIDIE,
index 4b3f6cc86b57f283f08faedfa481bfbc9719a879..220ada3a6dd962f16020e811894220298b720dce 100644 (file)
@@ -67,9 +67,9 @@ class FacebookIE(InfoExtractor):
             'uploader': 'Tennis on Facebook',
             'upload_date': '20140908',
             'timestamp': 1410199200,
-        }
+        },
+        'skip': 'Requires logging in',
     }, {
-        'note': 'Video without discernible title',
         'url': 'https://www.facebook.com/video.php?v=274175099429670',
         'info_dict': {
             'id': '274175099429670',
@@ -78,6 +78,7 @@ class FacebookIE(InfoExtractor):
             'uploader': 'Asif Nawab Butt',
             'upload_date': '20140506',
             'timestamp': 1399398998,
+            'thumbnail': r're:^https?://.*',
         },
         'expected_warnings': [
             'title'
@@ -94,6 +95,7 @@ class FacebookIE(InfoExtractor):
             'upload_date': '20160110',
             'timestamp': 1452431627,
         },
+        'skip': 'Requires logging in',
     }, {
         'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
         'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
@@ -121,7 +123,11 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '10153664894881749',
             'ext': 'mp4',
-            'title': 'Facebook video #10153664894881749',
+            'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
+            'thumbnail': r're:^https?://.*',
+            'timestamp': 1456259628,
+            'upload_date': '20160223',
+            'uploader': 'Barack Obama',
         },
     }, {
         # have 1080P, but only up to 720p in swf params
@@ -130,10 +136,11 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '10155529876156509',
             'ext': 'mp4',
-            'title': 'Holocaust survivor becomes US citizen',
+            'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...',
             'timestamp': 1477818095,
             'upload_date': '20161030',
             'uploader': 'CNN',
+            'thumbnail': r're:^https?://.*',
         },
     }, {
         # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@@ -158,6 +165,7 @@ class FacebookIE(InfoExtractor):
             'timestamp': 1477305000,
             'upload_date': '20161024',
             'uploader': 'La Guía Del Varón',
+            'thumbnail': r're:^https?://.*',
         },
         'params': {
             'skip_download': True,
@@ -376,6 +384,7 @@ class FacebookIE(InfoExtractor):
         timestamp = int_or_none(self._search_regex(
             r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
             'timestamp', default=None))
+        thumbnail = self._og_search_thumbnail(webpage)
 
         info_dict = {
             'id': video_id,
@@ -383,6 +392,7 @@ class FacebookIE(InfoExtractor):
             'formats': formats,
             'uploader': uploader,
             'timestamp': timestamp,
+            'thumbnail': thumbnail,
         }
 
         return webpage, info_dict
index facc665f606238fb2ebb54193fe4e525141216f4..5f98d017b84aae5a88eb07655b68ac52726dd643 100644 (file)
@@ -2,7 +2,10 @@
 from __future__ import unicode_literals
 
 from .adobepass import AdobePassIE
+from .uplynk import UplynkPreplayIE
+from ..compat import compat_str
 from ..utils import (
+    HEADRequest,
     int_or_none,
     parse_age_limit,
     parse_duration,
@@ -53,14 +56,7 @@ class FOXIE(AdobePassIE):
             })
 
         title = video['name']
-
-        m3u8_url = self._download_json(
-            video['videoRelease']['url'], video_id)['playURL']
-
-        formats = self._extract_m3u8_formats(
-            m3u8_url, video_id, 'mp4',
-            entry_protocol='m3u8_native', m3u8_id='hls')
-        self._sort_formats(formats)
+        release_url = video['videoRelease']['url']
 
         description = video.get('description')
         duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
@@ -84,7 +80,7 @@ class FOXIE(AdobePassIE):
             # TODO: AP
             pass
 
-        return {
+        info = {
             'id': video_id,
             'title': title,
             'description': description,
@@ -97,5 +93,22 @@ class FOXIE(AdobePassIE):
             'episode': episode,
             'episode_number': episode_number,
             'release_year': release_year,
-            'formats': formats,
         }
+
+        urlh = self._request_webpage(HEADRequest(release_url), video_id)
+        video_url = compat_str(urlh.geturl())
+
+        if UplynkPreplayIE.suitable(video_url):
+            info.update({
+                '_type': 'url_transparent',
+                'url': video_url,
+                'ie_key': UplynkPreplayIE.ie_key(),
+            })
+        else:
+            m3u8_url = self._download_json(release_url, video_id)['playURL']
+            formats = self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4',
+                entry_protocol='m3u8_native', m3u8_id='hls')
+            self._sort_formats(formats)
+            info['formats'] = formats
+        return info
index 0a70ca76351ab310ba394959b717973ec772f52d..7fa271b51fa59e14342d0de21cd50be719f3467d 100644 (file)
@@ -27,7 +27,7 @@ class FreespeechIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         title = mobj.group('title')
         webpage = self._download_webpage(url, title)
-        info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info')
+        info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info')
         info = json.loads(info_json)
 
         return {
diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py
new file mode 100644 (file)
index 0000000..ce5c67f
--- /dev/null
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .nexx import NexxIE
+from ..utils import extract_attributes
+
+
+class FunkIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
+    _TESTS = [{
+        'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
+        'md5': '4d40974481fa3475f8bccfd20c5361f8',
+        'info_dict': {
+            'id': '716599',
+            'ext': 'mp4',
+            'title': 'Neue Rechte Welle',
+            'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
+            'timestamp': 1501337639,
+            'upload_date': '20170729',
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        domain_id = NexxIE._extract_domain_id(webpage) or '741'
+        nexx_id = extract_attributes(self._search_regex(
+            r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
+            webpage, 'media player'))['data-id']
+
+        return self.url_result(
+            'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
+            video_id=nexx_id)
index 00d311158f6b7846a470dd90488d3db54d484910..02804d297a4709198371e2e2acfd8a83857095be 100644 (file)
@@ -105,7 +105,7 @@ class GameSpotIE(OnceIE):
             onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
             if onceux_url:
                 formats.extend(self._extract_once_formats(re.sub(
-                    r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', '')))
+                    r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url)))
 
         if not formats:
             for quality in ['sd', 'hd']:
index 7d0edf09c6a2e8428b84b3e7ebf787738db1bfaa..2a9c3e2dea0f397bc24b783550216ea42a582283 100644 (file)
@@ -22,6 +22,8 @@ from ..utils import (
     HEADRequest,
     is_html,
     js_to_json,
+    KNOWN_EXTENSIONS,
+    mimetype2ext,
     orderedSet,
     sanitized_Request,
     smuggle_url,
@@ -99,6 +101,7 @@ from .mediaset import MediasetIE
 from .joj import JojIE
 from .megaphone import MegaphoneIE
 from .vzaar import VzaarIE
+from .channel9 import Channel9IE
 
 
 class GenericIE(InfoExtractor):
@@ -1088,7 +1091,7 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'upload_date': '20150212',
                 'uploader': 'The National Archives UK',
-                'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
+                'description': 'md5:8078af856dca76edc42910b61273dbbf',
                 'uploader_id': 'NationalArchives08',
                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
             },
@@ -1104,7 +1107,8 @@ class GenericIE(InfoExtractor):
             },
             'params': {
                 'skip_download': True,
-            }
+            },
+            'skip': 'does not contain a video anymore',
         },
         # Complex jwplayer
         {
@@ -1113,6 +1117,7 @@ class GenericIE(InfoExtractor):
                 'id': 'videos',
                 'ext': 'mp4',
                 'title': 'king machine trailer 1',
+                'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
                 'thumbnail': r're:^https?://.*\.jpg$',
             },
         },
@@ -1130,13 +1135,42 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             }
         },
+        {
+            # Video.js embed, multiple formats
+            'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
+            'info_dict': {
+                'id': 'yygqldloqIk',
+                'ext': 'mp4',
+                'title': 'SolidWorks. Урок 6 Настройка чертежа',
+                'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
+                'upload_date': '20130314',
+                'uploader': 'PROстое3D',
+                'uploader_id': 'PROstoe3D',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # Video.js embed, single format
+            'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
+            'info_dict': {
+                'id': 'watch',
+                'ext': 'mp4',
+                'title': 'Step 1 -  Good Foundation',
+                'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
         # rtl.nl embed
         {
             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
             'playlist_mincount': 5,
             'info_dict': {
                 'id': 'aanslagen-kopenhagen',
-                'title': 'Aanslagen Kopenhagen | RTL Nieuws',
+                'title': 'Aanslagen Kopenhagen',
             }
         },
         # Zapiks embed
@@ -1268,6 +1302,7 @@ class GenericIE(InfoExtractor):
             'params': {
                 'skip_download': True,
             },
+            'skip': 'This video is unavailable.',
         },
         # Pladform embed
         {
@@ -1281,6 +1316,7 @@ class GenericIE(InfoExtractor):
                 'duration': 694,
                 'age_limit': 0,
             },
+            'skip': 'HTTP Error 404: Not Found',
         },
         # Playwire embed
         {
@@ -1301,6 +1337,14 @@ class GenericIE(InfoExtractor):
                 'id': '518726732',
                 'ext': 'mp4',
                 'title': 'Facebook Creates "On This Day" | Crunch Report',
+                'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
+                'timestamp': 1427237531,
+                'uploader': 'Crunch Report',
+                'upload_date': '20150324',
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
             },
         },
         # SVT embed
@@ -1352,16 +1396,20 @@ class GenericIE(InfoExtractor):
                 'upload_date': '20140107',
                 'timestamp': 1389118457,
             },
+            'skip': 'Invalid Page URL',
         },
         # NBC News embed
         {
             'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
             'md5': '1aa589c675898ae6d37a17913cf68d66',
             'info_dict': {
-                'id': '701714499682',
+                'id': 'x_dtl_oa_LettermanliftPR_160608',
                 'ext': 'mp4',
-                'title': 'PREVIEW: On Assignment: David Letterman',
+                'title': 'David Letterman: A Preview',
                 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
+                'upload_date': '20160609',
+                'timestamp': 1465431544,
+                'uploader': 'NBCU-NEWS',
             },
         },
         # UDN embed
@@ -1378,6 +1426,7 @@ class GenericIE(InfoExtractor):
                 # m3u8 download
                 'skip_download': True,
             },
+            'expected_warnings': ['Failed to parse JSON Expecting value'],
         },
         # Ooyala embed
         {
@@ -1385,7 +1434,7 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
                 'ext': 'mp4',
-                'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
+                'description': 'Index/Match versus VLOOKUP.',
                 'title': 'This is what separates the Excel masters from the wannabes',
                 'duration': 191.933,
             },
@@ -1423,7 +1472,8 @@ class GenericIE(InfoExtractor):
                 'upload_date': '20150622',
                 'uploader': 'Public Sénat',
                 'uploader_id': 'xa9gza',
-            }
+            },
+            'skip': 'File not found.',
         },
         # OnionStudios embed
         {
@@ -1581,22 +1631,6 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['BrightcoveLegacy'],
         },
-        # Nexx embed
-        {
-            'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503',
-            'info_dict': {
-                'id': '247746',
-                'ext': 'mp4',
-                'title': "Yesterday's Jam (OV)",
-                'description': 'md5:09bc0984723fed34e2581624a84e05f0',
-                'timestamp': 1492594816,
-                'upload_date': '20170419',
-            },
-            'params': {
-                'format': 'bestvideo',
-                'skip_download': True,
-            },
-        },
         # Facebook <iframe> embed
         {
             'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
@@ -2175,7 +2209,7 @@ class GenericIE(InfoExtractor):
         # And then there are the jokers who advertise that they use RTA,
         # but actually don't.
         AGE_LIMIT_MARKERS = [
-            r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
+            r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
         ]
         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
             age_limit = 18
@@ -2237,7 +2271,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded rtl.nl player
         matches = re.findall(
-            r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
+            r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
             webpage)
         if matches:
             return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
@@ -2636,7 +2670,7 @@ class GenericIE(InfoExtractor):
 
         # Look for UDN embeds
         mobj = re.search(
-            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
+            r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
         if mobj is not None:
             return self.url_result(
                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
@@ -2840,6 +2874,11 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
 
+        channel9_urls = Channel9IE._extract_urls(webpage)
+        if channel9_urls:
+            return self.playlist_from_matches(
+                channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
+
         def merge_dicts(dict1, dict2):
             merged = {}
             for k, v in dict1.items():
@@ -2880,6 +2919,46 @@ class GenericIE(InfoExtractor):
                 jwplayer_data, video_id, require_title=False, base_url=url)
             return merge_dicts(info, info_dict)
 
+        # Video.js embed
+        mobj = re.search(
+            r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
+            webpage)
+        if mobj is not None:
+            sources = self._parse_json(
+                mobj.group(1), video_id, transform_source=js_to_json,
+                fatal=False) or []
+            if not isinstance(sources, list):
+                sources = [sources]
+            formats = []
+            for source in sources:
+                src = source.get('src')
+                if not src or not isinstance(src, compat_str):
+                    continue
+                src = compat_urlparse.urljoin(url, src)
+                src_type = source.get('type')
+                if isinstance(src_type, compat_str):
+                    src_type = src_type.lower()
+                ext = determine_ext(src).lower()
+                if src_type == 'video/youtube':
+                    return self.url_result(src, YoutubeIE.ie_key())
+                if src_type == 'application/dash+xml' or ext == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        src, video_id, mpd_id='dash', fatal=False))
+                elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        src, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls', fatal=False))
+                else:
+                    formats.append({
+                        'url': src,
+                        'ext': (mimetype2ext(src_type) or
+                                ext if ext in KNOWN_EXTENSIONS else 'mp4'),
+                    })
+            if formats:
+                self._sort_formats(formats)
+                info_dict['formats'] = formats
+                return info_dict
+
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(
             webpage, video_id, default={}, expected_type='VideoObject')
@@ -2973,7 +3052,7 @@ class GenericIE(InfoExtractor):
             # be supported by youtube-dl thus this is checked the very last (see
             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
             embed_url = self._html_search_meta('twitter:player', webpage, default=None)
-            if embed_url:
+            if embed_url and embed_url != url:
                 return self.url_result(embed_url)
 
         if not found:
index 45ccc11c10a18033c5d53d0416903425a6ef2385..a0670b6456adf7e092dadbfc14d193f920aa0262 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class GfycatIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
         'info_dict': {
@@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor):
             'categories': list,
             'age_limit': 0,
         }
+    }, {
+        'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
+        'only_matching': True
     }]
 
     def _real_extract(self, url):
index 427499b11286f00a8e10e09a8de1d9f84611b5c9..6b927bb4477da8ff3f1f5635a6bf5d3a5a590984 100644 (file)
@@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor):
             'width': int(width),
             'height': int(height),
         } for width, height, video_url in re.findall(
-            r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)]
+            r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)]
         self._sort_formats(formats)
 
         return {
index 2be68abad0af91f1b508bc2cfa6e984ac39dbfd0..cf90ab3c9d976faabed8891fa4c1211853732f24 100644 (file)
@@ -11,45 +11,20 @@ from ..utils import (
 
 
 class HowStuffWorksIE(InfoExtractor):
-    _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
+    _VALID_URL = r'https?://[\da-z-]+\.(?:howstuffworks|stuff(?:(?:youshould|theydontwantyouto)know|toblowyourmind|momnevertoldyou)|(?:brain|car)stuffshow|fwthinking|geniusstuff)\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
     _TESTS = [
         {
-            'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
+            'url': 'http://www.stufftoblowyourmind.com/videos/optical-illusions-video.htm',
+            'md5': '76646a5acc0c92bf7cd66751ca5db94d',
             'info_dict': {
-                'id': '450221',
-                'ext': 'flv',
-                'title': 'Cool Jobs - Iditarod Musher',
-                'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.',
-                'display_id': 'cool-jobs-iditarod-musher',
-                'thumbnail': r're:^https?://.*\.jpg$',
-                'duration': 161,
-            },
-            'skip': 'Video broken',
-        },
-        {
-            'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
-            'info_dict': {
-                'id': '453464',
-                'ext': 'mp4',
-                'title': 'Survival Zone: Food and Water In the Savanna',
-                'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.',
-                'display_id': 'survival-zone-food-and-water-in-the-savanna',
-                'thumbnail': r're:^https?://.*\.jpg$',
-            },
-        },
-        {
-            'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
-            'info_dict': {
-                'id': '440011',
+                'id': '855410',
                 'ext': 'mp4',
-                'title': 'Sword Swallowing #1 by Dan Meyer',
-                'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
-                'display_id': 'sword-swallowing-1-by-dan-meyer',
-                'thumbnail': r're:^https?://.*\.jpg$',
+                'title': 'Your Trickster Brain: Optical Illusions -- Science on the Web',
+                'description': 'md5:e374ff9561f6833ad076a8cc0a5ab2fb',
             },
         },
         {
-            'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm',
+            'url': 'http://shows.howstuffworks.com/more-shows/why-does-balloon-stick-to-hair-video.htm',
             'only_matching': True,
         }
     ]
index 656ce6d0504180daa1f27495459599d47cebdae0..6424d34ac4acc0f6c01205076cd0e6e25723fc22 100644 (file)
@@ -104,7 +104,7 @@ class HRTiIE(HRTiBaseIE):
                         (?:
                             hrti:(?P<short_id>[0-9]+)|
                             https?://
-                                hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
+                                hrti\.hrt\.hr/(?:\#/)?video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
                         )
                     '''
     _TESTS = [{
@@ -129,6 +129,9 @@ class HRTiIE(HRTiBaseIE):
     }, {
         'url': 'hrti:2181385',
         'only_matching': True,
+    }, {
+        'url': 'https://hrti.hrt.hr/video/show/3873068/cuvar-dvorca-dramska-serija-14',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -170,7 +173,7 @@ class HRTiIE(HRTiBaseIE):
 
 
 class HRTiPlaylistIE(HRTiBaseIE):
-    _VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
+    _VALID_URL = r'https?://hrti\.hrt\.hr/(?:#/)?video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
     _TESTS = [{
         'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
         'info_dict': {
@@ -182,6 +185,9 @@ class HRTiPlaylistIE(HRTiBaseIE):
     }, {
         'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
         'only_matching': True,
+    }, {
+        'url': 'https://hrti.hrt.hr/video/list/category/212/ekumena',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index c1367cf517ce9b39960a13705b4475bf4f3cf8d1..a96ea801019c808e6a8fe1f8f6590b0e018feeef 100644 (file)
@@ -203,7 +203,7 @@ class PCMagIE(IGNIE):
     _VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
     IE_NAME = 'pcmag'
 
-    _EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]'
+    _EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
 
     _TESTS = [{
         'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
index fe425e786479e505aad8082745d43040d38ba881..c3e892feb1fd905b98f99b5670ee7c120b0b208d 100644 (file)
@@ -8,7 +8,10 @@ from ..compat import (
     compat_urllib_parse_unquote,
     compat_urlparse,
 )
-from ..utils import determine_ext
+from ..utils import (
+    determine_ext,
+    update_url_query,
+)
 from .bokecc import BokeCCBaseIE
 
 
@@ -68,21 +71,22 @@ class InfoQIE(BokeCCBaseIE):
             'play_path': playpath,
         }]
 
-    def _extract_cookies(self, webpage):
-        policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
-        signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
-        key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
-        return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
-            policy, signature, key_pair_id)
+    def _extract_cf_auth(self, webpage):
+        policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
+        signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
+        key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
+        return {
+            'Policy': policy,
+            'Signature': signature,
+            'Key-Pair-Id': key_pair_id,
+        }
 
     def _extract_http_video(self, webpage):
         http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
+        http_video_url = update_url_query(http_video_url, self._extract_cf_auth(webpage))
         return [{
             'format_id': 'http_video',
             'url': http_video_url,
-            'http_headers': {
-                'Cookie': self._extract_cookies(webpage)
-            },
         }]
 
     def _extract_http_audio(self, webpage, video_id):
@@ -91,22 +95,20 @@ class InfoQIE(BokeCCBaseIE):
         if not http_audio_url:
             return []
 
-        cookies_header = {'Cookie': self._extract_cookies(webpage)}
-
         # base URL is found in the Location header in the response returned by
         # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
         http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
+        http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
 
         # audio file seem to be missing some times even if there is a download link
         # so probe URL to make sure
-        if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
+        if not self._is_valid_url(http_audio_url, video_id):
             return []
 
         return [{
             'format_id': 'http_audio',
             'url': http_audio_url,
             'vcodec': 'none',
-            'http_headers': cookies_header,
         }]
 
     def _real_extract(self, url):
index 1a4227f6b4b0ef7370b0f09613ef9d4b8916b435..e9f4ed7384225456edf87baf0c24ce5c6ecc4b20 100644 (file)
@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
         webpage = self._download_webpage(url, title)
         title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
         config_url = self._html_search_regex(
-            r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
+            r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"',
             webpage, 'config URL')
         config_url = 'http://www.jeuxvideo.com' + config_url
 
index 138d4844d1bbd70e56aff50b5a59ff2ddac8f665..bdac2df3e52ffeafa55d4d6f05af18edcaf9e0dd 100644 (file)
@@ -287,6 +287,9 @@ class KalturaIE(InfoExtractor):
             # skip for now.
             if f.get('fileExt') == 'chun':
                 continue
+            # DRM-protected video, cannot be decrypted
+            if f.get('fileExt') == 'wvm':
+                continue
             if not f.get('fileExt'):
                 # QT indicates QuickTime; some videos have broken fileExt
                 if f.get('containerFormat') == 'qt':
index fb9c2dbd47789ae6f0457a4b2724c53875d14753..93a98e1e08beff701594d4e7d763c3b7b5790ab1 100644 (file)
@@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 
+from .canvas import CanvasIE
 from .common import InfoExtractor
 
 
@@ -7,7 +8,7 @@ class KetnetIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
-        'md5': 'd907f7b1814ef0fa285c0475d9994ed7',
+        'md5': '6bdeb65998930251bbd1c510750edba9',
         'info_dict': {
             'id': 'zomerse-filmpjes',
             'ext': 'mp4',
@@ -15,6 +16,20 @@ class KetnetIE(InfoExtractor):
             'description': 'Gluur mee met Ghost Rockers op de filmset',
             'thumbnail': r're:^https?://.*\.jpg$',
         }
+    }, {
+        # mzid in playerConfig instead of sources
+        'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
+        'md5': '90139b746a0a9bd7bb631283f6e2a64e',
+        'info_dict': {
+            'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+            'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+            'ext': 'flv',
+            'title': 'Nachtwacht: De Greystook',
+            'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 1468.03,
+        },
+        'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
     }, {
         'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
         'only_matching': True,
@@ -38,6 +53,12 @@ class KetnetIE(InfoExtractor):
                 'player config'),
             video_id)
 
+        mzid = config.get('mzid')
+        if mzid:
+            return self.url_result(
+                'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
+                CanvasIE.ie_key(), video_id=mzid)
+
         title = config['title']
 
         formats = []
index 7f946c6ed9d64c54670d7fba68058144bcab494b..317ebbc4ee60d17051574ae05b8575526216de05 100644 (file)
@@ -338,7 +338,7 @@ class LivestreamOriginalIE(InfoExtractor):
                 info = {
                     'title': self._og_search_title(webpage),
                     'description': self._og_search_description(webpage),
-                    'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
+                    'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
                 }
             video_data = self._download_json(stream_url, content_id)
             is_live = video_data.get('isLive')
index 068378c9c509a0483650feac42a8ffe92cc60328..cfec0d3d0910c6b107831b050f4af6dcd9d248c0 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class LnkGoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
+    _VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
     _TESTS = [{
         'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
         'info_dict': {
@@ -42,6 +42,9 @@ class LnkGoIE(InfoExtractor):
         'params': {
             'skip_download': True,  # HLS download
         },
+    }, {
+        'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
+        'only_matching': True,
     }]
     _AGE_LIMITS = {
         'N-7': 7,
index 3c34d4604f20699d99937b29a11aad7f8f4116a4..8eda69cfc4aded62c192d1cef00c27bee57ab296 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 
 class MakerTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
+    _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
     _TEST = {
         'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
         'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
index 1885ac7df59684767022f95c5c35ffeb65cd33d5..dbd761a67864036cd84be24fa9ef432433c08202 100644 (file)
@@ -22,7 +22,7 @@ class MangomoloBaseIE(InfoExtractor):
 
         format_url = self._html_search_regex(
             [
-                r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)',
+                r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
                 r'<a[^>]+href="(rtsp://[^"]+)"'
             ], webpage, 'format url')
         formats = self._extract_wowza_formats(
index c8eacb4f4d63ca789659ed9a3deeba7f8fe76f86..2445b8b3985fac92ad451d312f4bb105890db0f5 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 class MeipaiIE(InfoExtractor):
     IE_DESC = '美拍'
-    _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)'
     _TESTS = [{
         # regular uploaded video
         'url': 'http://www.meipai.com/media/531697625',
index f331db89075864c52733375c46b15fed74ef0e77..7b2bb6e20577929abb9097e2d05c13cfc141d4f9 100644 (file)
@@ -291,7 +291,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
             functools.partial(
                 self._tracks_page_func,
                 '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
-            self._PAGE_SIZE, use_cache=True)
+            self._PAGE_SIZE)
 
         return self.playlist_result(
             entries, video_id, '%s (%s)' % (username, list_type), description)
index 25af5ddfda4765132fec413caca9a09fc2ba2bb9..1154a35365ca9b388ebfdbe2a5790f67a25ee750 100644 (file)
@@ -258,7 +258,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
 
         if mgid is None or ':' not in mgid:
             mgid = self._search_regex(
-                [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
+                [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
                 webpage, 'mgid', default=None)
 
         if not mgid:
index 6bb64eb63c52018c34650a6361e7d70cad2459e0..367e811db5a1c0705626ae766483fb02a4c6162a 100644 (file)
@@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor):
         else:
             video_playpath = ''
 
-        video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
+        video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
         video_swfobj = compat_urllib_parse_unquote(video_swfobj)
 
         video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
index b91d865286e47affdc66c138dde9507963d62733..9e8d28f4848165ccdfda771800031e6c68359684 100644 (file)
@@ -111,7 +111,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
         release_url = self._search_regex(
             r'video_auth_playlist_url\s*=\s*"([^"]+)"',
             webpage, 'release url')
-        theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path')
+        theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path')
         video_id = theplatform_path.split('/')[-1]
         query = {
             'mbr': 'true',
index e8131333f8458505b7a323f378c5fee848414934..2047d440266907ea6cd16631cbf235d751c83d9a 100644 (file)
@@ -43,7 +43,7 @@ class NaverIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
+        m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"',
                          webpage)
         if m_id is None:
             error = self._html_search_regex(
index 53561961c12611eeead082ed662e44a75e38acbf..be295a7a3b010c375416e0af1a7d27337ca2d4cf 100644 (file)
@@ -122,7 +122,7 @@ class NBAIE(TurnerBaseIE):
         playlist_title = self._og_search_title(webpage, fatal=False)
         entries = OnDemandPagedList(
             functools.partial(self._fetch_page, team, video_id),
-            self._PAGE_SIZE, use_cache=True)
+            self._PAGE_SIZE)
 
         return self.playlist_result(entries, team, playlist_title)
 
index 836a41f0694dd20b3024ecdd97376d340f322e02..35151f5274d2452be7cb508ec76e36db8fab9d9e 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class NBCIE(AdobePassIE):
-    _VALID_URL = r'(?P<permalink>https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
+    _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
 
     _TESTS = [
         {
@@ -72,6 +72,7 @@ class NBCIE(AdobePassIE):
 
     def _real_extract(self, url):
         permalink, video_id = re.match(self._VALID_URL, url).groups()
+        permalink = 'http' + permalink
         video_data = self._download_json(
             'https://api.nbc.com/v3/videos', video_id, query={
                 'filter[permalink]': permalink,
index d0235fdfe97cba28049ee3e9f6ba8415ff450989..071879ba4f5317326ebed99ee59e8a6f4e6b4ce4 100644 (file)
@@ -18,7 +18,13 @@ from ..utils import (
 
 
 class NexxIE(InfoExtractor):
-    _VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+                        (?:
+                            https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
+                            nexx:(?P<domain_id_s>\d+):
+                        )
+                        (?P<id>\d+)
+                    '''
     _TESTS = [{
         # movie
         'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
@@ -62,8 +68,18 @@ class NexxIE(InfoExtractor):
     }, {
         'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
         'only_matching': True,
+    }, {
+        'url': 'nexx:748:128907',
+        'only_matching': True,
     }]
 
+    @staticmethod
+    def _extract_domain_id(webpage):
+        mobj = re.search(
+            r'<script\b[^>]+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
+            webpage)
+        return mobj.group('id') if mobj else None
+
     @staticmethod
     def _extract_urls(webpage):
         # Reference:
@@ -72,11 +88,8 @@ class NexxIE(InfoExtractor):
         entries = []
 
         # JavaScript Integration
-        mobj = re.search(
-            r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
-            webpage)
-        if mobj:
-            domain_id = mobj.group('id')
+        domain_id = NexxIE._extract_domain_id(webpage)
+        if domain_id:
             for video_id in re.findall(
                     r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
                     webpage):
@@ -112,7 +125,8 @@ class NexxIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        domain_id, video_id = mobj.group('domain_id', 'id')
+        domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
+        video_id = mobj.group('id')
 
         # Reverse engineered from JS code (see getDeviceID function)
         device_id = '%d:%d:%d%d' % (
index fa4ef20c52959240af41a0c8a7b08c02fd3eb54c..b8fe244071d05e1daac7514b932be148802c21a7 100644 (file)
@@ -469,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE):
 
 class HetKlokhuisIE(NPODataMidEmbedIE):
     IE_NAME = 'hetklokhuis'
-    _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
 
     _TEST = {
         'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',
index 1bf96ea56e29849725db4b9c799ec63330c34224..a637c8ecfb0f03ecc46536363800c38f716fe557 100644 (file)
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 
 
 class OnceIE(InfoExtractor):
-    _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
+    _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
     ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
     PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
 
index 1d336cf3069d8aae29eeb4e90a7c3f20241cab2e..c6e3d5640da8f4f0040e862a9641fb73deb391a4 100644 (file)
@@ -13,11 +13,11 @@ from ..utils import (
 
 
 class OnionStudiosIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)'
+    _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
 
     _TESTS = [{
         'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
-        'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
+        'md5': '719d1f8c32094b8c33902c17bcae5e34',
         'info_dict': {
             'id': '2937',
             'ext': 'mp4',
@@ -29,12 +29,15 @@ class OnionStudiosIE(InfoExtractor):
     }, {
         'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
         'only_matching': True,
+    }, {
+        'url': 'http://www.onionstudios.com/video/6139.json',
+        'only_matching': True,
     }]
 
     @staticmethod
     def _extract_url(webpage):
         mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage)
+            r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1', webpage)
         if mobj:
             return mobj.group('url')
 
index a4a5d390ea0de5e471217151ffba26641b7efae2..ee04936e1a44819301207a362d7aa8242ff49271 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class PornFlipIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})'
+    _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z-]{11})'
     _TESTS = [{
         'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
         'md5': '98c46639849145ae1fd77af532a9278c',
@@ -34,6 +34,12 @@ class PornFlipIE(InfoExtractor):
     }, {
         'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
         'only_matching': True,
+    }, {
+        'url': 'https://www.pornflip.com/v/EkRD6-vS2-s',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 01c85ee016306aa30b44c7521e736e6094b14f9c..f36bc648c28b31623b50b531ee053cfdae354320 100644 (file)
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
@@ -35,6 +37,8 @@ class RedditIE(InfoExtractor):
             'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
             mpd_id='dash', fatal=False))
 
+        self._sort_formats(formats)
+
         return {
             'id': video_id,
             'title': video_id,
@@ -43,7 +47,7 @@ class RedditIE(InfoExtractor):
 
 
 class RedditRIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
     _TESTS = [{
         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
         'info_dict': {
@@ -81,10 +85,13 @@ class RedditRIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        url, video_id = mobj.group('url', 'id')
+
         video_id = self._match_id(url)
 
         data = self._download_json(
-            url + '.json', video_id)[0]['data']['children'][0]['data']
+            url + '/.json', video_id)[0]['data']['children'][0]['data']
 
         video_url = data['url']
 
index 3e22998c6d8384893ac801c98c4f6bde6988c2d1..bba25a233e3fef88ae8e06e504e4524e64c3bd1c 100644 (file)
@@ -12,10 +12,10 @@ class RtlNlIE(InfoExtractor):
     IE_NAME = 'rtl.nl'
     IE_DESC = 'rtl.nl and rtlxl.nl'
     _VALID_URL = r'''(?x)
-        https?://(?:www\.)?
+        https?://(?:(?:www|static)\.)?
         (?:
             rtlxl\.nl/[^\#]*\#!/[^/]+/|
-            rtl\.nl/(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=|video/)
+            rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)
         )
         (?P<id>[0-9a-f-]+)'''
 
@@ -73,6 +73,9 @@ class RtlNlIE(InfoExtractor):
     }, {
         'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
         'only_matching': True,
+    }, {
+        'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 746677a24892f61249d32757ba5e4cac92d1f756..d9edf9da2f71cf6fbcf3c8bfd310c4818eced62c 100644 (file)
@@ -10,6 +10,7 @@ from ..compat import (
     compat_struct_unpack,
 )
 from ..utils import (
+    determine_ext,
     ExtractorError,
     float_or_none,
     remove_end,
@@ -84,6 +85,18 @@ class RTVEALaCartaIE(InfoExtractor):
             'title': 'TODO',
         },
         'skip': 'The f4m manifest can\'t be used yet',
+    }, {
+        'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
+        'md5': 'e55e162379ad587e9640eda4f7353c0f',
+        'info_dict': {
+            'id': '4236788',
+            'ext': 'mp4',
+            'title': 'Servir y proteger - Capítulo 104 ',
+            'duration': 3222.0,
+        },
+        'params': {
+            'skip_download': True,  # requires ffmpeg
+        },
     }, {
         'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
         'only_matching': True,
@@ -107,24 +120,41 @@ class RTVEALaCartaIE(InfoExtractor):
             video_id)['page']['items'][0]
         if info['state'] == 'DESPU':
             raise ExtractorError('The video is no longer available', expected=True)
+        title = info['title']
         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
         png_request = sanitized_Request(png_url)
         png_request.add_header('Referer', url)
         png = self._download_webpage(png_request, video_id, 'Downloading url information')
         video_url = _decrypt_url(png)
-        if not video_url.endswith('.f4m'):
+        ext = determine_ext(video_url)
+
+        formats = []
+        if not video_url.endswith('.f4m') and ext != 'm3u8':
             if '?' not in video_url:
                 video_url = video_url.replace('resources/', 'auth/resources/')
             video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
 
+        if ext == 'm3u8':
+            formats.extend(self._extract_m3u8_formats(
+                video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+                m3u8_id='hls', fatal=False))
+        elif ext == 'f4m':
+            formats.extend(self._extract_f4m_formats(
+                video_url, video_id, f4m_id='hds', fatal=False))
+        else:
+            formats.append({
+                'url': video_url,
+            })
+        self._sort_formats(formats)
+
         subtitles = None
         if info.get('sbtFile') is not None:
             subtitles = self.extract_subtitles(video_id, info['sbtFile'])
 
         return {
             'id': video_id,
-            'title': info['title'],
-            'url': video_url,
+            'title': title,
+            'formats': formats,
             'thumbnail': info.get('image'),
             'page_url': url,
             'subtitles': subtitles,
index 2b830cf477eef731caef1f2a6cddf10ef3efa14c..3c8053a2617669cc22549f62e386ac988b33d3c6 100644 (file)
@@ -25,7 +25,7 @@ class RUHDIE(InfoExtractor):
         video_url = self._html_search_regex(
             r'<param name="src" value="([^"]+)"', webpage, 'video url')
         title = self._html_search_regex(
-            r'<title>([^<]+)&nbsp;&nbsp; RUHD.ru - Видео Высокого качества №1 в России!</title>',
+            r'<title>([^<]+)&nbsp;&nbsp; RUHD\.ru - Видео Высокого качества №1 в России!</title>',
             webpage, 'title')
         description = self._html_search_regex(
             r'(?s)<div id="longdesc">(.+?)<span id="showlink">',
index 597d6f543a362791a0af0d111156bdac79ac4270..b446a02bace5f3e2679db7579a38e106c4f681ad 100644 (file)
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .adobepass import AdobePassIE
+import datetime
+import json
+import hashlib
+import hmac
+import re
+
+from .common import InfoExtractor
+from .anvato import AnvatoIE
 from ..utils import (
-    int_or_none,
     smuggle_url,
-    update_url_query,
+    urlencode_postdata,
+    xpath_text,
 )
 
 
-class ScrippsNetworksWatchIE(AdobePassIE):
+class ScrippsNetworksWatchIE(InfoExtractor):
     IE_NAME = 'scrippsnetworks:watch'
-    _VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://watch.hgtv.com/player.HNT.html#0256538',
+    _VALID_URL = r'''(?x)
+                    https?://
+                        watch\.
+                        (?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/
+                        (?:
+                            player\.[A-Z0-9]+\.html\#|
+                            show/(?:[^/]+/){2}|
+                            player/
+                        )
+                        (?P<id>\d+)
+                    '''
+    _TESTS = [{
+        'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/',
         'md5': '26545fd676d939954c6808274bdb905a',
         'info_dict': {
-            'id': '0256538',
+            'id': '4173834',
             'ext': 'mp4',
-            'title': 'Seeking a Wow House',
-            'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.',
-            'uploader': 'SCNI',
-            'upload_date': '20170207',
-            'timestamp': 1486450493,
+            'title': 'Best Ever Treehouses',
+            'description': "We're searching for the most over the top treehouses.",
+            'uploader': 'ANV',
+            'upload_date': '20170922',
+            'timestamp': 1506056400,
+        },
+        'params': {
+            'skip_download': True,
         },
-        'skip': 'requires TV provider authentication',
+        'add_ie': [AnvatoIE.ie_key()],
+    }, {
+        'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/',
+        'only_matching': True,
+    }, {
+        'url': 'http://watch.diynetwork.com/player.HNT.html#2656646',
+        'only_matching': True,
+    }, {
+        'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
+        'only_matching': True,
+    }]
+
+    _SNI_TABLE = {
+        'hgtv': 'hgtv',
+        'diynetwork': 'diy',
+        'foodnetwork': 'food',
+        'cookingchanneltv': 'cook',
+        'travelchannel': 'trav',
+        'geniuskitchen': 'genius',
     }
+    _SNI_HOST = 'web.api.video.snidigital.com'
+
+    _AWS_REGION = 'us-east-1'
+    _AWS_IDENTITY_ID_JSON = json.dumps({
+        'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION
+    })
+    _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
+    _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
+    _AWS_SERVICE = 'execute-api'
+    _AWS_REQUEST = 'aws4_request'
+    _AWS_SIGNED_HEADERS = ';'.join([
+        'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key'])
+    _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET
+%(uri)s
+
+host:%(host)s
+x-amz-date:%(date)s
+x-amz-security-token:%(token)s
+x-api-key:%(key)s
+
+%(signed_headers)s
+%(payload_hash)s'''
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        channel = self._parse_json(self._search_regex(
-            r'"channels"\s*:\s*(\[.+\])',
-            webpage, 'channels'), video_id)[0]
-        video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id)
-        title = video_data['title']
-        release_url = video_data['releaseUrl']
-        if video_data.get('restricted'):
-            requestor_id = self._search_regex(
-                r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id')
-            resource = self._get_mvpd_resource(
-                requestor_id, title, video_id,
-                video_data.get('ratings', [{}])[0].get('rating'))
-            auth = self._extract_mvpd_auth(
-                url, video_id, requestor_id, resource)
-            release_url = update_url_query(release_url, {'auth': auth})
-
-        return {
-            '_type': 'url_transparent',
-            'id': video_id,
-            'title': title,
-            'url': smuggle_url(release_url, {'force_smil_url': True}),
-            'description': video_data.get('description'),
-            'thumbnail': video_data.get('thumbnailUrl'),
-            'series': video_data.get('showTitle'),
-            'season_number': int_or_none(video_data.get('season')),
-            'episode_number': int_or_none(video_data.get('episodeNumber')),
-            'ie_key': 'ThePlatform',
+        mobj = re.match(self._VALID_URL, url)
+        site_id, video_id = mobj.group('site', 'id')
+
+        def aws_hash(s):
+            return hashlib.sha256(s.encode('utf-8')).hexdigest()
+
+        token = self._download_json(
+            'https://cognito-identity.us-east-1.amazonaws.com/', video_id,
+            data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'),
+            headers={
+                'Accept': '*/*',
+                'Content-Type': 'application/x-amz-json-1.1',
+                'Referer': url,
+                'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON),
+                'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
+                'X-Amz-User-Agent': self._AWS_USER_AGENT,
+            })['Token']
+
+        sts = self._download_xml(
+            'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
+                'Action': 'AssumeRoleWithWebIdentity',
+                'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
+                'RoleSessionName': 'web-identity',
+                'Version': '2011-06-15',
+                'WebIdentityToken': token,
+            }), headers={
+                'Referer': url,
+                'X-Amz-User-Agent': self._AWS_USER_AGENT,
+                'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
+            })
+
+        def get(key):
+            return xpath_text(
+                sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
+                fatal=True)
+
+        access_key_id = get('AccessKeyId')
+        secret_access_key = get('SecretAccessKey')
+        session_token = get('SessionToken')
+
+        # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
+        uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id)
+        datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
+        date = datetime_now[:8]
+        canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % {
+            'uri': uri,
+            'host': self._SNI_HOST,
+            'date': datetime_now,
+            'token': session_token,
+            'key': self._AWS_API_KEY,
+            'signed_headers': self._AWS_SIGNED_HEADERS,
+            'payload_hash': aws_hash(''),
         }
+
+        # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
+        credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST])
+        string_to_sign = '\n'.join([
+            'AWS4-HMAC-SHA256', datetime_now, credential_string,
+            aws_hash(canonical_string)])
+
+        # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
+        def aws_hmac(key, msg):
+            return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
+
+        def aws_hmac_digest(key, msg):
+            return aws_hmac(key, msg).digest()
+
+        def aws_hmac_hexdigest(key, msg):
+            return aws_hmac(key, msg).hexdigest()
+
+        k_secret = 'AWS4' + secret_access_key
+        k_date = aws_hmac_digest(k_secret.encode('utf-8'), date)
+        k_region = aws_hmac_digest(k_date, self._AWS_REGION)
+        k_service = aws_hmac_digest(k_region, self._AWS_SERVICE)
+        k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST)
+
+        signature = aws_hmac_hexdigest(k_signing, string_to_sign)
+
+        auth_header = ', '.join([
+            'AWS4-HMAC-SHA256 Credential=%s' % '/'.join(
+                [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]),
+            'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS,
+            'Signature=%s' % signature,
+        ])
+
+        mcp_id = self._download_json(
+            'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={
+                'Accept': '*/*',
+                'Referer': url,
+                'Authorization': auth_header,
+                'X-Amz-Date': datetime_now,
+                'X-Amz-Security-Token': session_token,
+                'X-Api-Key': self._AWS_API_KEY,
+            })['results'][0]['mcpId']
+
+        return self.url_result(
+            smuggle_url(
+                'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
+                {'geo_countries': ['US']}),
+            AnvatoIE.ie_key(), video_id=mcp_id)
index 62d41e88a1084c58af259176e28a8b2654ccd4ee..374f7faf9d0becc45f8dd5e49d6774fc45b8e002 100644 (file)
@@ -18,46 +18,32 @@ from ..utils import (
 
 class ShahidIE(InfoExtractor):
     _NETRC_MACHINE = 'shahid'
-    _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?P<type>episode|movie)/(?P<id>\d+)'
+    _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
     _TESTS = [{
-        'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
+        'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
         'info_dict': {
-            'id': '90574',
+            'id': '275286',
             'ext': 'mp4',
-            'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3',
-            'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان',
-            'duration': 2972,
-            'timestamp': 1422057420,
-            'upload_date': '20150123',
+            'title': 'مجلس الشباب الموسم 1 كليب 1',
+            'timestamp': 1506988800,
+            'upload_date': '20171003',
         },
         'params': {
             # m3u8 download
             'skip_download': True,
         }
     }, {
-        'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html',
+        'url': 'https://shahid.mbc.net/ar/movies/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9/movie-151746',
         'only_matching': True
     }, {
         # shahid plus subscriber only
-        'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
+        'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
         'only_matching': True
     }]
 
-    def _real_initialize(self):
-        email, password = self._get_login_info()
-        if email is None:
-            return
-
+    def _api2_request(self, *args, **kwargs):
         try:
-            user_data = self._download_json(
-                'https://shahid.mbc.net/wd/service/users/login',
-                None, 'Logging in', data=json.dumps({
-                    'email': email,
-                    'password': password,
-                    'basic': 'false',
-                }).encode('utf-8'), headers={
-                    'Content-Type': 'application/json; charset=UTF-8',
-                })['user']
+            return self._download_json(*args, **kwargs)
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError):
                 fail_data = self._parse_json(
@@ -69,6 +55,21 @@ class ShahidIE(InfoExtractor):
                         raise ExtractorError(faults_message, expected=True)
             raise
 
+    def _real_initialize(self):
+        email, password = self._get_login_info()
+        if email is None:
+            return
+
+        user_data = self._api2_request(
+            'https://shahid.mbc.net/wd/service/users/login',
+            None, 'Logging in', data=json.dumps({
+                'email': email,
+                'password': password,
+                'basic': 'false',
+            }).encode('utf-8'), headers={
+                'Content-Type': 'application/json; charset=UTF-8',
+            })['user']
+
         self._download_webpage(
             'https://shahid.mbc.net/populateContext',
             None, 'Populate Context', data=urlencode_postdata({
@@ -93,15 +94,17 @@ class ShahidIE(InfoExtractor):
 
     def _real_extract(self, url):
         page_type, video_id = re.match(self._VALID_URL, url).groups()
+        if page_type == 'clip':
+            page_type = 'episode'
 
-        player = self._get_api_data(self._download_json(
-            'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id,
-            video_id, 'Downloading player JSON'))
+        playout = self._api2_request(
+            'https://api2.shahid.net/proxy/v2/playout/url/' + video_id,
+            video_id, 'Downloading player JSON')['playout']
 
-        if player.get('drm'):
+        if playout.get('drm'):
             raise ExtractorError('This video is DRM protected.', expected=True)
 
-        formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
+        formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
         self._sort_formats(formats)
 
         video = self._get_api_data(self._download_json(
diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py
new file mode 100644 (file)
index 0000000..1045760
--- /dev/null
@@ -0,0 +1,34 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SlidesLiveIE(InfoExtractor):
+    _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
+        'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
+        'info_dict': {
+            'id': 'LMtgR8ba0b0',
+            'ext': 'mp4',
+            'title': '38902413: external video',
+            'description': '3890241320170925-9-1yd6ech.mp4',
+            'uploader': 'SlidesLive Administrator',
+            'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
+            'upload_date': '20170925',
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        video_data = self._download_json(
+            url, video_id, headers={'Accept': 'application/json'})
+        service_name = video_data['video_service_name']
+        if service_name == 'YOUTUBE':
+            yt_video_id = video_data['video_service_id']
+            return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id)
+        else:
+            raise ExtractorError(
+                'Unsupported service name: {0}'.format(service_name), expected=True)
index c59896a17905c006eabb40271d846eebe7908a66..a7b1b3b5f3f83567554f42bd6536a65a70a9d99d 100644 (file)
@@ -44,6 +44,7 @@ class SpikeIE(MTVServicesInfoExtractor):
     _FEED_URL = 'http://www.spike.com/feeds/mrss/'
     _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
     _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
+    _GEO_COUNTRIES = ['US']
 
     def _extract_mgid(self, webpage):
         mgid = super(SpikeIE, self)._extract_mgid(webpage)
index cce65fb1014d3595670707d8009832ea37f448dc..ae3dd13807d0f9f46939f6dfab066a75eddd1ed1 100644 (file)
@@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
                 r'(?s)<description>([^<]+)</description>',
                 coursepage, 'description', fatal=False)
 
-            links = orderedSet(re.findall(r'<a href="(VideoPage.php\?[^"]+)">', coursepage))
+            links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
             info['entries'] = [self.url_result(
                 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
             ) for l in links]
@@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
             rootpage = self._download_webpage(rootURL, info['id'],
                                               errnote='Unable to download course info page')
 
-            links = orderedSet(re.findall(r'<a href="(CoursePage.php\?[^"]+)">', rootpage))
+            links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
             info['entries'] = [self.url_result(
                 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
             ) for l in links]
index 1a831ef6da5f4076dbbab4c989562d7e183d1f43..e5ac586a7f750b131de611ca9bf66b6f826e54dd 100644 (file)
@@ -4,8 +4,10 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    extract_attributes,
     ExtractorError,
-    unescapeHTML,
+    get_element_by_class,
+    js_to_json,
 )
 
 
@@ -25,35 +27,39 @@ class SteamIE(InfoExtractor):
         'url': 'http://store.steampowered.com/video/105600/',
         'playlist': [
             {
-                'md5': 'f870007cee7065d7c76b88f0a45ecc07',
+                'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
                 'info_dict': {
-                    'id': '81300',
-                    'ext': 'flv',
-                    'title': 'Terraria 1.1 Trailer',
+                    'id': '2040428',
+                    'ext': 'mp4',
+                    'title': 'Terraria 1.3 Trailer',
                     'playlist_index': 1,
                 }
             },
             {
-                'md5': '61aaf31a5c5c3041afb58fb83cbb5751',
+                'md5': '911672b20064ca3263fa89650ba5a7aa',
                 'info_dict': {
-                    'id': '80859',
-                    'ext': 'flv',
-                    'title': 'Terraria Trailer',
+                    'id': '2029566',
+                    'ext': 'mp4',
+                    'title': 'Terraria 1.2 Trailer',
                     'playlist_index': 2,
                 }
             }
         ],
+        'info_dict': {
+            'id': '105600',
+            'title': 'Terraria',
+        },
         'params': {
             'playlistend': 2,
         }
     }, {
         'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
         'info_dict': {
-            'id': 'WB5DvDOOvAY',
+            'id': 'X8kpJBlzD2E',
             'ext': 'mp4',
-            'upload_date': '20140329',
-            'title': 'FRONTIERS - Final Greenlight Trailer',
-            'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9',
+            'upload_date': '20140617',
+            'title': 'FRONTIERS - Trapping',
+            'description': 'md5:bf6f7f773def614054089e5769c12a6e',
             'uploader': 'AAD Productions',
             'uploader_id': 'AtomicAgeDogGames',
         }
@@ -76,48 +82,65 @@ class SteamIE(InfoExtractor):
             self.report_age_confirmation()
             webpage = self._download_webpage(videourl, playlist_id)
 
+        flash_vars = self._parse_json(self._search_regex(
+            r'(?s)rgMovieFlashvars\s*=\s*({.+?});', webpage,
+            'flash vars'), playlist_id, js_to_json)
+
+        playlist_title = None
+        entries = []
         if fileID:
-            playlist_title = self._html_search_regex(
-                r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
-            mweb = re.finditer(r'''(?x)
-                'movie_(?P<videoID>[0-9]+)':\s*\{\s*
-                YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
-                ''', webpage)
-            videos = [{
-                '_type': 'url',
-                'url': vid.group('youtube_id'),
-                'ie_key': 'Youtube',
-            } for vid in mweb]
+            playlist_title = get_element_by_class('workshopItemTitle', webpage)
+            for movie in flash_vars.values():
+                if not movie:
+                    continue
+                youtube_id = movie.get('YOUTUBE_VIDEO_ID')
+                if not youtube_id:
+                    continue
+                entries.append({
+                    '_type': 'url',
+                    'url': youtube_id,
+                    'ie_key': 'Youtube',
+                })
         else:
-            playlist_title = self._html_search_regex(
-                r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')
-
-            mweb = re.finditer(r'''(?x)
-                'movie_(?P<videoID>[0-9]+)':\s*\{\s*
-                FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
-                (,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
-                ''', webpage)
-            titles = re.finditer(
-                r'<span class="title">(?P<videoName>.+?)</span>', webpage)
-            thumbs = re.finditer(
-                r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
-            videos = []
-
-            for vid, vtitle, thumb in zip(mweb, titles, thumbs):
-                video_id = vid.group('videoID')
-                title = vtitle.group('videoName')
-                video_url = vid.group('videoURL')
-                video_thumb = thumb.group('thumbnail')
-                if not video_url:
-                    raise ExtractorError('Cannot find video url for %s' % video_id)
-                videos.append({
+            playlist_title = get_element_by_class('apphub_AppName', webpage)
+            for movie_id, movie in flash_vars.items():
+                if not movie:
+                    continue
+                video_id = self._search_regex(r'movie_(\d+)', movie_id, 'video id', fatal=False)
+                title = movie.get('MOVIE_NAME')
+                if not title or not video_id:
+                    continue
+                entry = {
                     'id': video_id,
-                    'url': video_url,
-                    'ext': 'flv',
-                    'title': unescapeHTML(title),
-                    'thumbnail': video_thumb
-                })
-        if not videos:
+                    'title': title.replace('+', ' '),
+                }
+                formats = []
+                flv_url = movie.get('FILENAME')
+                if flv_url:
+                    formats.append({
+                        'format_id': 'flv',
+                        'url': flv_url,
+                    })
+                highlight_element = self._search_regex(
+                    r'(<div[^>]+id="highlight_movie_%s"[^>]+>)' % video_id,
+                    webpage, 'highlight element', fatal=False)
+                if highlight_element:
+                    highlight_attribs = extract_attributes(highlight_element)
+                    if highlight_attribs:
+                        entry['thumbnail'] = highlight_attribs.get('data-poster')
+                        for quality in ('', '-hd'):
+                            for ext in ('webm', 'mp4'):
+                                video_url = highlight_attribs.get('data-%s%s-source' % (ext, quality))
+                                if video_url:
+                                    formats.append({
+                                        'format_id': ext + quality,
+                                        'url': video_url,
+                                    })
+                if not formats:
+                    continue
+                entry['formats'] = formats
+                entries.append(entry)
+        if not entries:
             raise ExtractorError('Could not find any videos')
 
-        return self.playlist_result(videos, playlist_id, playlist_title)
+        return self.playlist_result(entries, playlist_id, playlist_title)
index de236bbba899837f87a748cb7aab6cb8182b77c4..b1a985ff6c12368347d98d95beed6a042e70093c 100644 (file)
@@ -216,7 +216,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
         def hex_to_bytes(hex):
             return binascii.a2b_hex(hex.encode('ascii'))
 
-        relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
+        relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
         clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
         checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
         sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
index 33683b139dee3cbf2513a30efb979701c5f93ee9..dc3dd03c823d7ec7878943b5f99bc14d266a6257 100644 (file)
@@ -57,10 +57,10 @@ class ThisAVIE(InfoExtractor):
                 info_dict = self._extract_jwplayer_data(
                     webpage, video_id, require_title=False)
         uploader = self._html_search_regex(
-            r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
+            r': <a href="http://www\.thisav\.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
             webpage, 'uploader name', fatal=False)
         uploader_id = self._html_search_regex(
-            r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
+            r': <a href="http://www\.thisav\.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
             webpage, 'uploader id', fatal=False)
 
         info_dict.update({
index c44018aec39c56eb4b8dccc8b4d001f783e2a19d..36f6c16732c7217141e860456788d389eb6bdd94 100644 (file)
@@ -13,11 +13,11 @@ from ..utils import (
 
 
 class TubiTvIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P<id>[0-9]+)'
     _LOGIN_URL = 'http://tubitv.com/login'
     _NETRC_MACHINE = 'tubitv'
     _GEO_COUNTRIES = ['US']
-    _TEST = {
+    _TESTS = [{
         'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
         'md5': '43ac06be9326f41912dc64ccf7a80320',
         'info_dict': {
@@ -27,7 +27,13 @@ class TubiTvIE(InfoExtractor):
             'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
             'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
         },
-    }
+    }, {
+        'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories',
+        'only_matching': True,
+    }, {
+        'url': 'http://tubitv.com/movies/383676/tracker',
+        'only_matching': True,
+    }]
 
     def _login(self):
         (username, password) = self._get_login_info()
index 3ced098f929e4fe155cd28cd28c5610658ad2260..b57abeaa49b0620184f9f4a96145303aec1ceeba 100644 (file)
@@ -3,52 +3,50 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    int_or_none,
-    parse_iso8601,
+    float_or_none,
     smuggle_url,
 )
 
 
 class TVAIE(InfoExtractor):
-    _VALID_URL = r'https?://videos\.tva\.ca/episode/(?P<id>\d+)'
+    _VALID_URL = r'https?://videos\.tva\.ca/details/_(?P<id>\d+)'
     _TEST = {
-        'url': 'http://videos.tva.ca/episode/85538',
+        'url': 'https://videos.tva.ca/details/_5596811470001',
         'info_dict': {
-            'id': '85538',
+            'id': '5596811470001',
             'ext': 'mp4',
-            'title': 'Épisode du 25 janvier 2017',
-            'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98',
-            'upload_date': '20170126',
-            'timestamp': 1485442329,
+            'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
+            'uploader_id': '5481942443001',
+            'upload_date': '20171003',
+            'timestamp': 1507064617,
         },
         'params': {
             # m3u8 download
             'skip_download': True,
         }
     }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_data = self._download_json(
-            "https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id,
-            video_id, query={
-                '$expand': 'Metadata,CustomId',
-                '$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName',
-                '$format': 'json',
+            'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
+                'Accept': 'application/json',
             })
-        metadata = video_data.get('Metadata', {})
+
+        def get_attribute(key):
+            for attribute in video_data.get('attributes', []):
+                if attribute.get('key') == key:
+                    return attribute.get('value')
+            return None
 
         return {
             '_type': 'url_transparent',
             'id': video_id,
-            'title': video_data['Title'],
-            'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}),
-            'description': video_data.get('LongDescription') or video_data.get('ShortDescription'),
-            'series': video_data.get('ShowName'),
-            'episode': metadata.get('EpisodeTitle'),
-            'episode_number': int_or_none(metadata.get('EpisodeNumber')),
-            'categories': video_data.get('Categories'),
-            'average_rating': video_data.get('AverageUserRating'),
-            'timestamp': parse_iso8601(video_data.get('CreatedDate')),
-            'ie_key': 'Ooyala',
+            'title': get_attribute('title'),
+            'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
+            'description': get_attribute('description'),
+            'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'),
+            'duration': float_or_none(get_attribute('video-duration'), 1000),
+            'ie_key': 'BrightcoveNew',
         }
index 12ed6039cb758af5563232a6f62497446a798cfb..6590e1fd01801f1825a0bb102197b7e2449b90dd 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class TVN24IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+    _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
         'md5': 'fbdec753d7bc29d96036808275f2130c',
@@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor):
             'ext': 'mp4',
             'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
             'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
-            'thumbnail': 're:http://.*[.]jpeg',
+            'thumbnail': 're:https?://.*[.]jpeg',
         }
     }, {
         'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
@@ -29,6 +29,9 @@ class TVN24IE(InfoExtractor):
     }, {
         'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
         'only_matching': True,
+    }, {
+        'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index c5b3288ad754d9d8c66b977446d2db668871bbef..3954f0b93812e5bd58d950e8401c954d0ea0a7c6 100644 (file)
@@ -15,16 +15,16 @@ from ..utils import (
 class TVPIE(InfoExtractor):
     IE_NAME = 'tvp'
     IE_DESC = 'Telewizja Polska'
-    _VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
+    _VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)'
 
     _TESTS = [{
-        'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
+        'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
         'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
         'info_dict': {
             'id': '194536',
             'ext': 'mp4',
             'title': 'Czas honoru, I seria – odc. 13',
-            'description': 'md5:76649d2014f65c99477be17f23a4dead',
+            'description': 'md5:381afa5bca72655fe94b05cfe82bf53d',
         },
     }, {
         'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
@@ -37,12 +37,13 @@ class TVPIE(InfoExtractor):
         },
     }, {
         # page id is not the same as video id(#7799)
-        'url': 'http://vod.tvp.pl/22704887/08122015-1500',
-        'md5': 'cf6a4705dfd1489aef8deb168d6ba742',
+        'url': 'https://wiadomosci.tvp.pl/33908820/28092017-1930',
+        'md5': '84cd3c8aec4840046e5ab712416b73d0',
         'info_dict': {
-            'id': '22680786',
+            'id': '33908820',
             'ext': 'mp4',
-            'title': 'Wiadomości, 08.12.2015, 15:00',
+            'title': 'Wiadomości, 28.09.2017, 19:30',
+            'description': 'Wydanie główne codziennego serwisu informacyjnego.'
         },
     }, {
         'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
index 0df3ad7c7c0f61b6529f0e5d09fd1694e9c84f04..1b0b9637160f3c096b4baf07f6146bc9d84a31c8 100644 (file)
@@ -174,7 +174,7 @@ class TwitterCardIE(TwitterBaseIE):
         webpage = self._download_webpage(url, video_id)
 
         iframe_url = self._html_search_regex(
-            r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
+            r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
             webpage, 'video iframe', default=None)
         if iframe_url:
             return self.url_result(iframe_url)
index daf45d0b4e1a3710832875f79e160ebc759849dd..2c8e5c7b41eba5220981a215a566fc8fc565320e 100644 (file)
@@ -1,7 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
@@ -29,6 +28,7 @@ class UDNEmbedIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         },
+        'expected_warnings': ['Failed to parse JSON Expecting value'],
     }, {
         'url': 'https://video.udn.com/embed/news/300040',
         'only_matching': True,
@@ -43,10 +43,21 @@ class UDNEmbedIE(InfoExtractor):
 
         page = self._download_webpage(url, video_id)
 
-        options = json.loads(js_to_json(self._html_search_regex(
-            r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary')))
-
-        video_urls = options['video']
+        options_str = self._html_search_regex(
+            r'var\s+options\s*=\s*([^;]+);', page, 'options')
+        trans_options_str = js_to_json(options_str)
+        options = self._parse_json(trans_options_str, 'options', fatal=False) or {}
+        if options:
+            video_urls = options['video']
+            title = options['title']
+            poster = options.get('poster')
+        else:
+            video_urls = self._parse_json(self._html_search_regex(
+                r'"video"\s*:\s*({.+?})\s*,', trans_options_str, 'video urls'), 'video urls')
+            title = self._html_search_regex(
+                r"title\s*:\s*'(.+?)'\s*,", options_str, 'title')
+            poster = self._html_search_regex(
+                r"poster\s*:\s*'(.+?)'\s*,", options_str, 'poster', default=None)
 
         if video_urls.get('youtube'):
             return self.url_result(video_urls.get('youtube'), 'Youtube')
@@ -68,7 +79,7 @@ class UDNEmbedIE(InfoExtractor):
                 formats.extend(self._extract_f4m_formats(
                     video_url, video_id, f4m_id='hds'))
             else:
-                mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+).mp4', video_url)
+                mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+)\.mp4', video_url)
                 a_format = {
                     'url': video_url,
                     # video_type may be 'mp4', which confuses YoutubeDL
@@ -83,14 +94,9 @@ class UDNEmbedIE(InfoExtractor):
 
         self._sort_formats(formats)
 
-        thumbnails = [{
-            'url': img_url,
-            'id': img_type,
-        } for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url]
-
         return {
             'id': video_id,
             'formats': formats,
-            'title': options['title'],
-            'thumbnails': thumbnails,
+            'title': title,
+            'thumbnail': poster,
         }
index 570fa45ea7d7e492ac924882f04708d4ecbf5c59..dff94a2b845e8d4fffc9ed6e5936b8399e10ae43 100644 (file)
+# coding: utf-8
 from __future__ import unicode_literals
 
-from .mtv import MTVIE
+from .mtv import MTVServicesInfoExtractor
 
-import re
-from ..utils import fix_xml_ampersands
 
-
-class VH1IE(MTVIE):
+class VH1IE(MTVServicesInfoExtractor):
     IE_NAME = 'vh1.com'
-    _FEED_URL = 'http://www.vh1.com/player/embed/AS3/fullepisode/rss/'
+    _FEED_URL = 'http://www.vh1.com/feeds/mrss/'
     _TESTS = [{
-        'url': 'http://www.vh1.com/video/metal-evolution/full-episodes/progressive-metal/1678612/playlist.jhtml',
-        'playlist': [
-            {
-                'md5': '7827a7505f59633983165bbd2c119b52',
-                'info_dict': {
-                    'id': '731565',
-                    'ext': 'mp4',
-                    'title': 'Metal Evolution: Ep. 11 Act 1',
-                    'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 12 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
-                }
-            },
-            {
-                'md5': '34fb4b7321c546b54deda2102a61821f',
-                'info_dict': {
-                    'id': '731567',
-                    'ext': 'mp4',
-                    'title': 'Metal Evolution: Ep. 11 Act 2',
-                    'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
-                }
-            },
-            {
-                'md5': '813f38dba4c1b8647196135ebbf7e048',
-                'info_dict': {
-                    'id': '731568',
-                    'ext': 'mp4',
-                    'title': 'Metal Evolution: Ep. 11 Act 3',
-                    'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
-                }
-            },
-            {
-                'md5': '51adb72439dfaed11c799115d76e497f',
-                'info_dict': {
-                    'id': '731569',
-                    'ext': 'mp4',
-                    'title': 'Metal Evolution: Ep. 11 Act 4',
-                    'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
-                }
-            },
-            {
-                'md5': '93d554aaf79320703b73a95288c76a6e',
-                'info_dict': {
-                    'id': '731570',
-                    'ext': 'mp4',
-                    'title': 'Metal Evolution: Ep. 11 Act 5',
-                    'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
-                }
-            }
-        ],
-        'skip': 'Blocked outside the US',
-    }, {
-        # Clip
-        'url': 'http://www.vh1.com/video/misc/706675/metal-evolution-episode-1-pre-metal-show-clip.jhtml#id=1674118',
-        'md5': '7d67cf6d9cdc6b4f3d3ac97a55403844',
+        'url': 'http://www.vh1.com/episodes/0umwpq/hip-hop-squares-kent-jones-vs-nick-young-season-1-ep-120',
         'info_dict': {
-            'id': '706675',
-            'ext': 'mp4',
-            'title': 'Metal Evolution: Episode 1 Pre-Metal Show Clip',
-            'description': 'The greatest documentary ever made about Heavy Metal begins as our host Sam Dunn travels the globe to seek out the origins and influences that helped create Heavy Metal. Sam speaks to legends like Kirk Hammett, Alice Cooper, Slash, Bill Ward, Geezer Butler, Tom Morello, Ace Frehley, Lemmy Kilmister, Dave Davies, and many many more. This episode is the prologue for the 11 hour series, and Sam goes back to the very beginning to reveal how Heavy Metal was created.'
+            'title': 'Kent Jones vs. Nick Young',
+            'description': 'Come to Play. Stay to Party. With Mike Epps, TIP, O’Shea Jackson Jr., T-Pain, Tisha Campbell-Martin and more.',
         },
-        'skip': 'Blocked outside the US',
+        'playlist_mincount': 4,
     }, {
-        # Short link
-        'url': 'http://www.vh1.com/video/play.jhtml?id=1678353',
-        'md5': '853192b87ad978732b67dd8e549b266a',
+        # Clip
+        'url': 'http://www.vh1.com/video-clips/t74mif/scared-famous-scared-famous-extended-preview',
         'info_dict': {
-            'id': '730355',
+            'id': '0a50c2d2-a86b-4141-9565-911c7e2d0b92',
             'ext': 'mp4',
-            'title': 'Metal Evolution: Episode 11 Progressive Metal Sneak',
-            'description': 'In Metal Evolution\'s finale sneak, Sam sits with Michael Giles of King Crimson and gets feedback from Metallica guitarist Kirk Hammett on why the group was influential.'
+            'title': 'Scared Famous|October 9, 2017|1|NO-EPISODE#|Scared Famous + Extended Preview',
+            'description': 'md5:eff5551a274c473a29463de40f7b09da',
+            'upload_date': '20171009',
+            'timestamp': 1507574700,
         },
-        'skip': 'Blocked outside the US',
-    }, {
-        'url': 'http://www.vh1.com/video/macklemore-ryan-lewis/900535/cant-hold-us-ft-ray-dalton.jhtml',
-        'md5': 'b1bcb5b4380c9d7f544065589432dee7',
-        'info_dict': {
-            'id': '900535',
-            'ext': 'mp4',
-            'title': 'Macklemore & Ryan Lewis - "Can\'t Hold Us ft. Ray Dalton"',
-            'description': 'The Heist'
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         },
-        'skip': 'Blocked outside the US',
     }]
 
-    _VALID_URL = r'''(?x)
-        https?://www\.vh1\.com/video/
-        (?:
-            .+?/full-episodes/.+?/(?P<playlist_id>[^/]+)/playlist\.jhtml
-        |
-            (?:
-            play.jhtml\?id=|
-            misc/.+?/.+?\.jhtml\#id=
-            )
-            (?P<video_id>[0-9]+)$
-        |
-            [^/]+/(?P<music_id>[0-9]+)/[^/]+?
-        )
-    '''
+    _VALID_URL = r'https?://(?:www\.)?vh1\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj.group('music_id'):
-            id_field = 'vid'
-            video_id = mobj.group('music_id')
-        else:
-            video_id = mobj.group('playlist_id') or mobj.group('video_id')
-            id_field = 'id'
-        doc_url = '%s?%s=%s' % (self._FEED_URL, id_field, video_id)
-
-        idoc = self._download_xml(
-            doc_url, video_id,
-            'Downloading info', transform_source=fix_xml_ampersands)
-
-        entries = []
-        for item in idoc.findall('.//item'):
-            info = self._get_video_info(item)
-            if info:
-                entries.append(info)
-
-        return self.playlist_result(entries, playlist_id=video_id)
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+        mgid = self._extract_triforce_mgid(webpage)
+        videos_info = self._get_videos_info(mgid)
+        return videos_info
index b8b8bf97968ea430a445c521622461736346af7b..bcc28693a4545f9260f5e7942bcac4942faac380 100644 (file)
@@ -198,7 +198,7 @@ class ViceShowIE(InfoExtractor):
 
 class ViceArticleIE(InfoExtractor):
     IE_NAME = 'vice:article'
-    _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
+    _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)'
 
     _TESTS = [{
         'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
index 5de8273c34aa61a6e1e79ce8b7d142ab9135d35d..cf690d7b0a669b67b84d2aedae992079040e7a64 100644 (file)
@@ -26,7 +26,7 @@ class VideoPremiumIE(InfoExtractor):
         webpage_url = 'http://videopremium.tv/' + video_id
         webpage = self._download_webpage(webpage_url, video_id)
 
-        if re.match(r'^<html><head><script[^>]*>window.location\s*=', webpage):
+        if re.match(r'^<html><head><script[^>]*>window\.location\s*=', webpage):
             # Download again, we need a cookie
             webpage = self._download_webpage(
                 webpage_url, video_id,
index f8e33149398bde16115114bb0323d2f286ee9d42..c7a0a88fe896ac408f0cf1b559929d98b7f6a8b7 100644 (file)
@@ -2,11 +2,44 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from .once import OnceIE
 from ..compat import compat_urllib_parse_unquote
+from ..utils import ExtractorError
+
+
+class VoxMediaVolumeIE(OnceIE):
+    _VALID_URL = r'https?://volume\.vox-cdn\.com/embed/(?P<id>[0-9a-f]{9})'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        video_data = self._parse_json(self._search_regex(
+            r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', webpage, 'video data'), video_id)
+        for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
+            provider_video_id = video_data.get('%s_id' % provider_video_type)
+            if not provider_video_id:
+                continue
+            info = {
+                'id': video_id,
+                'title': video_data.get('title_short'),
+                'description': video_data.get('description_long') or video_data.get('description_short'),
+                'thumbnail': video_data.get('brightcove_thumbnail')
+            }
+            if provider_video_type == 'brightcove':
+                info['formats'] = self._extract_once_formats(provider_video_id)
+                self._sort_formats(info['formats'])
+            else:
+                info.update({
+                    '_type': 'url_transparent',
+                    'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id),
+                    'ie_key': provider_video_type.capitalize(),
+                })
+            return info
+        raise ExtractorError('Unable to find provider video id')
 
 
 class VoxMediaIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P<id>[^/?]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)'
     _TESTS = [{
         'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
         'info_dict': {
@@ -31,6 +64,7 @@ class VoxMediaIE(InfoExtractor):
             'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
         },
         'add_ie': ['Ooyala'],
+        'skip': 'Video Not Found',
     }, {
         # volume embed
         'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
@@ -84,6 +118,17 @@ class VoxMediaIE(InfoExtractor):
                 'description': 'md5:e02d56b026d51aa32c010676765a690d',
             },
         }],
+    }, {
+        # volume embed, Brightcove Once
+        'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya',
+        'md5': '01571a896281f77dc06e084138987ea2',
+        'info_dict': {
+            'id': '1231c973d',
+            'ext': 'mp4',
+            'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
+            'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
+        },
+        'add_ie': ['VoxMediaVolume'],
     }]
 
     def _real_extract(self, url):
@@ -91,9 +136,14 @@ class VoxMediaIE(InfoExtractor):
         webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id))
 
         def create_entry(provider_video_id, provider_video_type, title=None, description=None):
+            video_url = {
+                'youtube': '%s',
+                'ooyala': 'ooyala:%s',
+                'volume': 'http://volume.vox-cdn.com/embed/%s',
+            }[provider_video_type] % provider_video_id
             return {
                 '_type': 'url_transparent',
-                'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id),
+                'url': video_url,
                 'title': title or self._og_search_title(webpage),
                 'description': description or self._og_search_description(webpage),
             }
@@ -124,17 +174,7 @@ class VoxMediaIE(InfoExtractor):
         volume_uuid = self._search_regex(
             r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None)
         if volume_uuid:
-            volume_webpage = self._download_webpage(
-                'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
-            video_data = self._parse_json(self._search_regex(
-                r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
-            for provider_video_type in ('ooyala', 'youtube'):
-                provider_video_id = video_data.get('%s_id' % provider_video_type)
-                if provider_video_id:
-                    description = video_data.get('description_long') or video_data.get('description_short')
-                    entries.append(create_entry(
-                        provider_video_id, provider_video_type, video_data.get('title_short'), description))
-                    break
+            entries.append(create_entry(volume_uuid, 'volume'))
 
         if len(entries) == 1:
             return entries[0]
index d44ec85fd8955c21a55b187a597ed2766b10befb..656a4b9e5a68afddcee721321dccab2104074aeb 100644 (file)
@@ -133,7 +133,7 @@ class VVVVIDIE(InfoExtractor):
             'season_id': season_id,
             'season_number': video_data.get('season_number'),
             'episode_id': str_or_none(video_data.get('id')),
-            'epidode_number': int_or_none(video_data.get('number')),
+            'episode_number': int_or_none(video_data.get('number')),
             'episode_title': video_data['title'],
             'view_count': int_or_none(video_data.get('views')),
             'like_count': int_or_none(video_data.get('video_likes')),
index 8bb7362bbc1bec46dcef081bb08a02728d4af3e0..621de1e1efb73a9a377a46fe0fa702e595c3cde5 100644 (file)
@@ -22,8 +22,13 @@ class WDRBaseIE(InfoExtractor):
         # for wdrmaus, in a tag with the class "videoButton" (previously a link
         # to the page in a multiline "videoLink"-tag)
         json_metadata = self._html_search_regex(
-            r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
-            webpage, 'media link', default=None, flags=re.MULTILINE)
+            r'''(?sx)class=
+                    (?:
+                        (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
+                        (["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
+                    )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
+            ''',
+            webpage, 'media link', default=None, group='data')
 
         if not json_metadata:
             return
index c42b59e51f31a02c8508217c1246d5f0a8c4ca9d..be3624ef2ea889835992e55949271c0028e9783b 100644 (file)
@@ -221,7 +221,7 @@ class XHamsterEmbedIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         video_url = self._search_regex(
-            r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
+            r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id),
             webpage, 'xhamster url', default=None)
 
         if not video_url:
index bea9b87ad4123f90bcf554d7115cfd35d431afe6..c6c0b3291c8320064fa0a7529be5b5d78f14461c 100644 (file)
@@ -18,7 +18,7 @@ class XTubeIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                         (?:
                             xtube:|
-                            https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-)
+                            https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-)
                         )
                         (?P<id>[^/?&#]+)
                     '''
@@ -64,6 +64,9 @@ class XTubeIE(InfoExtractor):
     }, {
         'url': 'xtube:kVTUy_G222_',
         'only_matching': True,
+    }, {
+        'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index eca603028d9ac91cf98e3da6c3651444f1653730..085c8d4f35a68c74c61ca9af571d421a68783d93 100644 (file)
@@ -14,8 +14,16 @@ from ..utils import (
 
 
 class XVideosIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P<id>[0-9]+)(?:.*)'
-    _TEST = {
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:www\.)?xvideos\.com/video|
+                            flashservice\.xvideos\.com/embedframe/|
+                            static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
+                        )
+                        (?P<id>[0-9]+)
+                    '''
+    _TESTS = [{
         'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
         'md5': '14cea69fcb84db54293b1e971466c2e1',
         'info_dict': {
@@ -25,21 +33,33 @@ class XVideosIE(InfoExtractor):
             'duration': 108,
             'age_limit': 18,
         }
-    }
+    }, {
+        'url': 'https://flashservice.xvideos.com/embedframe/4588838',
+        'only_matching': True,
+    }, {
+        'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+
+        webpage = self._download_webpage(
+            'http://www.xvideos.com/video%s/' % video_id, video_id)
 
         mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
         if mobj:
             raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
 
-        video_title = self._html_search_regex(
-            r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
-        video_thumbnail = self._search_regex(
+        title = self._html_search_regex(
+            (r'<title>(?P<title>.+?)\s+-\s+XVID',
+             r'setVideoTitle\s*\(\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
+            webpage, 'title', default=None,
+            group='title') or self._og_search_title(webpage)
+
+        thumbnail = self._search_regex(
             r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
-        video_duration = int_or_none(self._og_search_property(
+        duration = int_or_none(self._og_search_property(
             'duration', webpage, default=None)) or parse_duration(
             self._search_regex(
                 r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)',
@@ -74,8 +94,8 @@ class XVideosIE(InfoExtractor):
         return {
             'id': video_id,
             'formats': formats,
-            'title': video_title,
-            'duration': video_duration,
-            'thumbnail': video_thumbnail,
+            'title': title,
+            'duration': duration,
+            'thumbnail': thumbnail,
             'age_limit': 18,
         }
index 38f82bf447128d42643afa865c41efc7db9014cd..552013a74b75764834be40c378bbefbfdea1d351 100644 (file)
@@ -12,11 +12,13 @@ from ..compat import (
 )
 from ..utils import (
     clean_html,
-    unescapeHTML,
+    determine_ext,
     ExtractorError,
+    extract_attributes,
     int_or_none,
     mimetype2ext,
-    determine_ext,
+    smuggle_url,
+    unescapeHTML,
 )
 
 from .brightcove import (
@@ -28,7 +30,7 @@ from .nbc import NBCSportsVPlayerIE
 
 class YahooIE(InfoExtractor):
     IE_DESC = 'Yahoo screen and movies'
-    _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?)'
+    _VALID_URL = r'(?P<host>https?://(?:(?P<country>[a-zA-Z]{2})\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P<display_id>.+)?-)?(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?'
     _TESTS = [
         {
             'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@@ -50,6 +52,7 @@ class YahooIE(InfoExtractor):
                 'description': 'md5:66b627ab0a282b26352136ca96ce73c1',
                 'duration': 151,
             },
+            'skip': 'HTTP Error 404',
         },
         {
             'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
@@ -142,7 +145,7 @@ class YahooIE(InfoExtractor):
             'skip': 'Domain name in.lifestyle.yahoo.com gone',
         }, {
             'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
-            'md5': '2a9752f74cb898af5d1083ea9f661b58',
+            'md5': '989396ae73d20c6f057746fb226aa215',
             'info_dict': {
                 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
                 'ext': 'mp4',
@@ -227,13 +230,33 @@ class YahooIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            # custom brightcove
+            'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37083565/clown-entertainers-say-it-is-hurting-their-business/',
+            'info_dict': {
+                'id': '5575377707001',
+                'ext': 'mp4',
+                'title': "Clown entertainers say 'It' is hurting their business",
+                'description': 'Stephen King s horror film has much to answer for. Jelby and Mr Loopy the Clowns join us.',
+                'timestamp': 1505341164,
+                'upload_date': '20170913',
+                'uploader_id': '2376984109001',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # custom brightcove, geo-restricted to Australia, bypassable
+            'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37263964/sunrise-episode-wed-27-sep/',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id') or self._match_id(url)
         page_id = mobj.group('id')
-        url = mobj.group('url')
+        display_id = mobj.group('display_id') or page_id
         host = mobj.group('host')
         webpage, urlh = self._download_webpage_handle(url, display_id)
         if 'err=404' in urlh.geturl():
@@ -257,10 +280,31 @@ class YahooIE(InfoExtractor):
         if bc_url:
             return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
 
+        def brightcove_url_result(bc_url):
+            return self.url_result(
+                smuggle_url(bc_url, {'geo_countries': [mobj.group('country')]}),
+                BrightcoveNewIE.ie_key())
+
         # Look for Brightcove New Studio embeds
         bc_url = BrightcoveNewIE._extract_url(self, webpage)
         if bc_url:
-            return self.url_result(bc_url, BrightcoveNewIE.ie_key())
+            return brightcove_url_result(bc_url)
+
+        brightcove_iframe = self._search_regex(
+            r'(<iframe[^>]+data-video-id=["\']\d+[^>]+>)', webpage,
+            'brightcove iframe', default=None)
+        if brightcove_iframe:
+            attr = extract_attributes(brightcove_iframe)
+            src = attr.get('src')
+            if src:
+                parsed_src = compat_urlparse.urlparse(src)
+                qs = compat_urlparse.parse_qs(parsed_src.query)
+                account_id = qs.get('accountId', ['2376984109001'])[0]
+                brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0]
+                if account_id and brightcove_id:
+                    return brightcove_url_result(
+                        'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+                        % (account_id, brightcove_id))
 
         # Query result is often embedded in webpage as JSON. Sometimes explicit requests
         # to video API results in a failure with geo restriction reason therefore using
index ad2e933ee4e34c9ebdb982ca66278e6e4c4a06b0..4e8db240d3f9d141cfa457c1d941d1b5399f0c67 100644 (file)
@@ -332,6 +332,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
                             (?:www\.)?deturl\.com/www\.youtube\.com/|
                             (?:www\.)?pwnyoutube\.com/|
+                            (?:www\.)?hooktube\.com/|
                             (?:www\.)?yourepeat\.com/|
                             tube\.majestyc\.net/|
                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
@@ -1629,7 +1630,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     class="[^"]*"[^>]*>
                 [^<]+\.{3}\s*
                 </a>
-            ''', r'\1', video_description)
+            ''', lambda m: compat_urlparse.urljoin(url, m.group(1)), video_description)
             video_description = clean_html(video_description)
         else:
             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
@@ -1682,7 +1683,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         video_uploader_id = None
         video_uploader_url = None
         mobj = re.search(
-            r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
+            r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
             video_webpage)
         if mobj is not None:
             video_uploader_id = mobj.group('uploader_id')
@@ -2039,39 +2040,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         }
 
 
-class YoutubeSharedVideoIE(InfoExtractor):
-    _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'
-    IE_NAME = 'youtube:shared'
-
-    _TEST = {
-        'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
-        'info_dict': {
-            'id': 'uPDB5I9wfp8',
-            'ext': 'webm',
-            'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
-            'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
-            'upload_date': '20160219',
-            'uploader': 'Pocoyo - Português (BR)',
-            'uploader_id': 'PocoyoBrazil',
-        },
-        'add_ie': ['Youtube'],
-        'params': {
-            # There are already too many Youtube downloads
-            'skip_download': True,
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        real_video_id = self._html_search_meta(
-            'videoId', webpage, 'YouTube video id', fatal=True)
-
-        return self.url_result(real_video_id, YoutubeIE.ie_key())
-
-
 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
     IE_DESC = 'YouTube.com playlists'
     _VALID_URL = r"""(?x)(?:
index f71d413b5285005cbc98c75f0092053631e7c3f5..3ea1afcf31cb49e0992a79b46c5985bb33b742a3 100644 (file)
@@ -44,7 +44,7 @@ ACODECS = {
     'aac': 'aac',
     'flac': 'flac',
     'm4a': 'aac',
-    'opus': 'opus',
+    'opus': 'libopus',
     'vorbis': 'libvorbis',
     'wav': None,
 }
index 92b22e639b30af5e93cce69fa8c057be2cb1e0c0..59fb3343582e6dfa2aab2c6a8311300bd7458141 100644 (file)
@@ -1933,7 +1933,7 @@ class PagedList(object):
 
 
 class OnDemandPagedList(PagedList):
-    def __init__(self, pagefunc, pagesize, use_cache=False):
+    def __init__(self, pagefunc, pagesize, use_cache=True):
         self._pagefunc = pagefunc
         self._pagesize = pagesize
         self._use_cache = use_cache
index 0e2e4dc90edabb5a88eb177188f7d9ba4fa6105e..d01ba30950a375541b7ba32fd54dd4923f6bd98e 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2017.09.24'
+__version__ = '2017.10.15.1'