Prepare to release.

[youtubedl] / youtube_dl / extractor / generic.py
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 197ab95319419d29971979b94464aa191ef04a04..274f817384d65a6287427ad35f97d3126cea7cd0 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
  
  from __future__ import unicode_literals
  
@@ -20,6 +20,7 @@ from ..utils import (
      float_or_none,
      HEADRequest,
      is_html,
+    js_to_json,
      orderedSet,
      sanitized_Request,
      smuggle_url,
@@ -27,9 +28,9 @@ from ..utils import (
      unified_strdate,
      unsmuggle_url,
      UnsupportedError,
-    url_basename,
      xpath_text,
  )
+from .commonprotocols import RtmpIE
  from .brightcove import (
      BrightcoveLegacyIE,
      BrightcoveNewIE,
@@ -48,6 +49,8 @@ from .svt import SVTIE
  from .pornhub import PornHubIE
  from .xhamster import XHamsterEmbedIE
  from .tnaflix import TNAFlixNetworkEmbedIE
+from .drtuber import DrTuberIE
+from .redtube import RedTubeIE
  from .vimeo import VimeoIE
  from .dailymotion import (
      DailymotionIE,
@@ -55,10 +58,10 @@ from .dailymotion import (
  )
  from .onionstudios import OnionStudiosIE
  from .viewlift import ViewLiftEmbedIE
-from .screenwavemedia import ScreenwaveMediaIE
  from .mtv import MTVServicesEmbeddedIE
  from .pladform import PladformIE
  from .videomore import VideomoreIE
+from .webcaster import WebcasterFeedIE
  from .googledrive import GoogleDriveIE
  from .jwplatform import JWPlatformIE
  from .digiteka import DigitekaIE
@@ -72,7 +75,16 @@ from .kaltura import KalturaIE
  from .eagleplatform import EaglePlatformIE
  from .facebook import FacebookIE
  from .soundcloud import SoundcloudIE
+from .tunein import TuneInBaseIE
  from .vbox7 import Vbox7IE
+from .dbtv import DBTVIE
+from .piksel import PikselIE
+from .videa import VideaIE
+from .twentymin import TwentyMinutenIE
+from .ustream import UstreamIE
+from .openload import OpenloadIE
+from .videopress import VideoPressIE
+from .rutube import RutubeIE
  
  
  class GenericIE(InfoExtractor):
@@ -103,7 +115,8 @@ class GenericIE(InfoExtractor):
              },
              'expected_warnings': [
                  'URL could be a direct video link, returning it as such.'
-            ]
+            ],
+            'skip': 'URL invalid',
          },
          # Direct download with broken HEAD
          {
@@ -233,7 +246,7 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Tikibad ontruimd wegens brand',
                  'description': 'md5:05ca046ff47b931f9b04855015e163a4',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                  'duration': 33,
              },
              'params': {
@@ -267,7 +280,8 @@ class GenericIE(InfoExtractor):
              'params': {
                  # m3u8 downloads
                  'skip_download': True,
-            }
+            },
+            'skip': 'video gone',
          },
          # m3u8 served with Content-Type: text/plain
          {
@@ -282,7 +296,8 @@ class GenericIE(InfoExtractor):
              'params': {
                  # m3u8 downloads
                  'skip_download': True,
-            }
+            },
+            'skip': 'video gone',
          },
          # google redirect
          {
@@ -292,7 +307,7 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'upload_date': '20130224',
                  'uploader_id': 'TheVerge',
-                'description': 're:^Chris Ziegler takes a look at the\.*',
+                'description': r're:^Chris Ziegler takes a look at the\.*',
                  'uploader': 'The Verge',
                  'title': 'First Firefox OS phones side-by-side',
              },
@@ -338,10 +353,10 @@ class GenericIE(InfoExtractor):
              },
              'skip': 'There is a limit of 200 free downloads / month for the test song',
          },
-        # embedded brightcove video
-        # it also tests brightcove videos that need to set the 'Referer' in the
-        # http requests
          {
+            # embedded brightcove video
+            # it also tests brightcove videos that need to set the 'Referer'
+            # in the http requests
              'add_ie': ['BrightcoveLegacy'],
              'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
              'info_dict': {
@@ -355,6 +370,24 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              },
          },
+        {
+            # embedded with itemprop embedURL and video id spelled as `idVideo`
+            'add_id': ['BrightcoveLegacy'],
+            'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
+            'info_dict': {
+                'id': '5255628253001',
+                'ext': 'mp4',
+                'title': 'md5:37c519b1128915607601e75a87995fc0',
+                'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
+                'uploader': 'BFM BUSINESS',
+                'uploader_id': '876450612001',
+                'timestamp': 1482255315,
+                'upload_date': '20161220',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
          {
              # https://github.com/rg3/youtube-dl/issues/2253
              'url': 'http://bcove.me/i6nfkrc3',
@@ -367,6 +400,7 @@ class GenericIE(InfoExtractor):
                  'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
              },
              'add_ie': ['BrightcoveLegacy'],
+            'skip': 'video gone',
          },
          {
              'url': 'http://www.championat.com/video/football/v/87/87499.html',
@@ -395,6 +429,43 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,  # m3u8 download
              },
          },
+        {
+            # Brightcove with alternative playerID key
+            'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
+            'info_dict': {
+                'id': 'nmeth.2062_SV1',
+                'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
+            },
+            'playlist': [{
+                'info_dict': {
+                    'id': '2228375078001',
+                    'ext': 'mp4',
+                    'title': 'nmeth.2062-sv1',
+                    'description': 'nmeth.2062-sv1',
+                    'timestamp': 1363357591,
+                    'upload_date': '20130315',
+                    'uploader': 'Nature Publishing Group',
+                    'uploader_id': '1964492299001',
+                },
+            }],
+        },
+        {
+            # Brightcove with UUID in videoPlayer
+            'url': 'http://www8.hp.com/cn/zh/home.html',
+            'info_dict': {
+                'id': '5255815316001',
+                'ext': 'mp4',
+                'title': 'Sprocket Video - China',
+                'description': 'Sprocket Video - China',
+                'uploader': 'HP-Video Gallery',
+                'timestamp': 1482263210,
+                'upload_date': '20161220',
+                'uploader_id': '1107601872001',
+            },
+            'params': {
+                'skip_download': True,  # m3u8 download
+            },
+        },
          # ooyala video
          {
              'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
@@ -420,6 +491,7 @@ class GenericIE(InfoExtractor):
              'params': {
                  'skip_download': True,
              },
+            'skip': 'movie expired',
          },
          # embed.ly video
          {
@@ -447,6 +519,8 @@ class GenericIE(InfoExtractor):
                  'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
                  'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
              },
+            # HEAD requests lead to endless 301, while GET is OK
+            'expected_warnings': ['301'],
          },
          # RUTV embed
          {
@@ -509,7 +583,7 @@ class GenericIE(InfoExtractor):
                  'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
                  'ext': 'mp4',
                  'title': 'Ужастики, русский трейлер (2015)',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                  'duration': 153,
              }
          },
@@ -521,6 +595,9 @@ class GenericIE(InfoExtractor):
                  'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
              },
              'playlist_mincount': 7,
+            # This forum does not allow <iframe> syntaxes anymore
+            # Now HTML tags are displayed as-is
+            'skip': 'No videos on this page',
          },
          # Embedded TED video
          {
@@ -534,17 +611,6 @@ class GenericIE(InfoExtractor):
                  'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
              }
          },
-        # Embedded Ustream video
-        {
-            'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
-            'md5': '27b99cdb639c9b12a79bca876a073417',
-            'info_dict': {
-                'id': '45734260',
-                'ext': 'flv',
-                'uploader': 'AU SPA:  The NSA and Privacy',
-                'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
-            }
-        },
          # nowvideo embed hidden behind percent encoding
          {
              'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
@@ -569,7 +635,8 @@ class GenericIE(InfoExtractor):
              },
              'params': {
                  'skip_download': 'Requires rtmpdump'
-            }
+            },
+            'skip': 'video gone',
          },
          # francetv embed
          {
@@ -725,7 +792,7 @@ class GenericIE(InfoExtractor):
                  'duration': 48,
                  'timestamp': 1401537900,
                  'upload_date': '20140531',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
              },
          },
          # Wistia embed
@@ -795,6 +862,21 @@ class GenericIE(InfoExtractor):
              },
              'playlist_mincount': 7,
          },
+        # TuneIn station embed
+        {
+            'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
+            'info_dict': {
+                'id': '204146',
+                'ext': 'mp3',
+                'title': 'CNRV',
+                'location': 'Paris, France',
+                'is_live': True,
+            },
+            'params': {
+                # Live stream
+                'skip_download': True,
+            },
+        },
          # Livestream embed
          {
              'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
@@ -820,12 +902,13 @@ class GenericIE(InfoExtractor):
          },
          # LazyYT
          {
-            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
+            'url': 'https://skiplagged.com/',
              'info_dict': {
-                'id': '1986',
-                'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
+                'id': 'skiplagged',
+                'title': 'Skiplagged: The smart way to find cheap flights',
              },
-            'playlist_mincount': 2,
+            'playlist_mincount': 1,
+            'add_ie': ['Youtube'],
          },
          # Cinchcast embed
          {
@@ -885,6 +968,43 @@ class GenericIE(InfoExtractor):
                  'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
              },
          },
+        # jwplayer rtmp
+        {
+            'url': 'http://www.suffolk.edu/sjc/',
+            'info_dict': {
+                'id': 'sjclive',
+                'ext': 'flv',
+                'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
+                'uploader': 'www.suffolk.edu',
+            },
+            'params': {
+                'skip_download': True,
+            }
+        },
+        # Complex jwplayer
+        {
+            'url': 'http://www.indiedb.com/games/king-machine/videos',
+            'info_dict': {
+                'id': 'videos',
+                'ext': 'mp4',
+                'title': 'king machine trailer 1',
+                'thumbnail': r're:^https?://.*\.jpg$',
+            },
+        },
+        {
+            # JWPlayer config passed as variable
+            'url': 'http://www.txxx.com/videos/3326530/ariele/',
+            'info_dict': {
+                'id': '3326530_hq',
+                'ext': 'mp4',
+                'title': 'ARIELE | Tube Cup',
+                'uploader': 'www.txxx.com',
+                'age_limit': 18,
+            },
+            'params': {
+                'skip_download': True,
+            }
+        },
          # rtl.nl embed
          {
              'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
@@ -915,19 +1035,6 @@ class GenericIE(InfoExtractor):
                  'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
              },
          },
-        # Kaltura embed protected with referrer
-        {
-            'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
-            'info_dict': {
-                'id': '1_g4fbemnq',
-                'ext': 'mp4',
-                'title': 'Violetta - Achter De Schermen - Ruggero',
-                'description': 'Achter de schermen met Ruggero',
-                'timestamp': 1435133761,
-                'upload_date': '20150624',
-                'uploader_id': 'echojecka',
-            },
-        },
          # Kaltura embed with single quotes
          {
              'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
@@ -959,6 +1066,20 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              }
          },
+        {
+            # Kaltura embedded, some fileExt broken (#11480)
+            'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
+            'info_dict': {
+                'id': '1_sgtvehim',
+                'ext': 'mp4',
+                'title': 'Our "Standard Models" of particle physics and cosmology',
+                'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
+                'timestamp': 1321158993,
+                'upload_date': '20111113',
+                'uploader_id': 'kps1',
+            },
+            'add_ie': ['Kaltura'],
+        },
          # Eagle.Platform embed (generic URL)
          {
              'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -968,7 +1089,7 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Навальный вышел на свободу',
                  'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                  'duration': 87,
                  'view_count': int,
                  'age_limit': 0,
@@ -982,7 +1103,7 @@ class GenericIE(InfoExtractor):
                  'id': '12820',
                  'ext': 'mp4',
                  'title': "'O Sole Mio",
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                  'duration': 216,
                  'view_count': int,
              },
@@ -995,7 +1116,7 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
                  'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                  'duration': 694,
                  'age_limit': 0,
              },
@@ -1007,7 +1128,7 @@ class GenericIE(InfoExtractor):
                  'id': '3519514',
                  'ext': 'mp4',
                  'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
-                'thumbnail': 're:^https?://.*\.png$',
+                'thumbnail': r're:^https?://.*\.png$',
                  'duration': 45.115,
              },
          },
@@ -1090,7 +1211,7 @@ class GenericIE(InfoExtractor):
                  'id': '300346',
                  'ext': 'mp4',
                  'title': '中一中男師變性 全校師生力挺',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
              },
              'params': {
                  # m3u8 download
@@ -1136,7 +1257,7 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Sauvons les abeilles ! - Le débat',
                  'description': 'md5:d9082128b1c5277987825d684939ca26',
-                'thumbnail': 're:^https?://.*\.jpe?g$',
+                'thumbnail': r're:^https?://.*\.jpe?g$',
                  'timestamp': 1434970506,
                  'upload_date': '20150622',
                  'uploader': 'Public Sénat',
@@ -1150,7 +1271,7 @@ class GenericIE(InfoExtractor):
                  'id': '2855',
                  'ext': 'mp4',
                  'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
-                'thumbnail': 're:^https?://.*\.jpe?g$',
+                'thumbnail': r're:^https?://.*\.jpe?g$',
                  'uploader': 'ClickHole',
                  'uploader_id': 'clickhole',
              }
@@ -1176,16 +1297,6 @@ class GenericIE(InfoExtractor):
                  'duration': 248.667,
              },
          },
-        # ScreenwaveMedia embed
-        {
-            'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
-            'md5': '24ace5baba0d35d55c6810b51f34e9e0',
-            'info_dict': {
-                'id': 'cinemasnob-55d26273809dd',
-                'ext': 'mp4',
-                'title': 'cinemasnob',
-            },
-        },
          # BrightcoveInPageEmbed embed
          {
              'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
@@ -1197,20 +1308,6 @@ class GenericIE(InfoExtractor):
                  'duration': 51690,
              },
          },
-        # JWPlayer with M3U8
-        {
-            'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
-            'info_dict': {
-                'id': 'playlist',
-                'ext': 'mp4',
-                'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
-                'uploader': 'ren.tv',
-            },
-            'params': {
-                # m3u8 downloads
-                'skip_download': True,
-            }
-        },
          # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
          # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
          {
@@ -1357,6 +1454,11 @@ class GenericIE(InfoExtractor):
              },
              'add_ie': ['Vimeo'],
          },
+        {
+            # generic vimeo embed that requires original URL passed as Referer
+            'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
+            'only_matching': True,
+        },
          {
              'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
              'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
@@ -1386,6 +1488,86 @@ class GenericIE(InfoExtractor):
              },
              'add_ie': [Vbox7IE.ie_key()],
          },
+        {
+            # DBTV embeds
+            'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
+            'info_dict': {
+                'id': '43254897',
+                'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
+            },
+            'playlist_mincount': 3,
+        },
+        {
+            # Videa embeds
+            'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
+            'info_dict': {
+                'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
+                'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
+            },
+            'playlist_mincount': 2,
+        },
+        {
+            # 20 minuten embed
+            'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
+            'info_dict': {
+                'id': '523629',
+                'ext': 'mp4',
+                'title': 'So kommen Sie bei Eis und Schnee sicher an',
+                'description': 'md5:117c212f64b25e3d95747e5276863f7d',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'add_ie': [TwentyMinutenIE.ie_key()],
+        },
+        {
+            # VideoPress embed
+            'url': 'https://en.support.wordpress.com/videopress/',
+            'info_dict': {
+                'id': 'OcobLTqC',
+                'ext': 'm4v',
+                'title': 'IMG_5786',
+                'timestamp': 1435711927,
+                'upload_date': '20150701',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'add_ie': [VideoPressIE.ie_key()],
+        },
+        {
+            # Rutube embed
+            'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
+            'info_dict': {
+                'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
+                'ext': 'flv',
+                'title': 'Магаззино: Казань 2',
+                'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
+                'uploader': 'Магаззино',
+                'upload_date': '20170228',
+                'uploader_id': '996642',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'add_ie': [RutubeIE.ie_key()],
+        },
+        {
+            # ThePlatform embedded with whitespaces in URLs
+            'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
+            'only_matching': True,
+        },
+        {
+            # Senate ISVP iframe https
+            'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
+            'md5': 'fb8c70b0b515e5037981a2492099aab8',
+            'info_dict': {
+                'id': 'govtaff020316',
+                'ext': 'mp4',
+                'title': 'Integrated Senate Video Player',
+            },
+            'add_ie': [SenateISVPIE.ie_key()],
+        },
          # {
          #     # TODO: find another test
          #     # http://schema.org/VideoObject
@@ -1523,7 +1705,7 @@ class GenericIE(InfoExtractor):
              force_videoid = smuggled_data['force_videoid']
              video_id = force_videoid
          else:
-            video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+            video_id = self._generic_id(url)
  
          self.to_screen('%s: Requesting header' % video_id)
  
@@ -1552,7 +1734,7 @@ class GenericIE(InfoExtractor):
  
          info_dict = {
              'id': video_id,
-            'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+            'title': self._generic_title(url),
              'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
          }
  
@@ -1623,6 +1805,10 @@ class GenericIE(InfoExtractor):
              doc = compat_etree_fromstring(webpage.encode('utf-8'))
              if doc.tag == 'rss':
                  return self._extract_rss(url, video_id, doc)
+            elif doc.tag == 'SmoothStreamingMedia':
+                info_dict['formats'] = self._parse_ism_formats(doc, url)
+                self._sort_formats(info_dict['formats'])
+                return info_dict
              elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
                  smil = self._parse_smil(doc, url, video_id)
                  self._sort_formats(smil['formats'])
@@ -1631,7 +1817,9 @@ class GenericIE(InfoExtractor):
                  return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
              elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                  info_dict['formats'] = self._parse_mpd_formats(
-                    doc, video_id, mpd_base_url=url.rpartition('/')[0])
+                    doc, video_id,
+                    mpd_base_url=full_response.geturl().rpartition('/')[0],
+                    mpd_url=url)
                  self._sort_formats(info_dict['formats'])
                  return info_dict
              elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
@@ -1679,14 +1867,6 @@ class GenericIE(InfoExtractor):
          video_description = self._og_search_description(webpage, default=None)
          video_thumbnail = self._og_search_thumbnail(webpage, default=None)
  
-        # Helper method
-        def _playlist_from_matches(matches, getter=None, ie=None):
-            urlrs = orderedSet(
-                self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
-                for m in matches)
-            return self.playlist_result(
-                urlrs, playlist_id=video_id, playlist_title=video_title)
-
          # Look for Brightcove Legacy Studio embeds
          bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
          if bc_urls:
@@ -1707,28 +1887,28 @@ class GenericIE(InfoExtractor):
          # Look for Brightcove New Studio embeds
          bc_urls = BrightcoveNewIE._extract_urls(webpage)
          if bc_urls:
-            return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
+            return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
  
          # Look for ThePlatform embeds
          tp_urls = ThePlatformIE._extract_urls(webpage)
          if tp_urls:
-            return _playlist_from_matches(tp_urls, ie='ThePlatform')
+            return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
  
          # Look for Vessel embeds
          vessel_urls = VesselIE._extract_urls(webpage)
          if vessel_urls:
-            return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
+            return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
  
          # Look for embedded rtl.nl player
          matches = re.findall(
              r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
              webpage)
          if matches:
-            return _playlist_from_matches(matches, ie='RtlNl')
+            return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
  
-        vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
-        if vimeo_url is not None:
-            return self.url_result(vimeo_url)
+        vimeo_urls = VimeoIE._extract_urls(url, webpage)
+        if vimeo_urls:
+            return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
  
          vid_me_embed_url = self._search_regex(
              r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
@@ -1750,25 +1930,25 @@ class GenericIE(InfoExtractor):
                  (?:embed|v|p)/.+?)
              \1''', webpage)
          if matches:
-            return _playlist_from_matches(
-                matches, lambda m: unescapeHTML(m[1]))
+            return self.playlist_from_matches(
+                matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
  
          # Look for lazyYT YouTube embed
          matches = re.findall(
              r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
          if matches:
-            return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
+            return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
  
          # Look for Wordpress "YouTube Video Importer" plugin
          matches = re.findall(r'''(?x)<div[^>]+
              class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
              data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
          if matches:
-            return _playlist_from_matches(matches, lambda m: m[-1])
+            return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
  
          matches = DailymotionIE._extract_urls(webpage)
          if matches:
-            return _playlist_from_matches(matches)
+            return self.playlist_from_matches(matches, video_id, video_title)
  
          # Look for embedded Dailymotion playlist player (#3822)
          m = re.search(
@@ -1777,8 +1957,8 @@ class GenericIE(InfoExtractor):
              playlists = re.findall(
                  r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
              if playlists:
-                return _playlist_from_matches(
-                    playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
+                return self.playlist_from_matches(
+                    playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
  
          # Look for embedded Wistia player
          match = re.search(
@@ -1871,15 +2051,23 @@ class GenericIE(InfoExtractor):
                  re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
                  re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
          if mobj is not None:
-            return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
+            embed_token = self._search_regex(
+                r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
+                webpage, 'ooyala embed token', default=None)
+            return OoyalaIE._build_url_result(smuggle_url(
+                mobj.group('ec'), {
+                    'domain': url,
+                    'embed_token': embed_token,
+                }))
  
          # Look for multiple Ooyala embeds on SBN network websites
          mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
          if mobj is not None:
              embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
              if embeds:
-                return _playlist_from_matches(
-                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
+                return self.playlist_from_matches(
+                    embeds, video_id, video_title,
+                    getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
  
          # Look for Aparat videos
          mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@@ -1941,13 +2129,13 @@ class GenericIE(InfoExtractor):
          # Look for funnyordie embed
          matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
          if matches:
-            return _playlist_from_matches(
-                matches, getter=unescapeHTML, ie='FunnyOrDie')
+            return self.playlist_from_matches(
+                matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
  
          # Look for BBC iPlayer embed
          matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
          if matches:
-            return _playlist_from_matches(matches, ie='BBCCoUk')
+            return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
  
          # Look for embedded RUTV player
          rutv_url = RUTVIE._extract_url(webpage)
@@ -1962,22 +2150,32 @@ class GenericIE(InfoExtractor):
          # Look for embedded SportBox player
          sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
          if sportbox_urls:
-            return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
-
-        # Look for embedded PornHub player
-        pornhub_url = PornHubIE._extract_url(webpage)
-        if pornhub_url:
-            return self.url_result(pornhub_url, 'PornHub')
+            return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
  
          # Look for embedded XHamster player
          xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
          if xhamster_urls:
-            return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
+            return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
  
          # Look for embedded TNAFlixNetwork player
          tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
          if tnaflix_urls:
-            return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
+            return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
+
+        # Look for embedded PornHub player
+        pornhub_urls = PornHubIE._extract_urls(webpage)
+        if pornhub_urls:
+            return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
+
+        # Look for embedded DrTuber player
+        drtuber_urls = DrTuberIE._extract_urls(webpage)
+        if drtuber_urls:
+            return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
+
+        # Look for embedded RedTube player
+        redtube_urls = RedTubeIE._extract_urls(webpage)
+        if redtube_urls:
+            return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
  
          # Look for embedded Tvigle player
          mobj = re.search(
@@ -1992,10 +2190,9 @@ class GenericIE(InfoExtractor):
              return self.url_result(mobj.group('url'), 'TED')
  
          # Look for embedded Ustream videos
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
-        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'Ustream')
+        ustream_url = UstreamIE._extract_url(webpage)
+        if ustream_url:
+            return self.url_result(ustream_url, UstreamIE.ie_key())
  
          # Look for embedded arte.tv player
          mobj = re.search(
@@ -2024,7 +2221,12 @@ class GenericIE(InfoExtractor):
          # Look for embedded soundcloud player
          soundcloud_urls = SoundcloudIE._extract_urls(webpage)
          if soundcloud_urls:
-            return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+            return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+
+        # Look for tunein player
+        tunein_urls = TuneInBaseIE._extract_urls(webpage)
+        if tunein_urls:
+            return self.playlist_from_matches(tunein_urls, video_id, video_title)
  
          # Look for embedded mtvservices player
          mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
@@ -2111,6 +2313,11 @@ class GenericIE(InfoExtractor):
          if videomore_url:
              return self.url_result(videomore_url)
  
+        # Look for Webcaster embeds
+        webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
+        if webcaster_url:
+            return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
+
          # Look for Playwire embeds
          mobj = re.search(
              r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
@@ -2177,11 +2384,6 @@ class GenericIE(InfoExtractor):
          if jwplatform_url:
              return self.url_result(jwplatform_url, 'JWPlatform')
  
-        # Look for ScreenwaveMedia embeds
-        mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
-        if mobj is not None:
-            return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
-
          # Look for Digiteka embeds
          digiteka_url = DigitekaIE._extract_url(webpage)
          if digiteka_url:
@@ -2192,6 +2394,11 @@ class GenericIE(InfoExtractor):
          if arkena_url:
              return self.url_result(arkena_url, ArkenaIE.ie_key())
  
+        # Look for Piksel embeds
+        piksel_url = PikselIE._extract_url(webpage)
+        if piksel_url:
+            return self.url_result(piksel_url, PikselIE.ie_key())
+
          # Look for Limelight embeds
          mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
          if mobj:
@@ -2200,8 +2407,21 @@ class GenericIE(InfoExtractor):
                  'Channel': 'channel',
                  'ChannelList': 'channel_list',
              }
-            return self.url_result('limelight:%s:%s' % (
-                lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
+            return self.url_result(smuggle_url('limelight:%s:%s' % (
+                lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
+                'Limelight%s' % mobj.group(1), mobj.group(2))
+
+        mobj = re.search(
+            r'''(?sx)
+                <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
+                    <param[^>]+
+                        name=(["\'])flashVars\2[^>]+
+                        value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
+            ''', webpage)
+        if mobj:
+            return self.url_result(smuggle_url(
+                'limelight:media:%s' % mobj.group('id'),
+                {'source_url': url}), 'LimelightMedia', mobj.group('id'))
  
          # Look for AdobeTVVideo embeds
          mobj = re.search(
@@ -2222,11 +2442,40 @@ class GenericIE(InfoExtractor):
  
          # Look for VODPlatform embeds
          mobj = re.search(
-            r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)',
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
              webpage)
          if mobj is not None:
              return self.url_result(
-                self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform')
+                self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
+
+        # Look for Mangomolo embeds
+        mobj = re.search(
+            r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
+                (?:
+                    video\?.*?\bid=(?P<video_id>\d+)|
+                    index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
+                ).+?)\1''', webpage)
+        if mobj is not None:
+            info = {
+                '_type': 'url_transparent',
+                'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
+                'title': video_title,
+                'description': video_description,
+                'thumbnail': video_thumbnail,
+                'uploader': video_uploader,
+            }
+            video_id = mobj.group('video_id')
+            if video_id:
+                info.update({
+                    'ie_key': 'MangomoloVideo',
+                    'id': video_id,
+                })
+            else:
+                info.update({
+                    'ie_key': 'MangomoloLive',
+                    'id': mobj.group('channel_id'),
+                })
+            return info
  
          # Look for Instagram embeds
          instagram_embed_url = InstagramIE._extract_embed_url(webpage)
@@ -2257,6 +2506,40 @@ class GenericIE(InfoExtractor):
          if vbox7_url:
              return self.url_result(vbox7_url, Vbox7IE.ie_key())
  
+        # Look for DBTV embeds
+        dbtv_urls = DBTVIE._extract_urls(webpage)
+        if dbtv_urls:
+            return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
+
+        # Look for Videa embeds
+        videa_urls = VideaIE._extract_urls(webpage)
+        if videa_urls:
+            return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
+
+        # Look for 20 minuten embeds
+        twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
+        if twentymin_urls:
+            return self.playlist_from_matches(
+                twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
+
+        # Look for Openload embeds
+        openload_urls = OpenloadIE._extract_urls(webpage)
+        if openload_urls:
+            return self.playlist_from_matches(
+                openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
+
+        # Look for VideoPress embeds
+        videopress_urls = VideoPressIE._extract_urls(webpage)
+        if videopress_urls:
+            return self.playlist_from_matches(
+                videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
+
+        # Look for Rutube embeds
+        rutube_urls = RutubeIE._extract_urls(webpage)
+        if rutube_urls:
+            return self.playlist_from_matches(
+                rutube_urls, ie=RutubeIE.ie_key())
+
          # Looking for http://schema.org/VideoObject
          json_ld = self._search_json_ld(
              webpage, video_id, default={}, expected_type='VideoObject')
@@ -2270,12 +2553,34 @@ class GenericIE(InfoExtractor):
              info_dict.update(json_ld)
              return info_dict
  
+        # Look for HTML5 media
+        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
+        if entries:
+            for entry in entries:
+                entry.update({
+                    'id': video_id,
+                    'title': video_title,
+                })
+                self._sort_formats(entry['formats'])
+            return self.playlist_result(entries)
+
+        jwplayer_data = self._find_jwplayer_data(
+            webpage, video_id, transform_source=js_to_json)
+        if jwplayer_data:
+            info = self._parse_jwplayer_data(
+                jwplayer_data, video_id, require_title=False)
+            if not info.get('title'):
+                info['title'] = video_title
+            return info
+
          def check_video(vurl):
              if YoutubeIE.suitable(vurl):
                  return True
+            if RtmpIE.suitable(vurl):
+                return True
              vpath = compat_urlparse.urlparse(vurl).path
              vext = determine_ext(vpath)
-            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
+            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
  
          def filter_video(urls):
              return list(filter(check_video, urls))
@@ -2325,9 +2630,6 @@ class GenericIE(InfoExtractor):
              # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
              if m_video_type is not None:
                  found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
-        if not found:
-            # HTML5 video
-            found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
          if not found:
              REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
              found = re.search(
@@ -2344,11 +2646,14 @@ class GenericIE(InfoExtractor):
                      found = re.search(REDIRECT_REGEX, refresh_header)
              if found:
                  new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
-                self.report_following_redirect(new_url)
-                return {
-                    '_type': 'url',
-                    'url': new_url,
-                }
+                if new_url != url:
+                    self.report_following_redirect(new_url)
+                    return {
+                        '_type': 'url',
+                        'url': new_url,
+                    }
+                else:
+                    found = None
  
          if not found:
              # twitter:player is a https URL to iframe player that may or may not
@@ -2383,6 +2688,15 @@ class GenericIE(InfoExtractor):
                  'age_limit': age_limit,
              }
  
+            if RtmpIE.suitable(video_url):
+                entry_info_dict.update({
+                    '_type': 'url_transparent',
+                    'ie_key': RtmpIE.ie_key(),
+                    'url': video_url,
+                })
+                entries.append(entry_info_dict)
+                continue
+
              ext = determine_ext(video_url)
              if ext == 'smil':
                  entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
@@ -2394,6 +2708,21 @@ class GenericIE(InfoExtractor):
                  entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
              elif ext == 'f4m':
                  entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
+            elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
+                # Just matching .ism/manifest is not enough to be reliably sure
+                # whether it's actually an ISM manifest or some other streaming
+                # manifest since there are various streaming URL formats
+                # possible (see [1]) as well as some other shenanigans like
+                # .smil/manifest URLs that actually serve an ISM (see [2]) and
+                # so on.
+                # Thus the most reasonable way to solve this is to delegate
+                # to generic extractor in order to look into the contents of
+                # the manifest itself.
+                # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
+                # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
+                entry_info_dict = self.url_result(
+                    smuggle_url(video_url, {'to_generic': True}),
+                    GenericIE.ie_key())
              else:
                  entry_info_dict['url'] = video_url