]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/generic.py 
7a5bf939237ff45731fd3befca5ad0b7dfc0df1f
   3  from  __future__ 
import  unicode_literals
   8  from  . common 
import  InfoExtractor
   9  from  . youtube 
import  YoutubeIE
  10  from  .. compat 
import  (   13      compat_xml_parse_error
,   29  from  . brightcove 
import  BrightcoveIE
  30  from  . ooyala 
import  OoyalaIE
  31  from  . rutv 
import  RUTVIE
  32  from  . smotri 
import  SmotriIE
  33  from  . condenast 
import  CondeNastIE
  36  class  GenericIE ( InfoExtractor
):   37      IE_DESC 
=  'Generic downloader that works on some sites'   42              'url' :  'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html' ,   43              'md5' :  '85b90ccc9d73b4acd9138d3af4c27f89' ,   45                  'id' :  '13601338388002' ,   47                  'uploader' :  'www.hodiho.fr' ,   48                  'title' :  'R\u00e9gis plante sa Jeep' ,   51          # bandcamp page with custom domain   53              'add_ie' : [ 'Bandcamp' ],   54              'url' :  'http://bronyrock.com/track/the-pony-mash' ,   58                  'title' :  'The Pony Mash' ,   59                  'uploader' :  'M_Pallante' ,   61              'skip' :  'There is a limit of 200 free downloads / month for the test song' ,   63          # embedded brightcove video   64          # it also tests brightcove videos that need to set the 'Referer' in the   67              'add_ie' : [ 'Brightcove' ],   68              'url' :  'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/' ,   70                  'id' :  '2765128793001' ,   72                  'title' :  'Le cours de bourse : l’analyse technique' ,   73                  'description' :  'md5:7e9ad046e968cb2d1114004aba466fd9' ,   74                  'uploader' :  'BFM BUSINESS' ,   77                  'skip_download' :  True ,   81              # https://github.com/rg3/youtube-dl/issues/2253   82              'url' :  'http://bcove.me/i6nfkrc3' ,   83              'md5' :  '0ba9446db037002366bab3b3eb30c88c' ,   85                  'id' :  '3101154703001' ,   87                  'title' :  'Still no power' ,   88                  'uploader' :  'thestar.com' ,   89                  'description' :  'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.' ,   91              'add_ie' : [ 'Brightcove' ],   94              'url' :  'http://www.championat.com/video/football/v/87/87499.html' ,   95              'md5' :  'fb973ecf6e4a78a67453647444222983' ,   97                  'id' :  '3414141473001' ,   99                  'title' :  'Видео. Удаление Дзагоева (ЦСКА)' ,  100                  'description' :  'Онлайн-трансляция матча ЦСКА - "Волга"' ,  101                  'uploader' :  'Championat' ,  105              # https://github.com/rg3/youtube-dl/issues/3541  106              'add_ie' : [ 'Brightcove' ],  107              'url' :  'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1' ,  109                  'id' :  '3866516442001' ,  111                  'title' :  'Leer mij vrouwen kennen: Aflevering 1' ,  112                  'description' :  'Leer mij vrouwen kennen: Aflevering 1' ,  113                  'uploader' :  'SBS Broadcasting' ,  115              'skip' :  'Restricted to Netherlands' ,  117                  'skip_download' :  True ,   # m3u8 download  120          # Direct link to a video  122              'url' :  'http://media.w3.org/2010/05/sintel/trailer.mp4' ,  123              'md5' :  '67d406c2bcb6af27fa886f31aa934bbe' ,  128                  'upload_date' :  '20100513' ,  133              'url' :  'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219' ,  134              'md5' :  '166dd577b433b4d4ebfee10b0824d8ff' ,  136                  'id' :  'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ' ,  138                  'title' :  '2cc213299525360.mov' ,   # that's what we get  140              'add_ie' : [ 'Ooyala' ],  144              'url' :  'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE' ,  148                  'upload_date' :  '20130224' ,  149                  'uploader_id' :  'TheVerge' ,  150                  'description' :  're:^Chris Ziegler takes a look at the\.*' ,  151                  'uploader' :  'The Verge' ,  152                  'title' :  'First Firefox OS phones side-by-side' ,  155                  'skip_download' :  False ,  160              'url' :  'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/' ,  164                  'title' :  'Tested: Grinding Coffee at 2000 Frames Per Second' ,  165                  'upload_date' :  '20140225' ,  166                  'description' :  'md5:06a40fbf30b220468f1e0957c0f558ff' ,  167                  'uploader' :  'Tested' ,  168                  'uploader_id' :  'testedcom' ,  170              # No need to test YoutubeIE here  172                  'skip_download' :  True ,  177              'url' :  'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns' ,  181                  'title' :  'Between Two Ferns with Zach Galifianakis: President Barack Obama' ,  182                  'description' :  'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.' ,  187              'url' :  'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER' ,  189                  'title' :  'BBC - Blogs -  Adam Curtis - BUGGER' ,  191              'playlist_mincount' :  18 ,  195              'url' :  'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html' ,  199                  'title' :  'Охотское море стало целиком российским' ,  200                  'description' :  'md5:5ed62483b14663e2a95ebbe115eb8f43' ,  204                  'skip_download' :  True ,  209              'url' :  'http://en.support.wordpress.com/videos/ted-talks/' ,  210              'md5' :  '65fdff94098e4a607385a60c5177c638' ,  214                  'title' :  'Hidden miracles of the natural world' ,  215                  'uploader' :  'Louie Schwartzberg' ,  216                  'description' :  'md5:8145d19d320ff3e52f28401f4c4283b9' ,  219          # Embeded Ustream video  221              'url' :  'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm' ,  222              'md5' :  '27b99cdb639c9b12a79bca876a073417' ,  226                  'uploader' :  'AU SPA:  The NSA and Privacy' ,  227                  'title' :  'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'  230          # nowvideo embed hidden behind percent encoding  232              'url' :  'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/' ,  233              'md5' :  '2baf4ddd70f697d94b1c18cf796d5107' ,  235                  'id' :  '06e53103ca9aa' ,  237                  'title' :  'Macross Episode 001  Watch Macross Episode 001 onl' ,  238                  'description' :  'No description' ,  243              'url' :  'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html' ,  244              'md5' :  '7653032cbb25bf6c80d80f217055fa43' ,  246                  'id' :  '048195-004_PLUS7-F' ,  249                  'description' :  'md5:d5fdf32ef6613cdbfd516ae658abf168' ,  250                  'upload_date' :  '20140320' ,  253                  'skip_download' :  'Requires rtmpdump'  258              'url' :  'http://www.wired.com/2014/04/honda-asimo/' ,  259              'md5' :  'ba0dfe966fa007657bd1443ee672db0f' ,  261                  'id' :  '53501be369702d3275860000' ,  263                  'title' :  'Honda’s  New Asimo Robot Is More Human Than Ever' ,  268              'url' :  'http://www.spi0n.com/zap-spi0n-com-n216/' ,  269              'md5' :  '441aeeb82eb72c422c7f14ec533999cd' ,  271                  'id' :  'k2mm4bCdJ6CQ2i7c8o2' ,  273                  'title' :  'Le Zap de Spi0n n°216 - Zapping du Web' ,  276              'add_ie' : [ 'Dailymotion' ],  280              'url' :  'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html' ,  284                  'title' :  'The NBL Auction 2014' ,  285                  'uploader' :  'BADMINTON England' ,  286                  'uploader_id' :  'BADMINTONEvents' ,  287                  'upload_date' :  '20140603' ,  288                  'description' :  'md5:9ef128a69f1e262a700ed83edb163a73' ,  290              'add_ie' : [ 'Youtube' ],  292                  'skip_download' :  True ,  297              'url' :  'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too' ,  298              'md5' :  '35727f82f58c76d996fc188f9755b0d5' ,  300                  'id' :  '0306a69b-8adf-4fb5-aace-75f8e8cbfca9' ,  303                  'description' :  'Mario \' s life in the fast lane has never looked so good.' ,  306          # YouTube embed via <data-embed-url="">  308              'url' :  'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM' ,  312                  'title' :  'Asphalt 8: Airborne - Update - Welcome to Dubai!' ,  313                  'uploader' :  'Gameloft' ,  314                  'uploader_id' :  'gameloft' ,  315                  'upload_date' :  '20140828' ,  316                  'description' :  'md5:c80da9ed3d83ae6d1876c834de03e1c4' ,  319                  'skip_download' :  True ,  324              'url' :  'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/' ,  326                  'md5' :  '0c5e352edabf715d762b0ad4e6d9ee67' ,  328                      'id' :  'Fenn-AA_PA_Radar_Course_Lecture_1c_Final' ,  329                      'title' :  'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1' ,  334                  'md5' :  '10e4bb3aaca9fd630e273ff92d9f3c63' ,  336                      'id' :  'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP' ,  337                      'title' :  'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip' ,  343                  'title' :  'Fenn-AA_PA_Radar_Course_Lecture_1c_Final' ,  348              'url' :  'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html' ,  349              'md5' :  '9d65602bf31c6e20014319c7d07fba27' ,  351                  'id' :  '5123ea6d5e5a7' ,  354                  'uploader' :  'www.handjobhub.com' ,  355                  'title' :  'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com' ,  360              'url' :  'http://phihag.de/2014/youtube-dl/rss2.xml' ,  362                  'id' :  'http://phihag.de/2014/youtube-dl/rss2.xml' ,  363                  'title' :  'Zero Punctuation' ,  366              'playlist_mincount' :  11 ,  368          # Multiple brightcove videos  369          # https://github.com/rg3/youtube-dl/issues/2283  371              'url' :  'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html' ,  373                  'id' :  'always-never' ,  374                  'title' :  'Always / Never - The New Yorker' ,  378                  'extract_flat' :  False ,  379                  'skip_download' :  True ,  384              'url' :  'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/' ,  385              'md5' :  '96f09a37e44da40dd083e12d9a683327' ,  389                  'title' :  'Ump changes call to ball' ,  390                  'description' :  'md5:71c11215384298a172a6dcb4c2e20685' ,  392                  'timestamp' :  1401537900 ,  393                  'upload_date' :  '20140531' ,  394                  'thumbnail' :  're:^https?://.*\.jpg$' ,  399              'url' :  'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson' ,  400              'md5' :  '8788b683c777a5cf25621eaf286d0c23' ,  404                  'title' :  'md5:51364a8d3d009997ba99656004b5e20d' ,  406                  'filesize' :  182808282 ,  407                  'uploader' :  'education-portal.com' ,  411              'url' :  'http://thoughtworks.wistia.com/medias/uxjb0lwrcz' ,  412              'md5' :  'baf49c2baa8a7de5f3fc145a8506dcd4' ,  416                  'title' :  'Conversation about Hexagonal Rails Part 1 - ThoughtWorks' ,  418                  'uploader' :  'thoughtworks.wistia.com' ,  421          # Direct download with broken HEAD  423              'url' :  'http://ai-radio.org:8000/radio.opus' ,  430                  'skip_download' :  True ,   # infinite live stream  432              'expected_warnings' : [  433                  r
'501.*Not Implemented'  438              'url' :  'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/' ,  442                  'description' :  'md5:ff867d6b555488ad3c52572bb33d432c' ,  443                  'uploader' :  'Sophos Security' ,  444                  'title' :  'Chet Chat 171 - Oct 29, 2014' ,  445                  'upload_date' :  '20141029' ,  450              'url' :  'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast' ,  454                  'upload_date' :  '20141112' ,  455                  'title' :  'Rosetta #CometLanding webcast HL 10' ,  460              'url' :  'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986' ,  462                  'title' :  'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse' ,  464              'playlist_mincount' :  2 ,  466          # Direct link with incorrect MIME type  468              'url' :  'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm' ,  469              'md5' :  '4ccbebe5f36706d85221f204d7eb5913' ,  471                  'url' :  'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm' ,  472                  'id' :  '5_Lennart_Poettering_-_Systemd' ,  474                  'title' :  '5_Lennart_Poettering_-_Systemd' ,  475                  'upload_date' :  '20141120' ,  477              'expected_warnings' : [  478                  'URL could be a direct video link, returning it as such.'  483              'url' :  'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/' ,  487                  'upload_date' :  '20141126' ,  488                  'title' :  'Jack Tips: 5 Steps to Permanent Gut Healing' ,  493      def  report_following_redirect ( self
,  new_url
):  494          """Report information extraction."""  495          self
._ downloader
. to_screen ( '[redirect] Following redirect to  %s '  %  new_url
)  497      def  _extract_rss ( self
,  url
,  video_id
,  doc
):  498          playlist_title 
=  doc
. find ( './channel/title' ). text
 499          playlist_desc_el 
=  doc
. find ( './channel/description' )  500          playlist_desc 
=  None if  playlist_desc_el 
is None else  playlist_desc_el
. text
 504              'url' :  e
. find ( 'link' ). text
,  505              'title' :  e
. find ( 'title' ). text
,  506          }  for  e 
in  doc
. findall ( './channel/item' )]  511              'title' :  playlist_title
,  512              'description' :  playlist_desc
,  516      def  _extract_camtasia ( self
,  url
,  video_id
,  webpage
):  517          """ Returns None if no camtasia video can be found. """  519          camtasia_cfg 
=  self
._ search
_ regex
(  520              r
'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);' ,  521              webpage
,  'camtasia configuration file' ,  default
= None )  522          if  camtasia_cfg 
is None :  525          title 
=  self
._ html
_ search
_ meta
( 'DC.title' ,  webpage
,  fatal
= True )  527          camtasia_url 
=  compat_urlparse
. urljoin ( url
,  camtasia_cfg
)  528          camtasia_cfg 
=  self
._ download
_ xml
(  529              camtasia_url
,  video_id
,  530              note
= 'Downloading camtasia configuration' ,  531              errnote
= 'Failed to download camtasia configuration' )  532          fileset_node 
=  camtasia_cfg
. find ( './playlist/array/fileset' )  535          for  n 
in  fileset_node
. getchildren ():  536              url_n 
=  n
. find ( './uri' )  541                  'id' :  os
. path
. splitext ( url_n
. text
. rpartition ( '/' )[ 2 ])[ 0 ],  542                  'title' :  ' %s  -  %s '  % ( title
,  n
. tag
),  543                  'url' :  compat_urlparse
. urljoin ( url
,  url_n
. text
),  544                  'duration' :  float_or_none ( n
. find ( './duration' ). text
),  553      def  _real_extract ( self
,  url
):  554          if  url
. startswith ( '//' ):  557                  'url' :  self
. http_scheme () +  url
,  560          parsed_url 
=  compat_urlparse
. urlparse ( url
)  561          if not  parsed_url
. scheme
:  562              default_search 
=  self
._ downloader
. params
. get ( 'default_search' )  563              if  default_search 
is None :  564                  default_search 
=  'fixup_error'  566              if  default_search 
in  ( 'auto' ,  'auto_warning' ,  'fixup_error' ):  568                      self
._ downloader
. report_warning ( 'The url doesn \' t specify the protocol, trying with http' )  569                      return  self
. url_result ( 'http://'  +  url
)  570                  elif  default_search 
!=  'fixup_error' :  571                      if  default_search 
==  'auto_warning' :  572                          if  re
. match ( r
'^(?:url|URL)$' ,  url
):  573                              raise  ExtractorError (  574                                  'Invalid URL:   %r  . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  '  %  url
,  577                              self
._ downloader
. report_warning (  578                                  'Falling back to youtube search for   %s  . Set --default-search "auto" to suppress this warning.'  %  url
)  579                      return  self
. url_result ( 'ytsearch:'  +  url
)  581              if  default_search 
in  ( 'error' ,  'fixup_error' ):  582                  raise  ExtractorError (  583                      ' %r  is not a valid URL. '  584                      'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch: %s " ) to search YouTube'  585                      % ( url
,  url
),  expected
= True )  587                  if  ':'  not in  default_search
:  588                      default_search 
+=  ':'  589                  return  self
. url_result ( default_search 
+  url
)  591          url
,  smuggled_data 
=  unsmuggle_url ( url
)  593          is_intentional 
=  smuggled_data 
and  smuggled_data
. get ( 'to_generic' )  594          if  smuggled_data 
and  'force_videoid'  in  smuggled_data
:  595              force_videoid 
=  smuggled_data
[ 'force_videoid' ]  596              video_id 
=  force_videoid
 598              video_id 
=  os
. path
. splitext ( url
. rstrip ( '/' ). split ( '/' )[- 1 ])[ 0 ]  600          self
. to_screen ( ' %s : Requesting header'  %  video_id
)  602          head_req 
=  HEADRequest ( url
)  603          head_response 
=  self
._ request
_ webpage
(  605              note
= False ,  errnote
= 'Could not send HEAD request to  %s '  %  url
,  608          if  head_response 
is not False :  610              new_url 
=  head_response
. geturl ()  612                  self
. report_following_redirect ( new_url
)  614                      new_url 
=  smuggle_url (  615                          new_url
, { 'force_videoid' :  force_videoid
})  616                  return  self
. url_result ( new_url
)  619          if  head_response 
is False :  620              full_response 
=  self
._ request
_ webpage
( url
,  video_id
)  621              head_response 
=  full_response
 623          # Check for direct link to a video  624          content_type 
=  head_response
. headers
. get ( 'Content-Type' ,  '' )  625          m 
=  re
. match ( r
'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$' ,  content_type
)  627              upload_date 
=  unified_strdate (  628                  head_response
. headers
. get ( 'Last-Modified' ))  631                  'title' :  os
. path
. splitext ( url_basename ( url
))[ 0 ],  634                      'format_id' :  m
. group ( 'format_id' ),  636                      'vcodec' :  'none'  if  m
. group ( 'type' ) ==  'audio'  else None  638                  'upload_date' :  upload_date
,  641          if not  self
._ downloader
. params
. get ( 'test' ,  False )  and not  is_intentional
:  642              self
._ downloader
. report_warning ( 'Falling back on generic information extractor.' )  644          if not  full_response
:  645              full_response 
=  self
._ request
_ webpage
( url
,  video_id
)  647          # Maybe it's a direct link to a video?  648          # Be careful not to download the whole thing!  649          first_bytes 
=  full_response
. read ( 512 )  650          if not  re
. match ( r
'^\s*<' ,  first_bytes
. decode ( 'utf-8' ,  'replace' )):  651              self
._ downloader
. report_warning (  652                  'URL could be a direct video link, returning it as such.' )  653              upload_date 
=  unified_strdate (  654                  head_response
. headers
. get ( 'Last-Modified' ))  657                  'title' :  os
. path
. splitext ( url_basename ( url
))[ 0 ],  660                  'upload_date' :  upload_date
,  663          webpage 
=  self
._ webpage
_ read
_ content
(  664              full_response
,  url
,  video_id
,  prefix
= first_bytes
)  666          self
. report_extraction ( video_id
)  670              doc 
=  parse_xml ( webpage
)  672                  return  self
._ extract
_ rss
( url
,  video_id
,  doc
)  673          except  compat_xml_parse_error
:  676          # Is it a Camtasia project?  677          camtasia_res 
=  self
._ extract
_ camtasia
( url
,  video_id
,  webpage
)  678          if  camtasia_res 
is not None :  681          # Sometimes embedded video player is hidden behind percent encoding  682          # (e.g. https://github.com/rg3/youtube-dl/issues/2448)  683          # Unescaping the whole page allows to handle those cases in a generic way  684          webpage 
=  compat_urllib_parse
. unquote ( webpage
)  686          # it's tempting to parse this further, but you would  687          # have to take into account all the variations like  688          #   Video Title - Site Name  689          #   Site Name | Video Title  690          #   Video Title - Tagline | Site Name  691          # and so on and so forth; it's just not practical  692          video_title 
=  self
._ html
_ search
_ regex
(  693              r
'(?s)<title>(.*?)</title>' ,  webpage
,  'video title' ,  696          # Try to detect age limit automatically  697          age_limit 
=  self
._ rta
_ search
( webpage
)  698          # And then there are the jokers who advertise that they use RTA,  699          # but actually don't.  700          AGE_LIMIT_MARKERS 
= [  701              r
'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>' ,  703          if  any ( re
. search ( marker
,  webpage
)  for  marker 
in  AGE_LIMIT_MARKERS
):  706          # video uploader is domain name  707          video_uploader 
=  self
._ search
_ regex
(  708              r
'^(?:https?://)?([^/]*)/.*' ,  url
,  'video uploader' )  711          def  _playlist_from_matches ( matches
,  getter
= None ,  ie
= None ):  713                  self
. url_result ( self
._ proto
_ relative
_u rl
( getter ( m
)  if  getter 
else  m
),  ie
)  715              return  self
. playlist_result (  716                  urlrs
,  playlist_id
= video_id
,  playlist_title
= video_title
)  718          # Look for BrightCove:  719          bc_urls 
=  BrightcoveIE
._ extract
_ brightcove
_u rls
( webpage
)  721              self
. to_screen ( 'Brightcove video detected.' )  724                  'url' :  smuggle_url ( bc_url
, { 'Referer' :  url
}),  725                  'ie_key' :  'Brightcove'  726              }  for  bc_url 
in  bc_urls
]  730                  'title' :  video_title
,  735          # Look for embedded (iframe) Vimeo player  737              r
'<iframe[^>]+?src=(["\' ])( ?P
< url
>( ?
: https?
:) ?
// player\
. vimeo\
. com
/ video
/.+ ?
) \
1 ', webpage)  739              player_url = unescapeHTML(mobj.group(' url
'))  740              surl = smuggle_url(player_url, {' Referer
': url})  741              return self.url_result(surl)  743          # Look for embedded (swf embed) Vimeo player  745              r' < embed
[ ^
>]+ ?src
= "((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)" ', webpage)  747              return self.url_result(mobj.group(1))  749          # Look for embedded YouTube player  750          matches = re.findall(r'''(?x)  759                  (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/  763              return _playlist_from_matches(  764                  matches, lambda m: unescapeHTML(m[1]))  766          # Look for lazyYT YouTube embed  767          matches = re.findall(  768              r' class = "lazyYT"  data
- youtube
- id = "([^" ]+) "', webpage)  770              return _playlist_from_matches(matches, lambda m: unescapeHTML(m))  772          # Look for embedded Dailymotion player  773          matches = re.findall(  774              r'<iframe[^>]+?src=([" \' ])( ?P
< url
>( ?
: https?
:) ?
//( ?
: www\
.) ?dailymotion\
. com
/ embed
/ video
/.+ ?
) \
1 ', webpage)  776              return _playlist_from_matches(  777                  matches, lambda m: unescapeHTML(m[1]))  779          # Look for embedded Dailymotion playlist player (#3822)  781              r' < iframe
[ ^
>]+ ?src
=([ " \' ])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox \? .+?)\1', webpage)  783              playlists = re.findall(  784                  r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))  786                  return _playlist_from_matches(  787                      playlists, lambda p: '//dailymotion.com/playlist/ %s ' % p)  789          # Look for embedded Wistia player  791              r'<(?:meta[^>]+?content|iframe[^>]+?src)=([" \' ])( ?P
< url
>( ?
: https?
:) ?
//( ?
: fast\
.) ?wistia\
. net
/ embed
/ iframe
/.+ ?
) \
1 ', webpage)  793              embed_url = self._proto_relative_url(  794                  unescapeHTML(match.group(' url
')))  796                  ' _type
': ' url_transparent
',  799                  ' uploader
': video_uploader,  800                  ' title
': video_title,  804          match = re.search(r' ( ?
: id =[ " \' ]wistia_|data-wistia-?id=[" \' ] |Wistia\
. embed\
([ " \' ])(?P<id>[^" \' ]+) ', webpage)  807                  ' _type
': ' url_transparent
',  808                  ' url
': ' http
:// fast
. wistia
. net
/ embed
/ iframe
/ {0:}
'.format(match.group(' id ')),  810                  ' uploader
': video_uploader,  811                  ' title
': video_title,  812                  ' id ': match.group(' id ')  815          # Look for embedded blip.tv player  816          mobj = re.search(r' < meta\s
[ ^
>]* https?
:// api\
. blip\
. tv
/ \w
+/ redirect
/ \w
+/( \d
+) ', webpage)  818              return self.url_result(' http
:// blip
. tv
/ a
/ a
- ' + mobj.group(1), ' BlipTV
')  819          mobj = re.search(r' <( ?
: iframe|embed|
object ) \s
[ ^
>]*( https?
://( ?
: \w
+ \
.) ?blip\
. tv
/( ?
: play
/ |api\
. swf
#)[a-zA-Z0-9_]+)', webpage)  821              return  self
. url_result ( mobj
. group ( 1 ),  'BlipTV' )  823          # Look for embedded condenast player  824          matches 
=  re
. findall (  825              r
'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")' ,  832                      'ie_key' :  'CondeNast' ,  834                  }  for  ma 
in  matches
],  835                  'title' :  video_title
,  839          # Look for Bandcamp pages with custom domain  840          mobj 
=  re
. search ( r
'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"' ,  webpage
)  842              burl 
=  unescapeHTML ( mobj
. group ( 1 ))  843              # Don't set the extractor because it can be a track url or an album  844              return  self
. url_result ( burl
)  846          # Look for embedded Vevo player  848              r
'<iframe[^>]+?src=(["\' ])( ?P
< url
>( ?
: https?
:) ?
//( ?
: cache\
.) ?vevo\
. com
/.+ ?
) \
1 ', webpage)  850              return self.url_result(mobj.group(' url
'))  852          # Look for Ooyala videos  853          mobj = (re.search(r' player
. ooyala
. com
/[ ^
"?]+ \? [^" ]* ?
( ?
: embedCode|ec
)=( ?P
< ec
>[ ^
"&]+)', webpage) or  854                  re.search(r'OO.Player.create\([ \' " ].* ?
[ \' "],\s*[ \' " ]( ?P
< ec
>. {32}
)[ \' "]', webpage))  856              return OoyalaIE._build_url_result(mobj.group('ec'))  858          # Look for Aparat videos  859          mobj = re.search(r'<iframe .*?src=" ( http
:// www\
. aparat\
. com
/ video
/[ ^
"]+)" ', webpage)  861              return self.url_result(mobj.group(1), ' Aparat
')  863          # Look for MPORA videos  864          mobj = re.search(r' < iframe 
.* ?src
= "(http://mpora\.(?:com|de)/videos/[^" ]+) "', webpage)  866              return self.url_result(mobj.group(1), 'Mpora')  868          # Look for embedded NovaMov-based player  870              r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=([" \' ])  871                      ( ?P
< url
> http
://( ?
:( ?
: embed|www
) \
.) ?
 873                             nowvideo\
.( ?
: ch|sx|eu|at|ag|co
) |
 874                             videoweed\
.( ?
: es|com
) |
 875                             movshare\
.( ?
: net|sx|ag
) |
 876                             divxstage\
.( ?
: eu|net|ch|co|at|ag
))  877                          / embed\
. php
.+ ?
) \
1 ''', webpage)  879              return self.url_result(mobj.group('url'))  881          # Look for embedded Facebook player  883              r'<iframe[^>]+?src=([" \' ])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)  885              return self.url_result(mobj.group('url'), 'Facebook')  887          # Look for embedded VK player  888          mobj = re.search(r'<iframe[^>]+?src=([" \' ])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)  890              return self.url_result(mobj.group('url'), 'VK')  892          # Look for embedded ivi player  893          mobj = re.search(r'<embed[^>]+?src=([" \' ])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)  895              return self.url_result(mobj.group('url'), 'Ivi')  897          # Look for embedded Huffington Post player  899              r'<iframe[^>]+?src=([" \' ])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)  901              return self.url_result(mobj.group('url'), 'HuffPost')  904          mobj = re.search(r'class=[" \' ]embedly-card[" \' ][^>]href=[" \' ](?P<url>[^" \' ]+)', webpage)  906              return self.url_result(mobj.group('url'))  907          mobj = re.search(r'class=[" \' ]embedly-embed[" \' ][^>]src=[" \' ][^" \' ]*url=(?P<url>[^&]+)', webpage)  909              return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))  911          # Look for funnyordie embed  912          matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)  914              return _playlist_from_matches(  915                  matches, getter=unescapeHTML, ie='FunnyOrDie')  917          # Look for BBC iPlayer embed  918          matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z] {8} )"\)', webpage)  920              return _playlist_from_matches(matches, ie='BBCCoUk')  922          # Look for embedded RUTV player  923          rutv_url = RUTVIE._extract_url(webpage)  925              return self.url_result(rutv_url, 'RUTV')  927          # Look for embedded TED player  929              r'<iframe[^>]+?src=([" \' ])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)  931              return self.url_result(mobj.group('url'), 'TED')  933          # Look for embedded Ustream videos  935              r'<iframe[^>]+?src=([" \' ])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)  937              return self.url_result(mobj.group('url'), 'Ustream')  939          # Look for embedded arte.tv player  941              r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',  944              return self.url_result(mobj.group('url'), 'ArteTVEmbed')  946          # Look for embedded smotri.com player  947          smotri_url = SmotriIE._extract_url(webpage)  949              return self.url_result(smotri_url, 'Smotri')  951          # Look for embeded soundcloud player  953              r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',  956              url = unescapeHTML(mobj.group('url'))  957              return self.url_result(url)  959          # Look for embedded vulture.com player  961              r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',  964              url = unescapeHTML(mobj.group('url'))  965              return self.url_result(url, ie='Vulture')  967          # Look for embedded mtvservices player  969              r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',  972              url = unescapeHTML(mobj.group('url'))  973              return self.url_result(url, ie='MTVServicesEmbedded')  975          # Look for embedded yahoo player  977              r'<iframe[^>]+?src=([" \' ])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html \? format=embed)\1',  980              return self.url_result(mobj.group('url'), 'Yahoo')  982          # Look for embedded sbs.com.au player  984              r'<iframe[^>]+?src=([" \' ])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',  987              return self.url_result(mobj.group('url'), 'SBS')  989          # Look for embedded Cinchcast player  991              r'<iframe[^>]+?src=([" \' ])(?P<url>https?://player\.cinchcast\.com/.+?)\1',  994              return self.url_result(mobj.group('url'), 'Cinchcast')  997              r'<iframe[^>]+?src=([" \' ])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html \? .+?)\1', 1000              return self.url_result(mobj.group('url'), 'MLB') 1003              r'<iframe[^>]+?src=([" \' ])(?P<url> %s )\1' % CondeNastIE.EMBED_URL, 1005          if mobj is not None: 1006              return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') 1009              r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"', 1011          if mobj is not None: 1012              return self.url_result(mobj.group('url'), 'Livestream') 1014          def check_video(vurl): 1015              vpath = compat_urlparse.urlparse(vurl).path 1016              vext = determine_ext(vpath) 1017              return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml') 1019          def filter_video(urls): 1020              return list(filter(check_video, urls)) 1022          # Start with something easy: JW Player in SWFObject 1023          found = filter_video(re.findall(r'flashvars: [ \' "](?:.*&)?file=(http[^ \' "&]*)', webpage)) 1025              # Look for gorilla-vid style embedding 1026              found = filter_video(re.findall(r''' ( ?sx
) 1030                      jwplayer\s
* \
( \s
*[ "'][^'" ]+[ "']\s*\)\s*\.setup 1032                  .*?file\s*:\s*[" \' ](.* ?
)[ " \' ]''', webpage)) 1034              # Broaden the search a little bit 1035              found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^ \' " &]*) ', webpage)) 1037              # Broaden the findall a little bit: JWPlayer JS loader 1038              found = filter_video(re.findall( 1039                  r' [ ^A
- Za
- z0
- 9 ] ?
file [ " \' ]?:\s*[" \' ]( http ( ?
![ ^
\' "]+\.[0-9]+[ \' " ])[ ^
\' "]+)[" \' ] ', webpage)) 1042              found = filter_video(re.findall(r'''(?xs) 1043                  flowplayer\("[^"]+",\s* 1045                      \s*{[^}]+? ["' ] ?clip
[ "']?\s*:\s*\{\s* 1046                          [" ']?url["' ] ?\s
*: \s
*[ "']([^" ']+)["' ] 1049              # Try to find twitter cards info 1050              found = filter_video(re.findall( 1051                  r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)) 1053              # We look for Open Graph info: 1054              # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am) 1055              m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) 1056              # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: 1057              if m_video_type is not None: 1058                  found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)) 1061              found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=[" \' ](.*?)[" \' ]', webpage) 1064                  r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' 1065                  r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url= \' ?([^ \' "]+)', 1068                  new_url = found.group(1) 1069                  self.report_following_redirect(new_url) 1075              raise UnsupportedError(url) 1078          for video_url in found: 1079              video_url = compat_urlparse.urljoin(url, video_url) 1080              video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) 1082              # Sometimes, jwplayer extraction will result in a YouTube URL 1083              if YoutubeIE.suitable(video_url): 1084                  entries.append(self.url_result(video_url, 'Youtube')) 1087              # here's a fun little line of code for you: 1088              video_id = os.path.splitext(video_id)[0] 1093                  'uploader': video_uploader, 1094                  'title': video_title, 1095                  'age_limit': age_limit, 1098          if len(entries) == 1: 1101              for num, e in enumerate(entries, start=1): 1102                  e['title'] = ' %s  ( %d )' % (e['title'], num) 1104                  '_type': 'playlist',