]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bbc.py 
   2  from  __future__ 
import  unicode_literals
   6  from  . common 
import  InfoExtractor
  16  from  .. compat 
import  (   17      compat_etree_fromstring
,   22  class  BBCCoUkIE ( InfoExtractor
):   24      IE_DESC 
=  'BBC iPlayer'   25      _VALID_URL 
=  r
'https?://(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])(?P<id>[\da-z] {8} )'   27      _MEDIASELECTOR_URLS 
= [   28          # Provides HQ HLS streams with even better quality that pc mediaset but fails   29          # with geolocation in some cases when it's even not geo restricted at all (e.g.   30          # http://www.bbc.co.uk/programmes/b06bp7lf)   31          'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/ %s ' ,   32          'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/ %s ' ,   35      _MEDIASELECTION_NS 
=  'http://bbc.co.uk/2008/mp/mediaselection'   36      _EMP_PLAYLIST_NS 
=  'http://bbc.co.uk/2008/emp/playlist'   45              'url' :  'http://www.bbc.co.uk/programmes/b039g8p7' ,   49                  'title' :  'Kaleidoscope, Leonard Cohen' ,   50                  'description' :  'The Canadian poet and songwriter reflects on his musical career.' ,   55                  'skip_download' :  True ,   59              'url' :  'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/' ,   63                  'title' :  'The Man in Black: Series 3: The Printed Name' ,   64                  'description' :  "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey." ,   69                  'skip_download' :  True ,   71              'skip' :  'Episode is no longer available on BBC iPlayer Radio' ,   74              'url' :  'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/' ,   78                  'title' :  'The Voice UK: Series 3: Blind Auditions 5' ,   79                  'description' :  "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone." ,   84                  'skip_download' :  True ,   86              'skip' :  'Currently BBC iPlayer TV programmes are available to play in the UK only' ,   89              'url' :  'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion' ,   93                  'title' :  "Tomorrow's Worlds: The Unearthly History of Science Fiction" ,   94                  'description' :  '2. Invasion' ,   99                  'skip_download' :  True ,  101              'skip' :  'Currently BBC iPlayer TV programmes are available to play in the UK only' ,  103              'url' :  'http://www.bbc.co.uk/programmes/b04v20dw' ,  107                  'title' :  'Pete Tong, The Essential New Tune Special' ,  108                  'description' :  "Pete has a very special mix - all of 2014's Essential New Tunes!" ,  113                  'skip_download' :  True ,  116              'url' :  'http://www.bbc.co.uk/music/clips/p02frcc3' ,  121                  'title' :  'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix' ,  122                  'description' :  'French house superstar Madeon takes us out of the club and onto the after party.' ,  127                  'skip_download' :  True ,  130              'url' :  'http://www.bbc.co.uk/music/clips/p025c0zz' ,  135                  'title' :  'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)' ,  136                  'description' :  'Rae Morris performs Closer for BBC Three at Reading 2014' ,  141                  'skip_download' :  True ,  144              'url' :  'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls' ,  148                  'title' :  'Natural World, 2015-2016: 2. Super Powered Owls' ,  149                  'description' :  'md5:e4db5c937d0e95a7c6b5e654d429183d' ,  154                  'skip_download' :  True ,  156              'skip' :  'geolocation' ,  158              'url' :  'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition' ,  162                  'description' :  'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.' ,  163                  'title' :  'Royal Academy Summer Exhibition' ,  168                  'skip_download' :  True ,  170              'skip' :  'geolocation' ,  172              # iptv-all mediaset fails with geolocation however there is no geo restriction  173              # for this programme at all  174              'url' :  'http://www.bbc.co.uk/programmes/b06bp7lf' ,  178                  'title' :  "Annie Mac's Friday Night, B.Traits sits in for Annie" ,  179                  'description' :  'B.Traits sits in for Annie Mac with a Mini-Mix from Disclosure.' ,  184                  'skip_download' :  True ,  187              'url' :  'http://www.bbc.co.uk/iplayer/playlist/p01dvks4' ,  188              'only_matching' :  True ,  190              'url' :  'http://www.bbc.co.uk/music/clips#p02frcc3' ,  191              'only_matching' :  True ,  193              'url' :  'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo' ,  194              'only_matching' :  True ,  198      class  MediaSelectionError ( Exception ):  199          def  __init__ ( self
,  id ):  202      def  _extract_asx_playlist ( self
,  connection
,  programme_id
):  203          asx 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading ASX playlist' )  204          return  [ ref
. get ( 'href' )  for  ref 
in  asx
. findall ( './Entry/ref' )]  206      def  _extract_connection ( self
,  connection
,  programme_id
):  208          kind 
=  connection
. get ( 'kind' )  209          protocol 
=  connection
. get ( 'protocol' )  210          supplier 
=  connection
. get ( 'supplier' )  211          if  protocol 
==  'http' :  212              href 
=  connection
. get ( 'href' )  213              transfer_format 
=  connection
. get ( 'transferFormat' )  215              if  supplier 
==  'asx' :  216                  for  i
,  ref 
in  enumerate ( self
._ extract
_ asx
_ playlist
( connection
,  programme_id
)):  219                          'format_id' :  'ref %s _ %s '  % ( i
,  supplier
),  221              # Skip DASH until supported  222              elif  transfer_format 
==  'dash' :  224              elif  transfer_format 
==  'hls' :  225                  m3u8_formats 
=  self
._ extract
_ m
3u8_ formats
(  226                      href
,  programme_id
,  ext
= 'mp4' ,  entry_protocol
= 'm3u8_native' ,  227                      m3u8_id
= supplier
,  fatal
= False )  229                      formats
. extend ( m3u8_formats
)  234                      'format_id' :  supplier 
or  kind 
or  protocol
,  236          elif  protocol 
==  'rtmp' :  237              application 
=  connection
. get ( 'application' ,  'ondemand' )  238              auth_string 
=  connection
. get ( 'authString' )  239              identifier 
=  connection
. get ( 'identifier' )  240              server 
=  connection
. get ( 'server' )  242                  'url' :  ' %s :// %s / %s ? %s '  % ( protocol
,  server
,  application
,  auth_string
),  243                  'play_path' :  identifier
,  244                  'app' :  ' %s ? %s '  % ( application
,  auth_string
),  245                  'page_url' :  'http://www.bbc.co.uk' ,  246                  'player_url' :  'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf' ,  249                  'format_id' :  supplier
,  253      def  _extract_items ( self
,  playlist
):  254          return  playlist
. findall ( './{ %s }item'  %  self
._ EMP
_ PLAYLIST
_ NS
)  256      def  _findall_ns ( self
,  element
,  xpath
):  258          for  ns 
in  self
._ NAMESPACES
:  259              elements
. extend ( element
. findall ( xpath 
%  ns
))  262      def  _extract_medias ( self
,  media_selection
):  263          error 
=  media_selection
. find ( './{ %s }error'  %  self
._ MEDIASELECTION
_ NS
)  265              media_selection
. find ( './{ %s }error'  %  self
._ EMP
_ PLAYLIST
_ NS
)  266          if  error 
is not None :  267              raise  BBCCoUkIE
. MediaSelectionError ( error
. get ( 'id' ))  268          return  self
._ findall
_ ns
( media_selection
,  './{ %s }media' )  270      def  _extract_connections ( self
,  media
):  271          return  self
._ findall
_ ns
( media
,  './{ %s }connection' )  273      def  _extract_video ( self
,  media
,  programme_id
):  275          vbr 
=  int_or_none ( media
. get ( 'bitrate' ))  276          vcodec 
=  media
. get ( 'encoding' )  277          service 
=  media
. get ( 'service' )  278          width 
=  int_or_none ( media
. get ( 'width' ))  279          height 
=  int_or_none ( media
. get ( 'height' ))  280          file_size 
=  int_or_none ( media
. get ( 'media_file_size' ))  281          for  connection 
in  self
._ extract
_ connections
( media
):  282              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  283              for  format 
in  conn_formats
:  289                      'filesize' :  file_size
,  292                      format
[ 'format_id' ] =  ' %s _ %s '  % ( service
,  format
[ 'format_id' ])  293              formats
. extend ( conn_formats
)  296      def  _extract_audio ( self
,  media
,  programme_id
):  298          abr 
=  int_or_none ( media
. get ( 'bitrate' ))  299          acodec 
=  media
. get ( 'encoding' )  300          service 
=  media
. get ( 'service' )  301          for  connection 
in  self
._ extract
_ connections
( media
):  302              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  303              for  format 
in  conn_formats
:  305                      'format_id' :  ' %s _ %s '  % ( service
,  format
[ 'format_id' ]),  309              formats
. extend ( conn_formats
)  312      def  _get_subtitles ( self
,  media
,  programme_id
):  314          for  connection 
in  self
._ extract
_ connections
( media
):  315              captions 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading captions' )  316              lang 
=  captions
. get ( '{http://www.w3.org/XML/1998/namespace}lang' ,  'en' )  319                      'url' :  connection
. get ( 'href' ),  325      def  _raise_extractor_error ( self
,  media_selection_error
):  326          raise  ExtractorError (  327              ' %s  returned error:  %s '  % ( self
. IE_NAME
,  media_selection_error
. id ),  330      def  _download_media_selector ( self
,  programme_id
):  331          last_exception 
=  None  332          for  mediaselector_url 
in  self
._ MEDIASELECTOR
_U RLS
:  334                  return  self
._ download
_ media
_ selector
_u rl
(  335                      mediaselector_url 
%  programme_id
,  programme_id
)  336              except  BBCCoUkIE
. MediaSelectionError 
as  e
:  337                  if  e
. id  in  ( 'notukerror' ,  'geolocation' ):  340                  self
._ raise
_ extractor
_ error
( e
)  341          self
._ raise
_ extractor
_ error
( last_exception
)  343      def  _download_media_selector_url ( self
,  url
,  programme_id
= None ):  345              media_selection 
=  self
._ download
_ xml
(  346                  url
,  programme_id
,  'Downloading media selection XML' )  347          except  ExtractorError 
as  ee
:  348              if  isinstance ( ee
. cause
,  compat_HTTPError
)  and  ee
. cause
. code 
==  403 :  349                  media_selection 
=  compat_etree_fromstring ( ee
. cause
. read (). decode ( 'utf-8' ))  352          return  self
._ process
_ media
_ selector
( media_selection
,  programme_id
)  354      def  _process_media_selector ( self
,  media_selection
,  programme_id
):  358          for  media 
in  self
._ extract
_ medias
( media_selection
):  359              kind 
=  media
. get ( 'kind' )  361                  formats
. extend ( self
._ extract
_ audio
( media
,  programme_id
))  362              elif  kind 
==  'video' :  363                  formats
. extend ( self
._ extract
_ video
( media
,  programme_id
))  364              elif  kind 
==  'captions' :  365                  subtitles 
=  self
. extract_subtitles ( media
,  programme_id
)  366          return  formats
,  subtitles
 368      def  _download_playlist ( self
,  playlist_id
):  370              playlist 
=  self
._ download
_ json
(  371                  'http://www.bbc.co.uk/programmes/ %s /playlist.json'  %  playlist_id
,  372                  playlist_id
,  'Downloading playlist JSON' )  374              version 
=  playlist
. get ( 'defaultAvailableVersion' )  376                  smp_config 
=  version
[ 'smpConfig' ]  377                  title 
=  smp_config
[ 'title' ]  378                  description 
=  smp_config
[ 'summary' ]  379                  for  item 
in  smp_config
[ 'items' ]:  381                      if  kind 
!=  'programme'  and  kind 
!=  'radioProgramme' :  383                      programme_id 
=  item
. get ( 'vpid' )  384                      duration 
=  int_or_none ( item
. get ( 'duration' ))  385                      formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  386                  return  programme_id
,  title
,  description
,  duration
,  formats
,  subtitles
 387          except  ExtractorError 
as  ee
:  388              if not  ( isinstance ( ee
. cause
,  compat_HTTPError
)  and  ee
. cause
. code 
==  404 ):  391          # fallback to legacy playlist  392          return  self
._ process
_l egacy
_ playlist
( playlist_id
)  394      def  _process_legacy_playlist_url ( self
,  url
,  display_id
):  395          playlist 
=  self
._ download
_l egacy
_ playlist
_u rl
( url
,  display_id
)  396          return  self
._ extract
_ from
_l egacy
_ playlist
( playlist
,  display_id
)  398      def  _process_legacy_playlist ( self
,  playlist_id
):  399          return  self
._ process
_l egacy
_ playlist
_u rl
(  400              'http://www.bbc.co.uk/iplayer/playlist/ %s '  %  playlist_id
,  playlist_id
)  402      def  _download_legacy_playlist_url ( self
,  url
,  playlist_id
= None ):  403          return  self
._ download
_ xml
(  404              url
,  playlist_id
,  'Downloading legacy playlist XML' )  406      def  _extract_from_legacy_playlist ( self
,  playlist
,  playlist_id
):  407          no_items 
=  playlist
. find ( './{ %s }noItems'  %  self
._ EMP
_ PLAYLIST
_ NS
)  408          if  no_items 
is not None :  409              reason 
=  no_items
. get ( 'reason' )  410              if  reason 
==  'preAvailability' :  411                  msg 
=  'Episode  %s  is not yet available'  %  playlist_id
 412              elif  reason 
==  'postAvailability' :  413                  msg 
=  'Episode  %s  is no longer available'  %  playlist_id
 414              elif  reason 
==  'noMedia' :  415                  msg 
=  'Episode  %s  is not currently available'  %  playlist_id
 417                  msg 
=  'Episode  %s  is not available:  %s '  % ( playlist_id
,  reason
)  418              raise  ExtractorError ( msg
,  expected
= True )  420          for  item 
in  self
._ extract
_ items
( playlist
):  421              kind 
=  item
. get ( 'kind' )  422              if  kind 
!=  'programme'  and  kind 
!=  'radioProgramme' :  424              title 
=  playlist
. find ( './{ %s }title'  %  self
._ EMP
_ PLAYLIST
_ NS
). text
 425              description_el 
=  playlist
. find ( './{ %s }summary'  %  self
._ EMP
_ PLAYLIST
_ NS
)  426              description 
=  description_el
. text 
if  description_el 
is not None else None  428              def  get_programme_id ( item
):  429                  def  get_from_attributes ( item
):  430                      for  p 
in ( 'identifier' ,  'group' ):  432                          if  value 
and  re
. match ( r
'^[pb][\da-z] {7} $' ,  value
):  434                  get_from_attributes ( item
)  435                  mediator 
=  item
. find ( './{ %s }mediator'  %  self
._ EMP
_ PLAYLIST
_ NS
)  436                  if  mediator 
is not None :  437                      return  get_from_attributes ( mediator
)  439              programme_id 
=  get_programme_id ( item
)  440              duration 
=  int_or_none ( item
. get ( 'duration' ))  443                  formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  445                  formats
,  subtitles 
=  self
._ process
_ media
_ selector
( item
,  playlist_id
)  446                  programme_id 
=  playlist_id
 448          return  programme_id
,  title
,  description
,  duration
,  formats
,  subtitles
 450      def  _real_extract ( self
,  url
):  451          group_id 
=  self
._ match
_ id
( url
)  453          webpage 
=  self
._ download
_ webpage
( url
,  group_id
,  'Downloading video page' )  457          tviplayer 
=  self
._ search
_ regex
(  458              r
'mediator\.bind\(({.+?})\s*,\s*document\.getElementById' ,  459              webpage
,  'player' ,  default
= None )  462              player 
=  self
._ parse
_ json
( tviplayer
,  group_id
). get ( 'player' , {})  463              duration 
=  int_or_none ( player
. get ( 'duration' ))  464              programme_id 
=  player
. get ( 'vpid' )  467              programme_id 
=  self
._ search
_ regex
(  468                  r
'"vpid"\s*:\s*"([\da-z] {8} )"' ,  webpage
,  'vpid' ,  fatal
= False ,  default
= None )  471              formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  472              title 
=  self
._ og
_ search
_ title
( webpage
)  473              description 
=  self
._ search
_ regex
(  474                  r
'<p class="[^"]*medium-description[^"]*">([^<]+)</p>' ,  475                  webpage
,  'description' ,  fatal
= False )  477              programme_id
,  title
,  description
,  duration
,  formats
,  subtitles 
=  self
._ download
_ playlist
( group_id
)  479          self
._ sort
_ formats
( formats
)  484              'description' :  description
,  485              'thumbnail' :  self
._ og
_ search
_ thumbnail
( webpage
,  default
= None ),  486              'duration' :  duration
,  488              'subtitles' :  subtitles
,  492  class  BBCIE ( BBCCoUkIE
):  495      _VALID_URL 
=  r
'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'  497      _MEDIASELECTOR_URLS 
= [  498          # Provides HQ HLS streams but fails with geolocation in some cases when it's  499          # even not geo restricted at all  500          'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/ %s ' ,  501          # Provides more formats, namely direct mp4 links, but fails on some videos with  502          # notukerror for non UK (?) users (e.g.  503          # http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)  504          'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/ %s ' ,  505          # Provides fewer formats, but works everywhere for everybody (hopefully)  506          'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/ %s ' ,  510          # article with multiple videos embedded with data-playable containing vpids  511          'url' :  'http://www.bbc.com/news/world-europe-32668511' ,  513              'id' :  'world-europe-32668511' ,  514              'title' :  'Russia stages massive WW2 parade despite Western boycott' ,  515              'description' :  'md5:00ff61976f6081841f759a08bf78cc9c' ,  519          # article with multiple videos embedded with data-playable (more videos)  520          'url' :  'http://www.bbc.com/news/business-28299555' ,  522              'id' :  'business-28299555' ,  523              'title' :  'Farnborough Airshow: Video highlights' ,  524              'description' :  'BBC reports and video highlights at the Farnborough Airshow.' ,  529          # article with multiple videos embedded with `new SMP()`  531          'url' :  'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460' ,  533              'id' :  '3662a707-0af9-3149-963f-47bea720b460' ,  534              'title' :  'BBC Blogs - Adam Curtis - BUGGER' ,  536          'playlist_count' :  18 ,  538          # single video embedded with data-playable containing vpid  539          'url' :  'http://www.bbc.com/news/world-europe-32041533' ,  543              'title' :  'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV' ,  544              'description' :  'md5:2868290467291b37feda7863f7a83f54' ,  546              'timestamp' :  1427219242 ,  547              'upload_date' :  '20150324' ,  551              'skip_download' :  True ,  554          # article with single video embedded with data-playable containing XML playlist  555          # with direct video links as progressiveDownloadUrl (for now these are extracted)  556          # and playlist with f4m and m3u8 as streamingUrl  557          'url' :  'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu' ,  559              'id' :  '150615_telabyad_kentin_cogu' ,  561              'title' :  "YPG: Tel Abyad'ın tamamı kontrolümüzde" ,  562              'timestamp' :  1434397334 ,  563              'upload_date' :  '20150615' ,  566              'skip_download' :  True ,  569          # single video embedded with data-playable containing XML playlists (regional section)  570          'url' :  'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw' ,  572              'id' :  '150619_video_honduras_militares_hospitales_corrupcion_aw' ,  574              'title' :  'Honduras militariza sus hospitales por nuevo escĆ”ndalo de corrupción' ,  575              'timestamp' :  1434713142 ,  576              'upload_date' :  '20150619' ,  579              'skip_download' :  True ,  582          # single video from video playlist embedded with vxp-playlist-data JSON  583          'url' :  'http://www.bbc.com/news/video_and_audio/must_see/33376376' ,  587              'title' :  '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''' ,  591              'skip_download' :  True ,  594          # single video story with digitalData  595          'url' :  'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret' ,  599              'title' :  'Sri Lankaās spicy secret' ,  600              'description' :  'As a new train line to Jaffna opens up the countryās north, travellers can experience a truly distinct slice of Tamil culture.' ,  601              'timestamp' :  1437674293 ,  602              'upload_date' :  '20150723' ,  606              'skip_download' :  True ,  609          # single video story without digitalData  610          'url' :  'http://www.bbc.com/autos/story/20130513-hyundais-rock-star' ,  614              'title' :  'Hyundai Santa Fe Sport: Rock star' ,  615              'description' :  'md5:b042a26142c4154a6e472933cf20793d' ,  616              'timestamp' :  1415867444 ,  617              'upload_date' :  '20141113' ,  621              'skip_download' :  True ,  624          # single video with playlist.sxml URL in playlist param  625          'url' :  'http://www.bbc.com/sport/0/football/33653409' ,  629              'title' :  'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?' ,  630              'description' :  'BBC Sport \' s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.' ,  635              'skip_download' :  True ,  638          # article with multiple videos embedded with playlist.sxml in playlist param  639          'url' :  'http://www.bbc.com/sport/0/football/34475836' ,  642              'title' :  'What Liverpool can expect from Klopp' ,  646          # single video with playlist URL from weather section  647          'url' :  'http://www.bbc.com/weather/features/33601775' ,  648          'only_matching' :  True ,  650          # custom redirection to www.bbc.com  651          'url' :  'http://www.bbc.co.uk/news/science-environment-33661876' ,  652          'only_matching' :  True ,  656      def  suitable ( cls
,  url
):  657          return False if  BBCCoUkIE
. suitable ( url
)  or  BBCCoUkArticleIE
. suitable ( url
)  else  super ( BBCIE
,  cls
). suitable ( url
)  659      def  _extract_from_media_meta ( self
,  media_meta
,  video_id
):  660          # Direct links to media in media metadata (e.g.  661          # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)  662          # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml  663          source_files 
=  media_meta
. get ( 'sourceFiles' )  667                  'format_id' :  format_id
,  668                  'ext' :  f
. get ( 'encoding' ),  669                  'tbr' :  float_or_none ( f
. get ( 'bitrate' ),  1000 ),  670                  'filesize' :  int_or_none ( f
. get ( 'filesize' )),  671              }  for  format_id
,  f 
in  source_files
. items ()  if  f
. get ( 'url' )], []  673          programme_id 
=  media_meta
. get ( 'externalId' )  675              return  self
._ download
_ media
_ selector
( programme_id
)  677          # Process playlist.sxml as legacy playlist  678          href 
=  media_meta
. get ( 'href' )  680              playlist 
=  self
._ download
_l egacy
_ playlist
_u rl
( href
)  681              _
,  _
,  _
,  _
,  formats
,  subtitles 
=  self
._ extract
_ from
_l egacy
_ playlist
( playlist
,  video_id
)  682              return  formats
,  subtitles
 686      def  _extract_from_playlist_sxml ( self
,  url
,  playlist_id
,  timestamp
):  687          programme_id
,  title
,  description
,  duration
,  formats
,  subtitles 
=  \
 688              self
._ process
_l egacy
_ playlist
_u rl
( url
,  playlist_id
)  689          self
._ sort
_ formats
( formats
)  693              'description' :  description
,  694              'duration' :  duration
,  695              'timestamp' :  timestamp
,  697              'subtitles' :  subtitles
,  700      def  _real_extract ( self
,  url
):  701          playlist_id 
=  self
._ match
_ id
( url
)  703          webpage 
=  self
._ download
_ webpage
( url
,  playlist_id
)  706          playlist_title 
=  None  707          playlist_description 
=  None  709          ld 
=  self
._ parse
_ json
(  711                  r
'(?s)<script type="application/ld\+json">(.+?)</script>' ,  712                  webpage
,  'ld json' ,  default
= '{}' ),  713              playlist_id
,  fatal
= False )  715              timestamp 
=  parse_iso8601 ( ld
. get ( 'datePublished' ))  716              playlist_title 
=  ld
. get ( 'headline' )  717              playlist_description 
=  ld
. get ( 'articleBody' )  720              timestamp 
=  parse_iso8601 ( self
._ search
_ regex
(  721                  [ r
'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"' ,  722                   r
'itemprop="datePublished"[^>]+datetime="([^"]+)"' ,  723                   r
'"datePublished":\s*"([^"]+)' ],  724                  webpage
,  'date' ,  default
= None ))  728          # article with multiple videos embedded with playlist.sxml (e.g.  729          # http://www.bbc.com/sport/0/football/34475836)  730          playlists 
=  re
. findall ( r
'<param[^>]+name="playlist"[^>]+value="([^"]+)"' ,  webpage
)  733                  self
._ extract
_ from
_ playlist
_ sxml
( playlist_url
,  playlist_id
,  timestamp
)  734                  for  playlist_url 
in  playlists
]  736          # news article with multiple videos embedded with data-playable  737          data_playables 
=  re
. findall ( r
'data-playable=(["\' ])({.+ ?
}) \
1 ', webpage)  739              for _, data_playable_json in data_playables:  740                  data_playable = self._parse_json(  741                      unescapeHTML(data_playable_json), playlist_id, fatal=False)  742                  if not data_playable:  744                  settings = data_playable.get(' settings
', {})  746                      # data-playable with video vpid in settings.playlistObject.items (e.g.  747                      # http://www.bbc.com/news/world-us-canada-34473351)  748                      playlist_object = settings.get(' playlistObject
', {})  750                          items = playlist_object.get(' items
')  751                          if items and isinstance(items, list):  752                              title = playlist_object[' title
']  753                              description = playlist_object.get(' summary
')  754                              duration = int_or_none(items[0].get(' duration
'))  755                              programme_id = items[0].get(' vpid
')  756                              formats, subtitles = self._download_media_selector(programme_id)  757                              self._sort_formats(formats)  761                                  ' description
': description,  762                                  ' timestamp
': timestamp,  763                                  ' duration
': duration,  765                                  ' subtitles
': subtitles,  768                          # data-playable without vpid but with a playlist.sxml URLs  769                          # in otherSettings.playlist (e.g.  770                          # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)  771                          playlist = data_playable.get(' otherSettings
', {}).get(' playlist
', {})  773                              entries.append(self._extract_from_playlist_sxml(  774                                  playlist.get(' progressiveDownloadUrl
'), playlist_id, timestamp))  777              playlist_title = playlist_title or remove_end(self._og_search_title(webpage), '  -  BBC News
')  778              playlist_description = playlist_description or self._og_search_description(webpage, default=None)  779              return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)  781          # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)  782          programme_id = self._search_regex(  783              [r' data
- video
- player
- vpid
= "([\da-z] {8} )" ',  784               r' < param
[ ^
>]+ name
= "externalIdentifier" [ ^
>]+ value
= "([\da-z] {8} )" '],  785              webpage, ' vpid
', default=None)  788              formats, subtitles = self._download_media_selector(programme_id)  789              self._sort_formats(formats)  790              # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)  791              digital_data = self._parse_json(  793                      r' var\s
+ digitalData\s
*= \s
*({.+ ?
}); ?
\n ', webpage, ' digital data
', default=' {} '),  794                  programme_id, fatal=False)  795              page_info = digital_data.get(' page
', {}).get(' pageInfo
', {})  796              title = page_info.get(' pageName
') or self._og_search_title(webpage)  797              description = page_info.get(' description
') or self._og_search_description(webpage)  798              timestamp = parse_iso8601(page_info.get(' publicationDate
')) or timestamp  802                  ' description
': description,  803                  ' timestamp
': timestamp,  805                  ' subtitles
': subtitles,  808          playlist_title = self._html_search_regex(  809              r' < title
>(.* ?
)( ?
: \s
*- \s
* BBC 
[ ^ 
]+) ?
</ title
> ', webpage, ' playlist title
')  810          playlist_description = self._og_search_description(webpage, default=None)  812          def extract_all(pattern):  813              return list(filter(None, map(  814                  lambda s: self._parse_json(s, playlist_id, fatal=False),  815                  re.findall(pattern, webpage))))  817          # Multiple video article (e.g.  818          # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)  819          EMBED_URL = r' https?
://( ?
: www\
.) ?bbc\
. co\
. uk
/( ?
:[ ^
/]+/)+[ \da
- z
] {8}
( ?
: \b [ ^
"]+)?'  821          for match in extract_all(r'new\s+SMP\(({.+?})\)'):  822              embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')  823              if embed_url and re.match(EMBED_URL, embed_url):  824                  entries.append(embed_url)  825          entries.extend(re.findall(  826              r'setPlaylist\(" ( %s) "\)' % EMBED_URL, webpage))  828              return self.playlist_result(  829                  [self.url_result(entry, 'BBCCoUk') for entry in entries],  830                  playlist_id, playlist_title, playlist_description)  832          # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)  833          medias = extract_all(r" data
- media
- meta
= '({[^' ]+}) '")  836              # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)  837              media_asset = self._search_regex(  838                  r' mediaAssetPage\
. init\
( \s
*({.+ ?
}),  "/',  839                  webpage, 'media asset', default=None)  841                  media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)  843                  for video in media_asset_page.get('videos', {}).values():  844                      medias.extend(video.values())  847              # Multiple video playlist with single `now playing` entry (e.g.  848              # http://www.bbc.com/news/video_and_audio/must_see/33767813)  849              vxp_playlist = self._parse_json(  851                      r'<script[^>]+class=" vxp
- playlist
- data
"[^>]+type=" application
/ json
"[^>]*>([^<]+)</script>',  852                      webpage, 'playlist data'),  855              for item in vxp_playlist:  856                  media = item.get('media')  859                  playlist_medias.append(media)  860                  # Download single video if found media with asset id matching the video id from URL  861                  if item.get('advert', {}).get('assetId') == playlist_id:  864              # Fallback to the whole playlist  866                  medias = playlist_medias  869          for num, media_meta in enumerate(medias, start=1):  870              formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)  873              self._sort_formats(formats)  875              video_id = media_meta.get('externalId')  877                  video_id = playlist_id if len(medias) == 1 else ' %s-%s ' % (playlist_id, num)  879              title = media_meta.get('caption')  881                  title = playlist_title if len(medias) == 1 else ' %s  - Video  %s ' % (playlist_title, num)  883              duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))  886              for image in media_meta.get('images', {}).values():  887                  images.extend(image.values())  888              if 'image' in media_meta:  889                  images.append(media_meta['image'])  892                  'url': image.get('href'),  893                  'width': int_or_none(image.get('width')),  894                  'height': int_or_none(image.get('height')),  895              } for image in images]  900                  'thumbnails': thumbnails,  901                  'duration': duration,  902                  'timestamp': timestamp,  904                  'subtitles': subtitles,  907          return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)  910  class BBCCoUkArticleIE(InfoExtractor):  911      _VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'  912      IE_NAME = 'bbc.co.uk:article'  913      IE_DESC = 'BBC articles'  916          'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',  918              'id': '3jNQLTMrPlYGTBn0WV6M2MS',  919              'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',  920              'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',  923          'add_ie': ['BBCCoUk'],  926      def _real_extract(self, url):  927          playlist_id = self._match_id(url)  929          webpage = self._download_webpage(url, playlist_id)  931          title = self._og_search_title(webpage)  932          description = self._og_search_description(webpage).strip()  934          entries = [self.url_result(programme_url) for programme_url in re.findall(  935              r'<div[^>]+typeof=" Clip
"[^>]+resource=" ([ ^
"]+)" ', webpage)]  937          return self.playlist_result(entries, playlist_id, title, description)