]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bbccouk.py 
   1  from  __future__ 
import  unicode_literals
   3  import  xml
. etree
. ElementTree
   5  from  . common 
import  InfoExtractor
  10  from  .. compat 
import  compat_HTTPError
  13  class  BBCCoUkIE ( InfoExtractor
):   15      IE_DESC 
=  'BBC iPlayer'   16      _VALID_URL 
=  r
'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z] {8} )'   20              'url' :  'http://www.bbc.co.uk/programmes/b039g8p7' ,   24                  'title' :  'Kaleidoscope, Leonard Cohen' ,   25                  'description' :  'The Canadian poet and songwriter reflects on his musical career.' ,   30                  'skip_download' :  True ,   34              'url' :  'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/' ,   38                  'title' :  'The Man in Black: Series 3: The Printed Name' ,   39                  'description' :  "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey." ,   44                  'skip_download' :  True ,   46              'skip' :  'Episode is no longer available on BBC iPlayer Radio' ,   49              'url' :  'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/' ,   53                  'title' :  'The Voice UK: Series 3: Blind Auditions 5' ,   54                  'description' :  "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone." ,   59                  'skip_download' :  True ,   61              'skip' :  'Currently BBC iPlayer TV programmes are available to play in the UK only' ,   64              'url' :  'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion' ,   68                  'title' :  "Tomorrow's Worlds: The Unearthly History of Science Fiction" ,   69                  'description' :  '2. Invasion' ,   74                  'skip_download' :  True ,   76              'skip' :  'Currently BBC iPlayer TV programmes are available to play in the UK only' ,   78              'url' :  'http://www.bbc.co.uk/programmes/b04v20dw' ,   82                  'title' :  'Pete Tong, The Essential New Tune Special' ,   83                  'description' :  "Pete has a very special mix - all of 2014's Essential New Tunes!" ,   88                  'skip_download' :  True ,   91              'url' :  'http://www.bbc.co.uk/music/clips/p02frcc3' ,   96                  'title' :  'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix' ,   97                  'description' :  'French house superstar Madeon takes us out of the club and onto the after party.' ,  102                  'skip_download' :  True ,  105              'url' :  'http://www.bbc.co.uk/music/clips/p025c0zz' ,  110                  'title' :  'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)' ,  111                  'description' :  'Rae Morris performs Closer for BBC Three at Reading 2014' ,  116                  'skip_download' :  True ,  119              'url' :  'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls' ,  123                  'title' :  'Natural World, 2015-2016: 2. Super Powered Owls' ,  124                  'description' :  'md5:e4db5c937d0e95a7c6b5e654d429183d' ,  129                  'skip_download' :  True ,  131              'skip' :  'geolocation' ,  133              'url' :  'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition' ,  137                  'description' :  'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.' ,  138                  'title' :  'Royal Academy Summer Exhibition' ,  143                  'skip_download' :  True ,  145              'skip' :  'geolocation' ,  147              'url' :  'http://www.bbc.co.uk/iplayer/playlist/p01dvks4' ,  148              'only_matching' :  True ,  150              'url' :  'http://www.bbc.co.uk/music/clips#p02frcc3' ,  151              'only_matching' :  True ,  153              'url' :  'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo' ,  154              'only_matching' :  True ,  158      def  _extract_asx_playlist ( self
,  connection
,  programme_id
):  159          asx 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading ASX playlist' )  160          return  [ ref
. get ( 'href' )  for  ref 
in  asx
. findall ( './Entry/ref' )]  162      def  _extract_connection ( self
,  connection
,  programme_id
):  164          protocol 
=  connection
. get ( 'protocol' )  165          supplier 
=  connection
. get ( 'supplier' )  166          if  protocol 
==  'http' :  167              href 
=  connection
. get ( 'href' )  169              if  supplier 
==  'asx' :  170                  for  i
,  ref 
in  enumerate ( self
._ extract
_ asx
_ playlist
( connection
,  programme_id
)):  173                          'format_id' :  'ref %s _ %s '  % ( i
,  supplier
),  179                      'format_id' :  supplier
,  181          elif  protocol 
==  'rtmp' :  182              application 
=  connection
. get ( 'application' ,  'ondemand' )  183              auth_string 
=  connection
. get ( 'authString' )  184              identifier 
=  connection
. get ( 'identifier' )  185              server 
=  connection
. get ( 'server' )  187                  'url' :  ' %s :// %s / %s ? %s '  % ( protocol
,  server
,  application
,  auth_string
),  188                  'play_path' :  identifier
,  189                  'app' :  ' %s ? %s '  % ( application
,  auth_string
),  190                  'page_url' :  'http://www.bbc.co.uk' ,  191                  'player_url' :  'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf' ,  194                  'format_id' :  supplier
,  198      def  _extract_items ( self
,  playlist
):  199          return  playlist
. findall ( './{http://bbc.co.uk/2008/emp/playlist}item' )  201      def  _extract_medias ( self
,  media_selection
):  202          error 
=  media_selection
. find ( './{http://bbc.co.uk/2008/mp/mediaselection}error' )  203          if  error 
is not None :  204              raise  ExtractorError (  205                  ' %s  returned error:  %s '  % ( self
. IE_NAME
,  error
. get ( 'id' )),  expected
= True )  206          return  media_selection
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}media' )  208      def  _extract_connections ( self
,  media
):  209          return  media
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}connection' )  211      def  _extract_video ( self
,  media
,  programme_id
):  213          vbr 
=  int ( media
. get ( 'bitrate' ))  214          vcodec 
=  media
. get ( 'encoding' )  215          service 
=  media
. get ( 'service' )  216          width 
=  int ( media
. get ( 'width' ))  217          height 
=  int ( media
. get ( 'height' ))  218          file_size 
=  int ( media
. get ( 'media_file_size' ))  219          for  connection 
in  self
._ extract
_ connections
( media
):  220              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  221              for  format 
in  conn_formats
:  223                      'format_id' :  ' %s _ %s '  % ( service
,  format
[ 'format_id' ]),  228                      'filesize' :  file_size
,  230              formats
. extend ( conn_formats
)  233      def  _extract_audio ( self
,  media
,  programme_id
):  235          abr 
=  int ( media
. get ( 'bitrate' ))  236          acodec 
=  media
. get ( 'encoding' )  237          service 
=  media
. get ( 'service' )  238          for  connection 
in  self
._ extract
_ connections
( media
):  239              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  240              for  format 
in  conn_formats
:  242                      'format_id' :  ' %s _ %s '  % ( service
,  format
[ 'format_id' ]),  246              formats
. extend ( conn_formats
)  249      def  _get_subtitles ( self
,  media
,  programme_id
):  251          for  connection 
in  self
._ extract
_ connections
( media
):  252              captions 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading captions' )  253              lang 
=  captions
. get ( '{http://www.w3.org/XML/1998/namespace}lang' ,  'en' )  256                      'url' :  connection
. get ( 'href' ),  262      def  _download_media_selector ( self
,  programme_id
):  264              media_selection 
=  self
._ download
_ xml
(  265                  'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/ %s '  %  programme_id
,  266                  programme_id
,  'Downloading media selection XML' )  267          except  ExtractorError 
as  ee
:  268              if  isinstance ( ee
. cause
,  compat_HTTPError
)  and  ee
. cause
. code 
==  403 :  269                  media_selection 
=  xml
. etree
. ElementTree
. fromstring ( ee
. cause
. read (). decode ( 'utf-8' ))  276          for  media 
in  self
._ extract
_ medias
( media_selection
):  277              kind 
=  media
. get ( 'kind' )  279                  formats
. extend ( self
._ extract
_ audio
( media
,  programme_id
))  280              elif  kind 
==  'video' :  281                  formats
. extend ( self
._ extract
_ video
( media
,  programme_id
))  282              elif  kind 
==  'captions' :  283                  subtitles 
=  self
. extract_subtitles ( media
,  programme_id
)  285          return  formats
,  subtitles
 287      def  _download_playlist ( self
,  playlist_id
):  289              playlist 
=  self
._ download
_ json
(  290                  'http://www.bbc.co.uk/programmes/ %s /playlist.json'  %  playlist_id
,  291                  playlist_id
,  'Downloading playlist JSON' )  293              version 
=  playlist
. get ( 'defaultAvailableVersion' )  295                  smp_config 
=  version
[ 'smpConfig' ]  296                  title 
=  smp_config
[ 'title' ]  297                  description 
=  smp_config
[ 'summary' ]  298                  for  item 
in  smp_config
[ 'items' ]:  300                      if  kind 
!=  'programme'  and  kind 
!=  'radioProgramme' :  302                      programme_id 
=  item
. get ( 'vpid' )  303                      duration 
=  int ( item
. get ( 'duration' ))  304                      formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  305                  return  programme_id
,  title
,  description
,  duration
,  formats
,  subtitles
 306          except  ExtractorError 
as  ee
:  307              if not  ( isinstance ( ee
. cause
,  compat_HTTPError
)  and  ee
. cause
. code 
==  404 ):  310          # fallback to legacy playlist  311          playlist 
=  self
._ download
_ xml
(  312              'http://www.bbc.co.uk/iplayer/playlist/ %s '  %  playlist_id
,  313              playlist_id
,  'Downloading legacy playlist XML' )  315          no_items 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}noItems' )  316          if  no_items 
is not None :  317              reason 
=  no_items
. get ( 'reason' )  318              if  reason 
==  'preAvailability' :  319                  msg 
=  'Episode  %s  is not yet available'  %  playlist_id
 320              elif  reason 
==  'postAvailability' :  321                  msg 
=  'Episode  %s  is no longer available'  %  playlist_id
 322              elif  reason 
==  'noMedia' :  323                  msg 
=  'Episode  %s  is not currently available'  %  playlist_id
 325                  msg 
=  'Episode  %s  is not available:  %s '  % ( playlist_id
,  reason
)  326              raise  ExtractorError ( msg
,  expected
= True )  328          for  item 
in  self
._ extract
_ items
( playlist
):  329              kind 
=  item
. get ( 'kind' )  330              if  kind 
!=  'programme'  and  kind 
!=  'radioProgramme' :  332              title 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}title' ). text
 333              description 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}summary' ). text
 334              programme_id 
=  item
. get ( 'identifier' )  335              duration 
=  int ( item
. get ( 'duration' ))  336              formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  338          return  programme_id
,  title
,  description
,  duration
,  formats
,  subtitles
 340      def  _real_extract ( self
,  url
):  341          group_id 
=  self
._ match
_ id
( url
)  343          webpage 
=  self
._ download
_ webpage
( url
,  group_id
,  'Downloading video page' )  347          tviplayer 
=  self
._ search
_ regex
(  348              r
'mediator\.bind\(({.+?})\s*,\s*document\.getElementById' ,  349              webpage
,  'player' ,  default
= None )  352              player 
=  self
._ parse
_ json
( tviplayer
,  group_id
). get ( 'player' , {})  353              duration 
=  int_or_none ( player
. get ( 'duration' ))  354              programme_id 
=  player
. get ( 'vpid' )  357              programme_id 
=  self
._ search
_ regex
(  358                  r
'"vpid"\s*:\s*"([\da-z] {8} )"' ,  webpage
,  'vpid' ,  fatal
= False ,  default
= None )  361              formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  362              title 
=  self
._ og
_ search
_ title
( webpage
)  363              description 
=  self
._ search
_ regex
(  364                  r
'<p class="[^"]*medium-description[^"]*">([^<]+)</p>' ,  365                  webpage
,  'description' ,  fatal
= False )  367              programme_id
,  title
,  description
,  duration
,  formats
,  subtitles 
=  self
._ download
_ playlist
( group_id
)  369          self
._ sort
_ formats
( formats
)  374              'description' :  description
,  375              'thumbnail' :  self
._ og
_ search
_ thumbnail
( webpage
,  default
= None ),  376              'duration' :  duration
,  378              'subtitles' :  subtitles
,