]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bbccouk.py 
1cf48fe0dd739b328478899a83f0d8aba94e6c4a
   1  from  __future__ 
import  unicode_literals
   3  import  xml
. etree
. ElementTree
   5  from  . subtitles 
import  SubtitlesInfoExtractor
   6  from  .. utils 
import  ExtractorError
   7  from  .. compat 
import  compat_HTTPError
  10  class  BBCCoUkIE ( SubtitlesInfoExtractor
):   12      IE_DESC 
=  'BBC iPlayer'   13      _VALID_URL 
=  r
'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z] {8} )'   17              'url' :  'http://www.bbc.co.uk/programmes/b039g8p7' ,   21                  'title' :  'Kaleidoscope, Leonard Cohen' ,   22                  'description' :  'The Canadian poet and songwriter reflects on his musical career.' ,   27                  'skip_download' :  True ,   31              'url' :  'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/' ,   35                  'title' :  'The Man in Black: Series 3: The Printed Name' ,   36                  'description' :  "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey." ,   41                  'skip_download' :  True ,   43              'skip' :  'Episode is no longer available on BBC iPlayer Radio' ,   46              'url' :  'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/' ,   50                  'title' :  'The Voice UK: Series 3: Blind Auditions 5' ,   51                  'description' :  "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone." ,   56                  'skip_download' :  True ,   58              'skip' :  'Currently BBC iPlayer TV programmes are available to play in the UK only' ,   61              'url' :  'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion' ,   65                  'title' :  "Tomorrow's Worlds: The Unearthly History of Science Fiction" ,   66                  'description' :  '2. Invasion' ,   71                  'skip_download' :  True ,   73              'skip' :  'Currently BBC iPlayer TV programmes are available to play in the UK only' ,   75              'url' :  'http://www.bbc.co.uk/programmes/b04v20dw' ,   79                  'title' :  'Pete Tong, The Essential New Tune Special' ,   80                  'description' :  "Pete has a very special mix - all of 2014's Essential New Tunes!" ,   85                  'skip_download' :  True ,   88              'url' :  'http://www.bbc.co.uk/music/clips/p02frcc3' ,   93                  'title' :  'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix' ,   94                  'description' :  'French house superstar Madeon takes us out of the club and onto the after party.' ,   99                  'skip_download' :  True ,  102              'url' :  'http://www.bbc.co.uk/music/clips/p025c0zz' ,  107                  'title' :  'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)' ,  108                  'description' :  'Rae Morris performs Closer for BBC Three at Reading 2014' ,  113                  'skip_download' :  True ,  116              'url' :  'http://www.bbc.co.uk/iplayer/playlist/p01dvks4' ,  117              'only_matching' :  True ,  119              'url' :  'http://www.bbc.co.uk/music/clips#p02frcc3' ,  120              'only_matching' :  True ,  124      def  _extract_asx_playlist ( self
,  connection
,  programme_id
):  125          asx 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading ASX playlist' )  126          return  [ ref
. get ( 'href' )  for  ref 
in  asx
. findall ( './Entry/ref' )]  128      def  _extract_connection ( self
,  connection
,  programme_id
):  130          protocol 
=  connection
. get ( 'protocol' )  131          supplier 
=  connection
. get ( 'supplier' )  132          if  protocol 
==  'http' :  133              href 
=  connection
. get ( 'href' )  135              if  supplier 
==  'asx' :  136                  for  i
,  ref 
in  enumerate ( self
._ extract
_ asx
_ playlist
( connection
,  programme_id
)):  139                          'format_id' :  'ref %s _ %s '  % ( i
,  supplier
),  145                      'format_id' :  supplier
,  147          elif  protocol 
==  'rtmp' :  148              application 
=  connection
. get ( 'application' ,  'ondemand' )  149              auth_string 
=  connection
. get ( 'authString' )  150              identifier 
=  connection
. get ( 'identifier' )  151              server 
=  connection
. get ( 'server' )  153                  'url' :  ' %s :// %s / %s ? %s '  % ( protocol
,  server
,  application
,  auth_string
),  154                  'play_path' :  identifier
,  155                  'app' :  ' %s ? %s '  % ( application
,  auth_string
),  156                  'page_url' :  'http://www.bbc.co.uk' ,  157                  'player_url' :  'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf' ,  160                  'format_id' :  supplier
,  164      def  _extract_items ( self
,  playlist
):  165          return  playlist
. findall ( './{http://bbc.co.uk/2008/emp/playlist}item' )  167      def  _extract_medias ( self
,  media_selection
):  168          error 
=  media_selection
. find ( './{http://bbc.co.uk/2008/mp/mediaselection}error' )  169          if  error 
is not None :  170              raise  ExtractorError (  171                  ' %s  returned error:  %s '  % ( self
. IE_NAME
,  error
. get ( 'id' )),  expected
= True )  172          return  media_selection
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}media' )  174      def  _extract_connections ( self
,  media
):  175          return  media
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}connection' )  177      def  _extract_video ( self
,  media
,  programme_id
):  179          vbr 
=  int ( media
. get ( 'bitrate' ))  180          vcodec 
=  media
. get ( 'encoding' )  181          service 
=  media
. get ( 'service' )  182          width 
=  int ( media
. get ( 'width' ))  183          height 
=  int ( media
. get ( 'height' ))  184          file_size 
=  int ( media
. get ( 'media_file_size' ))  185          for  connection 
in  self
._ extract
_ connections
( media
):  186              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  187              for  format 
in  conn_formats
:  189                      'format_id' :  ' %s _ %s '  % ( service
,  format
[ 'format_id' ]),  194                      'filesize' :  file_size
,  196              formats
. extend ( conn_formats
)  199      def  _extract_audio ( self
,  media
,  programme_id
):  201          abr 
=  int ( media
. get ( 'bitrate' ))  202          acodec 
=  media
. get ( 'encoding' )  203          service 
=  media
. get ( 'service' )  204          for  connection 
in  self
._ extract
_ connections
( media
):  205              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  206              for  format 
in  conn_formats
:  208                      'format_id' :  ' %s _ %s '  % ( service
,  format
[ 'format_id' ]),  212              formats
. extend ( conn_formats
)  215      def  _extract_captions ( self
,  media
,  programme_id
):  217          for  connection 
in  self
._ extract
_ connections
( media
):  218              captions 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading captions' )  219              lang 
=  captions
. get ( '{http://www.w3.org/XML/1998/namespace}lang' ,  'en' )  220              ps 
=  captions
. findall ( './ {0} body/ {0} div/ {0} p' . format ( '{http://www.w3.org/2006/10/ttaf1}' ))  222              for  pos
,  p 
in  enumerate ( ps
):  223                  srt 
+=  ' %s \r\n %s  -->  %s \r\n %s \r\n\r\n '  % ( str ( pos
),  p
. get ( 'begin' ),  p
. get ( 'end' ),  224                                                            p
. text
. strip ()  if  p
. text 
is not None else  '' )  225              subtitles
[ lang
] =  srt
 228      def  _download_media_selector ( self
,  programme_id
):  230              media_selection 
=  self
._ download
_ xml
(  231                  'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/ %s '  %  programme_id
,  232                  programme_id
,  'Downloading media selection XML' )  233          except  ExtractorError 
as  ee
:  234              if  isinstance ( ee
. cause
,  compat_HTTPError
)  and  ee
. cause
. code 
==  403 :  235                  media_selection 
=  xml
. etree
. ElementTree
. fromstring ( ee
. cause
. read (). encode ( 'utf-8' ))  242          for  media 
in  self
._ extract
_ medias
( media_selection
):  243              kind 
=  media
. get ( 'kind' )  245                  formats
. extend ( self
._ extract
_ audio
( media
,  programme_id
))  246              elif  kind 
==  'video' :  247                  formats
. extend ( self
._ extract
_ video
( media
,  programme_id
))  248              elif  kind 
==  'captions' :  249                  subtitles 
=  self
._ extract
_ captions
( media
,  programme_id
)  251          return  formats
,  subtitles
 253      def  _download_playlist ( self
,  playlist_id
):  255              playlist 
=  self
._ download
_ json
(  256                  'http://www.bbc.co.uk/programmes/ %s /playlist.json'  %  playlist_id
,  257                  playlist_id
,  'Downloading playlist JSON' )  259              version 
=  playlist
. get ( 'defaultAvailableVersion' )  261                  smp_config 
=  version
[ 'smpConfig' ]  262                  title 
=  smp_config
[ 'title' ]  263                  description 
=  smp_config
[ 'summary' ]  264                  for  item 
in  smp_config
[ 'items' ]:  266                      if  kind 
!=  'programme'  and  kind 
!=  'radioProgramme' :  268                      programme_id 
=  item
. get ( 'vpid' )  269                      duration 
=  int ( item
. get ( 'duration' ))  270                      formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  271                  return  programme_id
,  title
,  description
,  duration
,  formats
,  subtitles
 272          except  ExtractorError 
as  ee
:  273              if not  isinstance ( ee
. cause
,  compat_HTTPError
)  and  ee
. cause
. code 
==  404 :  276          # fallback to legacy playlist  277          playlist 
=  self
._ download
_ xml
(  278              'http://www.bbc.co.uk/iplayer/playlist/ %s '  %  playlist_id
,  279              playlist_id
,  'Downloading legacy playlist XML' )  281          no_items 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}noItems' )  282          if  no_items 
is not None :  283              reason 
=  no_items
. get ( 'reason' )  284              if  reason 
==  'preAvailability' :  285                  msg 
=  'Episode  %s  is not yet available'  %  playlist_id
 286              elif  reason 
==  'postAvailability' :  287                  msg 
=  'Episode  %s  is no longer available'  %  playlist_id
 288              elif  reason 
==  'noMedia' :  289                  msg 
=  'Episode  %s  is not currently available'  %  playlist_id
 291                  msg 
=  'Episode  %s  is not available:  %s '  % ( playlist_id
,  reason
)  292              raise  ExtractorError ( msg
,  expected
= True )  294          for  item 
in  self
._ extract
_ items
( playlist
):  295              kind 
=  item
. get ( 'kind' )  296              if  kind 
!=  'programme'  and  kind 
!=  'radioProgramme' :  298              title 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}title' ). text
 299              description 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}summary' ). text
 300              programme_id 
=  item
. get ( 'identifier' )  301              duration 
=  int ( item
. get ( 'duration' ))  302              formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  304          return  programme_id
,  title
,  description
,  duration
,  formats
,  subtitles
 306      def  _real_extract ( self
,  url
):  307          group_id 
=  self
._ match
_ id
( url
)  309          webpage 
=  self
._ download
_ webpage
( url
,  group_id
,  'Downloading video page' )  311          programme_id 
=  self
._ search
_ regex
(  312              r
'"vpid"\s*:\s*"([\da-z] {8} )"' ,  webpage
,  'vpid' ,  fatal
= False ,  default
= None )  314              player 
=  self
._ download
_ json
(  315                  'http://www.bbc.co.uk/iplayer/episode/ %s .json'  %  group_id
,  316                  group_id
)[ 'jsConf' ][ 'player' ]  317              title 
=  player
[ 'title' ]  318              description 
=  player
[ 'subtitle' ]  319              duration 
=  player
[ 'duration' ]  320              formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  322              programme_id
,  title
,  description
,  duration
,  formats
,  subtitles 
=  self
._ download
_ playlist
( group_id
)  324          if  self
._ downloader
. params
. get ( 'listsubtitles' ,  False ):  325              self
._l ist
_ available
_ subtitles
( programme_id
,  subtitles
)  328          self
._ sort
_ formats
( formats
)  333              'description' :  description
,  334              'duration' :  duration
,  336              'subtitles' :  subtitles
,