]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bbccouk.py 
126c8824cccedbca287ac3ebfc92d1a5e2d93b57
   1  from  __future__ 
import  unicode_literals
   3  import  xml
. etree
. ElementTree
   5  from  . subtitles 
import  SubtitlesInfoExtractor
   6  from  .. utils 
import  ExtractorError
   7  from  .. compat 
import  compat_HTTPError
  10  class  BBCCoUkIE ( SubtitlesInfoExtractor
):   12      IE_DESC 
=  'BBC iPlayer'   13      _VALID_URL 
=  r
'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z] {8} )'   17              'url' :  'http://www.bbc.co.uk/programmes/b039g8p7' ,   21                  'title' :  'Kaleidoscope, Leonard Cohen' ,   22                  'description' :  'The Canadian poet and songwriter reflects on his musical career.' ,   27                  'skip_download' :  True ,   31              'url' :  'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/' ,   35                  'title' :  'The Man in Black: Series 3: The Printed Name' ,   36                  'description' :  "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey." ,   41                  'skip_download' :  True ,   43              'skip' :  'Episode is no longer available on BBC iPlayer Radio' ,   46              'url' :  'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/' ,   50                  'title' :  'The Voice UK: Series 3: Blind Auditions 5' ,   51                  'description' :  "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone." ,   56                  'skip_download' :  True ,   58              'skip' :  'Currently BBC iPlayer TV programmes are available to play in the UK only' ,   61              'url' :  'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion' ,   65                  'title' :  "Tomorrow's Worlds: The Unearthly History of Science Fiction" ,   66                  'description' :  '2. Invasion' ,   71                  'skip_download' :  True ,   73              'skip' :  'Currently BBC iPlayer TV programmes are available to play in the UK only' ,   75              'url' :  'http://www.bbc.co.uk/programmes/b04v20dw' ,   79                  'title' :  'Pete Tong, The Essential New Tune Special' ,   80                  'description' :  "Pete has a very special mix - all of 2014's Essential New Tunes!" ,   85                  'skip_download' :  True ,   88              'url' :  'http://www.bbc.co.uk/music/clips/p02frcc3' ,   93                  'title' :  'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix' ,   94                  'description' :  'French house superstar Madeon takes us out of the club and onto the after party.' ,   99                  'skip_download' :  True ,  102              'url' :  'http://www.bbc.co.uk/music/clips/p025c0zz' ,  107                  'title' :  'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)' ,  108                  'description' :  'Rae Morris performs Closer for BBC Three at Reading 2014' ,  113                  'skip_download' :  True ,  116              'url' :  'http://www.bbc.co.uk/iplayer/playlist/p01dvks4' ,  117              'only_matching' :  True ,  119              'url' :  'http://www.bbc.co.uk/music/clips#p02frcc3' ,  120              'only_matching' :  True ,  122              'url' :  'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo' ,  123              'only_matching' :  True ,  127      def  _extract_asx_playlist ( self
,  connection
,  programme_id
):  128          asx 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading ASX playlist' )  129          return  [ ref
. get ( 'href' )  for  ref 
in  asx
. findall ( './Entry/ref' )]  131      def  _extract_connection ( self
,  connection
,  programme_id
):  133          protocol 
=  connection
. get ( 'protocol' )  134          supplier 
=  connection
. get ( 'supplier' )  135          if  protocol 
==  'http' :  136              href 
=  connection
. get ( 'href' )  138              if  supplier 
==  'asx' :  139                  for  i
,  ref 
in  enumerate ( self
._ extract
_ asx
_ playlist
( connection
,  programme_id
)):  142                          'format_id' :  'ref %s _ %s '  % ( i
,  supplier
),  148                      'format_id' :  supplier
,  150          elif  protocol 
==  'rtmp' :  151              application 
=  connection
. get ( 'application' ,  'ondemand' )  152              auth_string 
=  connection
. get ( 'authString' )  153              identifier 
=  connection
. get ( 'identifier' )  154              server 
=  connection
. get ( 'server' )  156                  'url' :  ' %s :// %s / %s ? %s '  % ( protocol
,  server
,  application
,  auth_string
),  157                  'play_path' :  identifier
,  158                  'app' :  ' %s ? %s '  % ( application
,  auth_string
),  159                  'page_url' :  'http://www.bbc.co.uk' ,  160                  'player_url' :  'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf' ,  163                  'format_id' :  supplier
,  167      def  _extract_items ( self
,  playlist
):  168          return  playlist
. findall ( './{http://bbc.co.uk/2008/emp/playlist}item' )  170      def  _extract_medias ( self
,  media_selection
):  171          error 
=  media_selection
. find ( './{http://bbc.co.uk/2008/mp/mediaselection}error' )  172          if  error 
is not None :  173              raise  ExtractorError (  174                  ' %s  returned error:  %s '  % ( self
. IE_NAME
,  error
. get ( 'id' )),  expected
= True )  175          return  media_selection
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}media' )  177      def  _extract_connections ( self
,  media
):  178          return  media
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}connection' )  180      def  _extract_video ( self
,  media
,  programme_id
):  182          vbr 
=  int ( media
. get ( 'bitrate' ))  183          vcodec 
=  media
. get ( 'encoding' )  184          service 
=  media
. get ( 'service' )  185          width 
=  int ( media
. get ( 'width' ))  186          height 
=  int ( media
. get ( 'height' ))  187          file_size 
=  int ( media
. get ( 'media_file_size' ))  188          for  connection 
in  self
._ extract
_ connections
( media
):  189              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  190              for  format 
in  conn_formats
:  192                      'format_id' :  ' %s _ %s '  % ( service
,  format
[ 'format_id' ]),  197                      'filesize' :  file_size
,  199              formats
. extend ( conn_formats
)  202      def  _extract_audio ( self
,  media
,  programme_id
):  204          abr 
=  int ( media
. get ( 'bitrate' ))  205          acodec 
=  media
. get ( 'encoding' )  206          service 
=  media
. get ( 'service' )  207          for  connection 
in  self
._ extract
_ connections
( media
):  208              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  209              for  format 
in  conn_formats
:  211                      'format_id' :  ' %s _ %s '  % ( service
,  format
[ 'format_id' ]),  215              formats
. extend ( conn_formats
)  218      def  _extract_captions ( self
,  media
,  programme_id
):  220          for  connection 
in  self
._ extract
_ connections
( media
):  221              captions 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading captions' )  222              lang 
=  captions
. get ( '{http://www.w3.org/XML/1998/namespace}lang' ,  'en' )  223              ps 
=  captions
. findall ( './ {0} body/ {0} div/ {0} p' . format ( '{http://www.w3.org/2006/10/ttaf1}' ))  225              for  pos
,  p 
in  enumerate ( ps
):  226                  srt 
+=  ' %s \r\n %s  -->  %s \r\n %s \r\n\r\n '  % ( str ( pos
),  p
. get ( 'begin' ),  p
. get ( 'end' ),  227                                                            p
. text
. strip ()  if  p
. text 
is not None else  '' )  228              subtitles
[ lang
] =  srt
 231      def  _download_media_selector ( self
,  programme_id
):  233              media_selection 
=  self
._ download
_ xml
(  234                  'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/ %s '  %  programme_id
,  235                  programme_id
,  'Downloading media selection XML' )  236          except  ExtractorError 
as  ee
:  237              if  isinstance ( ee
. cause
,  compat_HTTPError
)  and  ee
. cause
. code 
==  403 :  238                  media_selection 
=  xml
. etree
. ElementTree
. fromstring ( ee
. cause
. read (). encode ( 'utf-8' ))  245          for  media 
in  self
._ extract
_ medias
( media_selection
):  246              kind 
=  media
. get ( 'kind' )  248                  formats
. extend ( self
._ extract
_ audio
( media
,  programme_id
))  249              elif  kind 
==  'video' :  250                  formats
. extend ( self
._ extract
_ video
( media
,  programme_id
))  251              elif  kind 
==  'captions' :  252                  subtitles 
=  self
._ extract
_ captions
( media
,  programme_id
)  254          return  formats
,  subtitles
 256      def  _download_playlist ( self
,  playlist_id
):  258              playlist 
=  self
._ download
_ json
(  259                  'http://www.bbc.co.uk/programmes/ %s /playlist.json'  %  playlist_id
,  260                  playlist_id
,  'Downloading playlist JSON' )  262              version 
=  playlist
. get ( 'defaultAvailableVersion' )  264                  smp_config 
=  version
[ 'smpConfig' ]  265                  title 
=  smp_config
[ 'title' ]  266                  description 
=  smp_config
[ 'summary' ]  267                  for  item 
in  smp_config
[ 'items' ]:  269                      if  kind 
!=  'programme'  and  kind 
!=  'radioProgramme' :  271                      programme_id 
=  item
. get ( 'vpid' )  272                      duration 
=  int ( item
. get ( 'duration' ))  273                      formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  274                  return  programme_id
,  title
,  description
,  duration
,  formats
,  subtitles
 275          except  ExtractorError 
as  ee
:  276              if not  isinstance ( ee
. cause
,  compat_HTTPError
)  and  ee
. cause
. code 
==  404 :  279          # fallback to legacy playlist  280          playlist 
=  self
._ download
_ xml
(  281              'http://www.bbc.co.uk/iplayer/playlist/ %s '  %  playlist_id
,  282              playlist_id
,  'Downloading legacy playlist XML' )  284          no_items 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}noItems' )  285          if  no_items 
is not None :  286              reason 
=  no_items
. get ( 'reason' )  287              if  reason 
==  'preAvailability' :  288                  msg 
=  'Episode  %s  is not yet available'  %  playlist_id
 289              elif  reason 
==  'postAvailability' :  290                  msg 
=  'Episode  %s  is no longer available'  %  playlist_id
 291              elif  reason 
==  'noMedia' :  292                  msg 
=  'Episode  %s  is not currently available'  %  playlist_id
 294                  msg 
=  'Episode  %s  is not available:  %s '  % ( playlist_id
,  reason
)  295              raise  ExtractorError ( msg
,  expected
= True )  297          for  item 
in  self
._ extract
_ items
( playlist
):  298              kind 
=  item
. get ( 'kind' )  299              if  kind 
!=  'programme'  and  kind 
!=  'radioProgramme' :  301              title 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}title' ). text
 302              description 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}summary' ). text
 303              programme_id 
=  item
. get ( 'identifier' )  304              duration 
=  int ( item
. get ( 'duration' ))  305              formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  307          return  programme_id
,  title
,  description
,  duration
,  formats
,  subtitles
 309      def  _real_extract ( self
,  url
):  310          group_id 
=  self
._ match
_ id
( url
)  312          webpage 
=  self
._ download
_ webpage
( url
,  group_id
,  'Downloading video page' )  314          programme_id 
=  self
._ search
_ regex
(  315              r
'"vpid"\s*:\s*"([\da-z] {8} )"' ,  webpage
,  'vpid' ,  fatal
= False ,  default
= None )  317              player 
=  self
._ download
_ json
(  318                  'http://www.bbc.co.uk/iplayer/episode/ %s .json'  %  group_id
,  319                  group_id
)[ 'jsConf' ][ 'player' ]  320              title 
=  player
[ 'title' ]  321              description 
=  player
[ 'subtitle' ]  322              duration 
=  player
[ 'duration' ]  323              formats
,  subtitles 
=  self
._ download
_ media
_ selector
( programme_id
)  325              programme_id
,  title
,  description
,  duration
,  formats
,  subtitles 
=  self
._ download
_ playlist
( group_id
)  327          if  self
._ downloader
. params
. get ( 'listsubtitles' ,  False ):  328              self
._l ist
_ available
_ subtitles
( programme_id
,  subtitles
)  331          self
._ sort
_ formats
( formats
)  336              'description' :  description
,  337              'duration' :  duration
,  339              'subtitles' :  subtitles
,