]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bbccouk.py 
6d785c0bf08a778bd91dbe1cf956e6cf4caeb560
   1  from  __future__ 
import  unicode_literals
   5  from  . subtitles 
import  SubtitlesInfoExtractor
   6  from  .. utils 
import  ExtractorError
   9  class  BBCCoUkIE ( SubtitlesInfoExtractor
):   11      IE_DESC 
=  'BBC iPlayer'   12      _VALID_URL 
=  r
'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z] {8} )'   16              'url' :  'http://www.bbc.co.uk/programmes/p01q7wz1' ,   20                  'title' :  'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix' ,   21                  'description' :  'Blu Mar Ten deliver a Guest Mix for Friction.' ,   26                  'skip_download' :  True ,   30              'url' :  'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/' ,   34                  'title' :  'The Man in Black: Series 3: The Printed Name' ,   35                  'description' :  "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey." ,   40                  'skip_download' :  True ,   44              'url' :  'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/' ,   48                  'title' :  'The Voice UK: Series 3: Blind Auditions 5' ,   49                  'description' :  "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone." ,   54                  'skip_download' :  True ,   56              'skip' :  'Currently BBC iPlayer TV programmes are available to play in the UK only' ,   60      def  _extract_asx_playlist ( self
,  connection
,  programme_id
):   61          asx 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading ASX playlist' )   62          return  [ ref
. get ( 'href' )  for  ref 
in  asx
. findall ( './Entry/ref' )]   64      def  _extract_connection ( self
,  connection
,  programme_id
):   66          protocol 
=  connection
. get ( 'protocol' )   67          supplier 
=  connection
. get ( 'supplier' )   68          if  protocol 
==  'http' :   69              href 
=  connection
. get ( 'href' )   72                  for  i
,  ref 
in  enumerate ( self
._ extract
_ asx
_ playlist
( connection
,  programme_id
)):   75                          'format_id' :  'ref %s _ %s '  % ( i
,  supplier
),   81                      'format_id' :  supplier
,   83          elif  protocol 
==  'rtmp' :   84              application 
=  connection
. get ( 'application' ,  'ondemand' )   85              auth_string 
=  connection
. get ( 'authString' )   86              identifier 
=  connection
. get ( 'identifier' )   87              server 
=  connection
. get ( 'server' )   89                  'url' :  ' %s :// %s / %s ? %s '  % ( protocol
,  server
,  application
,  auth_string
),   90                  'play_path' :  identifier
,   91                  'app' :  ' %s ? %s '  % ( application
,  auth_string
),   92                  'page_url' :  'http://www.bbc.co.uk' ,   93                  'player_url' :  'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf' ,   96                  'format_id' :  supplier
,  100      def  _extract_items ( self
,  playlist
):  101          return  playlist
. findall ( './{http://bbc.co.uk/2008/emp/playlist}item' )  103      def  _extract_medias ( self
,  media_selection
):  104          return  media_selection
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}media' )  106      def  _extract_connections ( self
,  media
):  107          return  media
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}connection' )  109      def  _extract_video ( self
,  media
,  programme_id
):  111          vbr 
=  int ( media
. get ( 'bitrate' ))  112          vcodec 
=  media
. get ( 'encoding' )  113          service 
=  media
. get ( 'service' )  114          width 
=  int ( media
. get ( 'width' ))  115          height 
=  int ( media
. get ( 'height' ))  116          file_size 
=  int ( media
. get ( 'media_file_size' ))  117          for  connection 
in  self
._ extract
_ connections
( media
):  118              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  119              for  format 
in  conn_formats
:  121                      'format_id' :  ' %s _ %s '  % ( service
,  format
[ 'format_id' ]),  126                      'filesize' :  file_size
,  128              formats
. extend ( conn_formats
)  131      def  _extract_audio ( self
,  media
,  programme_id
):  133          abr 
=  int ( media
. get ( 'bitrate' ))  134          acodec 
=  media
. get ( 'encoding' )  135          service 
=  media
. get ( 'service' )  136          for  connection 
in  self
._ extract
_ connections
( media
):  137              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  138              for  format 
in  conn_formats
:  140                      'format_id' :  ' %s _ %s '  % ( service
,  format
[ 'format_id' ]),  144              formats
. extend ( conn_formats
)  147      def  _extract_captions ( self
,  media
,  programme_id
):  149          for  connection 
in  self
._ extract
_ connections
( media
):  150              captions 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading captions' )  151              lang 
=  captions
. get ( '{http://www.w3.org/XML/1998/namespace}lang' ,  'en' )  152              ps 
=  captions
. findall ( './ {0} body/ {0} div/ {0} p' . format ( '{http://www.w3.org/2006/10/ttaf1}' ))  154              for  pos
,  p 
in  enumerate ( ps
):  155                  srt 
+=  ' %s \r\n %s  -->  %s \r\n %s \r\n\r\n '  % ( str ( pos
),  p
. get ( 'begin' ),  p
. get ( 'end' ),  156                                                            p
. text
. strip ()  if  p
. text 
is not None else  '' )  157              subtitles
[ lang
] =  srt
 160      def  _real_extract ( self
,  url
):  161          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  162          group_id 
=  mobj
. group ( 'id' )  164          playlist 
=  self
._ download
_ xml
( 'http://www.bbc.co.uk/iplayer/playlist/ %s '  %  group_id
,  group_id
,  165              'Downloading playlist XML' )  167          no_items 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}noItems' )  168          if  no_items 
is not None :  169              reason 
=  no_items
. get ( 'reason' )  170              if  reason 
==  'preAvailability' :  171                  msg 
=  'Episode  %s  is not yet available'  %  group_id
 172              elif  reason 
==  'postAvailability' :  173                  msg 
=  'Episode  %s  is no longer available'  %  group_id
 175                  msg 
=  'Episode  %s  is not available:  %s '  % ( group_id
,  reason
)  176              raise  ExtractorError ( msg
,  expected
= True )  181          for  item 
in  self
._ extract
_ items
( playlist
):  182              kind 
=  item
. get ( 'kind' )  183              if  kind 
!=  'programme'  and  kind 
!=  'radioProgramme' :  185              title 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}title' ). text
 186              description 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}summary' ). text
 188              programme_id 
=  item
. get ( 'identifier' )  189              duration 
=  int ( item
. get ( 'duration' ))  191              media_selection 
=  self
._ download
_ xml
(  192                  'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/ %s '   %  programme_id
,  193                  programme_id
,  'Downloading media selection XML' )  195              for  media 
in  self
._ extract
_ medias
( media_selection
):  196                  kind 
=  media
. get ( 'kind' )  198                      formats
. extend ( self
._ extract
_ audio
( media
,  programme_id
))  199                  elif  kind 
==  'video' :  200                      formats
. extend ( self
._ extract
_ video
( media
,  programme_id
))  201                  elif  kind 
==  'captions' :  202                      subtitles 
=  self
._ extract
_ captions
( media
,  programme_id
)  204          if  self
._ downloader
. params
. get ( 'listsubtitles' ,  False ):  205              self
._l ist
_ available
_ subtitles
( programme_id
,  subtitles
)  208          self
._ sort
_ formats
( formats
)  213              'description' :  description
,  214              'duration' :  duration
,  216              'subtitles' :  subtitles
,