]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bbccouk.py 
 
 
 
 
 
 
 
 
   1  from  __future__ 
import  unicode_literals
 
   5  from  . subtitles 
import  SubtitlesInfoExtractor
 
   6  from  .. utils 
import  ExtractorError
 
   9  class  BBCCoUkIE ( SubtitlesInfoExtractor
):  
  11      IE_DESC 
=  'BBC iPlayer'  
  12      _VALID_URL 
=  r
'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z] {8} )'  
  16              'url' :  'http://www.bbc.co.uk/programmes/p01q7wz1' ,  
  20                  'title' :  'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix' ,  
  21                  'description' :  'Blu Mar Ten deliver a Guest Mix for Friction.' ,  
  26                  'skip_download' :  True ,  
  30              'url' :  'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/' ,  
  34                  'title' :  'The Man in Black: Series 3: The Printed Name' ,  
  35                  'description' :  "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey." ,  
  40                  'skip_download' :  True ,  
  44              'url' :  'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/' ,  
  48                  'title' :  'The Voice UK: Series 3: Blind Auditions 5' ,  
  49                  'description' :  "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone." ,  
  54                  'skip_download' :  True ,  
  56              'skip' :  'Currently BBC iPlayer TV programmes are available to play in the UK only' ,  
  60      def  _extract_asx_playlist ( self
,  connection
,  programme_id
):  
  61          asx 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading ASX playlist' )  
  62          return  [ ref
. get ( 'href' )  for  ref 
in  asx
. findall ( './Entry/ref' )]  
  64      def  _extract_connection ( self
,  connection
,  programme_id
):  
  66          protocol 
=  connection
. get ( 'protocol' )  
  67          supplier 
=  connection
. get ( 'supplier' )  
  68          if  protocol 
==  'http' :  
  69              href 
=  connection
. get ( 'href' )  
  72                  for  i
,  ref 
in  enumerate ( self
._ extract
_ asx
_ playlist
( connection
,  programme_id
)):  
  75                          'format_id' :  'ref %s _ %s '  % ( i
,  supplier
),  
  81                      'format_id' :  supplier
,  
  83          elif  protocol 
==  'rtmp' :  
  84              application 
=  connection
. get ( 'application' ,  'ondemand' )  
  85              auth_string 
=  connection
. get ( 'authString' )  
  86              identifier 
=  connection
. get ( 'identifier' )  
  87              server 
=  connection
. get ( 'server' )  
  89                  'url' :  ' %s :// %s / %s ? %s '  % ( protocol
,  server
,  application
,  auth_string
),  
  90                  'play_path' :  identifier
,  
  91                  'app' :  ' %s ? %s '  % ( application
,  auth_string
),  
  92                  'page_url' :  'http://www.bbc.co.uk' ,  
  93                  'player_url' :  'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf' ,  
  96                  'format_id' :  supplier
,  
 100      def  _extract_items ( self
,  playlist
):  
 101          return  playlist
. findall ( './{http://bbc.co.uk/2008/emp/playlist}item' )  
 103      def  _extract_medias ( self
,  media_selection
):  
 104          return  media_selection
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}media' )  
 106      def  _extract_connections ( self
,  media
):  
 107          return  media
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}connection' )  
 109      def  _extract_video ( self
,  media
,  programme_id
):  
 111          vbr 
=  int ( media
. get ( 'bitrate' ))  
 112          vcodec 
=  media
. get ( 'encoding' )  
 113          service 
=  media
. get ( 'service' )  
 114          width 
=  int ( media
. get ( 'width' ))  
 115          height 
=  int ( media
. get ( 'height' ))  
 116          file_size 
=  int ( media
. get ( 'media_file_size' ))  
 117          for  connection 
in  self
._ extract
_ connections
( media
):  
 118              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  
 119              for  format 
in  conn_formats
:  
 121                      'format_id' :  ' %s _ %s '  % ( service
,  format
[ 'format_id' ]),  
 126                      'filesize' :  file_size
,  
 128              formats
. extend ( conn_formats
)  
 131      def  _extract_audio ( self
,  media
,  programme_id
):  
 133          abr 
=  int ( media
. get ( 'bitrate' ))  
 134          acodec 
=  media
. get ( 'encoding' )  
 135          service 
=  media
. get ( 'service' )  
 136          for  connection 
in  self
._ extract
_ connections
( media
):  
 137              conn_formats 
=  self
._ extract
_ connection
( connection
,  programme_id
)  
 138              for  format 
in  conn_formats
:  
 140                      'format_id' :  ' %s _ %s '  % ( service
,  format
[ 'format_id' ]),  
 144              formats
. extend ( conn_formats
)  
 147      def  _extract_captions ( self
,  media
,  programme_id
):  
 149          for  connection 
in  self
._ extract
_ connections
( media
):  
 150              captions 
=  self
._ download
_ xml
( connection
. get ( 'href' ),  programme_id
,  'Downloading captions' )  
 151              lang 
=  captions
. get ( '{http://www.w3.org/XML/1998/namespace}lang' ,  'en' )  
 152              ps 
=  captions
. findall ( './ {0} body/ {0} div/ {0} p' . format ( '{http://www.w3.org/2006/10/ttaf1}' ))  
 154              for  pos
,  p 
in  enumerate ( ps
):  
 155                  srt 
+=  ' %s \r\n %s  -->  %s \r\n %s \r\n\r\n '  % ( str ( pos
),  p
. get ( 'begin' ),  p
. get ( 'end' ),  
 156                                                            p
. text
. strip ()  if  p
. text 
is not None else  '' )  
 157              subtitles
[ lang
] =  srt
 
 160      def  _real_extract ( self
,  url
):  
 161          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
 162          group_id 
=  mobj
. group ( 'id' )  
 164          playlist 
=  self
._ download
_ xml
( 'http://www.bbc.co.uk/iplayer/playlist/ %s '  %  group_id
,  group_id
,  
 165              'Downloading playlist XML' )  
 167          no_items 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}noItems' )  
 168          if  no_items 
is not None :  
 169              reason 
=  no_items
. get ( 'reason' )  
 170              if  reason 
==  'preAvailability' :  
 171                  msg 
=  'Episode  %s  is not yet available'  %  group_id
 
 172              elif  reason 
==  'postAvailability' :  
 173                  msg 
=  'Episode  %s  is no longer available'  %  group_id
 
 175                  msg 
=  'Episode  %s  is not available:  %s '  % ( group_id
,  reason
)  
 176              raise  ExtractorError ( msg
,  expected
= True )  
 181          for  item 
in  self
._ extract
_ items
( playlist
):  
 182              kind 
=  item
. get ( 'kind' )  
 183              if  kind 
!=  'programme'  and  kind 
!=  'radioProgramme' :  
 185              title 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}title' ). text
 
 186              description 
=  playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}summary' ). text
 
 188              programme_id 
=  item
. get ( 'identifier' )  
 189              duration 
=  int ( item
. get ( 'duration' ))  
 191              media_selection 
=  self
._ download
_ xml
(  
 192                  'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/ %s '   %  programme_id
,  
 193                  programme_id
,  'Downloading media selection XML' )  
 195              for  media 
in  self
._ extract
_ medias
( media_selection
):  
 196                  kind 
=  media
. get ( 'kind' )  
 198                      formats
. extend ( self
._ extract
_ audio
( media
,  programme_id
))  
 199                  elif  kind 
==  'video' :  
 200                      formats
. extend ( self
._ extract
_ video
( media
,  programme_id
))  
 201                  elif  kind 
==  'captions' :  
 202                      subtitles 
=  self
._ extract
_ captions
( media
,  programme_id
)  
 204          if  self
._ downloader
. params
. get ( 'listsubtitles' ,  False ):  
 205              self
._l ist
_ available
_ subtitles
( programme_id
,  subtitles
)  
 208          self
._ sort
_ formats
( formats
)  
 213              'description' :  description
,  
 214              'duration' :  duration
,  
 216              'subtitles' :  subtitles
,