]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bbccouk.py
1 from __future__
import unicode_literals
5 from . subtitles
import SubtitlesInfoExtractor
6 from .. utils
import ExtractorError
9 class BBCCoUkIE ( SubtitlesInfoExtractor
):
11 IE_DESC
= 'BBC iPlayer'
12 _VALID_URL
= r
'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z] {8} )'
16 'url' : 'http://www.bbc.co.uk/programmes/b039g8p7' ,
20 'title' : 'Kaleidoscope: Leonard Cohen' ,
21 'description' : 'md5:db4755d7a665ae72343779f7dacb402c' ,
26 'skip_download' : True ,
30 'url' : 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/' ,
34 'title' : 'The Man in Black: Series 3: The Printed Name' ,
35 'description' : "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey." ,
40 'skip_download' : True ,
42 'skip' : 'Episode is no longer available on BBC iPlayer Radio' ,
45 'url' : 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/' ,
49 'title' : 'The Voice UK: Series 3: Blind Auditions 5' ,
50 'description' : "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone." ,
55 'skip_download' : True ,
57 'skip' : 'Currently BBC iPlayer TV programmes are available to play in the UK only' ,
61 def _extract_asx_playlist ( self
, connection
, programme_id
):
62 asx
= self
._ download
_ xml
( connection
. get ( 'href' ), programme_id
, 'Downloading ASX playlist' )
63 return [ ref
. get ( 'href' ) for ref
in asx
. findall ( './Entry/ref' )]
65 def _extract_connection ( self
, connection
, programme_id
):
67 protocol
= connection
. get ( 'protocol' )
68 supplier
= connection
. get ( 'supplier' )
69 if protocol
== 'http' :
70 href
= connection
. get ( 'href' )
73 for i
, ref
in enumerate ( self
._ extract
_ asx
_ playlist
( connection
, programme_id
)):
76 'format_id' : 'ref %s _ %s ' % ( i
, supplier
),
82 'format_id' : supplier
,
84 elif protocol
== 'rtmp' :
85 application
= connection
. get ( 'application' , 'ondemand' )
86 auth_string
= connection
. get ( 'authString' )
87 identifier
= connection
. get ( 'identifier' )
88 server
= connection
. get ( 'server' )
90 'url' : ' %s :// %s / %s ? %s ' % ( protocol
, server
, application
, auth_string
),
91 'play_path' : identifier
,
92 'app' : ' %s ? %s ' % ( application
, auth_string
),
93 'page_url' : 'http://www.bbc.co.uk' ,
94 'player_url' : 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf' ,
97 'format_id' : supplier
,
101 def _extract_items ( self
, playlist
):
102 return playlist
. findall ( './{http://bbc.co.uk/2008/emp/playlist}item' )
104 def _extract_medias ( self
, media_selection
):
105 return media_selection
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}media' )
107 def _extract_connections ( self
, media
):
108 return media
. findall ( './{http://bbc.co.uk/2008/mp/mediaselection}connection' )
110 def _extract_video ( self
, media
, programme_id
):
112 vbr
= int ( media
. get ( 'bitrate' ))
113 vcodec
= media
. get ( 'encoding' )
114 service
= media
. get ( 'service' )
115 width
= int ( media
. get ( 'width' ))
116 height
= int ( media
. get ( 'height' ))
117 file_size
= int ( media
. get ( 'media_file_size' ))
118 for connection
in self
._ extract
_ connections
( media
):
119 conn_formats
= self
._ extract
_ connection
( connection
, programme_id
)
120 for format
in conn_formats
:
122 'format_id' : ' %s _ %s ' % ( service
, format
[ 'format_id' ]),
127 'filesize' : file_size
,
129 formats
. extend ( conn_formats
)
132 def _extract_audio ( self
, media
, programme_id
):
134 abr
= int ( media
. get ( 'bitrate' ))
135 acodec
= media
. get ( 'encoding' )
136 service
= media
. get ( 'service' )
137 for connection
in self
._ extract
_ connections
( media
):
138 conn_formats
= self
._ extract
_ connection
( connection
, programme_id
)
139 for format
in conn_formats
:
141 'format_id' : ' %s _ %s ' % ( service
, format
[ 'format_id' ]),
145 formats
. extend ( conn_formats
)
148 def _extract_captions ( self
, media
, programme_id
):
150 for connection
in self
._ extract
_ connections
( media
):
151 captions
= self
._ download
_ xml
( connection
. get ( 'href' ), programme_id
, 'Downloading captions' )
152 lang
= captions
. get ( '{http://www.w3.org/XML/1998/namespace}lang' , 'en' )
153 ps
= captions
. findall ( './ {0} body/ {0} div/ {0} p' . format ( '{http://www.w3.org/2006/10/ttaf1}' ))
155 for pos
, p
in enumerate ( ps
):
156 srt
+= ' %s \r\n %s --> %s \r\n %s \r\n\r\n ' % ( str ( pos
), p
. get ( 'begin' ), p
. get ( 'end' ),
157 p
. text
. strip () if p
. text
is not None else '' )
158 subtitles
[ lang
] = srt
161 def _real_extract ( self
, url
):
162 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
163 group_id
= mobj
. group ( 'id' )
165 webpage
= self
._ download
_ webpage
( url
, group_id
, 'Downloading video page' )
166 if re
. search ( r
'id="emp-error" class="notinuk">' , webpage
):
167 raise ExtractorError ( 'Currently BBC iPlayer TV programmes are available to play in the UK only' ,
170 playlist
= self
._ download
_ xml
( 'http://www.bbc.co.uk/iplayer/playlist/ %s ' % group_id
, group_id
,
171 'Downloading playlist XML' )
173 no_items
= playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}noItems' )
174 if no_items
is not None :
175 reason
= no_items
. get ( 'reason' )
176 if reason
== 'preAvailability' :
177 msg
= 'Episode %s is not yet available' % group_id
178 elif reason
== 'postAvailability' :
179 msg
= 'Episode %s is no longer available' % group_id
181 msg
= 'Episode %s is not available: %s ' % ( group_id
, reason
)
182 raise ExtractorError ( msg
, expected
= True )
187 for item
in self
._ extract
_ items
( playlist
):
188 kind
= item
. get ( 'kind' )
189 if kind
!= 'programme' and kind
!= 'radioProgramme' :
191 title
= playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}title' ). text
192 description
= playlist
. find ( './{http://bbc.co.uk/2008/emp/playlist}summary' ). text
194 programme_id
= item
. get ( 'identifier' )
195 duration
= int ( item
. get ( 'duration' ))
197 media_selection
= self
._ download
_ xml
(
198 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/ %s ' % programme_id
,
199 programme_id
, 'Downloading media selection XML' )
201 for media
in self
._ extract
_ medias
( media_selection
):
202 kind
= media
. get ( 'kind' )
204 formats
. extend ( self
._ extract
_ audio
( media
, programme_id
))
205 elif kind
== 'video' :
206 formats
. extend ( self
._ extract
_ video
( media
, programme_id
))
207 elif kind
== 'captions' :
208 subtitles
= self
._ extract
_ captions
( media
, programme_id
)
210 if self
._ downloader
. params
. get ( 'listsubtitles' , False ):
211 self
._l ist
_ available
_ subtitles
( programme_id
, subtitles
)
214 self
._ sort
_ formats
( formats
)
219 'description' : description
,
220 'duration' : duration
,
222 'subtitles' : subtitles
,