]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bliptv.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
21 class BlipTVIE ( InfoExtractor
):
22 _VALID_URL
= r
'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
26 'url' : 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352' ,
27 'md5' : '80baf1ec5c3d2019037c1c707d676b9f' ,
31 'title' : 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3' ,
32 'description' : 'md5:9bc31f227219cde65e47eeec8d2dc596' ,
33 'timestamp' : 1323138843 ,
34 'upload_date' : '20111206' ,
36 'uploader_id' : '679425' ,
41 # https://github.com/rg3/youtube-dl/pull/2274
42 'note' : 'Video with subtitles' ,
43 'url' : 'http://blip.tv/play/h6Uag5OEVgI.html' ,
44 'md5' : '309f9d25b820b086ca163ffac8031806' ,
48 'title' : 'Red vs. Blue Season 11 Episode 1' ,
49 'description' : 'One-Zero-One' ,
50 'timestamp' : 1371261608 ,
51 'upload_date' : '20130615' ,
52 'uploader' : 'redvsblue' ,
53 'uploader_id' : '792887' ,
58 # https://bugzilla.redhat.com/show_bug.cgi?id=967465
59 'url' : 'http://a.blip.tv/api.swf#h6Uag5KbVwI' ,
60 'md5' : '314e87b1ebe7a48fcbfdd51b791ce5a6' ,
64 'upload_date' : '20130520' ,
65 'description' : 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.' ,
66 'title' : 'Red vs. Blue Season 11 Trailer' ,
67 'timestamp' : 1369029609 ,
68 'uploader' : 'redvsblue' ,
69 'uploader_id' : '792887' ,
73 'url' : 'http://blip.tv/play/gbk766dkj4Yn' ,
74 'md5' : 'fe0a33f022d49399a241e84a8ea8b8e3' ,
78 'upload_date' : '20090208' ,
79 'description' : 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.' ,
80 'title' : 'Nostalgia Critic: Transformers' ,
81 'timestamp' : 1234068723 ,
82 'uploader' : 'NostalgiaCritic' ,
83 'uploader_id' : '246467' ,
87 # https://github.com/rg3/youtube-dl/pull/4404
89 'url' : 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982' ,
90 'md5' : '76c0a56f24e769ceaab21fbb6416a351' ,
94 'title' : 'Weekly Manga Recap: Kingdom' ,
95 'description' : 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?' ,
96 'timestamp' : 1417660321 ,
97 'upload_date' : '20141204' ,
98 'uploader' : 'The Rollo T' ,
99 'uploader_id' : '407429' ,
106 'url' : 'http://blip.tv/rss/flash/6700880' ,
110 'title' : 'Cowboy Bebop: Gateway Shuffle Review' ,
111 'description' : 'md5:3acc480c0f9ae157f5fe88547ecaf3f8' ,
112 'timestamp' : 1386639757 ,
113 'upload_date' : '20131210' ,
114 'uploader' : 'sfdebris' ,
115 'uploader_id' : '706520' ,
121 def _extract_url ( webpage
):
122 mobj
= re
. search ( r
'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)' , webpage
)
124 return 'http://blip.tv/a/a-' + mobj
. group ( 1 )
125 mobj
= re
. search ( r
'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)' , webpage
)
129 def _real_extract ( self
, url
):
130 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
131 lookup_id
= mobj
. group ( 'lookup_id' )
133 # See https://github.com/rg3/youtube-dl/issues/857 and
134 # https://github.com/rg3/youtube-dl/issues/4197
136 urlh
= self
._ request
_ webpage
(
137 'http://blip.tv/play/ %s ' % lookup_id
, lookup_id
, 'Resolving lookup id' )
138 url
= compat_urlparse
. urlparse ( urlh
. geturl ())
139 qs
= compat_urlparse
. parse_qs ( url
. query
)
140 mobj
= re
. match ( self
._ VALID
_U RL
, qs
[ 'file' ][ 0 ])
142 video_id
= mobj
. group ( 'id' )
144 rss
= self
._ download
_ xml
( 'http://blip.tv/rss/flash/ %s ' % video_id
, video_id
, 'Downloading video RSS' )
147 return xpath_with_ns ( p
, {
148 'blip' : 'http://blip.tv/dtd/blip/1.0' ,
149 'media' : 'http://search.yahoo.com/mrss/' ,
150 'itunes' : 'http://www.itunes.com/dtds/podcast-1.0.dtd' ,
153 item
= rss
. find ( 'channel/item' )
155 video_id
= xpath_text ( item
, _x ( 'blip:item_id' ), 'video id' ) or lookup_id
156 title
= xpath_text ( item
, 'title' , 'title' , fatal
= True )
157 description
= clean_html ( xpath_text ( item
, _x ( 'blip:puredescription' ), 'description' ))
158 timestamp
= parse_iso8601 ( xpath_text ( item
, _x ( 'blip:datestamp' ), 'timestamp' ))
159 uploader
= xpath_text ( item
, _x ( 'blip:user' ), 'uploader' )
160 uploader_id
= xpath_text ( item
, _x ( 'blip:userid' ), 'uploader id' )
161 duration
= int_or_none ( xpath_text ( item
, _x ( 'blip:runtime' ), 'duration' ))
162 media_thumbnail
= item
. find ( _x ( 'media:thumbnail' ))
163 thumbnail
= ( media_thumbnail
. get ( 'url' ) if media_thumbnail
is not None
164 else xpath_text ( item
, 'image' , 'thumbnail' ))
165 categories
= [ category
. text
for category
in item
. findall ( 'category' ) if category
is not None ]
170 media_group
= item
. find ( _x ( 'media:group' ))
171 for media_content
in media_group
. findall ( _x ( 'media:content' )):
172 url
= media_content
. get ( 'url' )
173 role
= media_content
. get ( _x ( 'blip:role' ))
174 msg
= self
._ download
_ webpage
(
175 url
+ '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url' ,
176 video_id
, 'Resolving URL for %s ' % role
)
177 real_url
= compat_urlparse
. parse_qs ( msg
. strip ())[ 'message' ][ 0 ]
179 media_type
= media_content
. get ( 'type' )
180 if media_type
== 'text/srt' or url
. endswith ( '.srt' ):
184 lang
= role
. rpartition ( '-' )[- 1 ]. strip (). lower ()
185 langcode
= LANGS
. get ( lang
, lang
)
186 subtitles_urls
[ langcode
] = url
187 elif media_type
. startswith ( 'video/' ):
191 'format_note' : media_type
,
192 'vcodec' : media_content
. get ( _x ( 'blip:vcodec' )) or 'none' ,
193 'acodec' : media_content
. get ( _x ( 'blip:acodec' )),
194 'filesize' : media_content
. get ( 'filesize' ),
195 'width' : int_or_none ( media_content
. get ( 'width' )),
196 'height' : int_or_none ( media_content
. get ( 'height' )),
198 self
._ check
_ formats
( formats
, video_id
)
199 self
._ sort
_ formats
( formats
)
201 subtitles
= self
. extract_subtitles ( video_id
, subtitles_urls
)
206 'description' : description
,
207 'timestamp' : timestamp
,
208 'uploader' : uploader
,
209 'uploader_id' : uploader_id
,
210 'duration' : duration
,
211 'thumbnail' : thumbnail
,
212 'categories' : categories
,
214 'subtitles' : subtitles
,
217 def _get_subtitles ( self
, video_id
, subtitles_urls
):
219 for lang
, url
in subtitles_urls
. items ():
220 # For some weird reason, blip.tv serves a video instead of subtitles
221 # when we request with a common UA
222 req
= compat_urllib_request
. Request ( url
)
223 req
. add_header ( 'User-Agent' , 'youtube-dl' )
225 # The extension is 'srt' but it's actually an 'ass' file
227 'data' : self
._ download
_ webpage
( req
, None , note
= False ),
232 class BlipTVUserIE ( InfoExtractor
):
233 _VALID_URL
= r
'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
235 IE_NAME
= 'blip.tv:user'
237 'url' : 'http://blip.tv/actone' ,
240 'title' : 'Act One: The Series' ,
245 def _real_extract ( self
, url
):
246 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
247 username
= mobj
. group ( 1 )
249 page_base
= 'http://m.blip.tv/pr/show_get_full_episode_list?users_id= %s &lite=0&esi=1'
251 page
= self
._ download
_ webpage
( url
, username
, 'Downloading user page' )
252 mobj
= re
. search ( r
'data-users-id="([^"]+)"' , page
)
253 page_base
= page_base
% mobj
. group ( 1 )
254 title
= self
._ og
_ search
_ title
( page
)
256 # Download video ids using BlipTV Ajax calls. Result size per
257 # query is limited (currently to 12 videos) so we need to query
258 # page by page until there are no video ids - it means we got
265 url
= page_base
+ "&page=" + str ( pagenum
)
266 page
= self
._ download
_ webpage
(
267 url
, username
, 'Downloading video ids from page %d ' % pagenum
)
269 # Extract video identifiers
272 for mobj
in re
. finditer ( r
'href="/([^"]+)"' , page
):
273 if mobj
. group ( 1 ) not in ids_in_page
:
274 ids_in_page
. append ( unescapeHTML ( mobj
. group ( 1 )))
276 video_ids
. extend ( ids_in_page
)
278 # A little optimization - if current page is not
279 # "full", ie. does not contain PAGE_SIZE video ids then
280 # we can assume that this page is the last one - there
281 # are no more ids on further pages - no need to query
284 if len ( ids_in_page
) < self
._ PAGE
_ SIZE
:
289 urls
= [ 'http://blip.tv/ %s ' % video_id
for video_id
in video_ids
]
290 url_entries
= [ self
. url_result ( vurl
, 'BlipTV' ) for vurl
in urls
]
291 return self
. playlist_result (
292 url_entries
, playlist_title
= title
, playlist_id
= username
)