]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bliptv.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
7 from .. compat
import compat_urlparse
19 class BlipTVIE ( InfoExtractor
):
20 _VALID_URL
= r
'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
24 'url' : 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352' ,
25 'md5' : '80baf1ec5c3d2019037c1c707d676b9f' ,
29 'title' : 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3' ,
30 'description' : 'md5:9bc31f227219cde65e47eeec8d2dc596' ,
31 'timestamp' : 1323138843 ,
32 'upload_date' : '20111206' ,
34 'uploader_id' : '679425' ,
39 # https://github.com/rg3/youtube-dl/pull/2274
40 'note' : 'Video with subtitles' ,
41 'url' : 'http://blip.tv/play/h6Uag5OEVgI.html' ,
42 'md5' : '309f9d25b820b086ca163ffac8031806' ,
46 'title' : 'Red vs. Blue Season 11 Episode 1' ,
47 'description' : 'One-Zero-One' ,
48 'timestamp' : 1371261608 ,
49 'upload_date' : '20130615' ,
50 'uploader' : 'redvsblue' ,
51 'uploader_id' : '792887' ,
56 # https://bugzilla.redhat.com/show_bug.cgi?id=967465
57 'url' : 'http://a.blip.tv/api.swf#h6Uag5KbVwI' ,
58 'md5' : '314e87b1ebe7a48fcbfdd51b791ce5a6' ,
62 'upload_date' : '20130520' ,
63 'description' : 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.' ,
64 'title' : 'Red vs. Blue Season 11 Trailer' ,
65 'timestamp' : 1369029609 ,
66 'uploader' : 'redvsblue' ,
67 'uploader_id' : '792887' ,
71 'url' : 'http://blip.tv/play/gbk766dkj4Yn' ,
72 'md5' : 'fe0a33f022d49399a241e84a8ea8b8e3' ,
76 'upload_date' : '20090208' ,
77 'description' : 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.' ,
78 'title' : 'Nostalgia Critic: Transformers' ,
79 'timestamp' : 1234068723 ,
80 'uploader' : 'NostalgiaCritic' ,
81 'uploader_id' : '246467' ,
85 # https://github.com/rg3/youtube-dl/pull/4404
87 'url' : 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982' ,
88 'md5' : '76c0a56f24e769ceaab21fbb6416a351' ,
92 'title' : 'Weekly Manga Recap: Kingdom' ,
93 'description' : 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?' ,
94 'timestamp' : 1417660321 ,
95 'upload_date' : '20141204' ,
96 'uploader' : 'The Rollo T' ,
97 'uploader_id' : '407429' ,
104 'url' : 'http://blip.tv/rss/flash/6700880' ,
108 'title' : 'Cowboy Bebop: Gateway Shuffle Review' ,
109 'description' : 'md5:3acc480c0f9ae157f5fe88547ecaf3f8' ,
110 'timestamp' : 1386639757 ,
111 'upload_date' : '20131210' ,
112 'uploader' : 'sfdebris' ,
113 'uploader_id' : '706520' ,
119 def _extract_url ( webpage
):
120 mobj
= re
. search ( r
'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)' , webpage
)
122 return 'http://blip.tv/a/a-' + mobj
. group ( 1 )
123 mobj
= re
. search ( r
'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)' , webpage
)
127 def _real_extract ( self
, url
):
128 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
129 lookup_id
= mobj
. group ( 'lookup_id' )
131 # See https://github.com/rg3/youtube-dl/issues/857 and
132 # https://github.com/rg3/youtube-dl/issues/4197
134 urlh
= self
._ request
_ webpage
(
135 'http://blip.tv/play/ %s ' % lookup_id
, lookup_id
, 'Resolving lookup id' )
136 url
= compat_urlparse
. urlparse ( urlh
. geturl ())
137 qs
= compat_urlparse
. parse_qs ( url
. query
)
138 mobj
= re
. match ( self
._ VALID
_U RL
, qs
[ 'file' ][ 0 ])
140 video_id
= mobj
. group ( 'id' )
142 rss
= self
._ download
_ xml
( 'http://blip.tv/rss/flash/ %s ' % video_id
, video_id
, 'Downloading video RSS' )
145 return xpath_with_ns ( p
, {
146 'blip' : 'http://blip.tv/dtd/blip/1.0' ,
147 'media' : 'http://search.yahoo.com/mrss/' ,
148 'itunes' : 'http://www.itunes.com/dtds/podcast-1.0.dtd' ,
151 item
= rss
. find ( 'channel/item' )
153 video_id
= xpath_text ( item
, _x ( 'blip:item_id' ), 'video id' ) or lookup_id
154 title
= xpath_text ( item
, 'title' , 'title' , fatal
= True )
155 description
= clean_html ( xpath_text ( item
, _x ( 'blip:puredescription' ), 'description' ))
156 timestamp
= parse_iso8601 ( xpath_text ( item
, _x ( 'blip:datestamp' ), 'timestamp' ))
157 uploader
= xpath_text ( item
, _x ( 'blip:user' ), 'uploader' )
158 uploader_id
= xpath_text ( item
, _x ( 'blip:userid' ), 'uploader id' )
159 duration
= int_or_none ( xpath_text ( item
, _x ( 'blip:runtime' ), 'duration' ))
160 media_thumbnail
= item
. find ( _x ( 'media:thumbnail' ))
161 thumbnail
= ( media_thumbnail
. get ( 'url' ) if media_thumbnail
is not None
162 else xpath_text ( item
, 'image' , 'thumbnail' ))
163 categories
= [ category
. text
for category
in item
. findall ( 'category' ) if category
is not None ]
168 media_group
= item
. find ( _x ( 'media:group' ))
169 for media_content
in media_group
. findall ( _x ( 'media:content' )):
170 url
= media_content
. get ( 'url' )
171 role
= media_content
. get ( _x ( 'blip:role' ))
172 msg
= self
._ download
_ webpage
(
173 url
+ '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url' ,
174 video_id
, 'Resolving URL for %s ' % role
)
175 real_url
= compat_urlparse
. parse_qs ( msg
. strip ())[ 'message' ][ 0 ]
177 media_type
= media_content
. get ( 'type' )
178 if media_type
== 'text/srt' or url
. endswith ( '.srt' ):
182 lang
= role
. rpartition ( '-' )[- 1 ]. strip (). lower ()
183 langcode
= LANGS
. get ( lang
, lang
)
184 subtitles_urls
[ langcode
] = url
185 elif media_type
. startswith ( 'video/' ):
189 'format_note' : media_type
,
190 'vcodec' : media_content
. get ( _x ( 'blip:vcodec' )) or 'none' ,
191 'acodec' : media_content
. get ( _x ( 'blip:acodec' )),
192 'filesize' : media_content
. get ( 'filesize' ),
193 'width' : int_or_none ( media_content
. get ( 'width' )),
194 'height' : int_or_none ( media_content
. get ( 'height' )),
196 self
._ check
_ formats
( formats
, video_id
)
197 self
._ sort
_ formats
( formats
)
199 subtitles
= self
. extract_subtitles ( video_id
, subtitles_urls
)
204 'description' : description
,
205 'timestamp' : timestamp
,
206 'uploader' : uploader
,
207 'uploader_id' : uploader_id
,
208 'duration' : duration
,
209 'thumbnail' : thumbnail
,
210 'categories' : categories
,
212 'subtitles' : subtitles
,
215 def _get_subtitles ( self
, video_id
, subtitles_urls
):
217 for lang
, url
in subtitles_urls
. items ():
218 # For some weird reason, blip.tv serves a video instead of subtitles
219 # when we request with a common UA
220 req
= sanitized_Request ( url
)
221 req
. add_header ( 'User-Agent' , 'youtube-dl' )
223 # The extension is 'srt' but it's actually an 'ass' file
225 'data' : self
._ download
_ webpage
( req
, None , note
= False ),
230 class BlipTVUserIE ( InfoExtractor
):
231 _VALID_URL
= r
'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
233 IE_NAME
= 'blip.tv:user'
235 'url' : 'http://blip.tv/actone' ,
238 'title' : 'Act One: The Series' ,
243 def _real_extract ( self
, url
):
244 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
245 username
= mobj
. group ( 1 )
247 page_base
= 'http://m.blip.tv/pr/show_get_full_episode_list?users_id= %s &lite=0&esi=1'
249 page
= self
._ download
_ webpage
( url
, username
, 'Downloading user page' )
250 mobj
= re
. search ( r
'data-users-id="([^"]+)"' , page
)
251 page_base
= page_base
% mobj
. group ( 1 )
252 title
= self
._ og
_ search
_ title
( page
)
254 # Download video ids using BlipTV Ajax calls. Result size per
255 # query is limited (currently to 12 videos) so we need to query
256 # page by page until there are no video ids - it means we got
263 url
= page_base
+ "&page=" + str ( pagenum
)
264 page
= self
._ download
_ webpage
(
265 url
, username
, 'Downloading video ids from page %d ' % pagenum
)
267 # Extract video identifiers
270 for mobj
in re
. finditer ( r
'href="/([^"]+)"' , page
):
271 if mobj
. group ( 1 ) not in ids_in_page
:
272 ids_in_page
. append ( unescapeHTML ( mobj
. group ( 1 )))
274 video_ids
. extend ( ids_in_page
)
276 # A little optimization - if current page is not
277 # "full", ie. does not contain PAGE_SIZE video ids then
278 # we can assume that this page is the last one - there
279 # are no more ids on further pages - no need to query
282 if len ( ids_in_page
) < self
._ PAGE
_ SIZE
:
287 urls
= [ 'http://blip.tv/ %s ' % video_id
for video_id
in video_ids
]
288 url_entries
= [ self
. url_result ( vurl
, 'BlipTV' ) for vurl
in urls
]
289 return self
. playlist_result (
290 url_entries
, playlist_title
= title
, playlist_id
= username
)