]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bliptv.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
20 class BlipTVIE ( InfoExtractor
):
21 _VALID_URL
= r
'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
25 'url' : 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352' ,
26 'md5' : 'c6934ad0b6acf2bd920720ec888eb812' ,
30 'title' : 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3' ,
31 'description' : 'md5:9bc31f227219cde65e47eeec8d2dc596' ,
32 'timestamp' : 1323138843 ,
33 'upload_date' : '20111206' ,
35 'uploader_id' : '679425' ,
40 # https://github.com/rg3/youtube-dl/pull/2274
41 'note' : 'Video with subtitles' ,
42 'url' : 'http://blip.tv/play/h6Uag5OEVgI.html' ,
43 'md5' : '309f9d25b820b086ca163ffac8031806' ,
47 'title' : 'Red vs. Blue Season 11 Episode 1' ,
48 'description' : 'One-Zero-One' ,
49 'timestamp' : 1371261608 ,
50 'upload_date' : '20130615' ,
51 'uploader' : 'redvsblue' ,
52 'uploader_id' : '792887' ,
57 # https://bugzilla.redhat.com/show_bug.cgi?id=967465
58 'url' : 'http://a.blip.tv/api.swf#h6Uag5KbVwI' ,
59 'md5' : '314e87b1ebe7a48fcbfdd51b791ce5a6' ,
63 'upload_date' : '20130520' ,
64 'description' : 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.' ,
65 'title' : 'Red vs. Blue Season 11 Trailer' ,
66 'timestamp' : 1369029609 ,
67 'uploader' : 'redvsblue' ,
68 'uploader_id' : '792887' ,
72 'url' : 'http://blip.tv/play/gbk766dkj4Yn' ,
73 'md5' : 'fe0a33f022d49399a241e84a8ea8b8e3' ,
77 'upload_date' : '20090208' ,
78 'description' : 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.' ,
79 'title' : 'Nostalgia Critic: Transformers' ,
80 'timestamp' : 1234068723 ,
81 'uploader' : 'NostalgiaCritic' ,
82 'uploader_id' : '246467' ,
86 # https://github.com/rg3/youtube-dl/pull/4404
88 'url' : 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982' ,
89 'md5' : '76c0a56f24e769ceaab21fbb6416a351' ,
93 'title' : 'Weekly Manga Recap: Kingdom' ,
94 'description' : 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?' ,
95 'timestamp' : 1417660321 ,
96 'upload_date' : '20141204' ,
97 'uploader' : 'The Rollo T' ,
98 'uploader_id' : '407429' ,
106 def _extract_url ( webpage
):
107 mobj
= re
. search ( r
'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)' , webpage
)
109 return 'http://blip.tv/a/a-' + mobj
. group ( 1 )
110 mobj
= re
. search ( r
'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)' , webpage
)
114 def _real_extract ( self
, url
):
115 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
116 lookup_id
= mobj
. group ( 'lookup_id' )
118 # See https://github.com/rg3/youtube-dl/issues/857 and
119 # https://github.com/rg3/youtube-dl/issues/4197
121 urlh
= self
._ request
_ webpage
(
122 'http://blip.tv/play/ %s ' % lookup_id
, lookup_id
, 'Resolving lookup id' )
123 url
= compat_urlparse
. urlparse ( urlh
. geturl ())
124 qs
= compat_urlparse
. parse_qs ( url
. query
)
125 mobj
= re
. match ( self
._ VALID
_U RL
, qs
[ 'file' ][ 0 ])
127 video_id
= mobj
. group ( 'id' )
129 rss
= self
._ download
_ xml
( 'http://blip.tv/rss/flash/ %s ' % video_id
, video_id
, 'Downloading video RSS' )
132 return '{http://blip.tv/dtd/blip/1.0} %s ' % s
135 return '{http://search.yahoo.com/mrss/} %s ' % s
138 return '{http://www.itunes.com/dtds/podcast-1.0.dtd} %s ' % s
140 item
= rss
. find ( 'channel/item' )
142 video_id
= item
. find ( blip ( 'item_id' )). text
143 title
= item
. find ( './title' ). text
144 description
= clean_html ( compat_str ( item
. find ( blip ( 'puredescription' )). text
))
145 timestamp
= parse_iso8601 ( item
. find ( blip ( 'datestamp' )). text
)
146 uploader
= item
. find ( blip ( 'user' )). text
147 uploader_id
= item
. find ( blip ( 'userid' )). text
148 duration
= int ( item
. find ( blip ( 'runtime' )). text
)
149 media_thumbnail
= item
. find ( media ( 'thumbnail' ))
150 thumbnail
= media_thumbnail
. get ( 'url' ) if media_thumbnail
is not None else item
. find ( itunes ( 'image' )). text
151 categories
= [ category
. text
for category
in item
. findall ( 'category' )]
156 media_group
= item
. find ( media ( 'group' ))
157 for media_content
in media_group
. findall ( media ( 'content' )):
158 url
= media_content
. get ( 'url' )
159 role
= media_content
. get ( blip ( 'role' ))
160 msg
= self
._ download
_ webpage
(
161 url
+ '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url' ,
162 video_id
, 'Resolving URL for %s ' % role
)
163 real_url
= compat_urlparse
. parse_qs ( msg
. strip ())[ 'message' ][ 0 ]
165 media_type
= media_content
. get ( 'type' )
166 if media_type
== 'text/srt' or url
. endswith ( '.srt' ):
170 lang
= role
. rpartition ( '-' )[- 1 ]. strip (). lower ()
171 langcode
= LANGS
. get ( lang
, lang
)
172 subtitles_urls
[ langcode
] = url
173 elif media_type
. startswith ( 'video/' ):
177 'format_note' : media_type
,
178 'vcodec' : media_content
. get ( blip ( 'vcodec' )) or 'none' ,
179 'acodec' : media_content
. get ( blip ( 'acodec' )),
180 'filesize' : media_content
. get ( 'filesize' ),
181 'width' : int_or_none ( media_content
. get ( 'width' )),
182 'height' : int_or_none ( media_content
. get ( 'height' )),
184 self
._ check
_ formats
( formats
, video_id
)
185 self
._ sort
_ formats
( formats
)
187 subtitles
= self
. extract_subtitles ( video_id
, subtitles_urls
)
192 'description' : description
,
193 'timestamp' : timestamp
,
194 'uploader' : uploader
,
195 'uploader_id' : uploader_id
,
196 'duration' : duration
,
197 'thumbnail' : thumbnail
,
198 'categories' : categories
,
200 'subtitles' : subtitles
,
203 def _get_subtitles ( self
, video_id
, subtitles_urls
):
205 for lang
, url
in subtitles_urls
. items ():
206 # For some weird reason, blip.tv serves a video instead of subtitles
207 # when we request with a common UA
208 req
= compat_urllib_request
. Request ( url
)
209 req
. add_header ( 'User-Agent' , 'youtube-dl' )
211 # The extension is 'srt' but it's actually an 'ass' file
213 'data' : self
._ download
_ webpage
( req
, None , note
= False ),
218 class BlipTVUserIE ( InfoExtractor
):
219 _VALID_URL
= r
'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
221 IE_NAME
= 'blip.tv:user'
223 'url' : 'http://blip.tv/actone' ,
226 'title' : 'Act One: The Series' ,
231 def _real_extract ( self
, url
):
232 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
233 username
= mobj
. group ( 1 )
235 page_base
= 'http://m.blip.tv/pr/show_get_full_episode_list?users_id= %s &lite=0&esi=1'
237 page
= self
._ download
_ webpage
( url
, username
, 'Downloading user page' )
238 mobj
= re
. search ( r
'data-users-id="([^"]+)"' , page
)
239 page_base
= page_base
% mobj
. group ( 1 )
240 title
= self
._ og
_ search
_ title
( page
)
242 # Download video ids using BlipTV Ajax calls. Result size per
243 # query is limited (currently to 12 videos) so we need to query
244 # page by page until there are no video ids - it means we got
251 url
= page_base
+ "&page=" + str ( pagenum
)
252 page
= self
._ download
_ webpage
(
253 url
, username
, 'Downloading video ids from page %d ' % pagenum
)
255 # Extract video identifiers
258 for mobj
in re
. finditer ( r
'href="/([^"]+)"' , page
):
259 if mobj
. group ( 1 ) not in ids_in_page
:
260 ids_in_page
. append ( unescapeHTML ( mobj
. group ( 1 )))
262 video_ids
. extend ( ids_in_page
)
264 # A little optimization - if current page is not
265 # "full", ie. does not contain PAGE_SIZE video ids then
266 # we can assume that this page is the last one - there
267 # are no more ids on further pages - no need to query
270 if len ( ids_in_page
) < self
._ PAGE
_ SIZE
:
275 urls
= [ 'http://blip.tv/ %s ' % video_id
for video_id
in video_ids
]
276 url_entries
= [ self
. url_result ( vurl
, 'BlipTV' ) for vurl
in urls
]
277 return self
. playlist_result (
278 url_entries
, playlist_title
= title
, playlist_id
= username
)