]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bliptv.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
20 class BlipTVIE ( InfoExtractor
):
21 _VALID_URL
= r
'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
25 'url' : 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352' ,
26 'md5' : 'c6934ad0b6acf2bd920720ec888eb812' ,
30 'title' : 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3' ,
31 'description' : 'md5:9bc31f227219cde65e47eeec8d2dc596' ,
32 'timestamp' : 1323138843 ,
33 'upload_date' : '20111206' ,
35 'uploader_id' : '679425' ,
40 # https://github.com/rg3/youtube-dl/pull/2274
41 'note' : 'Video with subtitles' ,
42 'url' : 'http://blip.tv/play/h6Uag5OEVgI.html' ,
43 'md5' : '309f9d25b820b086ca163ffac8031806' ,
47 'title' : 'Red vs. Blue Season 11 Episode 1' ,
48 'description' : 'One-Zero-One' ,
49 'timestamp' : 1371261608 ,
50 'upload_date' : '20130615' ,
51 'uploader' : 'redvsblue' ,
52 'uploader_id' : '792887' ,
57 # https://bugzilla.redhat.com/show_bug.cgi?id=967465
58 'url' : 'http://a.blip.tv/api.swf#h6Uag5KbVwI' ,
59 'md5' : '314e87b1ebe7a48fcbfdd51b791ce5a6' ,
63 'upload_date' : '20130520' ,
64 'description' : 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.' ,
65 'title' : 'Red vs. Blue Season 11 Trailer' ,
66 'timestamp' : 1369029609 ,
67 'uploader' : 'redvsblue' ,
68 'uploader_id' : '792887' ,
72 'url' : 'http://blip.tv/play/gbk766dkj4Yn' ,
73 'md5' : 'fe0a33f022d49399a241e84a8ea8b8e3' ,
77 'upload_date' : '20090208' ,
78 'description' : 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.' ,
79 'title' : 'Nostalgia Critic: Transformers' ,
80 'timestamp' : 1234068723 ,
81 'uploader' : 'NostalgiaCritic' ,
82 'uploader_id' : '246467' ,
86 # https://github.com/rg3/youtube-dl/pull/4404
88 'url' : 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982' ,
89 'md5' : '76c0a56f24e769ceaab21fbb6416a351' ,
93 'title' : 'Weekly Manga Recap: Kingdom' ,
94 'description' : 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?' ,
95 'timestamp' : 1417660321 ,
96 'upload_date' : '20141204' ,
97 'uploader' : 'The Rollo T' ,
98 'uploader_id' : '407429' ,
105 def _real_extract ( self
, url
):
106 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
107 lookup_id
= mobj
. group ( 'lookup_id' )
109 # See https://github.com/rg3/youtube-dl/issues/857 and
110 # https://github.com/rg3/youtube-dl/issues/4197
112 urlh
= self
._ request
_ webpage
(
113 'http://blip.tv/play/ %s ' % lookup_id
, lookup_id
, 'Resolving lookup id' )
114 url
= compat_urlparse
. urlparse ( urlh
. geturl ())
115 qs
= compat_urlparse
. parse_qs ( url
. query
)
116 mobj
= re
. match ( self
._ VALID
_U RL
, qs
[ 'file' ][ 0 ])
118 video_id
= mobj
. group ( 'id' )
120 rss
= self
._ download
_ xml
( 'http://blip.tv/rss/flash/ %s ' % video_id
, video_id
, 'Downloading video RSS' )
123 return '{http://blip.tv/dtd/blip/1.0} %s ' % s
126 return '{http://search.yahoo.com/mrss/} %s ' % s
129 return '{http://www.itunes.com/dtds/podcast-1.0.dtd} %s ' % s
131 item
= rss
. find ( 'channel/item' )
133 video_id
= item
. find ( blip ( 'item_id' )). text
134 title
= item
. find ( './title' ). text
135 description
= clean_html ( compat_str ( item
. find ( blip ( 'puredescription' )). text
))
136 timestamp
= parse_iso8601 ( item
. find ( blip ( 'datestamp' )). text
)
137 uploader
= item
. find ( blip ( 'user' )). text
138 uploader_id
= item
. find ( blip ( 'userid' )). text
139 duration
= int ( item
. find ( blip ( 'runtime' )). text
)
140 media_thumbnail
= item
. find ( media ( 'thumbnail' ))
141 thumbnail
= media_thumbnail
. get ( 'url' ) if media_thumbnail
is not None else item
. find ( itunes ( 'image' )). text
142 categories
= [ category
. text
for category
in item
. findall ( 'category' )]
147 media_group
= item
. find ( media ( 'group' ))
148 for media_content
in media_group
. findall ( media ( 'content' )):
149 url
= media_content
. get ( 'url' )
150 role
= media_content
. get ( blip ( 'role' ))
151 msg
= self
._ download
_ webpage
(
152 url
+ '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url' ,
153 video_id
, 'Resolving URL for %s ' % role
)
154 real_url
= compat_urlparse
. parse_qs ( msg
. strip ())[ 'message' ][ 0 ]
156 media_type
= media_content
. get ( 'type' )
157 if media_type
== 'text/srt' or url
. endswith ( '.srt' ):
161 lang
= role
. rpartition ( '-' )[- 1 ]. strip (). lower ()
162 langcode
= LANGS
. get ( lang
, lang
)
163 subtitles_urls
[ langcode
] = url
164 elif media_type
. startswith ( 'video/' ):
168 'format_note' : media_type
,
169 'vcodec' : media_content
. get ( blip ( 'vcodec' )) or 'none' ,
170 'acodec' : media_content
. get ( blip ( 'acodec' )),
171 'filesize' : media_content
. get ( 'filesize' ),
172 'width' : int_or_none ( media_content
. get ( 'width' )),
173 'height' : int_or_none ( media_content
. get ( 'height' )),
175 self
._ sort
_ formats
( formats
)
177 subtitles
= self
. extract_subtitles ( video_id
, subtitles_urls
)
182 'description' : description
,
183 'timestamp' : timestamp
,
184 'uploader' : uploader
,
185 'uploader_id' : uploader_id
,
186 'duration' : duration
,
187 'thumbnail' : thumbnail
,
188 'categories' : categories
,
190 'subtitles' : subtitles
,
193 def _get_subtitles ( self
, video_id
, subtitles_urls
):
195 for lang
, url
in subtitles_urls
. items ():
196 # For some weird reason, blip.tv serves a video instead of subtitles
197 # when we request with a common UA
198 req
= compat_urllib_request
. Request ( url
)
199 req
. add_header ( 'User-Agent' , 'youtube-dl' )
201 # The extension is 'srt' but it's actually an 'ass' file
203 'data' : self
._ download
_ webpage
( req
, None , note
= False ),
208 class BlipTVUserIE ( InfoExtractor
):
209 _VALID_URL
= r
'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
211 IE_NAME
= 'blip.tv:user'
213 'url' : 'http://blip.tv/actone' ,
216 'title' : 'Act One: The Series' ,
221 def _real_extract ( self
, url
):
222 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
223 username
= mobj
. group ( 1 )
225 page_base
= 'http://m.blip.tv/pr/show_get_full_episode_list?users_id= %s &lite=0&esi=1'
227 page
= self
._ download
_ webpage
( url
, username
, 'Downloading user page' )
228 mobj
= re
. search ( r
'data-users-id="([^"]+)"' , page
)
229 page_base
= page_base
% mobj
. group ( 1 )
230 title
= self
._ og
_ search
_ title
( page
)
232 # Download video ids using BlipTV Ajax calls. Result size per
233 # query is limited (currently to 12 videos) so we need to query
234 # page by page until there are no video ids - it means we got
241 url
= page_base
+ "&page=" + str ( pagenum
)
242 page
= self
._ download
_ webpage
(
243 url
, username
, 'Downloading video ids from page %d ' % pagenum
)
245 # Extract video identifiers
248 for mobj
in re
. finditer ( r
'href="/([^"]+)"' , page
):
249 if mobj
. group ( 1 ) not in ids_in_page
:
250 ids_in_page
. append ( unescapeHTML ( mobj
. group ( 1 )))
252 video_ids
. extend ( ids_in_page
)
254 # A little optimization - if current page is not
255 # "full", ie. does not contain PAGE_SIZE video ids then
256 # we can assume that this page is the last one - there
257 # are no more ids on further pages - no need to query
260 if len ( ids_in_page
) < self
._ PAGE
_ SIZE
:
265 urls
= [ 'http://blip.tv/ %s ' % video_id
for video_id
in video_ids
]
266 url_entries
= [ self
. url_result ( vurl
, 'BlipTV' ) for vurl
in urls
]
267 return self
. playlist_result (
268 url_entries
, playlist_title
= title
, playlist_id
= username
)