]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bliptv.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
6 from . subtitles
import SubtitlesInfoExtractor
10 compat_urllib_request
,
21 class BlipTVIE ( SubtitlesInfoExtractor
):
22 _VALID_URL
= r
'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
26 'url' : 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352' ,
27 'md5' : 'c6934ad0b6acf2bd920720ec888eb812' ,
31 'title' : 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3' ,
32 'description' : 'md5:9bc31f227219cde65e47eeec8d2dc596' ,
33 'timestamp' : 1323138843 ,
34 'upload_date' : '20111206' ,
36 'uploader_id' : '679425' ,
41 # https://github.com/rg3/youtube-dl/pull/2274
42 'note' : 'Video with subtitles' ,
43 'url' : 'http://blip.tv/play/h6Uag5OEVgI.html' ,
44 'md5' : '309f9d25b820b086ca163ffac8031806' ,
48 'title' : 'Red vs. Blue Season 11 Episode 1' ,
49 'description' : 'One-Zero-One' ,
50 'timestamp' : 1371261608 ,
51 'upload_date' : '20130615' ,
52 'uploader' : 'redvsblue' ,
53 'uploader_id' : '792887' ,
58 # https://bugzilla.redhat.com/show_bug.cgi?id=967465
59 'url' : 'http://a.blip.tv/api.swf#h6Uag5KbVwI' ,
60 'md5' : '314e87b1ebe7a48fcbfdd51b791ce5a6' ,
64 'upload_date' : '20130520' ,
65 'description' : 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.' ,
66 'title' : 'Red vs. Blue Season 11 Trailer' ,
67 'timestamp' : 1369029609 ,
68 'uploader' : 'redvsblue' ,
69 'uploader_id' : '792887' ,
73 'url' : 'http://blip.tv/play/gbk766dkj4Yn' ,
74 'md5' : 'fe0a33f022d49399a241e84a8ea8b8e3' ,
78 'upload_date' : '20090208' ,
79 'description' : 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.' ,
80 'title' : 'Nostalgia Critic: Transformers' ,
81 'timestamp' : 1234068723 ,
82 'uploader' : 'NostalgiaCritic' ,
83 'uploader_id' : '246467' ,
87 # https://github.com/rg3/youtube-dl/pull/4404
89 'url' : 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982' ,
90 'md5' : '76c0a56f24e769ceaab21fbb6416a351' ,
94 'title' : 'Weekly Manga Recap: Kingdom' ,
95 'description' : 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?' ,
96 'timestamp' : 1417660321 ,
97 'upload_date' : '20141204' ,
98 'uploader' : 'The Rollo T' ,
99 'uploader_id' : '407429' ,
106 def _real_extract ( self
, url
):
107 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
108 lookup_id
= mobj
. group ( 'lookup_id' )
110 # See https://github.com/rg3/youtube-dl/issues/857 and
111 # https://github.com/rg3/youtube-dl/issues/4197
113 urlh
= self
._ request
_ webpage
(
114 'http://blip.tv/play/ %s ' % lookup_id
, lookup_id
, 'Resolving lookup id' )
115 url
= compat_urlparse
. urlparse ( urlh
. geturl ())
116 qs
= compat_urlparse
. parse_qs ( url
. query
)
117 mobj
= re
. match ( self
._ VALID
_U RL
, qs
[ 'file' ][ 0 ])
119 video_id
= mobj
. group ( 'id' )
121 rss
= self
._ download
_ xml
( 'http://blip.tv/rss/flash/ %s ' % video_id
, video_id
, 'Downloading video RSS' )
124 return '{http://blip.tv/dtd/blip/1.0} %s ' % s
127 return '{http://search.yahoo.com/mrss/} %s ' % s
130 return '{http://www.itunes.com/dtds/podcast-1.0.dtd} %s ' % s
132 item
= rss
. find ( 'channel/item' )
134 video_id
= item
. find ( blip ( 'item_id' )). text
135 title
= item
. find ( './title' ). text
136 description
= clean_html ( compat_str ( item
. find ( blip ( 'puredescription' )). text
))
137 timestamp
= parse_iso8601 ( item
. find ( blip ( 'datestamp' )). text
)
138 uploader
= item
. find ( blip ( 'user' )). text
139 uploader_id
= item
. find ( blip ( 'userid' )). text
140 duration
= int ( item
. find ( blip ( 'runtime' )). text
)
141 media_thumbnail
= item
. find ( media ( 'thumbnail' ))
142 thumbnail
= media_thumbnail
. get ( 'url' ) if media_thumbnail
is not None else item
. find ( itunes ( 'image' )). text
143 categories
= [ category
. text
for category
in item
. findall ( 'category' )]
148 media_group
= item
. find ( media ( 'group' ))
149 for media_content
in media_group
. findall ( media ( 'content' )):
150 url
= media_content
. get ( 'url' )
151 role
= media_content
. get ( blip ( 'role' ))
152 msg
= self
._ download
_ webpage
(
153 url
+ '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url' ,
154 video_id
, 'Resolving URL for %s ' % role
)
155 real_url
= compat_urlparse
. parse_qs ( msg
. strip ())[ 'message' ][ 0 ]
157 media_type
= media_content
. get ( 'type' )
158 if media_type
== 'text/srt' or url
. endswith ( '.srt' ):
162 lang
= role
. rpartition ( '-' )[- 1 ]. strip (). lower ()
163 langcode
= LANGS
. get ( lang
, lang
)
164 subtitles
[ langcode
] = url
165 elif media_type
. startswith ( 'video/' ):
169 'format_note' : media_type
,
170 'vcodec' : media_content
. get ( blip ( 'vcodec' )) or 'none' ,
171 'acodec' : media_content
. get ( blip ( 'acodec' )),
172 'filesize' : media_content
. get ( 'filesize' ),
173 'width' : int_or_none ( media_content
. get ( 'width' )),
174 'height' : int_or_none ( media_content
. get ( 'height' )),
176 self
._ sort
_ formats
( formats
)
179 video_subtitles
= self
. extract_subtitles ( video_id
, subtitles
)
180 if self
._ downloader
. params
. get ( 'listsubtitles' , False ):
181 self
._l ist
_ available
_ subtitles
( video_id
, subtitles
)
187 'description' : description
,
188 'timestamp' : timestamp
,
189 'uploader' : uploader
,
190 'uploader_id' : uploader_id
,
191 'duration' : duration
,
192 'thumbnail' : thumbnail
,
193 'categories' : categories
,
195 'subtitles' : video_subtitles
,
198 def _download_subtitle_url ( self
, sub_lang
, url
):
199 # For some weird reason, blip.tv serves a video instead of subtitles
200 # when we request with a common UA
201 req
= compat_urllib_request
. Request ( url
)
202 req
. add_header ( 'User-Agent' , 'youtube-dl' )
203 return self
._ download
_ webpage
( req
, None , note
= False )
206 class BlipTVUserIE ( InfoExtractor
):
207 _VALID_URL
= r
'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
209 IE_NAME
= 'blip.tv:user'
211 'url' : 'http://blip.tv/actone' ,
214 'title' : 'Act One: The Series' ,
219 def _real_extract ( self
, url
):
220 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
221 username
= mobj
. group ( 1 )
223 page_base
= 'http://m.blip.tv/pr/show_get_full_episode_list?users_id= %s &lite=0&esi=1'
225 page
= self
._ download
_ webpage
( url
, username
, 'Downloading user page' )
226 mobj
= re
. search ( r
'data-users-id="([^"]+)"' , page
)
227 page_base
= page_base
% mobj
. group ( 1 )
228 title
= self
._ og
_ search
_ title
( page
)
230 # Download video ids using BlipTV Ajax calls. Result size per
231 # query is limited (currently to 12 videos) so we need to query
232 # page by page until there are no video ids - it means we got
239 url
= page_base
+ "&page=" + str ( pagenum
)
240 page
= self
._ download
_ webpage
(
241 url
, username
, 'Downloading video ids from page %d ' % pagenum
)
243 # Extract video identifiers
246 for mobj
in re
. finditer ( r
'href="/([^"]+)"' , page
):
247 if mobj
. group ( 1 ) not in ids_in_page
:
248 ids_in_page
. append ( unescapeHTML ( mobj
. group ( 1 )))
250 video_ids
. extend ( ids_in_page
)
252 # A little optimization - if current page is not
253 # "full", ie. does not contain PAGE_SIZE video ids then
254 # we can assume that this page is the last one - there
255 # are no more ids on further pages - no need to query
258 if len ( ids_in_page
) < self
._ PAGE
_ SIZE
:
263 urls
= [ 'http://blip.tv/ %s ' % video_id
for video_id
in video_ids
]
264 url_entries
= [ self
. url_result ( vurl
, 'BlipTV' ) for vurl
in urls
]
265 return self
. playlist_result (
266 url_entries
, playlist_title
= title
, playlist_id
= username
)