]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bliptv.py 
   1  from  __future__ 
import  unicode_literals
   5  from  . common 
import  InfoExtractor
  21  class  BlipTVIE ( InfoExtractor
):   22      _VALID_URL 
=  r
'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'   26              'url' :  'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352' ,   27              'md5' :  '80baf1ec5c3d2019037c1c707d676b9f' ,   31                  'title' :  'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3' ,   32                  'description' :  'md5:9bc31f227219cde65e47eeec8d2dc596' ,   33                  'timestamp' :  1323138843 ,   34                  'upload_date' :  '20111206' ,   36                  'uploader_id' :  '679425' ,   41              # https://github.com/rg3/youtube-dl/pull/2274   42              'note' :  'Video with subtitles' ,   43              'url' :  'http://blip.tv/play/h6Uag5OEVgI.html' ,   44              'md5' :  '309f9d25b820b086ca163ffac8031806' ,   48                  'title' :  'Red vs. Blue Season 11 Episode 1' ,   49                  'description' :  'One-Zero-One' ,   50                  'timestamp' :  1371261608 ,   51                  'upload_date' :  '20130615' ,   52                  'uploader' :  'redvsblue' ,   53                  'uploader_id' :  '792887' ,   58              # https://bugzilla.redhat.com/show_bug.cgi?id=967465   59              'url' :  'http://a.blip.tv/api.swf#h6Uag5KbVwI' ,   60              'md5' :  '314e87b1ebe7a48fcbfdd51b791ce5a6' ,   64                  'upload_date' :  '20130520' ,   65                  'description' :  'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.' ,   66                  'title' :  'Red vs. Blue Season 11 Trailer' ,   67                  'timestamp' :  1369029609 ,   68                  'uploader' :  'redvsblue' ,   69                  'uploader_id' :  '792887' ,   73              'url' :  'http://blip.tv/play/gbk766dkj4Yn' ,   74              'md5' :  'fe0a33f022d49399a241e84a8ea8b8e3' ,   78                  'upload_date' :  '20090208' ,   79                  'description' :  'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.' ,   80                  'title' :  'Nostalgia Critic: Transformers' ,   81                  'timestamp' :  1234068723 ,   82                  'uploader' :  'NostalgiaCritic' ,   83                  'uploader_id' :  '246467' ,   87              # https://github.com/rg3/youtube-dl/pull/4404   89              'url' :  'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982' ,   90              'md5' :  '76c0a56f24e769ceaab21fbb6416a351' ,   94                  'title' :  'Weekly Manga Recap: Kingdom' ,   95                  'description' :  'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?' ,   96                  'timestamp' :  1417660321 ,   97                  'upload_date' :  '20141204' ,   98                  'uploader' :  'The Rollo T' ,   99                  'uploader_id' :  '407429' ,  106              'url' :  'http://blip.tv/rss/flash/6700880' ,  110                  'title' :  'Cowboy Bebop: Gateway Shuffle Review' ,  111                  'description' :  'md5:3acc480c0f9ae157f5fe88547ecaf3f8' ,  112                  'timestamp' :  1386639757 ,  113                  'upload_date' :  '20131210' ,  114                  'uploader' :  'sfdebris' ,  115                  'uploader_id' :  '706520' ,  121      def  _extract_url ( webpage
):  122          mobj 
=  re
. search ( r
'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)' ,  webpage
)  124              return  'http://blip.tv/a/a-'  +  mobj
. group ( 1 )  125          mobj 
=  re
. search ( r
'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)' ,  webpage
)  129      def  _real_extract ( self
,  url
):  130          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  131          lookup_id 
=  mobj
. group ( 'lookup_id' )  133          # See https://github.com/rg3/youtube-dl/issues/857 and  134          # https://github.com/rg3/youtube-dl/issues/4197  136              urlh 
=  self
._ request
_ webpage
(  137                  'http://blip.tv/play/ %s '  %  lookup_id
,  lookup_id
,  'Resolving lookup id' )  138              url 
=  compat_urlparse
. urlparse ( urlh
. geturl ())  139              qs 
=  compat_urlparse
. parse_qs ( url
. query
)  140              mobj 
=  re
. match ( self
._ VALID
_U RL
,  qs
[ 'file' ][ 0 ])  142          video_id 
=  mobj
. group ( 'id' )  144          rss 
=  self
._ download
_ xml
( 'http://blip.tv/rss/flash/ %s '  %  video_id
,  video_id
,  'Downloading video RSS' )  147              return  xpath_with_ns ( p
, {  148                  'blip' :  'http://blip.tv/dtd/blip/1.0' ,  149                  'media' :  'http://search.yahoo.com/mrss/' ,  150                  'itunes' :  'http://www.itunes.com/dtds/podcast-1.0.dtd' ,  153          item 
=  rss
. find ( 'channel/item' )  155          video_id 
=  xpath_text ( item
,  _x ( 'blip:item_id' ),  'video id' )  or  lookup_id
 156          title 
=  xpath_text ( item
,  'title' ,  'title' ,  fatal
= True )  157          description 
=  clean_html ( xpath_text ( item
,  _x ( 'blip:puredescription' ),  'description' ))  158          timestamp 
=  parse_iso8601 ( xpath_text ( item
,  _x ( 'blip:datestamp' ),  'timestamp' ))  159          uploader 
=  xpath_text ( item
,  _x ( 'blip:user' ),  'uploader' )  160          uploader_id 
=  xpath_text ( item
,  _x ( 'blip:userid' ),  'uploader id' )  161          duration 
=  int_or_none ( xpath_text ( item
,  _x ( 'blip:runtime' ),  'duration' ))  162          media_thumbnail 
=  item
. find ( _x ( 'media:thumbnail' ))  163          thumbnail 
= ( media_thumbnail
. get ( 'url' )  if  media_thumbnail 
is not None  164                       else  xpath_text ( item
,  'image' ,  'thumbnail' ))  165          categories 
= [ category
. text 
for  category 
in  item
. findall ( 'category' )  if  category 
is not None ]  170          media_group 
=  item
. find ( _x ( 'media:group' ))  171          for  media_content 
in  media_group
. findall ( _x ( 'media:content' )):  172              url 
=  media_content
. get ( 'url' )  173              role 
=  media_content
. get ( _x ( 'blip:role' ))  174              msg 
=  self
._ download
_ webpage
(  175                  url 
+  '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url' ,  176                  video_id
,  'Resolving URL for  %s '  %  role
)  177              real_url 
=  compat_urlparse
. parse_qs ( msg
. strip ())[ 'message' ][ 0 ]  179              media_type 
=  media_content
. get ( 'type' )  180              if  media_type 
==  'text/srt'  or  url
. endswith ( '.srt' ):  184                  lang 
=  role
. rpartition ( '-' )[- 1 ]. strip (). lower ()  185                  langcode 
=  LANGS
. get ( lang
,  lang
)  186                  subtitles_urls
[ langcode
] =  url
 187              elif  media_type
. startswith ( 'video/' ):  191                      'format_note' :  media_type
,  192                      'vcodec' :  media_content
. get ( _x ( 'blip:vcodec' ))  or  'none' ,  193                      'acodec' :  media_content
. get ( _x ( 'blip:acodec' )),  194                      'filesize' :  media_content
. get ( 'filesize' ),  195                      'width' :  int_or_none ( media_content
. get ( 'width' )),  196                      'height' :  int_or_none ( media_content
. get ( 'height' )),  198          self
._ check
_ formats
( formats
,  video_id
)  199          self
._ sort
_ formats
( formats
)  201          subtitles 
=  self
. extract_subtitles ( video_id
,  subtitles_urls
)  206              'description' :  description
,  207              'timestamp' :  timestamp
,  208              'uploader' :  uploader
,  209              'uploader_id' :  uploader_id
,  210              'duration' :  duration
,  211              'thumbnail' :  thumbnail
,  212              'categories' :  categories
,  214              'subtitles' :  subtitles
,  217      def  _get_subtitles ( self
,  video_id
,  subtitles_urls
):  219          for  lang
,  url 
in  subtitles_urls
. items ():  220              # For some weird reason, blip.tv serves a video instead of subtitles  221              # when we request with a common UA  222              req 
=  compat_urllib_request
. Request ( url
)  223              req
. add_header ( 'User-Agent' ,  'youtube-dl' )  225                  # The extension is 'srt' but it's actually an 'ass' file  227                  'data' :  self
._ download
_ webpage
( req
,  None ,  note
= False ),  232  class  BlipTVUserIE ( InfoExtractor
):  233      _VALID_URL 
=  r
'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'  235      IE_NAME 
=  'blip.tv:user'  237          'url' :  'http://blip.tv/actone' ,  240              'title' :  'Act One: The Series' ,  245      def  _real_extract ( self
,  url
):  246          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  247          username 
=  mobj
. group ( 1 )  249          page_base 
=  'http://m.blip.tv/pr/show_get_full_episode_list?users_id= %s &lite=0&esi=1'  251          page 
=  self
._ download
_ webpage
( url
,  username
,  'Downloading user page' )  252          mobj 
=  re
. search ( r
'data-users-id="([^"]+)"' ,  page
)  253          page_base 
=  page_base 
%  mobj
. group ( 1 )  254          title 
=  self
._ og
_ search
_ title
( page
)  256          # Download video ids using BlipTV Ajax calls. Result size per  257          # query is limited (currently to 12 videos) so we need to query  258          # page by page until there are no video ids - it means we got  265              url 
=  page_base 
+  "&page="  +  str ( pagenum
)  266              page 
=  self
._ download
_ webpage
(  267                  url
,  username
,  'Downloading video ids from page  %d '  %  pagenum
)  269              # Extract video identifiers  272              for  mobj 
in  re
. finditer ( r
'href="/([^"]+)"' ,  page
):  273                  if  mobj
. group ( 1 )  not in  ids_in_page
:  274                      ids_in_page
. append ( unescapeHTML ( mobj
. group ( 1 )))  276              video_ids
. extend ( ids_in_page
)  278              # A little optimization - if current page is not  279              # "full", ie. does not contain PAGE_SIZE video ids then  280              # we can assume that this page is the last one - there  281              # are no more ids on further pages - no need to query  284              if  len ( ids_in_page
) <  self
._ PAGE
_ SIZE
:  289          urls 
= [ 'http://blip.tv/ %s '  %  video_id 
for  video_id 
in  video_ids
]  290          url_entries 
= [ self
. url_result ( vurl
,  'BlipTV' )  for  vurl 
in  urls
]  291          return  self
. playlist_result (  292              url_entries
,  playlist_title
= title
,  playlist_id
= username
)