]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bliptv.py 
35375f7b1ead97c90d01cf2356c30017fb0f47dc
   1  from  __future__ 
import  unicode_literals
   5  from  . common 
import  InfoExtractor
   7  from  .. compat 
import  compat_urlparse
  19  class  BlipTVIE ( InfoExtractor
):   20      _VALID_URL 
=  r
'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'   24              'url' :  'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352' ,   25              'md5' :  '80baf1ec5c3d2019037c1c707d676b9f' ,   29                  'title' :  'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3' ,   30                  'description' :  'md5:9bc31f227219cde65e47eeec8d2dc596' ,   31                  'timestamp' :  1323138843 ,   32                  'upload_date' :  '20111206' ,   34                  'uploader_id' :  '679425' ,   39              # https://github.com/rg3/youtube-dl/pull/2274   40              'note' :  'Video with subtitles' ,   41              'url' :  'http://blip.tv/play/h6Uag5OEVgI.html' ,   42              'md5' :  '309f9d25b820b086ca163ffac8031806' ,   46                  'title' :  'Red vs. Blue Season 11 Episode 1' ,   47                  'description' :  'One-Zero-One' ,   48                  'timestamp' :  1371261608 ,   49                  'upload_date' :  '20130615' ,   50                  'uploader' :  'redvsblue' ,   51                  'uploader_id' :  '792887' ,   56              # https://bugzilla.redhat.com/show_bug.cgi?id=967465   57              'url' :  'http://a.blip.tv/api.swf#h6Uag5KbVwI' ,   58              'md5' :  '314e87b1ebe7a48fcbfdd51b791ce5a6' ,   62                  'upload_date' :  '20130520' ,   63                  'description' :  'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.' ,   64                  'title' :  'Red vs. Blue Season 11 Trailer' ,   65                  'timestamp' :  1369029609 ,   66                  'uploader' :  'redvsblue' ,   67                  'uploader_id' :  '792887' ,   71              'url' :  'http://blip.tv/play/gbk766dkj4Yn' ,   72              'md5' :  'fe0a33f022d49399a241e84a8ea8b8e3' ,   76                  'upload_date' :  '20090208' ,   77                  'description' :  'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.' ,   78                  'title' :  'Nostalgia Critic: Transformers' ,   79                  'timestamp' :  1234068723 ,   80                  'uploader' :  'NostalgiaCritic' ,   81                  'uploader_id' :  '246467' ,   85              # https://github.com/rg3/youtube-dl/pull/4404   87              'url' :  'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982' ,   88              'md5' :  '76c0a56f24e769ceaab21fbb6416a351' ,   92                  'title' :  'Weekly Manga Recap: Kingdom' ,   93                  'description' :  'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?' ,   94                  'timestamp' :  1417660321 ,   95                  'upload_date' :  '20141204' ,   96                  'uploader' :  'The Rollo T' ,   97                  'uploader_id' :  '407429' ,  104              'url' :  'http://blip.tv/rss/flash/6700880' ,  108                  'title' :  'Cowboy Bebop: Gateway Shuffle Review' ,  109                  'description' :  'md5:3acc480c0f9ae157f5fe88547ecaf3f8' ,  110                  'timestamp' :  1386639757 ,  111                  'upload_date' :  '20131210' ,  112                  'uploader' :  'sfdebris' ,  113                  'uploader_id' :  '706520' ,  119      def  _extract_url ( webpage
):  120          mobj 
=  re
. search ( r
'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)' ,  webpage
)  122              return  'http://blip.tv/a/a-'  +  mobj
. group ( 1 )  123          mobj 
=  re
. search ( r
'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)' ,  webpage
)  127      def  _real_extract ( self
,  url
):  128          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  129          lookup_id 
=  mobj
. group ( 'lookup_id' )  131          # See https://github.com/rg3/youtube-dl/issues/857 and  132          # https://github.com/rg3/youtube-dl/issues/4197  134              urlh 
=  self
._ request
_ webpage
(  135                  'http://blip.tv/play/ %s '  %  lookup_id
,  lookup_id
,  'Resolving lookup id' )  136              url 
=  compat_urlparse
. urlparse ( urlh
. geturl ())  137              qs 
=  compat_urlparse
. parse_qs ( url
. query
)  138              mobj 
=  re
. match ( self
._ VALID
_U RL
,  qs
[ 'file' ][ 0 ])  140          video_id 
=  mobj
. group ( 'id' )  142          rss 
=  self
._ download
_ xml
( 'http://blip.tv/rss/flash/ %s '  %  video_id
,  video_id
,  'Downloading video RSS' )  145              return  xpath_with_ns ( p
, {  146                  'blip' :  'http://blip.tv/dtd/blip/1.0' ,  147                  'media' :  'http://search.yahoo.com/mrss/' ,  148                  'itunes' :  'http://www.itunes.com/dtds/podcast-1.0.dtd' ,  151          item 
=  rss
. find ( 'channel/item' )  153          video_id 
=  xpath_text ( item
,  _x ( 'blip:item_id' ),  'video id' )  or  lookup_id
 154          title 
=  xpath_text ( item
,  'title' ,  'title' ,  fatal
= True )  155          description 
=  clean_html ( xpath_text ( item
,  _x ( 'blip:puredescription' ),  'description' ))  156          timestamp 
=  parse_iso8601 ( xpath_text ( item
,  _x ( 'blip:datestamp' ),  'timestamp' ))  157          uploader 
=  xpath_text ( item
,  _x ( 'blip:user' ),  'uploader' )  158          uploader_id 
=  xpath_text ( item
,  _x ( 'blip:userid' ),  'uploader id' )  159          duration 
=  int_or_none ( xpath_text ( item
,  _x ( 'blip:runtime' ),  'duration' ))  160          media_thumbnail 
=  item
. find ( _x ( 'media:thumbnail' ))  161          thumbnail 
= ( media_thumbnail
. get ( 'url' )  if  media_thumbnail 
is not None  162                       else  xpath_text ( item
,  'image' ,  'thumbnail' ))  163          categories 
= [ category
. text 
for  category 
in  item
. findall ( 'category' )  if  category 
is not None ]  168          media_group 
=  item
. find ( _x ( 'media:group' ))  169          for  media_content 
in  media_group
. findall ( _x ( 'media:content' )):  170              url 
=  media_content
. get ( 'url' )  171              role 
=  media_content
. get ( _x ( 'blip:role' ))  172              msg 
=  self
._ download
_ webpage
(  173                  url 
+  '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url' ,  174                  video_id
,  'Resolving URL for  %s '  %  role
)  175              real_url 
=  compat_urlparse
. parse_qs ( msg
. strip ())[ 'message' ][ 0 ]  177              media_type 
=  media_content
. get ( 'type' )  178              if  media_type 
==  'text/srt'  or  url
. endswith ( '.srt' ):  182                  lang 
=  role
. rpartition ( '-' )[- 1 ]. strip (). lower ()  183                  langcode 
=  LANGS
. get ( lang
,  lang
)  184                  subtitles_urls
[ langcode
] =  url
 185              elif  media_type
. startswith ( 'video/' ):  189                      'format_note' :  media_type
,  190                      'vcodec' :  media_content
. get ( _x ( 'blip:vcodec' ))  or  'none' ,  191                      'acodec' :  media_content
. get ( _x ( 'blip:acodec' )),  192                      'filesize' :  media_content
. get ( 'filesize' ),  193                      'width' :  int_or_none ( media_content
. get ( 'width' )),  194                      'height' :  int_or_none ( media_content
. get ( 'height' )),  196          self
._ check
_ formats
( formats
,  video_id
)  197          self
._ sort
_ formats
( formats
)  199          subtitles 
=  self
. extract_subtitles ( video_id
,  subtitles_urls
)  204              'description' :  description
,  205              'timestamp' :  timestamp
,  206              'uploader' :  uploader
,  207              'uploader_id' :  uploader_id
,  208              'duration' :  duration
,  209              'thumbnail' :  thumbnail
,  210              'categories' :  categories
,  212              'subtitles' :  subtitles
,  215      def  _get_subtitles ( self
,  video_id
,  subtitles_urls
):  217          for  lang
,  url 
in  subtitles_urls
. items ():  218              # For some weird reason, blip.tv serves a video instead of subtitles  219              # when we request with a common UA  220              req 
=  sanitized_Request ( url
)  221              req
. add_header ( 'User-Agent' ,  'youtube-dl' )  223                  # The extension is 'srt' but it's actually an 'ass' file  225                  'data' :  self
._ download
_ webpage
( req
,  None ,  note
= False ),  230  class  BlipTVUserIE ( InfoExtractor
):  231      _VALID_URL 
=  r
'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'  233      IE_NAME 
=  'blip.tv:user'  235          'url' :  'http://blip.tv/actone' ,  238              'title' :  'Act One: The Series' ,  243      def  _real_extract ( self
,  url
):  244          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  245          username 
=  mobj
. group ( 1 )  247          page_base 
=  'http://m.blip.tv/pr/show_get_full_episode_list?users_id= %s &lite=0&esi=1'  249          page 
=  self
._ download
_ webpage
( url
,  username
,  'Downloading user page' )  250          mobj 
=  re
. search ( r
'data-users-id="([^"]+)"' ,  page
)  251          page_base 
=  page_base 
%  mobj
. group ( 1 )  252          title 
=  self
._ og
_ search
_ title
( page
)  254          # Download video ids using BlipTV Ajax calls. Result size per  255          # query is limited (currently to 12 videos) so we need to query  256          # page by page until there are no video ids - it means we got  263              url 
=  page_base 
+  "&page="  +  str ( pagenum
)  264              page 
=  self
._ download
_ webpage
(  265                  url
,  username
,  'Downloading video ids from page  %d '  %  pagenum
)  267              # Extract video identifiers  270              for  mobj 
in  re
. finditer ( r
'href="/([^"]+)"' ,  page
):  271                  if  mobj
. group ( 1 )  not in  ids_in_page
:  272                      ids_in_page
. append ( unescapeHTML ( mobj
. group ( 1 )))  274              video_ids
. extend ( ids_in_page
)  276              # A little optimization - if current page is not  277              # "full", ie. does not contain PAGE_SIZE video ids then  278              # we can assume that this page is the last one - there  279              # are no more ids on further pages - no need to query  282              if  len ( ids_in_page
) <  self
._ PAGE
_ SIZE
:  287          urls 
= [ 'http://blip.tv/ %s '  %  video_id 
for  video_id 
in  video_ids
]  288          url_entries 
= [ self
. url_result ( vurl
,  'BlipTV' )  for  vurl 
in  urls
]  289          return  self
. playlist_result (  290              url_entries
,  playlist_title
= title
,  playlist_id
= username
)