]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bliptv.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from .subtitles 
import SubtitlesInfoExtractor
 
  10     compat_urllib_request
, 
  16 class BlipTVIE(SubtitlesInfoExtractor
): 
  17     """Information extractor for blip.tv""" 
  19     _VALID_URL 
= r
'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$' 
  22         'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', 
  23         'md5': 'c6934ad0b6acf2bd920720ec888eb812', 
  27             'upload_date': '20111205', 
  28             'description': 'md5:9bc31f227219cde65e47eeec8d2dc596', 
  29             'uploader': 'Comic Book Resources - CBR TV', 
  30             'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3', 
  33         # https://github.com/rg3/youtube-dl/pull/2274 
  34         'note': 'Video with subtitles', 
  35         'url': 'http://blip.tv/play/h6Uag5OEVgI.html', 
  36         'md5': '309f9d25b820b086ca163ffac8031806', 
  40             'uploader': 'Red vs. Blue', 
  41             'description': 'One-Zero-One', 
  42             'upload_date': '20130614', 
  43             'title': 'Red vs. Blue Season 11 Episode 1', 
  47     def _real_extract(self
, url
): 
  48         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  49         presumptive_id 
= mobj
.group('presumptive_id') 
  51         # See https://github.com/rg3/youtube-dl/issues/857 
  52         embed_mobj 
= re
.match(r
'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url
) 
  54             info_url 
= 'http://blip.tv/play/%s.x?p=1' % embed_mobj
.group(1) 
  55             info_page 
= self
._download
_webpage
(info_url
, embed_mobj
.group(1)) 
  56             video_id 
= self
._search
_regex
( 
  57                 r
'data-episode-id="([0-9]+)', info_page
, 'video_id') 
  58             return self
.url_result('http://blip.tv/a/a-' + video_id
, 'BlipTV') 
  60         cchar 
= '&' if '?' in url 
else '?' 
  61         json_url 
= url 
+ cchar 
+ 'skin=json&version=2&no_wrap=1' 
  62         request 
= compat_urllib_request
.Request(json_url
) 
  63         request
.add_header('User-Agent', 'iTunes/10.6.1') 
  65         json_data 
= self
._download
_json
(request
, video_id
=presumptive_id
) 
  67         if 'Post' in json_data
: 
  68             data 
= json_data
['Post'] 
  72         video_id 
= compat_str(data
['item_id']) 
  73         upload_date 
= datetime
.datetime
.strptime(data
['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') 
  76         if 'additionalMedia' in data
: 
  77             for f 
in data
['additionalMedia']: 
  78                 if f
.get('file_type_srt') == 1: 
  82                     lang 
= f
['role'].rpartition('-')[-1].strip().lower() 
  83                     langcode 
= LANGS
.get(lang
, lang
) 
  84                     subtitles
[langcode
] = f
['url'] 
  86                 if not int(f
['media_width']):  # filter m3u8 
  90                     'format_id': f
['role'], 
  91                     'width': int(f
['media_width']), 
  92                     'height': int(f
['media_height']), 
  96                 'url': data
['media']['url'], 
  97                 'width': int(data
['media']['width']), 
  98                 'height': int(data
['media']['height']), 
 100         self
._sort
_formats
(formats
) 
 103         video_subtitles 
= self
.extract_subtitles(video_id
, subtitles
) 
 104         if self
._downloader
.params
.get('listsubtitles', False): 
 105             self
._list
_available
_subtitles
(video_id
, subtitles
) 
 110             'uploader': data
['display_name'], 
 111             'upload_date': upload_date
, 
 112             'title': data
['title'], 
 113             'thumbnail': data
['thumbnailUrl'], 
 114             'description': data
['description'], 
 115             'user_agent': 'iTunes/10.6.1', 
 117             'subtitles': video_subtitles
, 
 120     def _download_subtitle_url(self
, sub_lang
, url
): 
 121         # For some weird reason, blip.tv serves a video instead of subtitles 
 122         # when we request with a common UA 
 123         req 
= compat_urllib_request
.Request(url
) 
 124         req
.add_header('Youtubedl-user-agent', 'youtube-dl') 
 125         return self
._download
_webpage
(req
, None, note
=False) 
 128 class BlipTVUserIE(InfoExtractor
): 
 129     _VALID_URL 
= r
'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$' 
 131     IE_NAME 
= 'blip.tv:user' 
 133     def _real_extract(self
, url
): 
 134         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 135         username 
= mobj
.group(1) 
 137         page_base 
= 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1' 
 139         page 
= self
._download
_webpage
(url
, username
, 'Downloading user page') 
 140         mobj 
= re
.search(r
'data-users-id="([^"]+)"', page
) 
 141         page_base 
= page_base 
% mobj
.group(1) 
 143         # Download video ids using BlipTV Ajax calls. Result size per 
 144         # query is limited (currently to 12 videos) so we need to query 
 145         # page by page until there are no video ids - it means we got 
 152             url 
= page_base 
+ "&page=" + str(pagenum
) 
 153             page 
= self
._download
_webpage
( 
 154                 url
, username
, 'Downloading video ids from page %d' % pagenum
) 
 156             # Extract video identifiers 
 159             for mobj 
in re
.finditer(r
'href="/([^"]+)"', page
): 
 160                 if mobj
.group(1) not in ids_in_page
: 
 161                     ids_in_page
.append(unescapeHTML(mobj
.group(1))) 
 163             video_ids
.extend(ids_in_page
) 
 165             # A little optimization - if current page is not 
 166             # "full", ie. does not contain PAGE_SIZE video ids then 
 167             # we can assume that this page is the last one - there 
 168             # are no more ids on further pages - no need to query 
 171             if len(ids_in_page
) < self
._PAGE
_SIZE
: 
 176         urls 
= ['http://blip.tv/%s' % video_id 
for video_id 
in video_ids
] 
 177         url_entries 
= [self
.url_result(vurl
, 'BlipTV') for vurl 
in urls
] 
 178         return [self
.playlist_result(url_entries
, playlist_title
=username
)]