]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bliptv.py
1 from __future__
import unicode_literals
8 from .common
import InfoExtractor
13 compat_urllib_request
,
20 class BlipTVIE(InfoExtractor
):
21 """Information extractor for blip.tv"""
23 _VALID_URL
= r
'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
26 'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
27 'file': '5779306.mov',
28 'md5': 'c6934ad0b6acf2bd920720ec888eb812',
30 'upload_date': '20111205',
31 'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
32 'uploader': 'Comic Book Resources - CBR TV',
33 'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
37 def report_direct_download(self
, title
):
38 """Report information extraction."""
39 self
.to_screen('%s: Direct download detected' % title
)
41 def _real_extract(self
, url
):
42 mobj
= re
.match(self
._VALID
_URL
, url
)
44 raise ExtractorError('Invalid URL: %s' % url
)
46 # See https://github.com/rg3/youtube-dl/issues/857
47 embed_mobj
= re
.search(r
'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url
)
49 info_url
= 'http://blip.tv/play/%s.x?p=1' % embed_mobj
.group(1)
50 info_page
= self
._download
_webpage
(info_url
, embed_mobj
.group(1))
51 video_id
= self
._search
_regex
(r
'data-episode-id="(\d+)', info_page
, 'video_id')
52 return self
.url_result('http://blip.tv/a/a-' + video_id
, 'BlipTV')
58 json_url
= url
+ cchar
+ 'skin=json&version=2&no_wrap=1'
59 request
= compat_urllib_request
.Request(json_url
)
60 request
.add_header('User-Agent', 'iTunes/10.6.1')
61 self
.report_extraction(mobj
.group(1))
62 urlh
= self
._request
_webpage
(request
, None, False,
63 'unable to download video info webpage')
66 json_code_bytes
= urlh
.read()
67 json_code
= json_code_bytes
.decode('utf-8')
68 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
69 raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err
))
72 json_data
= json
.loads(json_code
)
73 if 'Post' in json_data
:
74 data
= json_data
['Post']
78 upload_date
= datetime
.datetime
.strptime(data
['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
80 if 'additionalMedia' in data
:
81 for f
in sorted(data
['additionalMedia'], key
=lambda f
: int(f
['media_height'])):
82 if not int(f
['media_width']): # filter m3u8
86 'format_id': f
['role'],
87 'width': int(f
['media_width']),
88 'height': int(f
['media_height']),
92 'url': data
['media']['url'],
93 'width': int(data
['media']['width']),
94 'height': int(data
['media']['height']),
97 self
._sort
_formats
(formats
)
100 'id': compat_str(data
['item_id']),
101 'uploader': data
['display_name'],
102 'upload_date': upload_date
,
103 'title': data
['title'],
104 'thumbnail': data
['thumbnailUrl'],
105 'description': data
['description'],
106 'user_agent': 'iTunes/10.6.1',
109 except (ValueError, KeyError) as err
:
110 raise ExtractorError('Unable to parse video information: %s' % repr(err
))
113 class BlipTVUserIE(InfoExtractor
):
114 """Information Extractor for blip.tv users."""
116 _VALID_URL
= r
'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
118 IE_NAME
= 'blip.tv:user'
120 def _real_extract(self
, url
):
122 mobj
= re
.match(self
._VALID
_URL
, url
)
124 raise ExtractorError('Invalid URL: %s' % url
)
126 username
= mobj
.group(1)
128 page_base
= 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
130 page
= self
._download
_webpage
(url
, username
, 'Downloading user page')
131 mobj
= re
.search(r
'data-users-id="([^"]+)"', page
)
132 page_base
= page_base
% mobj
.group(1)
135 # Download video ids using BlipTV Ajax calls. Result size per
136 # query is limited (currently to 12 videos) so we need to query
137 # page by page until there are no video ids - it means we got
144 url
= page_base
+ "&page=" + str(pagenum
)
145 page
= self
._download
_webpage
(url
, username
,
146 'Downloading video ids from page %d' % pagenum
)
148 # Extract video identifiers
151 for mobj
in re
.finditer(r
'href="/([^"]+)"', page
):
152 if mobj
.group(1) not in ids_in_page
:
153 ids_in_page
.append(unescapeHTML(mobj
.group(1)))
155 video_ids
.extend(ids_in_page
)
157 # A little optimization - if current page is not
158 # "full", ie. does not contain PAGE_SIZE video ids then
159 # we can assume that this page is the last one - there
160 # are no more ids on further pages - no need to query
163 if len(ids_in_page
) < self
._PAGE
_SIZE
:
168 urls
= ['http://blip.tv/%s' % video_id
for video_id
in video_ids
]
169 url_entries
= [self
.url_result(vurl
, 'BlipTV') for vurl
in urls
]
170 return [self
.playlist_result(url_entries
, playlist_title
= username
)]