]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ustream.py
debian/NEWS: Write news about new behavior of youtube-dl.
[youtubedl] / youtube_dl / extractor / ustream.py
1 from __future__ import unicode_literals
2
3 import json
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 compat_urlparse,
9 get_meta_content,
10 )
11
12
13 class UstreamIE(InfoExtractor):
14 _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
15 IE_NAME = 'ustream'
16 _TEST = {
17 'url': 'http://www.ustream.tv/recorded/20274954',
18 'file': '20274954.flv',
19 'md5': '088f151799e8f572f84eb62f17d73e5c',
20 'info_dict': {
21 "uploader": "Young Americans for Liberty",
22 "title": "Young Americans for Liberty February 7, 2012 2:28 AM",
23 },
24 }
25
26 def _real_extract(self, url):
27 m = re.match(self._VALID_URL, url)
28 video_id = m.group('videoID')
29
30 video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
31 webpage = self._download_webpage(url, video_id)
32
33 self.report_extraction(video_id)
34
35 video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
36 webpage, 'title')
37
38 uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
39 webpage, 'uploader', fatal=False, flags=re.DOTALL)
40
41 thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
42 webpage, 'thumbnail', fatal=False)
43
44 return {
45 'id': video_id,
46 'url': video_url,
47 'ext': 'flv',
48 'title': video_title,
49 'uploader': uploader,
50 'thumbnail': thumbnail,
51 }
52
53
54 class UstreamChannelIE(InfoExtractor):
55 _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
56 IE_NAME = 'ustream:channel'
57
58 def _real_extract(self, url):
59 m = re.match(self._VALID_URL, url)
60 slug = m.group('slug')
61 webpage = self._download_webpage(url, slug)
62 channel_id = get_meta_content('ustream:channel_id', webpage)
63
64 BASE = 'http://www.ustream.tv'
65 next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
66 video_ids = []
67 while next_url:
68 reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
69 video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
70 next_url = reply['nextUrl']
71
72 urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
73 url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
74 return self.playlist_result(url_entries, channel_id)