]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ustream.py
0c06bf36bd5f76cabecc47e699ad56a45ba63a4a
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  21 class UstreamIE(InfoExtractor
): 
  22     _VALID_URL 
= r
'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)' 
  25         'url': 'http://www.ustream.tv/recorded/20274954', 
  26         'md5': '088f151799e8f572f84eb62f17d73e5c', 
  30             'title': 'Young Americans for Liberty February 7, 2012 2:28 AM', 
  31             'description': 'Young Americans for Liberty February 7, 2012 2:28 AM', 
  32             'timestamp': 1328577035, 
  33             'upload_date': '20120207', 
  34             'uploader': 'yaliberty', 
  35             'uploader_id': '6780869', 
  38         # From http://sportscanada.tv/canadagames/index.php/week2/figure-skating/444 
  39         # Title and uploader available only from params JSON 
  40         'url': 'http://www.ustream.tv/embed/recorded/59307601?ub=ff0000&lc=ff0000&oc=ffffff&uc=ffffff&v=3&wmode=direct', 
  41         'md5': '5a2abf40babeac9812ed20ae12d34e10', 
  45             'title': '-CG11- Canada Games Figure Skating', 
  46             'uploader': 'sportscanadatv', 
  48         'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.', 
  50         'url': 'http://www.ustream.tv/embed/10299409', 
  56         'url': 'http://www.ustream.tv/recorded/91343263', 
  60             'title': 'GitHub Universe - General Session - Day 1', 
  61             'upload_date': '20160914', 
  62             'description': 'GitHub Universe - General Session - Day 1', 
  63             'timestamp': 1473872730, 
  64             'uploader': 'wa0dnskeqkr', 
  65             'uploader_id': '38977840', 
  68             'skip_download': True,  # m3u8 download 
  72     def _get_stream_info(self
, url
, video_id
, app_id_ver
, extra_note
=None): 
  76         rnd 
= random
.randrange
 
  81         conn_info 
= self
._download
_json
( 
  82             'http://r%d-1-%s-recorded-lp-live.ums.ustream.tv/1/ustream' % (rnd(1e8
), video_id
), 
  83             video_id
, note
='Downloading connection info' + extra_note
, 
  86                 'appId': app_id_ver
[0], 
  87                 'appVersion': app_id_ver
[1], 
  88                 'rsid': '%s:%s' % (num_to_hex(rnd(1e8
)), num_to_hex(rnd(1e8
))), 
  89                 'rpin': '_rpin.%d' % rnd(1e15
), 
  92                 'application': 'recorded', 
  94         host 
= conn_info
[0]['args'][0]['host'] 
  95         connection_id 
= conn_info
[0]['args'][0]['connectionId'] 
  97         return self
._download
_json
( 
  98             'http://%s/1/ustream?connectionId=%s' % (host
, connection_id
), 
  99             video_id
, note
='Downloading stream info' + extra_note
) 
 101     def _get_streams(self
, url
, video_id
, app_id_ver
): 
 102         # Sometimes the return dict does not have 'stream' 
 103         for trial_count 
in range(3): 
 104             stream_info 
= self
._get
_stream
_info
( 
 105                 url
, video_id
, app_id_ver
, 
 106                 extra_note
=' (try %d)' % (trial_count 
+ 1) if trial_count 
> 0 else '') 
 107             if 'stream' in stream_info
[0]['args'][0]: 
 108                 return stream_info
[0]['args'][0]['stream'] 
 111     def _parse_segmented_mp4(self
, dash_stream_info
): 
 112         def resolve_dash_template(template
, idx
, chunk_hash
): 
 113             return template
.replace('%', compat_str(idx
), 1).replace('%', chunk_hash
) 
 116         for stream 
in dash_stream_info
['streams']: 
 117             # Use only one provider to avoid too many formats 
 118             provider 
= dash_stream_info
['providers'][0] 
 120                 'url': resolve_dash_template( 
 121                     provider
['url'] + stream
['initUrl'], 0, dash_stream_info
['hashes']['0']) 
 123             for idx 
in range(dash_stream_info
['videoLength'] // dash_stream_info
['chunkTime']): 
 125                     'url': resolve_dash_template( 
 126                         provider
['url'] + stream
['segmentUrl'], idx
, 
 127                         dash_stream_info
['hashes'][compat_str(idx 
// 10 * 10)]) 
 129             content_type 
= stream
['contentType'] 
 130             kind 
= content_type
.split('/')[0] 
 132                 'format_id': '-'.join(filter(None, [ 
 133                     'dash', kind
, str_or_none(stream
.get('bitrate'))])), 
 134                 'protocol': 'http_dash_segments', 
 135                 # TODO: generate a MPD doc for external players? 
 136                 'url': encode_data_uri(b
'<MPD/>', 'text/xml'), 
 137                 'ext': mimetype2ext(content_type
), 
 138                 'height': stream
.get('height'), 
 139                 'width': stream
.get('width'), 
 140                 'fragments': fragments
, 
 144                     'vcodec': stream
.get('codec'), 
 146                     'vbr': stream
.get('bitrate'), 
 151                     'acodec': stream
.get('codec'), 
 152                     'abr': stream
.get('bitrate'), 
 157     def _real_extract(self
, url
): 
 158         m 
= re
.match(self
._VALID
_URL
, url
) 
 159         video_id 
= m
.group('id') 
 161         # some sites use this embed format (see: https://github.com/rg3/youtube-dl/issues/2990) 
 162         if m
.group('type') == 'embed/recorded': 
 163             video_id 
= m
.group('id') 
 164             desktop_url 
= 'http://www.ustream.tv/recorded/' + video_id
 
 165             return self
.url_result(desktop_url
, 'Ustream') 
 166         if m
.group('type') == 'embed': 
 167             video_id 
= m
.group('id') 
 168             webpage 
= self
._download
_webpage
(url
, video_id
) 
 169             content_video_ids 
= self
._parse
_json
(self
._search
_regex
( 
 170                 r
'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage
, 
 171                 'content video IDs'), video_id
) 
 172             return self
.playlist_result( 
 173                 map(lambda u
: self
.url_result('http://www.ustream.tv/recorded/' + u
, 'Ustream'), content_video_ids
), 
 176         params 
= self
._download
_json
( 
 177             'https://api.ustream.tv/videos/%s.json' % video_id
, video_id
) 
 179         error 
= params
.get('error') 
 181             raise ExtractorError( 
 182                 '%s returned error: %s' % (self
.IE_NAME
, error
), expected
=True) 
 184         video 
= params
['video'] 
 186         title 
= video
['title'] 
 187         filesize 
= float_or_none(video
.get('file_size')) 
 193             'filesize': filesize
, 
 194         } for format_id
, video_url 
in video
['media_urls'].items() if video_url
] 
 197             hls_streams 
= self
._get
_streams
(url
, video_id
, app_id_ver
=(11, 2)) 
 199                 # m3u8_native leads to intermittent ContentTooShortError 
 200                 formats
.extend(self
._extract
_m
3u8_formats
( 
 201                     hls_streams
[0]['url'], video_id
, ext
='mp4', m3u8_id
='hls')) 
 204             # DASH streams handling is incomplete as 'url' is missing 
 205             dash_streams = self._get_streams(url, video_id, app_id_ver=(3, 1)) 
 207                 formats.extend(self._parse_segmented_mp4(dash_streams)) 
 210         self
._sort
_formats
(formats
) 
 212         description 
= video
.get('description') 
 213         timestamp 
= int_or_none(video
.get('created_at')) 
 214         duration 
= float_or_none(video
.get('length')) 
 215         view_count 
= int_or_none(video
.get('views')) 
 217         uploader 
= video
.get('owner', {}).get('username') 
 218         uploader_id 
= video
.get('owner', {}).get('id') 
 222             'url': thumbnail_url
, 
 223         } for thumbnail_id
, thumbnail_url 
in video
.get('thumbnail', {}).items()] 
 228             'description': description
, 
 229             'thumbnails': thumbnails
, 
 230             'timestamp': timestamp
, 
 231             'duration': duration
, 
 232             'view_count': view_count
, 
 233             'uploader': uploader
, 
 234             'uploader_id': uploader_id
, 
 239 class UstreamChannelIE(InfoExtractor
): 
 240     _VALID_URL 
= r
'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)' 
 241     IE_NAME 
= 'ustream:channel' 
 243         'url': 'http://www.ustream.tv/channel/channeljapan', 
 247         'playlist_mincount': 17, 
 250     def _real_extract(self
, url
): 
 251         m 
= re
.match(self
._VALID
_URL
, url
) 
 252         display_id 
= m
.group('slug') 
 253         webpage 
= self
._download
_webpage
(url
, display_id
) 
 254         channel_id 
= self
._html
_search
_meta
('ustream:channel_id', webpage
) 
 256         BASE 
= 'http://www.ustream.tv' 
 257         next_url 
= '/ajax/socialstream/videos/%s/1.json' % channel_id
 
 260             reply 
= self
._download
_json
( 
 261                 compat_urlparse
.urljoin(BASE
, next_url
), display_id
, 
 262                 note
='Downloading video information (next: %d)' % (len(video_ids
) + 1)) 
 263             video_ids
.extend(re
.findall(r
'data-content-id="(\d.*)"', reply
['data'])) 
 264             next_url 
= reply
['nextUrl'] 
 267             self
.url_result('http://www.ustream.tv/recorded/' + vid
, 'Ustream') 
 268             for vid 
in video_ids
] 
 272             'display_id': display_id
,