]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bloomberg.py
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
8 class BloombergIE(InfoExtractor
):
9 _VALID_URL
= r
'https?://www\.bloomberg\.com/news/[^/]+/[^/]+/(?P<id>[^/?#]+)'
12 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
13 # The md5 checksum changes
15 'id': 'qurhIVlJSB6hzkVi229d8g',
17 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
18 'description': 'md5:a8ba0302912d03d246979735c17d2761',
21 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
22 'only_matching': True,
25 def _real_extract(self
, url
):
26 name
= self
._match
_id
(url
)
27 webpage
= self
._download
_webpage
(url
, name
)
28 video_id
= self
._search
_regex
(r
'"bmmrId":"(.+?)"', webpage
, 'id')
29 title
= re
.sub(': Video$', '', self
._og
_search
_title
(webpage
))
31 embed_info
= self
._download
_json
(
32 'http://www.bloomberg.com/api/embed?id=%s' % video_id
, video_id
)
34 for stream
in embed_info
['streams']:
35 if stream
["muxing_format"] == "TS":
36 formats
.extend(self
._extract
_m
3u8_formats
(stream
['url'], video_id
))
38 formats
.extend(self
._extract
_f
4m
_formats
(stream
['url'], video_id
))
39 self
._sort
_formats
(formats
)
45 'description': self
._og
_search
_description
(webpage
),
46 'thumbnail': self
._og
_search
_thumbnail
(webpage
),