]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/newstube.py
2 from __future__
import unicode_literals
7 from .common
import InfoExtractor
8 from ..aes
import aes_cbc_decrypt
18 class NewstubeIE(InfoExtractor
):
19 _VALID_URL
= r
'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
21 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
22 'md5': '9d10320ad473444352f72f746ccb8b8c',
24 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
26 'title': 'Телеканал CNN переместил город Славянск в Крым',
27 'description': 'md5:419a8c9f03442bc0b0a794d689360335',
32 def _real_extract(self
, url
):
33 video_id
= self
._match
_id
(url
)
35 page
= self
._download
_webpage
(url
, video_id
)
36 title
= self
._html
_search
_meta
(['og:title', 'twitter:title'], page
, fatal
=True)
38 video_guid
= self
._html
_search
_regex
(
39 r
'<meta\s+property="og:video(?::(?:(?:secure_)?url|iframe))?"\s+content="https?://(?:www\.)?newstube\.ru/embed/(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
42 enc_data
= base64
.b64decode(self
._download
_webpage
(
43 'https://www.newstube.ru/embed/api/player/getsources2',
48 key
= hashlib
.pbkdf2_hmac(
49 'sha1', video_guid
.replace('-', '').encode(), enc_data
[:16], 1)[:16]
50 dec_data
= aes_cbc_decrypt(
51 bytes_to_intlist(enc_data
[32:]), bytes_to_intlist(key
),
52 bytes_to_intlist(enc_data
[16:32]))
53 sources
= self
._parse
_json
(intlist_to_bytes(dec_data
[:-dec_data
[-1]]), video_guid
)
56 for source
in sources
:
57 source_url
= source
.get('Src')
60 height
= int_or_none(source
.get('Height'))
62 'format_id': 'http' + ('-%dp' % height
if height
else ''),
64 'width': int_or_none(source
.get('Width')),
67 source_type
= source
.get('Type')
69 f
.update(parse_codecs(self
._search
_regex
(
70 r
'codecs="([^"]+)"', source_type
, 'codecs', fatal
=False)))
73 self
._check
_formats
(formats
, video_guid
)
74 self
._sort
_formats
(formats
)
79 'description': self
._html
_search
_meta
(['description', 'og:description'], page
),
80 'thumbnail': self
._html
_search
_meta
(['og:image:secure_url', 'og:image', 'twitter:image'], page
),
81 'duration': parse_duration(self
._html
_search
_meta
('duration', page
)),