]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hrti.py
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
   8 from ..compat 
import compat_HTTPError
 
  19 class HRTiBaseIE(InfoExtractor
): 
  21         Base Information Extractor for Croatian Radiotelevision 
  22         video on demand site https://hrti.hrt.hr 
  23         Reverse engineered from the JavaScript app in app.min.js 
  25     _NETRC_MACHINE 
= 'hrti' 
  29     _APP_PUBLICATION_ID 
= 'all_in_one' 
  30     _API_URL 
= 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json' 
  32     def _initialize_api(self
): 
  34             'application_publication_id': self
._APP
_PUBLICATION
_ID
 
  37         uuid 
= self
._download
_json
( 
  38             self
._API
_URL
, None, note
='Downloading uuid', 
  39             errnote
='Unable to download uuid', 
  40             data
=json
.dumps(init_data
).encode('utf-8'))['uuid'] 
  44             'application_publication_id': self
._APP
_PUBLICATION
_ID
, 
  45             'application_version': self
._APP
_VERSION
 
  48         req 
= sanitized_Request(self
._API
_URL
, data
=json
.dumps(app_data
).encode('utf-8')) 
  49         req
.get_method 
= lambda: 'PUT' 
  51         resources 
= self
._download
_json
( 
  52             req
, None, note
='Downloading session information', 
  53             errnote
='Unable to download session information') 
  55         self
._session
_id 
= resources
['session_id'] 
  57         modules 
= resources
['modules'] 
  59         self
._search
_url 
= modules
['vod_catalog']['resources']['search']['uri'].format( 
  60             language
=self
._APP
_LANGUAGE
, 
  61             application_id
=self
._APP
_PUBLICATION
_ID
) 
  63         self
._login
_url 
= (modules
['user']['resources']['login']['uri'] + 
  64                            '/format/json').format(session_id
=self
._session
_id
) 
  66         self
._logout
_url 
= modules
['user']['resources']['logout']['uri'] 
  69         (username
, password
) = self
._get
_login
_info
() 
  70         # TODO: figure out authentication with cookies 
  71         if username 
is None or password 
is None: 
  72             self
.raise_login_required() 
  80             auth_info 
= self
._download
_json
( 
  81                 self
._login
_url
, None, note
='Logging in', errnote
='Unable to log in', 
  82                 data
=json
.dumps(auth_data
).encode('utf-8')) 
  83         except ExtractorError 
as e
: 
  84             if isinstance(e
.cause
, compat_HTTPError
) and e
.cause
.code 
== 406: 
  85                 auth_info 
= self
._parse
_json
(e
.cause
.read().encode('utf-8'), None) 
  89         error_message 
= auth_info
.get('error', {}).get('message') 
  92                 '%s said: %s' % (self
.IE_NAME
, error_message
), 
  95         self
._token 
= auth_info
['secure_streaming_token'] 
  97     def _real_initialize(self
): 
  98         self
._initialize
_api
() 
 102 class HRTiIE(HRTiBaseIE
): 
 103     _VALID_URL 
= r
'''(?x) 
 105                             hrti:(?P<short_id>[0-9]+)| 
 107                                 hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)? 
 111         'url': 'https://hrti.hrt.hr/#/video/show/2181385/republika-dokumentarna-serija-16-hd', 
 114             'display_id': 'republika-dokumentarna-serija-16-hd', 
 116             'title': 'REPUBLIKA, dokumentarna serija (1/6) (HD)', 
 117             'description': 'md5:48af85f620e8e0e1df4096270568544f', 
 120             'average_rating': int, 
 121             'episode_number': int, 
 122             'season_number': int, 
 125         'skip': 'Requires account credentials', 
 127         'url': 'https://hrti.hrt.hr/#/video/show/2181385/', 
 128         'only_matching': True, 
 130         'url': 'hrti:2181385', 
 131         'only_matching': True, 
 134     def _real_extract(self
, url
): 
 135         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 136         video_id 
= mobj
.group('short_id') or mobj
.group('id') 
 137         display_id 
= mobj
.group('display_id') or video_id
 
 139         video 
= self
._download
_json
( 
 140             '%s/video_id/%s/format/json' % (self
._search
_url
, video_id
), 
 141             display_id
, 'Downloading video metadata JSON')['video'][0] 
 143         title_info 
= video
['title'] 
 144         title 
= title_info
['title_long'] 
 146         movie 
= video
['video_assets']['movie'][0] 
 147         m3u8_url 
= movie
['url'].format(TOKEN
=self
._token
) 
 148         formats 
= self
._extract
_m
3u8_formats
( 
 149             m3u8_url
, display_id
, 'mp4', entry_protocol
='m3u8_native', 
 151         self
._sort
_formats
(formats
) 
 153         description 
= clean_html(title_info
.get('summary_long')) 
 154         age_limit 
= parse_age_limit(video
.get('parental_control', {}).get('rating')) 
 155         view_count 
= int_or_none(video
.get('views')) 
 156         average_rating 
= int_or_none(video
.get('user_rating')) 
 157         duration 
= int_or_none(movie
.get('duration')) 
 161             'display_id': display_id
, 
 163             'description': description
, 
 164             'duration': duration
, 
 165             'view_count': view_count
, 
 166             'average_rating': average_rating
, 
 167             'age_limit': age_limit
, 
 172 class HRTiPlaylistIE(HRTiBaseIE
): 
 173     _VALID_URL 
= r
'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?' 
 175         'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena', 
 180         'playlist_mincount': 8, 
 181         'skip': 'Requires account credentials', 
 183         'url': 'https://hrti.hrt.hr/#/video/list/category/212/', 
 184         'only_matching': True, 
 187     def _real_extract(self
, url
): 
 188         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 189         category_id 
= mobj
.group('id') 
 190         display_id 
= mobj
.group('display_id') or category_id
 
 192         response 
= self
._download
_json
( 
 193             '%s/category_id/%s/format/json' % (self
._search
_url
, category_id
), 
 194             display_id
, 'Downloading video metadata JSON') 
 197             response
, lambda x
: x
['video_listings'][0]['alternatives'][0]['list'], 
 198             list) or [video
['id'] for video 
in response
.get('videos', []) if video
.get('id')] 
 200         entries 
= [self
.url_result('hrti:%s' % video_id
) for video_id 
in video_ids
] 
 202         return self
.playlist_result(entries
, category_id
, display_id
)