]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/aftonbladet.py
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
5 from ..utils
import int_or_none
8 class AftonbladetIE(InfoExtractor
):
9 _VALID_URL
= r
'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
11 'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
15 'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
16 'description': 'Jupiters måne mest aktiv av alla himlakroppar',
17 'timestamp': 1394142732,
18 'upload_date': '20140306',
22 def _real_extract(self
, url
):
23 video_id
= self
._match
_id
(url
)
24 webpage
= self
._download
_webpage
(url
, video_id
)
26 # find internal video meta data
27 meta_url
= 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
28 player_config
= self
._parse
_json
(self
._html
_search
_regex
(
29 r
'data-player-config="([^"]+)"', webpage
, 'player config'), video_id
)
30 internal_meta_id
= player_config
['videoId']
31 internal_meta_url
= meta_url
% internal_meta_id
32 internal_meta_json
= self
._download
_json
(
33 internal_meta_url
, video_id
, 'Downloading video meta data')
35 # find internal video formats
36 format_url
= 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
37 internal_video_id
= internal_meta_json
['videoId']
38 internal_formats_url
= format_url
% internal_video_id
39 internal_formats_json
= self
._download
_json
(
40 internal_formats_url
, video_id
, 'Downloading video formats')
43 for fmt
in internal_formats_json
['formats']['http']['pseudostreaming']['mp4']:
46 'url': 'http://%s:%d/%s/%s' % (p
['address'], p
['port'], p
['path'], p
['filename']),
48 'width': int_or_none(fmt
.get('width')),
49 'height': int_or_none(fmt
.get('height')),
50 'tbr': int_or_none(fmt
.get('bitrate')),
53 self
._sort
_formats
(formats
)
57 'title': internal_meta_json
['title'],
59 'thumbnail': internal_meta_json
.get('imageUrl'),
60 'description': internal_meta_json
.get('shortPreamble'),
61 'timestamp': int_or_none(internal_meta_json
.get('timePublished')),
62 'duration': int_or_none(internal_meta_json
.get('duration')),
63 'view_count': int_or_none(internal_meta_json
.get('views')),