]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/aftonbladet.py
a117502bc0ad7bfec11592ec57da575898cacc3d
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
5 from ..utils
import int_or_none
8 class AftonbladetIE(InfoExtractor
):
9 _VALID_URL
= r
'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])'
11 'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
15 'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
16 'description': 'Jupiters måne mest aktiv av alla himlakroppar',
17 'timestamp': 1394142732,
18 'upload_date': '20140306',
22 def _real_extract(self
, url
):
23 video_id
= self
._match
_id
(url
)
24 webpage
= self
._download
_webpage
(url
, video_id
)
26 # find internal video meta data
27 meta_url
= 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
28 internal_meta_id
= self
._html
_search
_regex
(
29 r
'data-aptomaId="([\w\d]+)"', webpage
, 'internal_meta_id')
30 internal_meta_url
= meta_url
% internal_meta_id
31 internal_meta_json
= self
._download
_json
(
32 internal_meta_url
, video_id
, 'Downloading video meta data')
34 # find internal video formats
35 format_url
= 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
36 internal_video_id
= internal_meta_json
['videoId']
37 internal_formats_url
= format_url
% internal_video_id
38 internal_formats_json
= self
._download
_json
(
39 internal_formats_url
, video_id
, 'Downloading video formats')
42 for fmt
in internal_formats_json
['formats']['http']['pseudostreaming']['mp4']:
45 'url': 'http://%s:%d/%s/%s' % (p
['address'], p
['port'], p
['path'], p
['filename']),
47 'width': int_or_none(fmt
.get('width')),
48 'height': int_or_none(fmt
.get('height')),
49 'tbr': int_or_none(fmt
.get('bitrate')),
52 self
._sort
_formats
(formats
)
56 'title': internal_meta_json
['title'],
58 'thumbnail': internal_meta_json
.get('imageUrl'),
59 'description': internal_meta_json
.get('shortPreamble'),
60 'timestamp': int_or_none(internal_meta_json
.get('timePublished')),
61 'duration': int_or_none(internal_meta_json
.get('duration')),
62 'view_count': int_or_none(internal_meta_json
.get('views')),