]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tv2.py
f225ec68448271eabbcca0b63ef367f37e7e908c
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  17 class TV2IE(InfoExtractor
): 
  18     _VALID_URL 
= r
'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)' 
  20         'url': 'http://www.tv2.no/v/916509/', 
  24             'title': 'Se Frode Gryttens hyllest av Steven Gerrard', 
  25             'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.', 
  26             'timestamp': 1431715610, 
  27             'upload_date': '20150515', 
  34             'skip_download': True, 
  38     def _real_extract(self
, url
): 
  39         video_id 
= self
._match
_id
(url
) 
  43         for protocol 
in ('HDS', 'HLS'): 
  44             data 
= self
._download
_json
( 
  45                 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id
, protocol
), 
  46                 video_id
, 'Downloading play JSON')['playback'] 
  47             for item 
in data
['items']['item']: 
  48                 video_url 
= item
.get('url') 
  49                 if not video_url 
or video_url 
in format_urls
: 
  51                 format_id 
= '%s-%s' % (protocol
.lower(), item
.get('mediaFormat')) 
  52                 if not self
._is
_valid
_url
(video_url
, video_id
, format_id
): 
  54                 format_urls
.append(video_url
) 
  55                 ext 
= determine_ext(video_url
) 
  57                     formats
.extend(self
._extract
_f
4m
_formats
( 
  58                         video_url
, video_id
, f4m_id
=format_id
, fatal
=False)) 
  60                     formats
.extend(self
._extract
_m
3u8_formats
( 
  61                         video_url
, video_id
, 'mp4', entry_protocol
='m3u8_native', 
  62                         m3u8_id
=format_id
, fatal
=False)) 
  63                 elif ext 
== 'ism' or video_url
.endswith('.ism/Manifest'): 
  68                         'format_id': format_id
, 
  69                         'tbr': int_or_none(item
.get('bitrate')), 
  70                         'filesize': int_or_none(item
.get('fileSize')), 
  72         self
._sort
_formats
(formats
) 
  74         asset 
= self
._download
_json
( 
  75             'http://sumo.tv2.no/api/web/asset/%s.json' % video_id
, 
  76             video_id
, 'Downloading metadata JSON')['asset'] 
  78         title 
= asset
['title'] 
  79         description 
= asset
.get('description') 
  80         timestamp 
= parse_iso8601(asset
.get('createTime')) 
  81         duration 
= float_or_none(asset
.get('accurateDuration') or asset
.get('duration')) 
  82         view_count 
= int_or_none(asset
.get('views')) 
  83         categories 
= asset
.get('keywords', '').split(',') 
  86             'id': thumbnail
.get('@type'), 
  87             'url': thumbnail
.get('url'), 
  88         } for _
, thumbnail 
in asset
.get('imageVersions', {}).items()] 
  94             'description': description
, 
  95             'thumbnails': thumbnails
, 
  96             'timestamp': timestamp
, 
  98             'view_count': view_count
, 
  99             'categories': categories
, 
 104 class TV2ArticleIE(InfoExtractor
): 
 105     _VALID_URL 
= r
'https?://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)' 
 107         'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542', 
 110             'title': 'Russen hetses etter pingvintyveri - innrømmer å ha åpnet luken på buret', 
 111             'description': 'md5:339573779d3eea3542ffe12006190954', 
 115         'url': 'http://www.tv2.no/a/6930542', 
 116         'only_matching': True, 
 119     def _real_extract(self
, url
): 
 120         playlist_id 
= self
._match
_id
(url
) 
 122         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 124         # Old embed pattern (looks unused nowadays) 
 125         assets 
= re
.findall(r
'data-assetid=["\'](\d
+)', webpage) 
 129             for v in re.findall('TV2ContentboxVideo\
(({.+?
})\
)', webpage): 
 130                 video = self._parse_json( 
 131                     v, playlist_id, transform_source=js_to_json, fatal=False) 
 134                 asset = video.get('assetId
') 
 139             self.url_result('http
://www
.tv2
.no
/v
/%s' % asset_id, 'TV2
') 
 140             for asset_id in assets] 
 142         title = remove_end(self._og_search_title(webpage), ' - TV2
.no
') 
 143         description = remove_end(self._og_search_description(webpage), ' - TV2
.no
') 
 145         return self.playlist_result(entries, playlist_id, title, description)