]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tv2.py
fa338b936de7d3fef15cf24bccc05255bc928ee6
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  16 class TV2IE(InfoExtractor
): 
  17     _VALID_URL 
= 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)' 
  19         'url': 'http://www.tv2.no/v/916509/', 
  20         'md5': '9cb9e3410b18b515d71892f27856e9b1', 
  24             'title': 'Se Gryttens hyllest av Steven Gerrard', 
  25             'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.', 
  26             'timestamp': 1431715610, 
  27             'upload_date': '20150515', 
  34     def _real_extract(self
, url
): 
  35         video_id 
= self
._match
_id
(url
) 
  39         for protocol 
in ('HDS', 'HLS'): 
  40             data 
= self
._download
_json
( 
  41                 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id
, protocol
), 
  42                 video_id
, 'Downloading play JSON')['playback'] 
  43             for item 
in data
['items']['item']: 
  44                 video_url 
= item
.get('url') 
  45                 if not video_url 
or video_url 
in format_urls
: 
  47                 format_id 
= '%s-%s' % (protocol
.lower(), item
.get('mediaFormat')) 
  48                 if not self
._is
_valid
_url
(video_url
, video_id
, format_id
): 
  50                 format_urls
.append(video_url
) 
  51                 ext 
= determine_ext(video_url
) 
  53                     formats
.extend(self
._extract
_f
4m
_formats
( 
  54                         video_url
, video_id
, f4m_id
=format_id
)) 
  56                     formats
.extend(self
._extract
_m
3u8_formats
( 
  57                         video_url
, video_id
, 'mp4', m3u8_id
=format_id
)) 
  58                 elif ext 
== 'ism' or video_url
.endswith('.ism/Manifest'): 
  63                         'format_id': format_id
, 
  64                         'tbr': int_or_none(item
.get('bitrate')), 
  65                         'filesize': int_or_none(item
.get('fileSize')), 
  67         self
._sort
_formats
(formats
) 
  69         asset 
= self
._download
_json
( 
  70             'http://sumo.tv2.no/api/web/asset/%s.json' % video_id
, 
  71             video_id
, 'Downloading metadata JSON')['asset'] 
  73         title 
= asset
['title'] 
  74         description 
= asset
.get('description') 
  75         timestamp 
= parse_iso8601(asset
.get('createTime')) 
  76         duration 
= float_or_none(asset
.get('accurateDuration') or asset
.get('duration')) 
  77         view_count 
= int_or_none(asset
.get('views')) 
  78         categories 
= asset
.get('keywords', '').split(',') 
  81             'id': thumbnail
.get('@type'), 
  82             'url': thumbnail
.get('url'), 
  83         } for _
, thumbnail 
in asset
.get('imageVersions', {}).items()] 
  89             'description': description
, 
  90             'thumbnails': thumbnails
, 
  91             'timestamp': timestamp
, 
  93             'view_count': view_count
, 
  94             'categories': categories
, 
  99 class TV2ArticleIE(InfoExtractor
): 
 100     _VALID_URL 
= 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)' 
 102         'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542', 
 105             'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret', 
 106             'description': 'md5:339573779d3eea3542ffe12006190954', 
 110         'url': 'http://www.tv2.no/a/6930542', 
 111         'only_matching': True, 
 114     def _real_extract(self
, url
): 
 115         playlist_id 
= self
._match
_id
(url
) 
 117         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 120             self
.url_result('http://www.tv2.no/v/%s' % video_id
, 'TV2') 
 121             for video_id 
in re
.findall(r
'data-assetid="(\d+)"', webpage
)] 
 123         title 
= remove_end(self
._og
_search
_title
(webpage
), ' - TV2.no') 
 124         description 
= remove_end(self
._og
_search
_description
(webpage
), ' - TV2.no') 
 126         return self
.playlist_result(entries
, playlist_id
, title
, description
)