]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vier.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   9 class VierIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' 
  13         'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', 
  16             'display_id': 'het-wordt-warm-de-moestuin', 
  18             'title': 'Het wordt warm in De Moestuin', 
  19             'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...', 
  23             'skip_download': True, 
  26         'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', 
  27         'only_matching': True, 
  29         'url': 'http://www.vier.be/video/v3/embed/16129', 
  30         'only_matching': True, 
  33     def _real_extract(self
, url
): 
  34         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  35         embed_id 
= mobj
.group('embed_id') 
  36         display_id 
= mobj
.group('display_id') or embed_id
 
  38         webpage 
= self
._download
_webpage
(url
, display_id
) 
  40         video_id 
= self
._search
_regex
( 
  41             r
'"nid"\s*:\s*"(\d+)"', webpage
, 'video id') 
  42         application 
= self
._search
_regex
( 
  43             r
'"application"\s*:\s*"([^"]+)"', webpage
, 'application', default
='vier_vod') 
  44         filename 
= self
._search
_regex
( 
  45             r
'"filename"\s*:\s*"([^"]+)"', webpage
, 'filename') 
  47         playlist_url 
= 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application
, filename
) 
  48         formats 
= self
._extract
_m
3u8_formats
(playlist_url
, display_id
, 'mp4') 
  50         title 
= self
._og
_search
_title
(webpage
, default
=display_id
) 
  51         description 
= self
._og
_search
_description
(webpage
, default
=None) 
  52         thumbnail 
= self
._og
_search
_thumbnail
(webpage
, default
=None) 
  56             'display_id': display_id
, 
  58             'description': description
, 
  59             'thumbnail': thumbnail
, 
  64 class VierVideosIE(InfoExtractor
): 
  65     IE_NAME 
= 'vier:videos' 
  66     _VALID_URL 
= r
'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)' 
  68         'url': 'http://www.vier.be/demoestuin/videos', 
  72         'playlist_mincount': 153, 
  74         'url': 'http://www.vier.be/demoestuin/videos?page=6', 
  76             'id': 'demoestuin-page6', 
  78         'playlist_mincount': 20, 
  80         'url': 'http://www.vier.be/demoestuin/videos?page=7', 
  82             'id': 'demoestuin-page7', 
  84         'playlist_mincount': 13, 
  87     def _real_extract(self
, url
): 
  88         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  89         program 
= mobj
.group('program') 
  91         webpage 
= self
._download
_webpage
(url
, program
) 
  93         page_id 
= mobj
.group('page') 
  95             page_id 
= int(page_id
) 
  97             last_page 
= start_page 
+ 1 
  98             playlist_id 
= '%s-page%d' % (program
, page_id
) 
 101             last_page 
= int(self
._search
_regex
( 
 102                 r
'videos\?page=(\d+)">laatste</a>', 
 103                 webpage
, 'last page', default
=0)) + 1 
 104             playlist_id 
= program
 
 107         for current_page_id 
in range(start_page
, last_page
): 
 108             current_page 
= self
._download
_webpage
( 
 109                 'http://www.vier.be/%s/videos?page=%d' % (program
, current_page_id
), 
 111                 'Downloading page %d' % (current_page_id 
+ 1)) if current_page_id 
!= page_id 
else webpage
 
 113                 self
.url_result('http://www.vier.be' + video_url
, 'Vier') 
 114                 for video_url 
in re
.findall( 
 115                     r
'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page
)] 
 116             entries
.extend(page_entries
) 
 118         return self
.playlist_result(entries
, playlist_id
)