]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vier.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   9 class VierIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' 
  13         'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', 
  16             'display_id': 'het-wordt-warm-de-moestuin', 
  18             'title': 'Het wordt warm in De Moestuin', 
  19             'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...', 
  23             'skip_download': True, 
  26         'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', 
  27         'only_matching': True, 
  29         'url': 'http://www.vier.be/video/v3/embed/16129', 
  30         'only_matching': True, 
  33     def _real_extract(self
, url
): 
  34         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  35         embed_id 
= mobj
.group('embed_id') 
  36         display_id 
= mobj
.group('display_id') or embed_id
 
  38         webpage 
= self
._download
_webpage
(url
, display_id
) 
  40         video_id 
= self
._search
_regex
( 
  41             [r
'data-nid="(\d+)"', r
'"nid"\s*:\s*"(\d+)"'], 
  43         application 
= self
._search
_regex
( 
  44             [r
'data-application="([^"]+)"', r
'"application"\s*:\s*"([^"]+)"'], 
  45             webpage
, 'application', default
='vier_vod') 
  46         filename 
= self
._search
_regex
( 
  47             [r
'data-filename="([^"]+)"', r
'"filename"\s*:\s*"([^"]+)"'], 
  50         playlist_url 
= 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application
, filename
) 
  51         formats 
= self
._extract
_m
3u8_formats
(playlist_url
, display_id
, 'mp4') 
  53         title 
= self
._og
_search
_title
(webpage
, default
=display_id
) 
  54         description 
= self
._og
_search
_description
(webpage
, default
=None) 
  55         thumbnail 
= self
._og
_search
_thumbnail
(webpage
, default
=None) 
  59             'display_id': display_id
, 
  61             'description': description
, 
  62             'thumbnail': thumbnail
, 
  67 class VierVideosIE(InfoExtractor
): 
  68     IE_NAME 
= 'vier:videos' 
  69     _VALID_URL 
= r
'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)' 
  71         'url': 'http://www.vier.be/demoestuin/videos', 
  75         'playlist_mincount': 153, 
  77         'url': 'http://www.vier.be/demoestuin/videos?page=6', 
  79             'id': 'demoestuin-page6', 
  81         'playlist_mincount': 20, 
  83         'url': 'http://www.vier.be/demoestuin/videos?page=7', 
  85             'id': 'demoestuin-page7', 
  87         'playlist_mincount': 13, 
  90     def _real_extract(self
, url
): 
  91         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  92         program 
= mobj
.group('program') 
  94         webpage 
= self
._download
_webpage
(url
, program
) 
  96         page_id 
= mobj
.group('page') 
  98             page_id 
= int(page_id
) 
 100             last_page 
= start_page 
+ 1 
 101             playlist_id 
= '%s-page%d' % (program
, page_id
) 
 104             last_page 
= int(self
._search
_regex
( 
 105                 r
'videos\?page=(\d+)">laatste</a>', 
 106                 webpage
, 'last page', default
=0)) + 1 
 107             playlist_id 
= program
 
 110         for current_page_id 
in range(start_page
, last_page
): 
 111             current_page 
= self
._download
_webpage
( 
 112                 'http://www.vier.be/%s/videos?page=%d' % (program
, current_page_id
), 
 114                 'Downloading page %d' % (current_page_id 
+ 1)) if current_page_id 
!= page_id 
else webpage
 
 116                 self
.url_result('http://www.vier.be' + video_url
, 'Vier') 
 117                 for video_url 
in re
.findall( 
 118                     r
'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page
)] 
 119             entries
.extend(page_entries
) 
 121         return self
.playlist_result(entries
, playlist_id
)