]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vier.py
6645c6186dbff315e850f22ae793677803cbbf9b
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  10 class VierIE(InfoExtractor
): 
  12     _VALID_URL 
= r
'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' 
  14         'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', 
  17             'display_id': 'het-wordt-warm-de-moestuin', 
  19             'title': 'Het wordt warm in De Moestuin', 
  20             'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...', 
  24             'skip_download': True, 
  27         'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', 
  28         'only_matching': True, 
  30         'url': 'http://www.vier.be/video/v3/embed/16129', 
  31         'only_matching': True, 
  34     def _real_extract(self
, url
): 
  35         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  36         embed_id 
= mobj
.group('embed_id') 
  37         display_id 
= mobj
.group('display_id') or embed_id
 
  39         webpage 
= self
._download
_webpage
(url
, display_id
) 
  41         video_id 
= self
._search
_regex
( 
  42             [r
'data-nid="(\d+)"', r
'"nid"\s*:\s*"(\d+)"'], 
  44         application 
= self
._search
_regex
( 
  45             [r
'data-application="([^"]+)"', r
'"application"\s*:\s*"([^"]+)"'], 
  46             webpage
, 'application', default
='vier_vod') 
  47         filename 
= self
._search
_regex
( 
  48             [r
'data-filename="([^"]+)"', r
'"filename"\s*:\s*"([^"]+)"'], 
  51         playlist_url 
= 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application
, filename
) 
  52         formats 
= self
._extract
_m
3u8_formats
(playlist_url
, display_id
, 'mp4') 
  53         self
._sort
_formats
(formats
) 
  55         title 
= self
._og
_search
_title
(webpage
, default
=display_id
) 
  56         description 
= self
._og
_search
_description
(webpage
, default
=None) 
  57         thumbnail 
= self
._og
_search
_thumbnail
(webpage
, default
=None) 
  61             'display_id': display_id
, 
  63             'description': description
, 
  64             'thumbnail': thumbnail
, 
  69 class VierVideosIE(InfoExtractor
): 
  70     IE_NAME 
= 'vier:videos' 
  71     _VALID_URL 
= r
'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)' 
  73         'url': 'http://www.vier.be/demoestuin/videos', 
  77         'playlist_mincount': 153, 
  79         'url': 'http://www.vier.be/demoestuin/videos?page=6', 
  81             'id': 'demoestuin-page6', 
  83         'playlist_mincount': 20, 
  85         'url': 'http://www.vier.be/demoestuin/videos?page=7', 
  87             'id': 'demoestuin-page7', 
  89         'playlist_mincount': 13, 
  92     def _real_extract(self
, url
): 
  93         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  94         program 
= mobj
.group('program') 
  96         page_id 
= mobj
.group('page') 
  98             page_id 
= int(page_id
) 
 100             playlist_id 
= '%s-page%d' % (program
, page_id
) 
 103             playlist_id 
= program
 
 106         for current_page_id 
in itertools
.count(start_page
): 
 107             current_page 
= self
._download
_webpage
( 
 108                 'http://www.vier.be/%s/videos?page=%d' % (program
, current_page_id
), 
 110                 'Downloading page %d' % (current_page_id 
+ 1)) 
 112                 self
.url_result('http://www.vier.be' + video_url
, 'Vier') 
 113                 for video_url 
in re
.findall( 
 114                     r
'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page
)] 
 115             entries
.extend(page_entries
) 
 116             if page_id 
or '>Meer<' not in current_page
: 
 119         return self
.playlist_result(entries
, playlist_id
)