]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vier.py
5086f591e56b6b6a9266e96b3c26541963757120
2 from __future__
import unicode_literals
7 from . common
import InfoExtractor
10 class VierIE ( InfoExtractor
):
12 _VALID_URL
= r
'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
14 'url' : 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129' ,
17 'display_id' : 'het-wordt-warm-de-moestuin' ,
19 'title' : 'Het wordt warm in De Moestuin' ,
20 'description' : 'De vele uren werk eisen hun tol. Wim droomt van assistentie...' ,
24 'skip_download' : True ,
27 'url' : 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614' ,
30 'display_id' : 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas' ,
32 'title' : 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma \' s' ,
33 'description' : 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma \' s voorgeschoteld en ze MOETEN een keuze maken.' ,
37 'skip_download' : True ,
40 'url' : 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen' ,
41 'only_matching' : True ,
43 'url' : 'http://www.vier.be/video/v3/embed/16129' ,
44 'only_matching' : True ,
47 def _real_extract ( self
, url
):
48 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
49 embed_id
= mobj
. group ( 'embed_id' )
50 display_id
= mobj
. group ( 'display_id' ) or embed_id
51 site
= mobj
. group ( 'site' )
53 webpage
= self
._ download
_ webpage
( url
, display_id
)
55 video_id
= self
._ search
_ regex
(
56 [ r
'data-nid="(\d+)"' , r
'"nid"\s*:\s*"(\d+)"' ],
58 application
= self
._ search
_ regex
(
59 [ r
'data-application="([^"]+)"' , r
'"application"\s*:\s*"([^"]+)"' ],
60 webpage
, 'application' , default
= site
+ '_vod' )
61 filename
= self
._ search
_ regex
(
62 [ r
'data-filename="([^"]+)"' , r
'"filename"\s*:\s*"([^"]+)"' ],
65 playlist_url
= 'http://vod.streamcloud.be/ %s /_definst_/mp4: %s .mp4/playlist.m3u8' % ( application
, filename
)
66 formats
= self
._ extract
_ wowza
_ formats
( playlist_url
, display_id
, skip_protocols
=[ 'dash' ])
67 self
._ sort
_ formats
( formats
)
69 title
= self
._ og
_ search
_ title
( webpage
, default
= display_id
)
70 description
= self
._ og
_ search
_ description
( webpage
, default
= None )
71 thumbnail
= self
._ og
_ search
_ thumbnail
( webpage
, default
= None )
75 'display_id' : display_id
,
77 'description' : description
,
78 'thumbnail' : thumbnail
,
83 class VierVideosIE ( InfoExtractor
):
84 IE_NAME
= 'vier:videos'
85 _VALID_URL
= r
'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
87 'url' : 'http://www.vier.be/demoestuin/videos' ,
91 'playlist_mincount' : 153 ,
93 'url' : 'http://www.vijf.be/temptationisland/videos' ,
95 'id' : 'temptationisland' ,
97 'playlist_mincount' : 159 ,
99 'url' : 'http://www.vier.be/demoestuin/videos?page=6' ,
101 'id' : 'demoestuin-page6' ,
103 'playlist_mincount' : 20 ,
105 'url' : 'http://www.vier.be/demoestuin/videos?page=7' ,
107 'id' : 'demoestuin-page7' ,
109 'playlist_mincount' : 13 ,
112 def _real_extract ( self
, url
):
113 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
114 program
= mobj
. group ( 'program' )
115 site
= mobj
. group ( 'site' )
117 page_id
= mobj
. group ( 'page' )
119 page_id
= int ( page_id
)
121 playlist_id
= ' %s- page %d ' % ( program
, page_id
)
124 playlist_id
= program
127 for current_page_id
in itertools
. count ( start_page
):
128 current_page
= self
._ download
_ webpage
(
129 'http://www. %s .be/ %s /videos?page= %d ' % ( site
, program
, current_page_id
),
131 'Downloading page %d ' % ( current_page_id
+ 1 ))
133 self
. url_result ( 'http://www.' + site
+ '.be' + video_url
, 'Vier' )
134 for video_url
in re
. findall (
135 r
'<h[23]><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">' , current_page
)]
136 entries
. extend ( page_entries
)
137 if page_id
or '>Meer<' not in current_page
:
140 return self
. playlist_result ( entries
, playlist_id
)