]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rutube.py
2 from __future__
import unicode_literals
7 from . common
import InfoExtractor
17 class RutubeIE ( InfoExtractor
):
19 IE_DESC
= 'Rutube videos'
20 _VALID_URL
= r
'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z] {32} )'
23 'url' : 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/' ,
25 'id' : '3eac3b4561676c17df9132a9a1e62e3e' ,
27 'title' : 'Раненный кенгуру забежал в аптеку' ,
28 'description' : 'http://www.ntdtv.ru ' ,
30 'uploader' : 'NTDRussian' ,
31 'uploader_id' : '29790' ,
32 'upload_date' : '20131016' ,
36 # It requires ffmpeg (m3u8 download)
37 'skip_download' : True ,
40 'url' : 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661' ,
41 'only_matching' : True ,
43 'url' : 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661' ,
44 'only_matching' : True ,
48 def _extract_urls ( webpage
):
49 return [ mobj
. group ( 'url' ) for mobj
in re
. finditer (
50 r
'<iframe[^>]+?src=(["\' ])( ?P
< url
>( ?
: https?
:) ?
// rutube\
. ru
/ embed
/[ \da
- z
] {32}
.* ?
) \
1 ',
53 def _real_extract(self, url):
54 video_id = self._match_id(url)
55 video = self._download_json(
56 ' http
:// rutube
. ru
/ api
/ video
/ %s/ ?format
= json
' % video_id,
57 video_id, ' Downloading video JSON
')
59 # Some videos don' t have the author field
60 author
= video
. get ( 'author' ) or {}
62 options
= self
._ download
_ json
(
63 'http://rutube.ru/api/play/options/ %s /?format=json' % video_id
,
64 video_id
, 'Downloading options JSON' )
67 for format_id
, format_url
in options
[ 'video_balancer' ]. items ():
68 ext
= determine_ext ( format_url
)
70 formats
. extend ( self
._ extract
_ m
3u8_ formats
(
71 format_url
, video_id
, 'mp4' , m3u8_id
= format_id
, fatal
= False ))
73 formats
. extend ( self
._ extract
_ f
4 m
_ formats
(
74 format_url
, video_id
, f4m_id
= format_id
, fatal
= False ))
78 'format_id' : format_id
,
80 self
._ sort
_ formats
( formats
)
84 'title' : video
[ 'title' ],
85 'description' : video
[ 'description' ],
86 'duration' : video
[ 'duration' ],
87 'view_count' : video
[ 'hits' ],
89 'thumbnail' : video
[ 'thumbnail_url' ],
90 'uploader' : author
. get ( 'name' ),
91 'uploader_id' : compat_str ( author
[ 'id' ]) if author
else None ,
92 'upload_date' : unified_strdate ( video
[ 'created_ts' ]),
93 'age_limit' : 18 if video
[ 'is_adult' ] else 0 ,
97 class RutubeEmbedIE ( InfoExtractor
):
98 IE_NAME
= 'rutube:embed'
99 IE_DESC
= 'Rutube embedded videos'
100 _VALID_URL
= r
'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
103 'url' : 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=' ,
105 'id' : 'a10e53b86e8f349080f718582ce4c661' ,
107 'upload_date' : '20131223' ,
108 'uploader_id' : '297833' ,
109 'description' : 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89' ,
110 'uploader' : 'subziro89 ILya' ,
111 'title' : 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89' ,
114 'skip_download' : 'Requires ffmpeg' ,
117 'url' : 'http://rutube.ru/play/embed/8083783' ,
118 'only_matching' : True ,
121 def _real_extract ( self
, url
):
122 embed_id
= self
._ match
_ id
( url
)
123 webpage
= self
._ download
_ webpage
( url
, embed_id
)
125 canonical_url
= self
._ html
_ search
_ regex
(
126 r
'<link\s+rel="canonical"\s+href="([^"]+?)"' , webpage
,
128 return self
. url_result ( canonical_url
, 'Rutube' )
131 class RutubeChannelIE ( InfoExtractor
):
132 IE_NAME
= 'rutube:channel'
133 IE_DESC
= 'Rutube channels'
134 _VALID_URL
= r
'https?://rutube\.ru/tags/video/(?P<id>\d+)'
136 'url' : 'http://rutube.ru/tags/video/1800/' ,
140 'playlist_mincount' : 68 ,
143 _PAGE_TEMPLATE
= 'http://rutube.ru/api/tags/video/ %s /?page= %s &format=json'
145 def _extract_videos ( self
, channel_id
, channel_title
= None ):
147 for pagenum
in itertools
. count ( 1 ):
148 page
= self
._ download
_ json
(
149 self
._ PAGE
_ TEMPLATE
% ( channel_id
, pagenum
),
150 channel_id
, 'Downloading page %s ' % pagenum
)
151 results
= page
[ 'results' ]
154 entries
. extend ( self
. url_result ( result
[ 'video_url' ], 'Rutube' ) for result
in results
)
155 if not page
[ 'has_next' ]:
157 return self
. playlist_result ( entries
, channel_id
, channel_title
)
159 def _real_extract ( self
, url
):
160 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
161 channel_id
= mobj
. group ( 'id' )
162 return self
._ extract
_ videos
( channel_id
)
165 class RutubeMovieIE ( RutubeChannelIE
):
166 IE_NAME
= 'rutube:movie'
167 IE_DESC
= 'Rutube movies'
168 _VALID_URL
= r
'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
171 _MOVIE_TEMPLATE
= 'http://rutube.ru/api/metainfo/tv/ %s /?format=json'
172 _PAGE_TEMPLATE
= 'http://rutube.ru/api/metainfo/tv/ %s /video?page= %s &format=json'
174 def _real_extract ( self
, url
):
175 movie_id
= self
._ match
_ id
( url
)
176 movie
= self
._ download
_ json
(
177 self
._ MOVIE
_ TEMPLATE
% movie_id
, movie_id
,
178 'Downloading movie JSON' )
179 movie_name
= movie
[ 'name' ]
180 return self
._ extract
_ videos
( movie_id
, movie_name
)
183 class RutubePersonIE ( RutubeChannelIE
):
184 IE_NAME
= 'rutube:person'
185 IE_DESC
= 'Rutube person videos'
186 _VALID_URL
= r
'https?://rutube\.ru/video/person/(?P<id>\d+)'
188 'url' : 'http://rutube.ru/video/person/313878/' ,
192 'playlist_mincount' : 37 ,
195 _PAGE_TEMPLATE
= 'http://rutube.ru/api/video/person/ %s /?page= %s &format=json'