]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/walla.py
Annotate changelog with bug being closed.
[youtubedl] / youtube_dl / extractor / walla.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 xpath_text,
9 int_or_none,
10 )
11
12
13 class WallaIE(InfoExtractor):
14 _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
15 _TEST = {
16 'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
17 'info_dict': {
18 'id': '2642630',
19 'display_id': 'one-direction-all-for-one',
20 'ext': 'flv',
21 'title': 'וואן דיירקשן: ההיסטריה',
22 'description': 'md5:de9e2512a92442574cdb0913c49bc4d8',
23 'thumbnail': r're:^https?://.*\.jpg',
24 'duration': 3600,
25 },
26 'params': {
27 # rtmp download
28 'skip_download': True,
29 }
30 }
31
32 _SUBTITLE_LANGS = {
33 'עברית': 'heb',
34 }
35
36 def _real_extract(self, url):
37 mobj = re.match(self._VALID_URL, url)
38 video_id = mobj.group('id')
39 display_id = mobj.group('display_id')
40
41 video = self._download_xml(
42 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id,
43 display_id)
44
45 item = video.find('./items/item')
46
47 title = xpath_text(item, './title', 'title')
48 description = xpath_text(item, './synopsis', 'description')
49 thumbnail = xpath_text(item, './preview_pic', 'thumbnail')
50 duration = int_or_none(xpath_text(item, './duration', 'duration'))
51
52 subtitles = {}
53 for subtitle in item.findall('./subtitles/subtitle'):
54 lang = xpath_text(subtitle, './title')
55 subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
56 'ext': 'srt',
57 'url': xpath_text(subtitle, './src'),
58 }]
59
60 formats = []
61 for quality in item.findall('./qualities/quality'):
62 format_id = xpath_text(quality, './title')
63 fmt = {
64 'url': 'rtmp://wafla.walla.co.il/vod',
65 'play_path': xpath_text(quality, './src'),
66 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf',
67 'page_url': url,
68 'ext': 'flv',
69 'format_id': xpath_text(quality, './title'),
70 }
71 m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
72 if m:
73 fmt['height'] = int(m.group('height'))
74 formats.append(fmt)
75 self._sort_formats(formats)
76
77 return {
78 'id': video_id,
79 'display_id': display_id,
80 'title': title,
81 'description': description,
82 'thumbnail': thumbnail,
83 'duration': duration,
84 'formats': formats,
85 'subtitles': subtitles,
86 }