]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ynet.py
d/p/disable-autoupdate-mechanism.patch: Extend to clean up errant import and README...
[youtubedl] / youtube_dl / extractor / ynet.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 import json
6
7 from .common import InfoExtractor
8 from ..compat import compat_urllib_parse_unquote_plus
9
10
11 class YnetIE(InfoExtractor):
12 _VALID_URL = r'https?://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'
13 _TESTS = [
14 {
15 'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html',
16 'info_dict': {
17 'id': 'L-11659-99244',
18 'ext': 'flv',
19 'title': 'איש לא יודע מאיפה באנו',
20 'thumbnail': r're:^https?://.*\.jpg',
21 }
22 }, {
23 'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html',
24 'info_dict': {
25 'id': 'L-8859-84418',
26 'ext': 'flv',
27 'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין",
28 'thumbnail': r're:^https?://.*\.jpg',
29 }
30 }
31 ]
32
33 def _real_extract(self, url):
34 video_id = self._match_id(url)
35 webpage = self._download_webpage(url, video_id)
36
37 content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage))
38 config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
39 f4m_url = config['clip']['url']
40 title = self._og_search_title(webpage)
41 m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
42 if m:
43 title = m.group('title')
44 formats = self._extract_f4m_formats(f4m_url, video_id)
45 self._sort_formats(formats)
46
47 return {
48 'id': video_id,
49 'title': title,
50 'formats': formats,
51 'thumbnail': self._og_search_thumbnail(webpage),
52 }