]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/firsttv.py
debian/changelog: Fix spelling of Re-enable to please lintian.
[youtubedl] / youtube_dl / extractor / firsttv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_urlparse
6 from ..utils import (
7 int_or_none,
8 qualities,
9 unified_strdate,
10 )
11
12
13 class FirstTVIE(InfoExtractor):
14 IE_NAME = '1tv'
15 IE_DESC = 'Первый канал'
16 _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)'
17
18 _TESTS = [{
19 # single format
20 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
21 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
22 'info_dict': {
23 'id': '40049',
24 'ext': 'mp4',
25 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
26 'description': 'md5:36a39c1d19618fec57d12efe212a8370',
27 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
28 'upload_date': '20150212',
29 'duration': 2694,
30 },
31 }, {
32 # multiple formats
33 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
34 'info_dict': {
35 'id': '364746',
36 'ext': 'mp4',
37 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
38 'description': 'md5:a242eea0031fd180a4497d52640a9572',
39 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
40 'upload_date': '20160407',
41 'duration': 179,
42 'formats': 'mincount:3',
43 },
44 'params': {
45 'skip_download': True,
46 },
47 }]
48
49 def _real_extract(self, url):
50 display_id = self._match_id(url)
51
52 webpage = self._download_webpage(url, display_id)
53 playlist_url = compat_urlparse.urljoin(url, self._search_regex(
54 r'data-playlist-url="([^"]+)', webpage, 'playlist url'))
55
56 item = self._download_json(playlist_url, display_id)[0]
57 video_id = item['id']
58 quality = qualities(('ld', 'sd', 'hd', ))
59 formats = []
60 for f in item.get('mbr', []):
61 src = f.get('src')
62 if not src:
63 continue
64 fname = f.get('name')
65 formats.append({
66 'url': src,
67 'format_id': fname,
68 'quality': quality(fname),
69 })
70 self._sort_formats(formats)
71
72 title = self._html_search_regex(
73 (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
74 r"'title'\s*:\s*'([^']+)'"),
75 webpage, 'title', default=None) or item['title']
76 description = self._html_search_regex(
77 r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
78 webpage, 'description', default=None) or self._html_search_meta(
79 'description', webpage, 'description')
80 duration = int_or_none(self._html_search_meta(
81 'video:duration', webpage, 'video duration', fatal=False))
82 upload_date = unified_strdate(self._html_search_meta(
83 'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
84
85 return {
86 'id': video_id,
87 'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage),
88 'title': title,
89 'description': description,
90 'upload_date': upload_date,
91 'duration': int_or_none(duration),
92 'formats': formats
93 }