]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/azmedien.py
New upstream version 2017.02.07
[youtubedl] / youtube_dl / extractor / azmedien.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from .kaltura import KalturaIE
7 from ..utils import (
8 get_element_by_id,
9 strip_or_none,
10 urljoin,
11 )
12
13
14 class AZMedienBaseIE(InfoExtractor):
15 def _kaltura_video(self, partner_id, entry_id):
16 return self.url_result(
17 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
18 video_id=entry_id)
19
20
21 class AZMedienIE(AZMedienBaseIE):
22 IE_DESC = 'AZ Medien videos'
23 _VALID_URL = r'''(?x)
24 https?://
25 (?:www\.)?
26 (?:
27 telezueri\.ch|
28 telebaern\.tv|
29 telem1\.ch
30 )/
31 [0-9]+-show-[^/\#]+
32 (?:
33 /[0-9]+-episode-[^/\#]+
34 (?:
35 /[0-9]+-segment-(?:[^/\#]+\#)?|
36 \#
37 )|
38 \#
39 )
40 (?P<id>[^\#]+)
41 '''
42
43 _TESTS = [{
44 # URL with 'segment'
45 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
46 'info_dict': {
47 'id': '1_2444peh4',
48 'ext': 'mov',
49 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
50 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
51 'uploader_id': 'TeleZ?ri',
52 'upload_date': '20161218',
53 'timestamp': 1482084490,
54 },
55 'params': {
56 'skip_download': True,
57 },
58 }, {
59 # URL with 'segment' and fragment:
60 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
61 'only_matching': True
62 }, {
63 # URL with 'episode' and fragment:
64 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
65 'only_matching': True
66 }, {
67 # URL with 'show' and fragment:
68 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
69 'only_matching': True
70 }]
71
72 def _real_extract(self, url):
73 video_id = self._match_id(url)
74
75 webpage = self._download_webpage(url, video_id)
76
77 partner_id = self._search_regex(
78 r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
79 webpage, 'kaltura partner id')
80 entry_id = self._html_search_regex(
81 r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
82 % re.escape(video_id), webpage, 'kaltura entry id', group='id')
83
84 return self._kaltura_video(partner_id, entry_id)
85
86
87 class AZMedienPlaylistIE(AZMedienBaseIE):
88 IE_DESC = 'AZ Medien playlists'
89 _VALID_URL = r'''(?x)
90 https?://
91 (?:www\.)?
92 (?:
93 telezueri\.ch|
94 telebaern\.tv|
95 telem1\.ch
96 )/
97 (?P<id>[0-9]+-
98 (?:
99 show|
100 topic|
101 themen
102 )-[^/\#]+
103 (?:
104 /[0-9]+-episode-[^/\#]+
105 )?
106 )$
107 '''
108
109 _TESTS = [{
110 # URL with 'episode'
111 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
112 'info_dict': {
113 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
114 'title': 'News - Donnerstag, 15. Dezember 2016',
115 },
116 'playlist_count': 9,
117 }, {
118 # URL with 'themen'
119 'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
120 'info_dict': {
121 'id': '258-themen-tele-m1-classics',
122 'title': 'Tele M1 Classics',
123 },
124 'playlist_mincount': 15,
125 }, {
126 # URL with 'topic', contains nested playlists
127 'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
128 'only_matching': True,
129 }, {
130 # URL with 'show' only
131 'url': 'http://www.telezueri.ch/86-show-talktaeglich',
132 'only_matching': True
133 }]
134
135 def _real_extract(self, url):
136 show_id = self._match_id(url)
137 webpage = self._download_webpage(url, show_id)
138
139 entries = []
140
141 partner_id = self._search_regex(
142 r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
143 webpage, 'kaltura partner id', default=None)
144
145 if partner_id:
146 entries = [
147 self._kaltura_video(partner_id, m.group('id'))
148 for m in re.finditer(
149 r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
150
151 if not entries:
152 entries = [
153 self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
154 for m in re.finditer(
155 r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
156
157 if not entries:
158 entries = [
159 # May contain nested playlists (e.g. [1]) thus no explicit
160 # ie_key
161 # 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
162 self.url_result(urljoin(url, m.group('url')))
163 for m in re.finditer(
164 r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
165
166 title = self._search_regex(
167 r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
168 webpage, 'title',
169 default=strip_or_none(get_element_by_id(
170 'video-title', webpage)), group='title')
171
172 return self.playlist_result(entries, show_id, title)