]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/philharmoniedeparis.py
debian/patches: Add patch from upstream to fix extraction from youtube.
[youtubedl] / youtube_dl / extractor / philharmoniedeparis.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6 float_or_none,
7 int_or_none,
8 parse_iso8601,
9 xpath_text,
10 )
11
12
13 class PhilharmonieDeParisIE(InfoExtractor):
14 IE_DESC = 'Philharmonie de Paris'
15 _VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
16 _TESTS = [{
17 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
18 'info_dict': {
19 'id': '1032066',
20 'ext': 'flv',
21 'title': 'md5:d1f5585d87d041d07ce9434804bc8425',
22 'timestamp': 1428179400,
23 'upload_date': '20150404',
24 'duration': 6592.278,
25 },
26 'params': {
27 # rtmp download
28 'skip_download': True,
29 }
30 }, {
31 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
32 'only_matching': True,
33 }, {
34 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
35 'only_matching': True,
36 }]
37
38 def _real_extract(self, url):
39 video_id = self._match_id(url)
40
41 concert = self._download_xml(
42 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id,
43 video_id).find('./concert')
44
45 formats = []
46 info_dict = {
47 'id': video_id,
48 'title': xpath_text(concert, './titre', 'title', fatal=True),
49 'formats': formats,
50 }
51
52 fichiers = concert.find('./fichiers')
53 stream = fichiers.attrib['serveurstream']
54 for fichier in fichiers.findall('./fichier'):
55 info_dict['duration'] = float_or_none(fichier.get('timecodefin'))
56 for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]):
57 format_url = fichier.get('url%s' % suffix)
58 if not format_url:
59 continue
60 formats.append({
61 'url': stream,
62 'play_path': format_url,
63 'ext': 'flv',
64 'format_id': format_id,
65 'width': int_or_none(concert.get('largeur%s' % suffix)),
66 'height': int_or_none(concert.get('hauteur%s' % suffix)),
67 'quality': quality,
68 })
69 self._sort_formats(formats)
70
71 date, hour = concert.get('date'), concert.get('heure')
72 if date and hour:
73 info_dict['timestamp'] = parse_iso8601(
74 '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour))
75 elif date:
76 info_dict['upload_date'] = date
77
78 return info_dict