]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dreisat.py
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
16 class DreiSatIE(InfoExtractor
):
18 _GEO_COUNTRIES
= ['DE']
19 _VALID_URL
= r
'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
22 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
23 'md5': 'be37228896d30a88f315b638900a026e',
27 'title': 'Waidmannsheil',
28 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
29 'uploader': 'SCHWEIZWEIT',
30 'uploader_id': '100000210',
31 'upload_date': '20140913'
34 'skip_download': True, # m3u8 downloads
38 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
39 'only_matching': True,
43 def _parse_smil_formats(self
, smil
, smil_url
, video_id
, namespace
=None, f4m_params
=None, transform_rtmp_url
=None):
45 for param_group
in smil
.findall(self
._xpath
_ns
('./head/paramGroup', namespace
)):
46 group_id
= param_group
.get(self
._xpath
_ns
(
47 'id', 'http://www.w3.org/XML/1998/namespace'))
49 for param
in param_group
:
50 params
[param
.get('name')] = param
.get('value')
51 param_groups
[group_id
] = params
54 for video
in smil
.findall(self
._xpath
_ns
('.//video', namespace
)):
55 src
= video
.get('src')
58 bitrate
= int_or_none(self
._search
_regex
(r
'_(\d+)k', src
, 'bitrate', None)) or float_or_none(video
.get('system-bitrate') or video
.get('systemBitrate'), 1000)
59 group_id
= video
.get('paramGroup')
60 param_group
= param_groups
[group_id
]
61 for proto
in param_group
['protocols'].split(','):
63 'url': '%s://%s' % (proto
, param_group
['host']),
64 'app': param_group
['app'],
67 'format_id': '%s-%d' % (proto
, bitrate
),
70 self
._sort
_formats
(formats
)
73 def extract_from_xml_url(self
, video_id
, xml_url
):
74 doc
= self
._download
_xml
(
76 note
='Downloading video info',
77 errnote
='Failed to download video info')
79 status_code
= xpath_text(doc
, './status/statuscode')
80 if status_code
and status_code
!= 'ok':
81 if status_code
== 'notVisibleAnymore':
82 message
= 'Video %s is not available' % video_id
84 message
= '%s returned error: %s' % (self
.IE_NAME
, status_code
)
85 raise ExtractorError(message
, expected
=True)
87 title
= xpath_text(doc
, './/information/title', 'title', True)
91 for fnode
in doc
.findall('.//formitaeten/formitaet'):
92 video_url
= xpath_text(fnode
, 'url')
93 if not video_url
or video_url
in urls
:
95 urls
.append(video_url
)
97 is_available
= 'http://www.metafilegenerator' not in video_url
98 geoloced
= 'static_geoloced_online' in video_url
99 if not is_available
or geoloced
:
102 format_id
= fnode
.attrib
['basetype']
103 format_m
= re
.match(r
'''(?x)
104 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
105 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
108 ext
= determine_ext(video_url
, None) or format_m
.group('container')
113 formats
.extend(self
._extract
_smil
_formats
(
114 video_url
, video_id
, fatal
=False))
116 # the certificates are misconfigured (see
117 # https://github.com/ytdl-org/youtube-dl/issues/8665)
118 if video_url
.startswith('https://'):
120 formats
.extend(self
._extract
_m
3u8_formats
(
121 video_url
, video_id
, 'mp4', 'm3u8_native',
122 m3u8_id
=format_id
, fatal
=False))
124 formats
.extend(self
._extract
_f
4m
_formats
(
125 video_url
, video_id
, f4m_id
=format_id
, fatal
=False))
127 quality
= xpath_text(fnode
, './quality')
129 format_id
+= '-' + quality
131 abr
= int_or_none(xpath_text(fnode
, './audioBitrate'), 1000)
132 vbr
= int_or_none(xpath_text(fnode
, './videoBitrate'), 1000)
134 tbr
= int_or_none(self
._search
_regex
(
135 r
'_(\d+)k', video_url
, 'bitrate', None))
136 if tbr
and vbr
and not abr
:
140 'format_id': format_id
,
143 'acodec': format_m
.group('acodec'),
144 'vcodec': format_m
.group('vcodec'),
148 'width': int_or_none(xpath_text(fnode
, './width')),
149 'height': int_or_none(xpath_text(fnode
, './height')),
150 'filesize': int_or_none(xpath_text(fnode
, './filesize')),
151 'protocol': format_m
.group('proto').lower(),
154 geolocation
= xpath_text(doc
, './/details/geolocation')
155 if not formats
and geolocation
and geolocation
!= 'none':
156 self
.raise_geo_restricted(countries
=self
._GEO
_COUNTRIES
)
158 self
._sort
_formats
(formats
)
161 for node
in doc
.findall('.//teaserimages/teaserimage'):
162 thumbnail_url
= node
.text
163 if not thumbnail_url
:
166 'url': thumbnail_url
,
168 thumbnail_key
= node
.get('key')
170 m
= re
.match('^([0-9]+)x([0-9]+)$', thumbnail_key
)
172 thumbnail
['width'] = int(m
.group(1))
173 thumbnail
['height'] = int(m
.group(2))
174 thumbnails
.append(thumbnail
)
176 upload_date
= unified_strdate(xpath_text(doc
, './/details/airtime'))
181 'description': xpath_text(doc
, './/information/detail'),
182 'duration': int_or_none(xpath_text(doc
, './/details/lengthSec')),
183 'thumbnails': thumbnails
,
184 'uploader': xpath_text(doc
, './/details/originChannelTitle'),
185 'uploader_id': xpath_text(doc
, './/details/originChannelId'),
186 'upload_date': upload_date
,
190 def _real_extract(self
, url
):
191 video_id
= self
._match
_id
(url
)
192 details_url
= 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
193 return self
.extract_from_xml_url(video_id
, details_url
)