]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mpora.py
387935d4db784641377b72f5be9ec5e7649f5908
[youtubedl] / youtube_dl / extractor / mpora.py
1 from __future__ import unicode_literals
2
3 import json
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import int_or_none
8
9
10 class MporaIE(InfoExtractor):
11 _VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
12 IE_NAME = 'MPORA'
13
14 _TEST = {
15 'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
16 'file': 'AAdo8okx4wiz.mp4',
17 'md5': 'a7a228473eedd3be741397cf452932eb',
18 'info_dict': {
19 'title': 'Katy Curd - Winter in the Forest',
20 'duration': 416,
21 'uploader': 'Peter Newman Media',
22 },
23 }
24
25 def _real_extract(self, url):
26 m = re.match(self._VALID_URL, url)
27 video_id = m.group('id')
28
29 webpage = self._download_webpage(url, video_id)
30 data_json = self._search_regex(
31 r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
32
33 data = json.loads(data_json)
34
35 uploader = data['info_overlay'].get('username')
36 duration = data['video']['duration'] // 1000
37 thumbnail = data['video']['encodings']['sd']['poster']
38 title = data['info_overlay']['title']
39
40 formats = []
41 for encoding_id, edata in data['video']['encodings'].items():
42 for src in edata['sources']:
43 width_str = self._search_regex(
44 r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
45 False, default=None)
46 vcodec = src['type'].partition('/')[2]
47
48 formats.append({
49 'format_id': encoding_id + '-' + vcodec,
50 'url': src['src'],
51 'vcodec': vcodec,
52 'width': int_or_none(width_str),
53 })
54
55 self._sort_formats(formats)
56
57 return {
58 'id': video_id,
59 'title': title,
60 'formats': formats,
61 'uploader': uploader,
62 'duration': duration,
63 'thumbnail': thumbnail,
64 }