]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mpora.py
6a8e2cc442c25327d9d91acebb47d4e109750731
[youtubedl] / youtube_dl / extractor / mpora.py
1 from __future__ import unicode_literals
2
3 import json
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 int_or_none,
9 )
10
11
12 class MporaIE(InfoExtractor):
13 _VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
14 IE_NAME = 'MPORA'
15
16 _TEST = {
17 'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
18 'file': 'AAdo8okx4wiz.mp4',
19 'md5': 'a7a228473eedd3be741397cf452932eb',
20 'info_dict': {
21 'title': 'Katy Curd - Winter in the Forest',
22 'duration': 416,
23 'uploader': 'petenewman',
24 },
25 }
26
27 def _real_extract(self, url):
28 m = re.match(self._VALID_URL, url)
29 video_id = m.group('id')
30
31 webpage = self._download_webpage(url, video_id)
32 data_json = self._search_regex(
33 r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json')
34
35 data = json.loads(data_json)
36
37 uploader = data['info_overlay'].get('username')
38 duration = data['video']['duration'] // 1000
39 thumbnail = data['video']['encodings']['sd']['poster']
40 title = data['info_overlay']['title']
41
42 formats = []
43 for encoding_id, edata in data['video']['encodings'].items():
44 for src in edata['sources']:
45 width_str = self._search_regex(
46 r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
47 False, default=None)
48 vcodec = src['type'].partition('/')[2]
49
50 formats.append({
51 'format_id': encoding_id + '-' + vcodec,
52 'url': src['src'],
53 'vcodec': vcodec,
54 'width': int_or_none(width_str),
55 })
56
57 self._sort_formats(formats)
58
59 return {
60 'id': video_id,
61 'title': title,
62 'formats': formats,
63 'uploader': uploader,
64 'duration': duration,
65 'thumbnail': thumbnail,
66 }