]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dailymotion.py
Imported Upstream version 2013.07.02
[youtubedl] / youtube_dl / extractor / dailymotion.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5 compat_urllib_request,
6 compat_urllib_parse,
7
8 ExtractorError,
9 unescapeHTML,
10 )
11
12 class DailymotionIE(InfoExtractor):
13 """Information Extractor for Dailymotion"""
14
15 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
16 IE_NAME = u'dailymotion'
17 _TEST = {
18 u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
19 u'file': u'x33vw9.mp4',
20 u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
21 u'info_dict': {
22 u"uploader": u"Alex and Van .",
23 u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
24 }
25 }
26
27 def _real_extract(self, url):
28 # Extract id and simplified title from URL
29 mobj = re.match(self._VALID_URL, url)
30
31 video_id = mobj.group(1).split('_')[0].split('?')[0]
32
33 video_extension = 'mp4'
34
35 # Retrieve video webpage to extract further information
36 request = compat_urllib_request.Request(url)
37 request.add_header('Cookie', 'family_filter=off')
38 webpage = self._download_webpage(request, video_id)
39
40 # Extract URL, uploader and title from webpage
41 self.report_extraction(video_id)
42 mobj = re.search(r'\s*var flashvars = (.*)', webpage)
43 if mobj is None:
44 raise ExtractorError(u'Unable to extract media URL')
45 flashvars = compat_urllib_parse.unquote(mobj.group(1))
46
47 for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']:
48 if key in flashvars:
49 max_quality = key
50 self.to_screen(u'Using %s' % key)
51 break
52 else:
53 raise ExtractorError(u'Unable to extract video URL')
54
55 mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars)
56 if mobj is None:
57 raise ExtractorError(u'Unable to extract video URL')
58
59 video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/')
60
61 # TODO: support choosing qualities
62
63 mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
64 if mobj is None:
65 raise ExtractorError(u'Unable to extract title')
66 video_title = unescapeHTML(mobj.group('title'))
67
68 video_uploader = None
69 video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
70 # Looking for official user
71 r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
72 webpage, 'video uploader')
73
74 video_upload_date = None
75 mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
76 if mobj is not None:
77 video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
78
79 return [{
80 'id': video_id,
81 'url': video_url,
82 'uploader': video_uploader,
83 'upload_date': video_upload_date,
84 'title': video_title,
85 'ext': video_extension,
86 }]