]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dailymotion.py
3 from .common
import InfoExtractor
12 class DailymotionIE(InfoExtractor
):
13 """Information Extractor for Dailymotion"""
15 _VALID_URL
= r
'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
16 IE_NAME
= u
'dailymotion'
18 u
'url': u
'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
19 u
'file': u
'x33vw9.mp4',
20 u
'md5': u
'392c4b85a60a90dc4792da41ce3144eb',
22 u
"uploader": u
"Alex and Van .",
23 u
"title": u
"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
27 def _real_extract(self
, url
):
28 # Extract id and simplified title from URL
29 mobj
= re
.match(self
._VALID
_URL
, url
)
31 video_id
= mobj
.group(1).split('_')[0].split('?')[0]
33 video_extension
= 'mp4'
35 # Retrieve video webpage to extract further information
36 request
= compat_urllib_request
.Request(url
)
37 request
.add_header('Cookie', 'family_filter=off')
38 webpage
= self
._download
_webpage
(request
, video_id
)
40 # Extract URL, uploader and title from webpage
41 self
.report_extraction(video_id
)
42 mobj
= re
.search(r
'\s*var flashvars = (.*)', webpage
)
44 raise ExtractorError(u
'Unable to extract media URL')
45 flashvars
= compat_urllib_parse
.unquote(mobj
.group(1))
47 for key
in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']:
50 self
.to_screen(u
'Using %s' % key
)
53 raise ExtractorError(u
'Unable to extract video URL')
55 mobj
= re
.search(r
'"' + max_quality
+ r
'":"(.+?)"', flashvars
)
57 raise ExtractorError(u
'Unable to extract video URL')
59 video_url
= compat_urllib_parse
.unquote(mobj
.group(1)).replace('\\/', '/')
61 # TODO: support choosing qualities
63 mobj
= re
.search(r
'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage
)
65 raise ExtractorError(u
'Unable to extract title')
66 video_title
= unescapeHTML(mobj
.group('title'))
69 video_uploader
= self
._search
_regex
([r
'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
70 # Looking for official user
71 r
'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
72 webpage
, 'video uploader')
74 video_upload_date
= None
75 mobj
= re
.search(r
'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage
)
77 video_upload_date
= mobj
.group(3) + mobj
.group(2) + mobj
.group(1)
82 'uploader': video_uploader
,
83 'upload_date': video_upload_date
,
85 'ext': video_extension
,