]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dailymotion.py
fa8c630d053168bf30d835952debd67536555c0c
   5 from .common 
import InfoExtractor
 
   8     get_element_by_attribute
, 
  14 class DailymotionIE(InfoExtractor
): 
  15     """Information Extractor for Dailymotion""" 
  17     _VALID_URL 
= r
'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' 
  18     IE_NAME 
= u
'dailymotion' 
  20         u
'url': u
'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', 
  21         u
'file': u
'x33vw9.mp4', 
  22         u
'md5': u
'392c4b85a60a90dc4792da41ce3144eb', 
  24             u
"uploader": u
"Alex and Van .",  
  25             u
"title": u
"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" 
  29     def _real_extract(self
, url
): 
  30         # Extract id and simplified title from URL 
  31         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  33         video_id 
= mobj
.group(1).split('_')[0].split('?')[0] 
  35         video_extension 
= 'mp4' 
  37         # Retrieve video webpage to extract further information 
  38         request 
= compat_urllib_request
.Request(url
) 
  39         request
.add_header('Cookie', 'family_filter=off') 
  40         webpage 
= self
._download
_webpage
(request
, video_id
) 
  42         # Extract URL, uploader and title from webpage 
  43         self
.report_extraction(video_id
) 
  45         video_uploader 
= self
._search
_regex
([r
'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', 
  46                                              # Looking for official user 
  47                                              r
'<(?:span|a) .*?rel="author".*?>([^<]+?)</'], 
  48                                             webpage
, 'video uploader') 
  50         video_upload_date 
= None 
  51         mobj 
= re
.search(r
'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage
) 
  53             video_upload_date 
= mobj
.group(3) + mobj
.group(2) + mobj
.group(1) 
  55         embed_url 
= 'http://www.dailymotion.com/embed/video/%s' % video_id
 
  56         embed_page 
= self
._download
_webpage
(embed_url
, video_id
, 
  57                                             u
'Downloading embed page') 
  58         info 
= self
._search
_regex
(r
'var info = ({.*?}),', embed_page
, 'video info') 
  59         info 
= json
.loads(info
) 
  61         # TODO: support choosing qualities 
  63         for key 
in ['stream_h264_hd1080_url','stream_h264_hd_url', 
  64                     'stream_h264_hq_url','stream_h264_url', 
  65                     'stream_h264_ld_url']: 
  66             if info
.get(key
):#key in info and info[key]: 
  68                 self
.to_screen(u
'Using %s' % key
) 
  71             raise ExtractorError(u
'Unable to extract video URL') 
  72         video_url 
= info
[max_quality
] 
  77             'uploader': video_uploader
, 
  78             'upload_date':  video_upload_date
, 
  79             'title':    self
._og
_search
_title
(webpage
), 
  80             'ext':      video_extension
, 
  81             'thumbnail': info
['thumbnail_url'] 
  85 class DailymotionPlaylistIE(InfoExtractor
): 
  86     _VALID_URL 
= r
'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' 
  87     _MORE_PAGES_INDICATOR 
= r
'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>' 
  89     def _real_extract(self
, url
): 
  90         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  91         playlist_id 
=  mobj
.group('id') 
  94         for pagenum 
in itertools
.count(1): 
  95             webpage 
= self
._download
_webpage
('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id
, pagenum
), 
  96                                              playlist_id
, u
'Downloading page %s' % pagenum
) 
  98             playlist_el 
= get_element_by_attribute(u
'class', u
'video_list', webpage
) 
  99             video_ids
.extend(re
.findall(r
'data-id="(.+?)" data-ext-id', playlist_el
)) 
 101             if re
.search(self
._MORE
_PAGES
_INDICATOR
, webpage
, re
.DOTALL
) is None: 
 104         entries 
= [self
.url_result('http://www.dailymotion.com/video/%s' % video_id
, 'Dailymotion') 
 105                    for video_id 
in video_ids
] 
 106         return {'_type': 'playlist', 
 108                 'title': get_element_by_id(u
'playlist_name', webpage
),