]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/arte.py
   4 from .common 
import InfoExtractor
 
   6     # This is used by the not implemented extractLiveStream method 
  13 class ArteTvIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' 
  15     _LIVE_URL 
= r
'index-[0-9]+\.html$' 
  19     # TODO implement Live Stream 
  20     # def extractLiveStream(self, url): 
  21     #     video_lang = url.split('/')[-4] 
  22     #     info = self.grep_webpage( 
  24     #         r'src="(.*?/videothek_js.*?\.js)', 
  27     #             (1, 'url', u'Invalid URL: %s' % url) 
  30     #     http_host = url.split('/')[2] 
  31     #     next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url'))) 
  32     #     info = self.grep_webpage( 
  34     #         r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' + 
  35     #             '(http://.*?\.swf).*?' + 
  39     #             (1, 'path',   u'could not extract video path: %s' % url), 
  40     #             (2, 'player', u'could not extract video player: %s' % url), 
  41     #             (3, 'url',    u'could not extract video url: %s' % url) 
  44     #     video_url = u'%s/%s' % (info.get('url'), info.get('path')) 
  46     def _real_extract(self
, url
): 
  47         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  48         name 
= mobj
.group('name') 
  49         # This is not a real id, it can be for example AJT for the news 
  50         # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal 
  51         video_id 
= mobj
.group('id') 
  53         if re
.search(self
._LIVE
_URL
, video_id
) is not None: 
  54             raise ExtractorError(u
'Arte live streams are not yet supported, sorry') 
  55             # self.extractLiveStream(url) 
  58         webpage 
= self
._download
_webpage
(url
, video_id
) 
  59         json_url 
= self
._html
_search
_regex
(r
'arte_vp_url="(.*?)"', webpage
, 'json url') 
  61         json_info 
= self
._download
_webpage
(json_url
, video_id
, 'Downloading info json') 
  62         self
.report_extraction(video_id
) 
  63         info 
= json
.loads(json_info
) 
  64         player_info 
= info
['videoJsonPlayer'] 
  66         info_dict 
= {'id': player_info
['VID'], 
  67                      'title': player_info
['VTI'], 
  68                      'description': player_info
['VDE'], 
  69                      'upload_date': unified_strdate(player_info
['VDA'].split(' ')[0]), 
  70                      'thumbnail': player_info
['programImage'], 
  73         formats 
= player_info
['VSR'].values() 
  74         # We order the formats by quality 
  75         formats 
= sorted(formats
, key
=lambda f
: int(f
['height'])) 
  76         # Pick the best quality 
  77         format_info 
= formats
[-1] 
  78         if format_info
['mediaType'] == u
'rtmp': 
  79             info_dict
['url'] = format_info
['streamer'] 
  80             info_dict
['play_path'] = 'mp4:' + format_info
['url'] 
  81             info_dict
['ext'] = 'mp4' 
  83             info_dict
['url'] = format_info
['url'] 
  84             info_dict
['ext'] = 'mp4'