]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zapiks.py
22a9a57e882be49109c00036fa3559410b4e334f
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  16 class ZapiksIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))' 
  20             'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html', 
  21             'md5': 'aeb3c473b2d564b2d46d664d28d5f050', 
  25                 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!', 
  26                 'description': 'md5:7054d6f6f620c6519be1fe710d4da847', 
  27                 'thumbnail': 're:^https?://.*\.jpg$', 
  29                 'timestamp': 1359044972, 
  30                 'upload_date': '20130124', 
  36             'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html', 
  37             'only_matching': True, 
  40             'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html', 
  41             'only_matching': True, 
  44             'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr', 
  45             'only_matching': True, 
  49     def _real_extract(self
, url
): 
  50         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  51         video_id 
= mobj
.group('id') 
  52         display_id 
= mobj
.group('display_id') or video_id
 
  54         webpage 
= self
._download
_webpage
(url
, display_id
) 
  57             video_id 
= self
._search
_regex
( 
  58                 r
'data-media-id="(\d+)"', webpage
, 'video id') 
  60         playlist 
= self
._download
_xml
( 
  61             'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id
, 
  65             'jwplayer': 'http://rss.jwpcdn.com/' 
  69             return xpath_with_ns(path
, NS_MAP
) 
  71         item 
= playlist
.find('./channel/item') 
  73         title 
= xpath_text(item
, 'title', 'title') or self
._og
_search
_title
(webpage
) 
  74         description 
= self
._og
_search
_description
(webpage
, default
=None) 
  75         thumbnail 
= xpath_text( 
  76             item
, ns('./jwplayer:image'), 'thumbnail') or self
._og
_search
_thumbnail
(webpage
, default
=None) 
  77         duration 
= parse_duration(self
._html
_search
_meta
( 
  78             'duration', webpage
, 'duration', default
=None)) 
  79         timestamp 
= parse_iso8601(self
._html
_search
_meta
( 
  80             'uploadDate', webpage
, 'upload date', default
=None), ' ') 
  82         view_count 
= int_or_none(self
._search
_regex
( 
  83             r
'UserPlays:(\d+)', webpage
, 'view count', default
=None)) 
  84         comment_count 
= int_or_none(self
._search
_regex
( 
  85             r
'UserComments:(\d+)', webpage
, 'comment count', default
=None)) 
  88         for source 
in item
.findall(ns('./jwplayer:source')): 
  89             format_id 
= source
.attrib
['label'] 
  91                 'url': source
.attrib
['file'], 
  92                 'format_id': format_id
, 
  94             m 
= re
.search(r
'^(?P<height>\d+)[pP]', format_id
) 
  96                 f
['height'] = int(m
.group('height')) 
  98         self
._sort
_formats
(formats
) 
 103             'description': description
, 
 104             'thumbnail': thumbnail
, 
 105             'duration': duration
, 
 106             'timestamp': timestamp
, 
 107             'view_count': view_count
, 
 108             'comment_count': comment_count
,