]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zapiks.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  16 class ZapiksIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))' 
  20             'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html', 
  21             'md5': 'aeb3c473b2d564b2d46d664d28d5f050', 
  25                 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!', 
  26                 'description': 'md5:7054d6f6f620c6519be1fe710d4da847', 
  27                 'thumbnail': r
're:^https?://.*\.jpg$', 
  29                 'timestamp': 1359044972, 
  30                 'upload_date': '20130124', 
  35             'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html', 
  36             'only_matching': True, 
  39             'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html', 
  40             'only_matching': True, 
  43             'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr', 
  44             'only_matching': True, 
  48     def _real_extract(self
, url
): 
  49         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  50         video_id 
= mobj
.group('id') 
  51         display_id 
= mobj
.group('display_id') or video_id
 
  53         webpage 
= self
._download
_webpage
(url
, display_id
) 
  56             video_id 
= self
._search
_regex
( 
  57                 r
'data-media-id="(\d+)"', webpage
, 'video id') 
  59         playlist 
= self
._download
_xml
( 
  60             'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id
, 
  64             'jwplayer': 'http://rss.jwpcdn.com/' 
  68             return xpath_with_ns(path
, NS_MAP
) 
  70         item 
= playlist
.find('./channel/item') 
  72         title 
= xpath_text(item
, 'title', 'title') or self
._og
_search
_title
(webpage
) 
  73         description 
= self
._og
_search
_description
(webpage
, default
=None) 
  74         thumbnail 
= xpath_text( 
  75             item
, ns('./jwplayer:image'), 'thumbnail') or self
._og
_search
_thumbnail
(webpage
, default
=None) 
  76         duration 
= parse_duration(self
._html
_search
_meta
( 
  77             'duration', webpage
, 'duration', default
=None)) 
  78         timestamp 
= parse_iso8601(self
._html
_search
_meta
( 
  79             'uploadDate', webpage
, 'upload date', default
=None), ' ') 
  81         view_count 
= int_or_none(self
._search
_regex
( 
  82             r
'UserPlays:(\d+)', webpage
, 'view count', default
=None)) 
  83         comment_count 
= int_or_none(self
._search
_regex
( 
  84             r
'UserComments:(\d+)', webpage
, 'comment count', default
=None)) 
  87         for source 
in item
.findall(ns('./jwplayer:source')): 
  88             format_id 
= source
.attrib
['label'] 
  90                 'url': source
.attrib
['file'], 
  91                 'format_id': format_id
, 
  93             m 
= re
.search(r
'^(?P<height>\d+)[pP]', format_id
) 
  95                 f
['height'] = int(m
.group('height')) 
  97         self
._sort
_formats
(formats
) 
 102             'description': description
, 
 103             'thumbnail': thumbnail
, 
 104             'duration': duration
, 
 105             'timestamp': timestamp
, 
 106             'view_count': view_count
, 
 107             'comment_count': comment_count
,