]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/allocine.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
   5 from ..compat 
import compat_str
 
  16 class AllocineIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?' 
  20         'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', 
  21         'md5': '0c9fcf59a841f65635fa300ac43d8269', 
  24             'display_id': '18635087', 
  26             'title': 'Astérix - Le Domaine des Dieux Teaser VF', 
  27             'description': 'md5:4a754271d9c6f16c72629a8a993ee884', 
  28             'thumbnail': r
're:http://.*\.jpg', 
  30             'timestamp': 1404273600, 
  31             'upload_date': '20140702', 
  35         'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', 
  36         'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', 
  39             'display_id': '19540403', 
  41             'title': 'Planes 2 Bande-annonce VF', 
  42             'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', 
  43             'thumbnail': r
're:http://.*\.jpg', 
  45             'timestamp': 1385659800, 
  46             'upload_date': '20131128', 
  50         'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', 
  51         'md5': '101250fb127ef9ca3d73186ff22a47ce', 
  54             'display_id': '19544709', 
  56             'title': 'Dragons 2 - Bande annonce finale VF', 
  57             'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', 
  58             'thumbnail': r
're:http://.*\.jpg', 
  60             'timestamp': 1397589900, 
  61             'upload_date': '20140415', 
  65         'url': 'http://www.allocine.fr/video/video-19550147/', 
  66         'md5': '3566c0668c0235e2d224fd8edb389f67', 
  70             'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger', 
  71             'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354', 
  72             'thumbnail': r
're:http://.*\.jpg', 
  76     def _real_extract(self
, url
): 
  77         display_id 
= self
._match
_id
(url
) 
  79         webpage 
= self
._download
_webpage
(url
, display_id
) 
  82         quality 
= qualities(['ld', 'md', 'hd']) 
  84         model 
= self
._html
_search
_regex
( 
  85             r
'data-model="([^"]+)"', webpage
, 'data model', default
=None) 
  87             model_data 
= self
._parse
_json
(model
, display_id
) 
  88             video 
= model_data
['videos'][0] 
  89             title 
= video
['title'] 
  90             for video_url 
in video
['sources'].values(): 
  91                 video_id
, format_id 
= url_basename(video_url
).split('_')[:2] 
  93                     'format_id': format_id
, 
  94                     'quality': quality(format_id
), 
  97             duration 
= int_or_none(video
.get('duration')) 
  98             view_count 
= int_or_none(video
.get('view_count')) 
  99             timestamp 
= unified_timestamp(try_get( 
 100                 video
, lambda x
: x
['added_at']['date'], compat_str
)) 
 102             video_id 
= display_id
 
 103             media_data 
= self
._download
_json
( 
 104                 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id
, display_id
) 
 106                 self
._html
_search
_regex
( 
 107                     r
'(?s)<title>(.+?)</title>', webpage
, 'title').strip(), 
 109             for key
, value 
in media_data
['video'].items(): 
 110                 if not key
.endswith('Path'): 
 112                 format_id 
= key
[:-len('Path')] 
 114                     'format_id': format_id
, 
 115                     'quality': quality(format_id
), 
 118             duration
, view_count
, timestamp 
= [None] * 3 
 120         self
._sort
_formats
(formats
) 
 124             'display_id': display_id
, 
 126             'description': self
._og
_search
_description
(webpage
), 
 127             'thumbnail': self
._og
_search
_thumbnail
(webpage
), 
 128             'duration': duration
, 
 129             'timestamp': timestamp
, 
 130             'view_count': view_count
,