import json
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- compat_str,
qualities,
- determine_ext,
+ unescapeHTML,
+ xpath_element,
)
class AllocineIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?'
+ _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
_TESTS = [{
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
'id': '19546517',
'ext': 'mp4',
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
- 'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
+ 'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'id': '19540403',
'ext': 'mp4',
'title': 'Planes 2 Bande-annonce VF',
- 'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d',
+ 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'id': '19544709',
'ext': 'mp4',
'title': 'Dragons 2 - Bande annonce finale VF',
- 'description': 'md5:e74a4dc750894bac300ece46c7036490',
+ 'description': 'md5:601d15393ac40f249648ef000720e7e3',
'thumbnail': 're:http://.*\.jpg',
},
+ }, {
+ 'url': 'http://www.allocine.fr/video/video-19550147/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
if typ == 'film':
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
else:
- player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
-
- player_data = json.loads(player)
- video_id = compat_str(player_data['refMedia'])
+ player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None)
+ if player:
+ player_data = json.loads(player)
+ video_id = compat_str(player_data['refMedia'])
+ else:
+ model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model')
+ model_data = self._parse_json(unescapeHTML(model), display_id)
+ video_id = compat_str(model_data['id'])
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
- video = xml.find('.//AcVisionVideo').attrib
+ video = xpath_element(xml, './/AcVisionVideo').attrib
quality = qualities(['ld', 'md', 'hd'])
formats = []
'format_id': format_id,
'quality': quality(format_id),
'url': v,
- 'ext': determine_ext(v),
})
-
self._sort_formats(formats)
return {