X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/46113edab215c2211a604c06245c16d5d4e57dcf..5ea61d104de9ca8a7d19d63d83173eb7391081d2:/youtube_dl/extractor/francetv.py?ds=sidebyside diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 35d7d15..edf555b 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -6,13 +6,17 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_urllib_parse_urlparse, compat_urlparse, - ExtractorError, +) +from ..utils import ( clean_html, - parse_duration, - compat_urllib_parse_urlparse, + ExtractorError, int_or_none, + float_or_none, + parse_duration, + determine_ext, ) @@ -26,6 +30,19 @@ class FranceTVBaseInfoExtractor(InfoExtractor): if info.get('status') == 'NOK': raise ExtractorError( '%s returned error: %s' % (self.IE_NAME, info['message']), expected=True) + allowed_countries = info['videos'][0].get('geoblocage') + if allowed_countries: + georestricted = True + geo_info = self._download_json( + 'http://geo.francetv.fr/ws/edgescape.json', video_id, + 'Downloading geo restriction info') + country = geo_info['reponse']['geo_info']['country_code'] + if country not in allowed_countries: + raise ExtractorError( + 'The video is not available from your location', + expected=True) + else: + georestricted = False formats = [] for video in info['videos']: @@ -35,18 +52,20 @@ class FranceTVBaseInfoExtractor(InfoExtractor): if not video_url: continue format_id = video['format'] - if video_url.endswith('.f4m'): + ext = determine_ext(video_url) + if ext == 'f4m': + if georestricted: + # See https://github.com/rg3/youtube-dl/issues/3963 + # m3u8 urls work fine + continue video_url_parsed = compat_urllib_parse_urlparse(video_url) f4m_url = self._download_webpage( 'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path, video_id, 'Downloading f4m manifest token', fatal=False) if f4m_url: - f4m_formats = self._extract_f4m_formats(f4m_url, video_id) - for f4m_format in f4m_formats: - f4m_format['preference'] = 1 - formats.extend(f4m_formats) - elif video_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4')) + formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id)) elif video_url.startswith('rtmp'): formats.append({ 'url': video_url, @@ -67,7 +86,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor): 'title': info['titre'], 'description': clean_html(info['synopsis']), 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']), - 'duration': parse_duration(info['duree']), + 'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']), 'timestamp': int_or_none(info['diffusion']['timestamp']), 'formats': formats, } @@ -211,12 +230,13 @@ class FranceTVIE(FranceTVBaseInfoExtractor): class GenerationQuoiIE(InfoExtractor): IE_NAME = 'france2.fr:generation-quoi' - _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P.*)(\?|$)' + _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P[^/?#]+)' _TEST = { 'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous', - 'file': 'k7FJX8VBcvvLmX4wA5Q.mp4', 'info_dict': { + 'id': 'k7FJX8VBcvvLmX4wA5Q', + 'ext': 'mp4', 'title': 'Génération Quoi - Garde à Vous', 'uploader': 'Génération Quoi', }, @@ -224,17 +244,15 @@ class GenerationQuoiIE(InfoExtractor): # It uses Dailymotion 'skip_download': True, }, - 'skip': 'Only available from France', } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name) - info_json = self._download_webpage(info_url, name) + display_id = self._match_id(url) + info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id) + info_json = self._download_webpage(info_url, display_id) info = json.loads(info_json) return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], - ie='Dailymotion') + ie='Dailymotion') class CultureboxIE(FranceTVBaseInfoExtractor): @@ -242,22 +260,28 @@ class CultureboxIE(FranceTVBaseInfoExtractor): _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P.*?)(\?|$)' _TEST = { - 'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553', - 'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6', + 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511', + 'md5': '9b88dc156781c4dbebd4c3e066e0b1d6', 'info_dict': { - 'id': 'EV_22853', + 'id': 'EV_50111', 'ext': 'flv', - 'title': 'Dans les jardins de William Christie - Le Camus', - 'description': 'md5:4710c82315c40f0c865ca8b9a68b5299', - 'upload_date': '20140829', - 'timestamp': 1409317200, + 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne", + 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9', + 'upload_date': '20150320', + 'timestamp': 1426892400, + 'duration': 2760.9, }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') + webpage = self._download_webpage(url, name) + + if ">Ce live n'est plus disponible en replay<" in webpage: + raise ExtractorError('Video %s is not available' % name, expected=True) + video_id, catalogue = self._search_regex( r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')