]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tv5mondeplus.py
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
13 class TV5MondePlusIE(InfoExtractor
):
15 _VALID_URL
= r
'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
18 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/rendez-vous-a-atlit',
19 'md5': '8cbde5ea7b296cf635073e27895e227f',
21 'id': '822a4756-0712-7329-1859-a13ac7fd1407',
22 'display_id': 'rendez-vous-a-atlit',
24 'title': 'Rendez-vous à Atlit',
25 'description': 'md5:2893a4c5e1dbac3eedff2d87956e4efb',
26 'upload_date': '20200130',
30 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/c-est-la-vie-ennemie-juree',
32 'id': '0df7007c-4900-3936-c601-87a13a93a068',
33 'display_id': 'c-est-la-vie-ennemie-juree',
35 'title': "C'est la vie - Ennemie jurée",
36 'description': 'md5:dfb5c63087b6f35fe0cc0af4fe44287e',
37 'upload_date': '20200130',
38 'series': "C'est la vie",
39 'episode': 'Ennemie jurée',
42 'skip_download': True,
45 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
46 'only_matching': True,
48 'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
49 'only_matching': True,
53 def _real_extract(self
, url
):
54 display_id
= self
._match
_id
(url
)
55 webpage
= self
._download
_webpage
(url
, display_id
)
57 if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage
:
58 self
.raise_geo_restricted(countries
=['FR'])
60 title
= episode
= self
._html
_search
_regex
(r
'<h1>([^<]+)', webpage
, 'title')
61 vpl_data
= extract_attributes(self
._search
_regex
(
62 r
'(<[^>]+class="video_player_loader"[^>]+>)',
63 webpage
, 'video player loader'))
65 video_files
= self
._parse
_json
(
66 vpl_data
['data-broadcast'], display_id
).get('files', [])
68 for video_file
in video_files
:
69 v_url
= video_file
.get('url')
72 video_format
= video_file
.get('format') or determine_ext(v_url
)
73 if video_format
== 'm3u8':
74 formats
.extend(self
._extract
_m
3u8_formats
(
75 v_url
, display_id
, 'mp4', 'm3u8_native',
76 m3u8_id
='hls', fatal
=False))
80 'format_id': video_format
,
82 self
._sort
_formats
(formats
)
84 description
= self
._html
_search
_regex
(
85 r
'(?s)<div[^>]+class=["\']episode
-texte
[^
>]+>(.+?
)</div
>', webpage,
86 'description
', fatal=False)
88 series = self._html_search_regex(
89 r'<p
[^
>]+class=["\']episode-emission[^>]+>([^<]+)', webpage,
90 'series', default=None)
92 if series and series != title:
93 title = '%s - %s' % (series, title)
95 upload_date = self._search_regex(
96 r'(?:date_publication|publish_date)["\']\s
*:\s
*["\'](\d{4}_\d{2}_\d{2})',
97 webpage, 'upload date', default=None)
99 upload_date = upload_date.replace('_', '')
101 video_id = self._search_regex(
102 (r'data-guid=["\']([\da
-f
]{8}
-[\da
-f
]{4}
-[\da
-f
]{4}
-[\da
-f
]{4}
-[\da
-f
]{12}
)',
103 r'id_contenu
["\']\s:\s*(\d+)'), webpage, 'video id',
108 'display_id': display_id,
110 'description': description,
111 'thumbnail': vpl_data.get('data-image'),
112 'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
113 'upload_date': upload_date,