]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tvnet.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_str
 
  14 class TVNetIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?:\d+/)?(?P<id>\d+)(?:/|$)' 
  18         'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h', 
  19         'md5': 'b4d7abe0252c9b47774760b7519c7558', 
  23             'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang', 
  24             'thumbnail': r
're:(?i)https?://.*\.(?:jpg|png)', 
  30         'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi', 
  31         'md5': 'b5875ce9b0a2eecde029216d0e6db2ae', 
  35             'title': 'VOV1 - Bản tin chiều (10/06/2018)', 
  36             'thumbnail': r
're:(?i)https?://.*\.(?:jpg|png)', 
  40         'url': 'http://us.tvnet.gov.vn/video/118023/129999/ngay-0705', 
  44             'title': 'VTV1 - Quốc hội với cử tri (11/06/2018)', 
  45             'thumbnail': r
're:(?i)https?://.*\.(?:jpg|png)', 
  49             'skip_download': True, 
  53         'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1', 
  57             'title': r
're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 
  58             'thumbnail': r
're:(?i)https?://.*\.(?:jpg|png)', 
  62             'skip_download': True, 
  66         'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014', 
  70             'title': r
're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 
  71             'thumbnail': r
're:(?i)https?://.*\.(?:jpg|png)', 
  75             'skip_download': True, 
  78         'url': 'http://us.tvnet.gov.vn/phim/6136/25510/vtv3---ca-mot-doi-an-oan-tap-1-50/phim-truyen-hinh', 
  79         'only_matching': True, 
  82     def _real_extract(self
, url
): 
  83         video_id 
= self
._match
_id
(url
) 
  85         webpage 
= self
._download
_webpage
(url
, video_id
) 
  87         title 
= self
._og
_search
_title
( 
  88             webpage
, default
=None) or self
._html
_search
_meta
( 
  89             'title', webpage
, default
=None) or self
._search
_regex
( 
  90             r
'<title>([^<]+)<', webpage
, 'title') 
  91         title 
= re
.sub(r
'\s*-\s*TV Net\s*$', '', title
) 
  93         if '/video/' in url 
or '/radio/' in url
: 
  95         elif '/kenh-truyen-hinh/' in url
: 
 100         data_file 
= unescapeHTML(self
._search
_regex
( 
 101             r
'data-file=(["\'])(?P
<url
>(?
:https?
:)?
//.+?
)\
1', webpage, 
 102             'data 
file', group='url
')) 
 106         for stream in self._download_json(data_file, video_id): 
 107             if not isinstance(stream, dict): 
 109             stream_url = stream.get('url
') 
 110             if (stream_url in stream_urls or not stream_url or 
 111                     not isinstance(stream_url, compat_str)): 
 113             stream_urls.add(stream_url) 
 114             formats.extend(self._extract_m3u8_formats( 
 115                 stream_url, video_id, 'mp4
', 
 116                 entry_protocol='m3u8
' if is_live else 'm3u8_native
', 
 117                 m3u8_id='hls
', fatal=False)) 
 118         self._sort_formats(formats) 
 120         # better support for radio streams 
 121         if title.startswith('VOV
'): 
 128         thumbnail = self._og_search_thumbnail( 
 129             webpage, default=None) or unescapeHTML( 
 131                 r'data
-image
=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, 
 132                 'thumbnail', default=None, group='url')) 
 135             title = self._live_title(title) 
 137         view_count = int_or_none(self._search_regex( 
 138             r'(?s)<div[^>]+\bclass=["\'].*?view
-count
[^
>]+>.*?
(\d
+).*?
</div
>', 
 139             webpage, 'view count
', default=None)) 
 144             'thumbnail
': thumbnail, 
 146             'view_count
': view_count,