]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/lrt.py
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
12 class LRTIE(InfoExtractor
):
14 _VALID_URL
= r
'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
16 'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
20 'title': 'Septynios Kauno dienos',
21 'description': 'md5:24d84534c7dc76581e59f5689462411a',
27 'skip_download': True, # m3u8 download
31 def _real_extract(self
, url
):
32 video_id
= self
._match
_id
(url
)
33 webpage
= self
._download
_webpage
(url
, video_id
)
35 title
= remove_end(self
._og
_search
_title
(webpage
), ' - LRT')
36 m3u8_url
= self
._search
_regex
(
37 r
'file\s*:\s*(["\'])(?P
<url
>.+?
)\
1\s
*\
+\s
*location\
.hash\
.substring\
(1\
)',
38 webpage, 'm3u8 url
', group='url
')
39 formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4
')
40 self._sort_formats(formats)
42 thumbnail = self._og_search_thumbnail(webpage)
43 description = self._og_search_description(webpage)
44 duration = parse_duration(self._search_regex(
45 r'var\s
+record_len\s
*=\s
*(["\'])(?P<duration>[0-9]+:[0-9]+:[0-9]+)\1',
46 webpage, 'duration', default=None, group='duration'))
48 view_count = int_or_none(self._html_search_regex(
49 r'<div[^>]+class=(["\']).*?record
-desc
-seen
.*?\
1[^
>]*>(?P
<count
>.+?
)</div
>',
50 webpage, 'view count
', fatal=False, group='count
'))
51 like_count = int_or_none(self._search_regex(
52 r'<span
[^
>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
53 webpage, 'like count', fatal=False, group='count'))
59 'thumbnail': thumbnail,
60 'description': description,
62 'view_count': view_count,
63 'like_count': like_count,