1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
6 from ..utils
import parse_iso8601
9 class Abc7NewsIE(InfoExtractor
):
10 _VALID_URL
= r
'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
13 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
16 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
18 'title': 'East Bay museum celebrates history of synthesized music',
19 'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
20 'thumbnail': 're:^https?://.*\.jpg$',
21 'timestamp': 1421123075,
22 'upload_date': '20150113',
23 'uploader': 'Jonathan Bloom',
27 'skip_download': True,
31 'url': 'http://abc7news.com/472581',
32 'only_matching': True,
36 def _real_extract(self
, url
):
37 mobj
= re
.match(self
._VALID
_URL
, url
)
38 video_id
= mobj
.group('id')
39 display_id
= mobj
.group('display_id') or video_id
41 webpage
= self
._download
_webpage
(url
, display_id
)
43 m3u8
= self
._html
_search
_meta
(
44 'contentURL', webpage
, 'm3u8 url', fatal
=True)
46 formats
= self
._extract
_m
3u8_formats
(m3u8
, display_id
, 'mp4')
47 self
._sort
_formats
(formats
)
49 title
= self
._og
_search
_title
(webpage
).strip()
50 description
= self
._og
_search
_description
(webpage
).strip()
51 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
52 timestamp
= parse_iso8601(self
._search
_regex
(
53 r
'<div class="meta">\s*<time class="timeago" datetime="([^"]+)">',
54 webpage
, 'upload date', fatal
=False))
55 uploader
= self
._search
_regex
(
56 r
'rel="author">([^<]+)</a>',
57 webpage
, 'uploader', default
=None)
61 'display_id': display_id
,
63 'description': description
,
64 'thumbnail': thumbnail
,
65 'timestamp': timestamp
,