]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/abc7news.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..utils 
import parse_iso8601
 
   9 class Abc7NewsIE(InfoExtractor
): 
  10     _VALID_URL 
= r
'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)' 
  13             'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/', 
  16                 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', 
  18                 'title': 'East Bay museum celebrates history of synthesized music', 
  19                 'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10', 
  20                 'thumbnail': 're:^https?://.*\.jpg$', 
  21                 'timestamp': 1421123075, 
  22                 'upload_date': '20150113', 
  23                 'uploader': 'Jonathan Bloom', 
  27                 'skip_download': True, 
  31             'url': 'http://abc7news.com/472581', 
  32             'only_matching': True, 
  36     def _real_extract(self
, url
): 
  37         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  38         video_id 
= mobj
.group('id') 
  39         display_id 
= mobj
.group('display_id') or video_id
 
  41         webpage 
= self
._download
_webpage
(url
, display_id
) 
  43         m3u8 
= self
._html
_search
_meta
( 
  44             'contentURL', webpage
, 'm3u8 url', fatal
=True) 
  46         formats 
= self
._extract
_m
3u8_formats
(m3u8
, display_id
, 'mp4') 
  47         self
._sort
_formats
(formats
) 
  49         title 
= self
._og
_search
_title
(webpage
).strip() 
  50         description 
= self
._og
_search
_description
(webpage
).strip() 
  51         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
  52         timestamp 
= parse_iso8601(self
._search
_regex
( 
  53             r
'<div class="meta">\s*<time class="timeago" datetime="([^"]+)">', 
  54             webpage
, 'upload date', fatal
=False)) 
  55         uploader 
= self
._search
_regex
( 
  56             r
'rel="author">([^<]+)</a>', 
  57             webpage
, 'uploader', default
=None) 
  61             'display_id': display_id
, 
  63             'description': description
, 
  64             'thumbnail': thumbnail
, 
  65             'timestamp': timestamp
,