]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/abcnews.py
   2 from __future__ 
import unicode_literals
 
   9 from .common 
import InfoExtractor
 
  10 from .youtube 
import YoutubeIE
 
  11 from ..compat 
import compat_urlparse
 
  14 class AbcNewsVideoIE(AMPIE
): 
  15     IE_NAME 
= 'abcnews:video' 
  20                             [^/]+/video/(?P<display_id>[0-9a-z-]+)-| 
  27         'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', 
  31             'display_id': 'week-exclusive-irans-foreign-minister-zarif', 
  32             'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif', 
  33             'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.', 
  35             'thumbnail': r
're:^https?://.*\.jpg$', 
  39             'skip_download': True, 
  42         'url': 'http://abcnews.go.com/video/embed?id=46979033', 
  43         'only_matching': True, 
  45         'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', 
  46         'only_matching': True, 
  49     def _real_extract(self
, url
): 
  50         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  51         display_id 
= mobj
.group('display_id') 
  52         video_id 
= mobj
.group('id') 
  53         info_dict 
= self
._extract
_feed
_info
( 
  54             'http://abcnews.go.com/video/itemfeed?id=%s' % video_id
) 
  57             'display_id': display_id
, 
  62 class AbcNewsIE(InfoExtractor
): 
  64     _VALID_URL 
= r
'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' 
  67         'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY', 
  71             'display_id': 'dramatic-video-rare-death-job-america', 
  72             'title': 'Occupational Hazards', 
  73             'description': 'Nightline investigates the dangers that lurk at various jobs.', 
  74             'thumbnail': r
're:^https?://.*\.jpg$', 
  75             'upload_date': '20100428', 
  76             'timestamp': 1272412800, 
  78         'add_ie': ['AbcNewsVideo'], 
  80         'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818', 
  84             'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016', 
  85             'title': 'Justin Timberlake Drops Hints For Secret Single', 
  86             'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.', 
  87             'upload_date': '20160515', 
  88             'timestamp': 1463329500, 
  92             'skip_download': True, 
  93             # The embedded YouTube video is blocked due to copyright issues 
  94             'playlist_items': '1', 
  96         'add_ie': ['AbcNewsVideo'], 
  98         'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', 
  99         'only_matching': True, 
 102     def _real_extract(self
, url
): 
 103         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 104         display_id 
= mobj
.group('display_id') 
 105         video_id 
= mobj
.group('id') 
 107         webpage 
= self
._download
_webpage
(url
, video_id
) 
 108         video_url 
= self
._search
_regex
( 
 109             r
'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage
, 'video URL') 
 110         full_video_url 
= compat_urlparse
.urljoin(url
, video_url
) 
 112         youtube_url 
= YoutubeIE
._extract
_url
(webpage
) 
 115         date_str 
= self
._html
_search
_regex
( 
 116             r
'<span[^>]+class="timestamp">([^<]+)</span>', 
 117             webpage
, 'timestamp', fatal
=False) 
 120             if date_str
.endswith(' ET'):  # Eastern Time 
 122                 date_str 
= date_str
[:-3] 
 123             date_formats 
= ['%b. %d, %Y', '%b %d, %Y, %I:%M %p'] 
 124             for date_format 
in date_formats
: 
 126                     timestamp 
= calendar
.timegm(time
.strptime(date_str
.strip(), date_format
)) 
 129             if timestamp 
is not None: 
 130                 timestamp 
-= tz_offset 
* 3600 
 133             '_type': 'url_transparent', 
 134             'ie_key': AbcNewsVideoIE
.ie_key(), 
 135             'url': full_video_url
, 
 137             'display_id': display_id
, 
 138             'timestamp': timestamp
, 
 142             entries 
= [entry
, self
.url_result(youtube_url
, ie
=YoutubeIE
.ie_key())] 
 143             return self
.playlist_result(entries
)