]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/abcnews.py
   2 from __future__ 
import unicode_literals
 
   9 from .common 
import InfoExtractor
 
  10 from ..compat 
import compat_urlparse
 
  13 class AbcNewsVideoIE(AMPIE
): 
  14     IE_NAME 
= 'abcnews:video' 
  15     _VALID_URL 
= r
'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' 
  18         'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', 
  22             'display_id': 'week-exclusive-irans-foreign-minister-zarif', 
  23             'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif', 
  24             'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.', 
  26             'thumbnail': r
're:^https?://.*\.jpg$', 
  30             'skip_download': True, 
  33         'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', 
  34         'only_matching': True, 
  37     def _real_extract(self
, url
): 
  38         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  39         display_id 
= mobj
.group('display_id') 
  40         video_id 
= mobj
.group('id') 
  41         info_dict 
= self
._extract
_feed
_info
( 
  42             'http://abcnews.go.com/video/itemfeed?id=%s' % video_id
) 
  45             'display_id': display_id
, 
  50 class AbcNewsIE(InfoExtractor
): 
  52     _VALID_URL 
= r
'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' 
  55         'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY', 
  59             'display_id': 'dramatic-video-rare-death-job-america', 
  60             'title': 'Occupational Hazards', 
  61             'description': 'Nightline investigates the dangers that lurk at various jobs.', 
  62             'thumbnail': r
're:^https?://.*\.jpg$', 
  63             'upload_date': '20100428', 
  64             'timestamp': 1272412800, 
  66         'add_ie': ['AbcNewsVideo'], 
  68         'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818', 
  72             'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016', 
  73             'title': 'Justin Timberlake Drops Hints For Secret Single', 
  74             'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.', 
  75             'upload_date': '20160515', 
  76             'timestamp': 1463329500, 
  80             'skip_download': True, 
  81             # The embedded YouTube video is blocked due to copyright issues 
  82             'playlist_items': '1', 
  84         'add_ie': ['AbcNewsVideo'], 
  86         'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', 
  87         'only_matching': True, 
  90     def _real_extract(self
, url
): 
  91         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  92         display_id 
= mobj
.group('display_id') 
  93         video_id 
= mobj
.group('id') 
  95         webpage 
= self
._download
_webpage
(url
, video_id
) 
  96         video_url 
= self
._search
_regex
( 
  97             r
'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage
, 'video URL') 
  98         full_video_url 
= compat_urlparse
.urljoin(url
, video_url
) 
 100         youtube_url 
= self
._html
_search
_regex
( 
 101             r
'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"', 
 102             webpage
, 'YouTube URL', default
=None) 
 105         date_str 
= self
._html
_search
_regex
( 
 106             r
'<span[^>]+class="timestamp">([^<]+)</span>', 
 107             webpage
, 'timestamp', fatal
=False) 
 110             if date_str
.endswith(' ET'):  # Eastern Time 
 112                 date_str 
= date_str
[:-3] 
 113             date_formats 
= ['%b. %d, %Y', '%b %d, %Y, %I:%M %p'] 
 114             for date_format 
in date_formats
: 
 116                     timestamp 
= calendar
.timegm(time
.strptime(date_str
.strip(), date_format
)) 
 119             if timestamp 
is not None: 
 120                 timestamp 
-= tz_offset 
* 3600 
 123             '_type': 'url_transparent', 
 124             'ie_key': AbcNewsVideoIE
.ie_key(), 
 125             'url': full_video_url
, 
 127             'display_id': display_id
, 
 128             'timestamp': timestamp
, 
 132             entries 
= [entry
, self
.url_result(youtube_url
, 'Youtube')] 
 133             return self
.playlist_result(entries
)