]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/cbslocal.py
Update changelog.
[youtubedl] / youtube_dl / extractor / cbslocal.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .anvato import AnvatoIE
5 from .sendtonews import SendtoNewsIE
6 from ..compat import compat_urlparse
7 from ..utils import unified_timestamp
8
9
10 class CBSLocalIE(AnvatoIE):
11 _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
12
13 _TESTS = [{
14 # Anvato backend
15 'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
16 'md5': 'f0ee3081e3843f575fccef901199b212',
17 'info_dict': {
18 'id': '3401037',
19 'ext': 'mp4',
20 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
21 'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
22 'thumbnail': 're:^https?://.*',
23 'timestamp': 1463440500,
24 'upload_date': '20160516',
25 'subtitles': {
26 'en': 'mincount:5',
27 },
28 'categories': [
29 'Stations\\Spoken Word\\KCBSTV',
30 'Syndication\\MSN',
31 'Syndication\\NDN',
32 'Syndication\\AOL',
33 'Syndication\\Yahoo',
34 'Syndication\\Tribune',
35 'Syndication\\Curb.tv',
36 'Content\\News'
37 ],
38 },
39 }, {
40 # SendtoNews embed
41 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
42 'info_dict': {
43 'id': 'GxfCe0Zo7D-175909-5588',
44 },
45 'playlist_count': 9,
46 'params': {
47 # m3u8 download
48 'skip_download': True,
49 },
50 }]
51
52 def _real_extract(self, url):
53 display_id = self._match_id(url)
54 webpage = self._download_webpage(url, display_id)
55
56 sendtonews_url = SendtoNewsIE._extract_url(webpage)
57 if sendtonews_url:
58 return self.url_result(
59 compat_urlparse.urljoin(url, sendtonews_url),
60 ie=SendtoNewsIE.ie_key())
61
62 info_dict = self._extract_anvato_videos(webpage, display_id)
63
64 time_str = self._html_search_regex(
65 r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
66 timestamp = unified_timestamp(time_str)
67
68 info_dict.update({
69 'display_id': display_id,
70 'timestamp': timestamp,
71 })
72
73 return info_dict