]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/breakcom.py
70d16767f19966fe625f8496f334fde1a15929f8
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from .youtube 
import YoutubeIE
 
   7 from ..compat 
import compat_str
 
   8 from ..utils 
import int_or_none
 
  11 class BreakIE(InfoExtractor
): 
  12     _VALID_URL 
= r
'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' 
  14         'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', 
  18             'title': 'When Girls Act Like D-Bags', 
  23         'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work', 
  27             'title': 'Whale Watching Boat Crashing Into San Diego Dock', 
  28             'description': 'md5:afc1b2772f0a8468be51dd80eb021069', 
  29             'upload_date': '20160331', 
  30             'uploader': 'Steve Holden', 
  31             'uploader_id': 'sdholden07', 
  34             'skip_download': True, 
  37         'url': 'http://www.break.com/video/ugc/baby-flex-2773063', 
  38         'only_matching': True, 
  41     def _real_extract(self
, url
): 
  42         display_id
, video_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
  44         webpage 
= self
._download
_webpage
(url
, display_id
) 
  46         youtube_url 
= YoutubeIE
._extract
_url
(webpage
) 
  48             return self
.url_result(youtube_url
, ie
=YoutubeIE
.ie_key()) 
  50         content 
= self
._parse
_json
( 
  52                 r
'(?s)content["\']\s
*:\s
*(\
[.+?\
])\s
*[,\n]', webpage, 
  58             video_url = video.get('url
') 
  59             if not video_url or not isinstance(video_url, compat_str): 
  61             bitrate = int_or_none(self._search_regex( 
  62                 r'(\d
+)_kbps
', video_url, 'tbr
', default=None)) 
  65                 'format_id
': 'http
-%d' % bitrate if bitrate else 'http
', 
  68         self._sort_formats(formats) 
  70         title = self._search_regex( 
  71             (r'title
["\']\s*:\s*(["\'])(?P
<value
>(?
:(?
!\
1).)+)\
1', 
  72              r'<h1
[^
>]*>(?P
<value
>[^
<]+)'), webpage, 'title
', group='value
') 
  75             return int_or_none(self._search_regex( 
  76                 r'%s["\']\s*:\s*["\'](\d
+)' % key, webpage, name, 
  79         age_limit = get('ratings
', 'age limit
') 
  80         video_id = video_id or get('pid
', 'video 
id') or display_id 
  84             'display_id
': display_id, 
  86             'thumbnail
': self._og_search_thumbnail(webpage), 
  87             'age_limit
': age_limit,