]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/funnyordie.py
f85e7de1496b07848e19b491b7ef5312652a0d7c
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  14 class FunnyOrDieIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|articles|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])' 
  17         'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', 
  18         'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9', 
  22             'title': 'Heart-Shaped Box: Literal Video Version', 
  23             'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338', 
  24             'thumbnail': r
're:^http:.*\.jpg$', 
  26             'timestamp': 1317904928, 
  27             'upload_date': '20111006', 
  31         'url': 'http://www.funnyordie.com/embed/e402820827', 
  35             'title': 'Please Use This Song (Jon Lajoie)', 
  36             'description': 'Please use this to sell something.  www.jonlajoie.com', 
  37             'thumbnail': r
're:^http:.*\.jpg$', 
  38             'timestamp': 1398988800, 
  39             'upload_date': '20140502', 
  42             'skip_download': True, 
  45         'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man', 
  46         'only_matching': True, 
  49     def _real_extract(self
, url
): 
  50         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  52         video_id 
= mobj
.group('id') 
  53         webpage 
= self
._download
_webpage
(url
, video_id
) 
  55         links 
= re
.findall(r
'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video
', webpage) 
  57             raise ExtractorError('No media links available 
for %s' % video_id) 
  59         links.sort(key=lambda link: 1 if link[1] == 'mp4
' else 0) 
  61         m3u8_url = self._search_regex( 
  62             r'<source
[^
>]+src
=(["\'])(?P<url>.+?/master\.m3u8[^"\']*)\
1', 
  63             webpage, 'm3u8 url
', group='url
') 
  67         m3u8_formats = self._extract_m3u8_formats( 
  68             m3u8_url, video_id, 'mp4
', 'm3u8_native
', 
  69             m3u8_id='hls
', fatal=False) 
  70         source_formats = list(filter( 
  71             lambda f: f.get('vcodec
') != 'none
', m3u8_formats)) 
  73         bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d
+)(?
=[,/])', m3u8_url)] 
  77             self._sort_formats(source_formats) 
  79         for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)): 
  80             for path, ext in links: 
  85                             [(k, v) for k, v in ff.items() 
  86                              if k in ('height
', 'width
', 'format_id
')]) 
  88                         'format_id
': ff['format_id
'].replace('hls
', ext), 
  94                         'format_id
': '%s-%d' % (ext, bitrate), 
  97                 ff['url
'] = self._proto_relative_url( 
  98                     '%s%d.%s' % (path, bitrate, ext)) 
 100         self._check_formats(formats, video_id) 
 102         formats.extend(m3u8_formats) 
 104             formats, field_preference=('height
', 'width
', 'tbr
', 'format_id
')) 
 107         for src, src_lang in re.findall(r'<track kind
="captions" src
="([^"]+)" srclang="([^
"]+)"', webpage): 
 108             subtitles[src_lang] = [{ 
 109                 'ext
': src.split('/')[-1], 
 110                 'url
': 'http
://www
.funnyordie
.com
%s' % src, 
 113         timestamp = unified_timestamp(self._html_search_meta( 
 114             'uploadDate
', webpage, 'timestamp
', default=None)) 
 116         uploader = self._html_search_regex( 
 117             r'<h\d
[^
>]+\bclass
=["\']channel-preview-name[^>]+>(.+?)</h', 
 118             webpage, 'uploader', default=None) 
 120         title, description, thumbnail, duration = [None] * 4 
 122         medium = self._parse_json( 
 124                 r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium', 
 126             video_id, fatal=False) 
 128             title = medium.get('title') 
 129             duration = float_or_none(medium.get('duration')) 
 131                 timestamp = unified_timestamp(medium.get('publishDate')) 
 133         post = self._parse_json( 
 135                 r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details', 
 137             video_id, fatal=False) 
 140                 title = post.get('name') 
 141             description = post.get('description') 
 142             thumbnail = post.get('picture') 
 145             title = self._og_search_title(webpage) 
 147             description = self._og_search_description(webpage) 
 149             duration = int_or_none(self._html_search_meta( 
 150                 ('video:duration', 'duration'), webpage, 'duration', default=False)) 
 155             'description': description, 
 156             'thumbnail': thumbnail, 
 157             'uploader': uploader, 
 158             'timestamp': timestamp, 
 159             'duration': duration, 
 161             'subtitles': subtitles,