]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/funnyordie.py
debian/control: Remove trailing whitespace at EOF.
[youtubedl] / youtube_dl / extractor / funnyordie.py
index f2928b5fecb68df5429a90ca079faac6faf93bb2..f85e7de1496b07848e19b491b7ef5312652a0d7c 100644 (file)
@@ -1,10 +1,14 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+    unified_timestamp,
+)
 
 
 class FunnyOrDieIE(InfoExtractor):
 
 
 class FunnyOrDieIE(InfoExtractor):
@@ -17,7 +21,11 @@ class FunnyOrDieIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Heart-Shaped Box: Literal Video Version',
             'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
             'ext': 'mp4',
             'title': 'Heart-Shaped Box: Literal Video Version',
             'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
-            'thumbnail': 're:^http:.*\.jpg$',
+            'thumbnail': r're:^http:.*\.jpg$',
+            'uploader': 'DASjr',
+            'timestamp': 1317904928,
+            'upload_date': '20111006',
+            'duration': 318.3,
         },
     }, {
         'url': 'http://www.funnyordie.com/embed/e402820827',
         },
     }, {
         'url': 'http://www.funnyordie.com/embed/e402820827',
@@ -26,7 +34,9 @@ class FunnyOrDieIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Please Use This Song (Jon Lajoie)',
             'description': 'Please use this to sell something.  www.jonlajoie.com',
             'ext': 'mp4',
             'title': 'Please Use This Song (Jon Lajoie)',
             'description': 'Please use this to sell something.  www.jonlajoie.com',
-            'thumbnail': 're:^http:.*\.jpg$',
+            'thumbnail': r're:^http:.*\.jpg$',
+            'timestamp': 1398988800,
+            'upload_date': '20140502',
         },
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
@@ -58,8 +68,7 @@ class FunnyOrDieIE(InfoExtractor):
             m3u8_url, video_id, 'mp4', 'm3u8_native',
             m3u8_id='hls', fatal=False)
         source_formats = list(filter(
             m3u8_url, video_id, 'mp4', 'm3u8_native',
             m3u8_id='hls', fatal=False)
         source_formats = list(filter(
-            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-            m3u8_formats))
+            lambda f: f.get('vcodec') != 'none', m3u8_formats))
 
         bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
         bitrates.sort()
 
         bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
         bitrates.sort()
@@ -101,15 +110,53 @@ class FunnyOrDieIE(InfoExtractor):
                 'url': 'http://www.funnyordie.com%s' % src,
             }]
 
                 'url': 'http://www.funnyordie.com%s' % src,
             }]
 
-        post_json = self._search_regex(
-            r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
-        post = json.loads(post_json)
+        timestamp = unified_timestamp(self._html_search_meta(
+            'uploadDate', webpage, 'timestamp', default=None))
+
+        uploader = self._html_search_regex(
+            r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h',
+            webpage, 'uploader', default=None)
+
+        title, description, thumbnail, duration = [None] * 4
+
+        medium = self._parse_json(
+            self._search_regex(
+                r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium',
+                default='{}'),
+            video_id, fatal=False)
+        if medium:
+            title = medium.get('title')
+            duration = float_or_none(medium.get('duration'))
+            if not timestamp:
+                timestamp = unified_timestamp(medium.get('publishDate'))
+
+        post = self._parse_json(
+            self._search_regex(
+                r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details',
+                default='{}'),
+            video_id, fatal=False)
+        if post:
+            if not title:
+                title = post.get('name')
+            description = post.get('description')
+            thumbnail = post.get('picture')
+
+        if not title:
+            title = self._og_search_title(webpage)
+        if not description:
+            description = self._og_search_description(webpage)
+        if not duration:
+            duration = int_or_none(self._html_search_meta(
+                ('video:duration', 'duration'), webpage, 'duration', default=False))
 
         return {
             'id': video_id,
 
         return {
             'id': video_id,
-            'title': post['name'],
-            'description': post.get('description'),
-            'thumbnail': post.get('picture'),
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'timestamp': timestamp,
+            'duration': duration,
             'formats': formats,
             'subtitles': subtitles,
         }
             'formats': formats,
             'subtitles': subtitles,
         }