]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/theonion.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   9 class TheOnionIE(InfoExtractor
): 
  10     _VALID_URL 
= r
'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?' 
  12         'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/', 
  13         'md5': '19eaa9a39cf9b9804d982e654dc791ee', 
  17             'title': 'Man Wearing M&M Jacket Apparently Made In God\'s Image', 
  18             'description': 'md5:cc12448686b5600baae9261d3e180910', 
  19             'thumbnail': 're:^https?://.*\.jpg\?\d+$', 
  23     def _real_extract(self
, url
): 
  24         display_id 
= self
._match
_id
(url
) 
  25         webpage 
= self
._download
_webpage
(url
, display_id
) 
  27         video_id 
= self
._search
_regex
( 
  28             r
'"videoId":\s(\d+),', webpage
, 'video ID') 
  29         title 
= self
._og
_search
_title
(webpage
) 
  30         description 
= self
._og
_search
_description
(webpage
) 
  31         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
  33         sources 
= re
.findall(r
'<source src="([^"]+)" type="([^"]+)"', webpage
) 
  35         for src
, type_ 
in sources
: 
  36             if type_ 
== 'video/mp4': 
  38                     'format_id': 'mp4_sd', 
  42             elif type_ 
== 'video/webm': 
  44                     'format_id': 'webm_sd', 
  48             elif type_ 
== 'application/x-mpegURL': 
  50                     self
._extract
_m
3u8_formats
(src
, display_id
, preference
=-1)) 
  53                     'Encountered unexpected format: %s' % type_
) 
  54         self
._sort
_formats
(formats
) 
  58             'display_id': display_id
, 
  61             'thumbnail': thumbnail
, 
  62             'description': description
,