]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/theonion.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..utils 
import ExtractorError
 
  10 class TheOnionIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?' 
  13         'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/', 
  14         'md5': '19eaa9a39cf9b9804d982e654dc791ee', 
  18             'title': 'Man Wearing M&M Jacket Apparently Made In God\'s Image', 
  19             'description': 'md5:cc12448686b5600baae9261d3e180910', 
  20             'thumbnail': 're:^https?://.*\.jpg\?\d+$', 
  24     def _real_extract(self
, url
): 
  25         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  26         article_id 
= mobj
.group('article_id') 
  28         webpage 
= self
._download
_webpage
(url
, article_id
) 
  30         video_id 
= self
._search
_regex
( 
  31             r
'"videoId":\s(\d+),', webpage
, 'video ID') 
  32         title 
= self
._og
_search
_title
(webpage
) 
  33         description 
= self
._og
_search
_description
(webpage
) 
  34         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
  36         sources 
= re
.findall(r
'<source src="([^"]+)" type="([^"]+)"', webpage
) 
  39                 'No sources found for video %s' % video_id
, expected
=True) 
  42         for src
, type_ 
in sources
: 
  43             if type_ 
== 'video/mp4': 
  45                     'format_id': 'mp4_sd', 
  49             elif type_ 
== 'video/webm': 
  51                     'format_id': 'webm_sd', 
  55             elif type_ 
== 'application/x-mpegURL': 
  57                     self
._extract
_m
3u8_formats
(src
, video_id
, preference
=-1)) 
  60                     'Encountered unexpected format: %s' % type_
) 
  62         self
._sort
_formats
(formats
) 
  68             'thumbnail': thumbnail
, 
  69             'description': description
,