]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/theonion.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..utils
import ExtractorError
10 class TheOnionIE(InfoExtractor
):
11 _VALID_URL
= r
'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
13 'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
14 'md5': '19eaa9a39cf9b9804d982e654dc791ee',
18 'title': 'Man Wearing M&M Jacket Apparently Made In God\'s Image',
19 'description': 'md5:cc12448686b5600baae9261d3e180910',
20 'thumbnail': 're:^https?://.*\.jpg\?\d+$',
24 def _real_extract(self
, url
):
25 mobj
= re
.match(self
._VALID
_URL
, url
)
26 article_id
= mobj
.group('article_id')
28 webpage
= self
._download
_webpage
(url
, article_id
)
30 video_id
= self
._search
_regex
(
31 r
'"videoId":\s(\d+),', webpage
, 'video ID')
32 title
= self
._og
_search
_title
(webpage
)
33 description
= self
._og
_search
_description
(webpage
)
34 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
36 sources
= re
.findall(r
'<source src="([^"]+)" type="([^"]+)"', webpage
)
39 'No sources found for video %s' % video_id
, expected
=True)
42 for src
, type_
in sources
:
43 if type_
== 'video/mp4':
45 'format_id': 'mp4_sd',
49 elif type_
== 'video/webm':
51 'format_id': 'webm_sd',
55 elif type_
== 'application/x-mpegURL':
57 self
._extract
_m
3u8_formats
(src
, video_id
, preference
=-1))
60 'Encountered unexpected format: %s' % type_
)
62 self
._sort
_formats
(formats
)
68 'thumbnail': thumbnail
,
69 'description': description
,