]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/theonion.py
10239c906201e460ed288386709dffc5b7f6efbc
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
9 class TheOnionIE(InfoExtractor
):
10 _VALID_URL
= r
'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
12 'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
13 'md5': '19eaa9a39cf9b9804d982e654dc791ee',
17 'title': 'Man Wearing M&M Jacket Apparently Made In God\'s Image',
18 'description': 'md5:cc12448686b5600baae9261d3e180910',
19 'thumbnail': 're:^https?://.*\.jpg\?\d+$',
23 def _real_extract(self
, url
):
24 display_id
= self
._match
_id
(url
)
25 webpage
= self
._download
_webpage
(url
, display_id
)
27 video_id
= self
._search
_regex
(
28 r
'"videoId":\s(\d+),', webpage
, 'video ID')
29 title
= self
._og
_search
_title
(webpage
)
30 description
= self
._og
_search
_description
(webpage
)
31 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
33 sources
= re
.findall(r
'<source src="([^"]+)" type="([^"]+)"', webpage
)
35 for src
, type_
in sources
:
36 if type_
== 'video/mp4':
38 'format_id': 'mp4_sd',
42 elif type_
== 'video/webm':
44 'format_id': 'webm_sd',
48 elif type_
== 'application/x-mpegURL':
50 self
._extract
_m
3u8_formats
(src
, display_id
, preference
=-1))
53 'Encountered unexpected format: %s' % type_
)
54 self
._sort
_formats
(formats
)
58 'display_id': display_id
,
61 'thumbnail': thumbnail
,
62 'description': description
,