]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/gaskrank.py
2 from __future__
import unicode_literals
5 from .common
import InfoExtractor
14 class GaskrankIE(InfoExtractor
):
15 """InfoExtractor for gaskrank.tv"""
16 _VALID_URL
= r
'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
19 'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
20 'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
24 'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
25 'thumbnail': r
're:^https?://.*\.jpg$',
26 'categories': ['motorrad-fun'],
27 'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
28 'uploader_id': 'Bikefun',
29 'upload_date': '20170110',
34 'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
35 'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
39 'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
40 'thumbnail': r
're:^https?://.*\.jpg$',
41 'categories': ['racing'],
42 'display_id': 'isle-of-man-tt-2011-michael-du-15920',
44 'upload_date': '20160506',
45 'uploader_url': 'www.iomtt.com',
50 def _real_extract(self
, url
):
51 """extract information from gaskrank.tv"""
53 """Removes trailing comma in json: {{},} --> {{}}"""
54 return re
.sub(r
',\s*}', r
'}', js_to_json(code
))
56 display_id
= self
._match
_id
(url
)
57 webpage
= self
._download
_webpage
(url
, display_id
)
58 categories
= [re
.match(self
._VALID
_URL
, url
).group('categories')]
59 title
= self
._search
_regex
(
60 r
'movieName\s*:\s*\'([^
\']*)\'',
62 thumbnail = self._search_regex(
63 r'poster\s
*:\s
*\'([^
\']*)\'',
64 webpage, 'thumbnail
', default=None)
67 r'Video von
:\s
*(?P
<uploader_id
>[^|
]*?
)\s
*\|\s
*vom
:\s
*(?P
<upload_date
>[0-9][0-9]\
.[0-9][0-9]\
.[0-9][0-9][0-9][0-9])',
70 uploader_id = mobj.groupdict().get('uploader_id
')
71 upload_date = unified_strdate(mobj.groupdict().get('upload_date
'))
73 uploader_url = self._search_regex(
74 r'Homepage
:\s
*<[^
>]*>(?P
<uploader_url
>[^
<]*)',
75 webpage, 'uploader_url
', default=None)
77 r'/tv
/tags
/[^
/]+/"\s*>(?P<tag>[^<]*?)<',
80 view_count = self._search_regex(
81 r'class\s*=\s*"gkRight
"(?:[^>]*>\s*<[^>]*)*icon-eye-open(?:[^>]*>\s*<[^>]*)*>\s*(?P<view_count>[0-9\.]*)',
82 webpage, 'view_count', default=None)
84 view_count = int_or_none(view_count.replace('.', ''))
86 average_rating = self._search_regex(
87 r'itemprop\s*=\s*"ratingValue
"[^>]*>\s*(?P<average_rating>[0-9,]+)',
88 webpage, 'average_rating')
90 average_rating = float_or_none(average_rating.replace(',', '.'))
92 playlist = self._parse_json(
94 r'playlist\s*:\s*\[([^\]]*)\]',
95 webpage, 'playlist', default='{}'),
96 display_id, transform_source=fix_json, fatal=False)
98 video_id = self._search_regex(
99 r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
100 playlist.get('0').get('src'), 'video id')
105 'url': playlist[key]['src'],
107 'quality': playlist[key].get('quality')})
108 self._sort_formats(formats, field_preference=['format_id'])
114 'thumbnail': thumbnail,
115 'categories': categories,
116 'display_id': display_id,
117 'uploader_id': uploader_id,
118 'upload_date': upload_date,
119 'uploader_url': uploader_url,
121 'view_count': view_count,
122 'average_rating': average_rating,