]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/cartoonnetwork.py
debian/copyright: use spaces rather than tabs to start continuation lines.
[youtubedl] / youtube_dl / extractor / cartoonnetwork.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .turner import TurnerBaseIE
5 from ..utils import int_or_none
6
7
8 class CartoonNetworkIE(TurnerBaseIE):
9 _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
10 _TEST = {
11 'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
12 'info_dict': {
13 'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
14 'ext': 'mp4',
15 'title': 'How to Draw Upgrade',
16 'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
17 },
18 'params': {
19 # m3u8 download
20 'skip_download': True,
21 },
22 }
23
24 def _real_extract(self, url):
25 display_id = self._match_id(url)
26 webpage = self._download_webpage(url, display_id)
27
28 def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
29 metadata_re = ''
30 if content_re:
31 metadata_re = r'|video_metadata\.content_' + content_re
32 return self._search_regex(
33 r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
34 webpage, name, fatal=fatal)
35
36 media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
37 title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
38
39 info = self._extract_ngtv_info(
40 media_id, {'networkId': 'cartoonnetwork'}, {
41 'url': url,
42 'site_name': 'CartoonNetwork',
43 'auth_required': find_field('authType', 'auth type') != 'unauth',
44 })
45
46 series = find_field(
47 'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
48 info.update({
49 'id': media_id,
50 'display_id': display_id,
51 'title': title,
52 'description': self._html_search_meta('description', webpage),
53 'series': series,
54 'episode': title,
55 })
56
57 for field in ('season', 'episode'):
58 field_name = field + 'Number'
59 info[field + '_number'] = int_or_none(find_field(
60 field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
61
62 return info