]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/telecinco.py
d37e1b0557cf3ba241a25e7e56d28c8dc679b1d0
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
   8 from .ooyala 
import OoyalaIE
 
  18 class TelecincoIE(InfoExtractor
): 
  19     IE_DESC 
= 'telecinco.es, cuatro.com and mediaset.es' 
  20     _VALID_URL 
= r
'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' 
  23         'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', 
  26             'title': 'Bacalao con kokotxas al pil-pil', 
  27             'description': 'md5:1382dacd32dd4592d478cbdca458e5bb', 
  30             'md5': 'adb28c37238b675dad0f042292f209a7', 
  32                 'id': 'JEA5ijCnF6p5W08A1rNKn7', 
  34                 'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido', 
  39         'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', 
  40         'md5': '9468140ebc300fbb8b9d65dc6e5c4b43', 
  42             'id': 'jn24Od1zGLG4XUZcnUnZB6', 
  44             'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?', 
  45             'description': 'md5:a62ecb5f1934fc787107d7b9a2262805', 
  49         'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', 
  50         'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6', 
  52             'id': 'aywerkD2Sv1vGNqq9b85Q2', 
  54             'title': '#DOYLACARA. Con la trata no hay trato', 
  55             'description': 'md5:2771356ff7bfad9179c5f5cd954f1477', 
  59         'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html', 
  60         'only_matching': True, 
  62         'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html', 
  63         'only_matching': True, 
  66         'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html', 
  67         'only_matching': True, 
  70     def _parse_content(self
, content
, url
): 
  71         video_id 
= content
['dataMediaId'] 
  72         if content
.get('dataCmsId') == 'ooyala': 
  73             return self
.url_result( 
  74                 'ooyala:%s' % video_id
, OoyalaIE
.ie_key(), video_id
) 
  75         config_url 
= urljoin(url
, content
['dataConfig']) 
  76         config 
= self
._download
_json
( 
  77             config_url
, video_id
, 'Downloading config JSON') 
  78         title 
= config
['info']['title'] 
  80         def mmc_url(mmc_type
): 
  82                 r
'/(?:flash|html5)\.json', '/%s.json' % mmc_type
, 
  83                 config
['services']['mmc']) 
  87         for mmc_type 
in ('flash', 'html5'): 
  88             mmc 
= self
._download
_json
( 
  89                 mmc_url(mmc_type
), video_id
, 
  90                 'Downloading %s mmc JSON' % mmc_type
, fatal
=False) 
  94                 duration 
= int_or_none(mmc
.get('duration')) 
  95             for location 
in mmc
['locations']: 
  96                 gat 
= self
._proto
_relative
_url
(location
.get('gat'), 'http:') 
  97                 gcp 
= location
.get('gcp') 
  98                 ogn 
= location
.get('ogn') 
  99                 if None in (gat
, gcp
, ogn
): 
 106                 media 
= self
._download
_json
( 
 107                     gat
, video_id
, data
=json
.dumps(token_data
).encode('utf-8'), 
 109                         'Content-Type': 'application/json;charset=utf-8', 
 111                     }, fatal
=False) or {} 
 112                 stream 
= media
.get('stream') or media
.get('file') 
 115                 ext 
= determine_ext(stream
) 
 117                     formats
.extend(self
._extract
_f
4m
_formats
( 
 118                         stream 
+ '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', 
 119                         video_id
, f4m_id
='hds', fatal
=False)) 
 121                     formats
.extend(self
._extract
_m
3u8_formats
( 
 122                         stream
, video_id
, 'mp4', 'm3u8_native', 
 123                         m3u8_id
='hls', fatal
=False)) 
 124         self
._sort
_formats
(formats
) 
 130             'thumbnail': content
.get('dataPoster') or config
.get('poster', {}).get('imageUrl'), 
 131             'duration': duration
, 
 134     def _real_extract(self
, url
): 
 135         display_id 
= self
._match
_id
(url
) 
 136         webpage 
= self
._download
_webpage
(url
, display_id
) 
 137         article 
= self
._parse
_json
(self
._search
_regex
( 
 138             r
'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})', 
 139             webpage
, 'article'), display_id
)['article'] 
 140         title 
= article
.get('title') 
 141         description 
= clean_html(article
.get('leadParagraph')) 
 142         if article
.get('editorialType') != 'VID': 
 144             for p 
in article
.get('body', []): 
 145                 content 
= p
.get('content') 
 146                 if p
.get('type') != 'video' or not content
: 
 148                 entries
.append(self
._parse
_content
(content
, url
)) 
 149             return self
.playlist_result( 
 150                 entries
, str_or_none(article
.get('id')), title
, description
) 
 151         content 
= article
['opening']['content'] 
 152         info 
= self
._parse
_content
(content
, url
) 
 154             'description': description
,