]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/telecinco.py
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
   8 from .ooyala 
import OoyalaIE
 
  19 class TelecincoIE(InfoExtractor
): 
  20     IE_DESC 
= 'telecinco.es, cuatro.com and mediaset.es' 
  21     _VALID_URL 
= r
'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' 
  24         'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', 
  27             'title': 'Bacalao con kokotxas al pil-pil', 
  28             'description': 'md5:716caf5601e25c3c5ab6605b1ae71529', 
  31             'md5': 'adb28c37238b675dad0f042292f209a7', 
  33                 'id': 'JEA5ijCnF6p5W08A1rNKn7', 
  35                 'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido', 
  40         'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', 
  41         'md5': '9468140ebc300fbb8b9d65dc6e5c4b43', 
  43             'id': 'jn24Od1zGLG4XUZcnUnZB6', 
  45             'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?', 
  46             'description': 'md5:a62ecb5f1934fc787107d7b9a2262805', 
  50         'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', 
  51         'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6', 
  53             'id': 'aywerkD2Sv1vGNqq9b85Q2', 
  55             'title': '#DOYLACARA. Con la trata no hay trato', 
  56             'description': 'md5:2771356ff7bfad9179c5f5cd954f1477', 
  60         # video in opening's content 
  61         'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html', 
  64             'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', 
  65             'description': 'md5:73f340a7320143d37ab895375b2bf13a', 
  68             'md5': 'adb28c37238b675dad0f042292f209a7', 
  70                 'id': 'TpI2EttSDAReWpJ1o0NVh2', 
  72                 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', 
  77             'skip_download': True, 
  80         'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html', 
  81         'only_matching': True, 
  83         'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html', 
  84         'only_matching': True, 
  87         'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html', 
  88         'only_matching': True, 
  91     def _parse_content(self
, content
, url
): 
  92         video_id 
= content
['dataMediaId'] 
  93         if content
.get('dataCmsId') == 'ooyala': 
  94             return self
.url_result( 
  95                 'ooyala:%s' % video_id
, OoyalaIE
.ie_key(), video_id
) 
  96         config_url 
= urljoin(url
, content
['dataConfig']) 
  97         config 
= self
._download
_json
( 
  98             config_url
, video_id
, 'Downloading config JSON') 
  99         title 
= config
['info']['title'] 
 101         def mmc_url(mmc_type
): 
 103                 r
'/(?:flash|html5)\.json', '/%s.json' % mmc_type
, 
 104                 config
['services']['mmc']) 
 108         for mmc_type 
in ('flash', 'html5'): 
 109             mmc 
= self
._download
_json
( 
 110                 mmc_url(mmc_type
), video_id
, 
 111                 'Downloading %s mmc JSON' % mmc_type
, fatal
=False) 
 115                 duration 
= int_or_none(mmc
.get('duration')) 
 116             for location 
in mmc
['locations']: 
 117                 gat 
= self
._proto
_relative
_url
(location
.get('gat'), 'http:') 
 118                 gcp 
= location
.get('gcp') 
 119                 ogn 
= location
.get('ogn') 
 120                 if None in (gat
, gcp
, ogn
): 
 127                 media 
= self
._download
_json
( 
 128                     gat
, video_id
, data
=json
.dumps(token_data
).encode('utf-8'), 
 130                         'Content-Type': 'application/json;charset=utf-8', 
 132                     }, fatal
=False) or {} 
 133                 stream 
= media
.get('stream') or media
.get('file') 
 136                 ext 
= determine_ext(stream
) 
 138                     formats
.extend(self
._extract
_f
4m
_formats
( 
 139                         stream 
+ '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', 
 140                         video_id
, f4m_id
='hds', fatal
=False)) 
 142                     formats
.extend(self
._extract
_m
3u8_formats
( 
 143                         stream
, video_id
, 'mp4', 'm3u8_native', 
 144                         m3u8_id
='hls', fatal
=False)) 
 145         self
._sort
_formats
(formats
) 
 151             'thumbnail': content
.get('dataPoster') or config
.get('poster', {}).get('imageUrl'), 
 152             'duration': duration
, 
 155     def _real_extract(self
, url
): 
 156         display_id 
= self
._match
_id
(url
) 
 157         webpage 
= self
._download
_webpage
(url
, display_id
) 
 158         article 
= self
._parse
_json
(self
._search
_regex
( 
 159             r
'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})', 
 160             webpage
, 'article'), display_id
)['article'] 
 161         title 
= article
.get('title') 
 162         description 
= clean_html(article
.get('leadParagraph')) or '' 
 163         if article
.get('editorialType') != 'VID': 
 165             body 
= [article
.get('opening')] 
 166             body
.extend(try_get(article
, lambda x
: x
['body'], list) or []) 
 168                 if not isinstance(p
, dict): 
 170                 content 
= p
.get('content') 
 173                 type_ 
= p
.get('type') 
 174                 if type_ 
== 'paragraph': 
 175                     content_str 
= str_or_none(content
) 
 177                         description 
+= content_str
 
 179                 if type_ 
== 'video' and isinstance(content
, dict): 
 180                     entries
.append(self
._parse
_content
(content
, url
)) 
 181             return self
.playlist_result( 
 182                 entries
, str_or_none(article
.get('id')), title
, description
) 
 183         content 
= article
['opening']['content'] 
 184         info 
= self
._parse
_content
(content
, url
) 
 186             'description': description
,