2 from __future__ 
import unicode_literals
 
  10 from .common 
import InfoExtractor
 
  11 from ..compat 
import ( 
  24 class GloboIE(InfoExtractor
): 
  25     _VALID_URL 
= r
'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})' 
  26     _NETRC_MACHINE 
= 'globo' 
  28         'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/', 
  29         'md5': 'b3ccc801f75cd04a914d51dadb83a78d', 
  33             'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa', 
  35             'uploader': 'Globo.com', 
  39         'url': 'http://globoplay.globo.com/v/4581987/', 
  40         'md5': 'f36a1ecd6a50da1577eee6dd17f67eff', 
  44             'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP', 
  46             'uploader': 'Rede Globo', 
  50         'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html', 
  51         'only_matching': True, 
  53         'url': 'http://globosatplay.globo.com/globonews/v/4472924/', 
  54         'only_matching': True, 
  56         'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/', 
  57         'only_matching': True, 
  59         'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', 
  60         'only_matching': True, 
  62         'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html', 
  63         'only_matching': True, 
  65         'url': 'globo:3607726', 
  66         'only_matching': True, 
  69     def _real_initialize(self
): 
  70         email
, password 
= self
._get
_login
_info
() 
  75             glb_id 
= (self
._download
_json
( 
  76                 'https://login.globo.com/api/authentication', None, data
=json
.dumps({ 
  82                 }).encode(), headers
={ 
  83                     'Content-Type': 'application/json; charset=utf-8', 
  84                 }) or {}).get('glbId') 
  86                 self
._set
_cookie
('.globo.com', 'GLBID', glb_id
) 
  87         except ExtractorError 
as e
: 
  88             if isinstance(e
.cause
, compat_HTTPError
) and e
.cause
.code 
== 401: 
  89                 resp 
= self
._parse
_json
(e
.cause
.read(), None) 
  90                 raise ExtractorError(resp
.get('userMessage') or resp
['id'], expected
=True) 
  93     def _real_extract(self
, url
): 
  94         video_id 
= self
._match
_id
(url
) 
  96         video 
= self
._download
_json
( 
  97             'http://api.globovideos.com/videos/%s/playlist' % video_id
, 
  98             video_id
)['videos'][0] 
  99         if video
.get('encrypted') is True: 
 100             raise ExtractorError('This video is DRM protected.', expected
=True) 
 102         title 
= video
['title'] 
 106         for resource 
in video
['resources']: 
 107             resource_id 
= resource
.get('_id') 
 108             resource_url 
= resource
.get('url') 
 109             resource_type 
= resource
.get('type') 
 110             if not resource_url 
or (resource_type 
== 'media' and not resource_id
) or resource_type 
not in ('subtitle', 'media'): 
 113             if resource_type 
== 'subtitle': 
 114                 subtitles
.setdefault(resource
.get('language') or 'por', []).append({ 
 119             security 
= self
._download
_json
( 
 120                 'http://security.video.globo.com/videos/%s/hash' % video_id
, 
 121                 video_id
, 'Downloading security hash for %s' % resource_id
, query
={ 
 124                     'resource_id': resource_id
, 
 127             security_hash 
= security
.get('hash') 
 128             if not security_hash
: 
 129                 message 
= security
.get('message') 
 131                     raise ExtractorError( 
 132                         '%s returned error: %s' % (self
.IE_NAME
, message
), expected
=True) 
 135             hash_code 
= security_hash
[:2] 
 136             padding 
= '%010d' % random
.randint(1, 10000000000) 
 137             if hash_code 
in ('04', '14'): 
 138                 received_time 
= security_hash
[3:13] 
 139                 received_md5 
= security_hash
[24:] 
 140                 hash_prefix 
= security_hash
[:23] 
 141             elif hash_code 
in ('02', '12', '03', '13'): 
 142                 received_time 
= security_hash
[2:12] 
 143                 received_md5 
= security_hash
[22:] 
 145                 hash_prefix 
= '05' + security_hash
[:22] 
 147             padded_sign_time 
= compat_str(int(received_time
) + 86400) + padding
 
 148             md5_data 
= (received_md5 
+ padded_sign_time 
+ '0xAC10FD').encode() 
 149             signed_md5 
= base64
.urlsafe_b64encode(hashlib
.md5(md5_data
).digest()).decode().strip('=') 
 150             signed_hash 
= hash_prefix 
+ padded_sign_time 
+ signed_md5
 
 151             signed_url 
= '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url
, signed_hash
, 'F' if video
.get('subscriber_only') else 'A', security
.get('user') or '') 
 153             if resource_id
.endswith('m3u8') or resource_url
.endswith('.m3u8'): 
 154                 formats
.extend(self
._extract
_m
3u8_formats
( 
 155                     signed_url
, resource_id
, 'mp4', entry_protocol
='m3u8_native', 
 156                     m3u8_id
='hls', fatal
=False)) 
 157             elif resource_id
.endswith('mpd') or resource_url
.endswith('.mpd'): 
 158                 formats
.extend(self
._extract
_mpd
_formats
( 
 159                     signed_url
, resource_id
, mpd_id
='dash', fatal
=False)) 
 160             elif resource_id
.endswith('manifest') or resource_url
.endswith('/manifest'): 
 161                 formats
.extend(self
._extract
_ism
_formats
( 
 162                     signed_url
, resource_id
, ism_id
='mss', fatal
=False)) 
 166                     'format_id': 'http-%s' % resource_id
, 
 167                     'height': int_or_none(resource
.get('height')), 
 170         self
._sort
_formats
(formats
) 
 172         duration 
= float_or_none(video
.get('duration'), 1000) 
 173         uploader 
= video
.get('channel') 
 174         uploader_id 
= str_or_none(video
.get('channel_id')) 
 179             'duration': duration
, 
 180             'uploader': uploader
, 
 181             'uploader_id': uploader_id
, 
 183             'subtitles': subtitles
, 
 187 class GloboArticleIE(InfoExtractor
): 
 188     _VALID_URL 
= r
'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?' 
 191         r
'\bdata-video-id=["\'](\d
{7,})', 
 192         r'\bdata
-player
-videosids
=["\'](\d{7,})', 
 193         r'\bvideosIDs\s*:\s*["\']?
(\d
{7,})', 
 194         r'\bdata
-id=["\'](\d{7,})', 
 195         r'<div[^>]+\bid=["\'](\d
{7,})', 
 199         'url
': 'http
://g1
.globo
.com
/jornal
-nacional
/noticia
/2014/09/novidade
-na
-fiscalizacao
-de
-bagagem
-pela
-receita
-provoca
-discussoes
.html
', 
 201             'id': 'novidade
-na
-fiscalizacao
-de
-bagagem
-pela
-receita
-provoca
-discussoes
', 
 202             'title
': 'Novidade na fiscalização de bagagem pela Receita provoca discussões
', 
 203             'description
': 'md5
:c3c4b4d4c30c32fce460040b1ac46b12
', 
 207         'url
': 'http
://g1
.globo
.com
/pr
/parana
/noticia
/2016/09/mpf
-denuncia
-lula
-marisa
-e
-mais
-seis
-na
-operacao
-lava
-jato
.html
', 
 209             'id': 'mpf
-denuncia
-lula
-marisa
-e
-mais
-seis
-na
-operacao
-lava
-jato
', 
 210             'title
': "Lula era o 'comandante máximo
' do esquema da Lava Jato, diz MPF", 
 211             'description
': 'md5
:8aa7cc8beda4dc71cc8553e00b77c54c
', 
 215         'url
': 'http
://gq
.globo
.com
/Prazeres
/Poder
/noticia
/2015/10/all
-o
-desafio
-assista
-ao
-segundo
-capitulo
-da
-serie
.html
', 
 216         'only_matching
': True, 
 218         'url
': 'http
://gshow
.globo
.com
/programas
/tv
-xuxa
/O
-Programa
/noticia
/2014/01/xuxa
-e
-junno
-namoram
-muuuito
-em
-luau
-de
-zeze
-di
-camargo
-e
-luciano
.html
', 
 219         'only_matching
': True, 
 221         'url
': 'http
://oglobo
.globo
.com
/rio
/a
-amizade
-entre
-um
-entregador
-de
-farmacia
-um
-piano
-19946271', 
 222         'only_matching
': True, 
 226     def suitable(cls, url): 
 227         return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url) 
 229     def _real_extract(self, url): 
 230         display_id = self._match_id(url) 
 231         webpage = self._download_webpage(url, display_id) 
 233         for video_regex in self._VIDEOID_REGEXES: 
 234             video_ids.extend(re.findall(video_regex, webpage)) 
 236             self.url_result('globo
:%s' % video_id, GloboIE.ie_key()) 
 237             for video_id in orderedSet(video_ids)] 
 238         title = self._og_search_title(webpage, fatal=False) 
 239         description = self._html_search_meta('description
', webpage) 
 240         return self.playlist_result(entries, display_id, title, description)