2 from __future__ 
import unicode_literals
 
  10 from .common 
import InfoExtractor
 
  11 from ..compat 
import ( 
  24 class GloboIE(InfoExtractor
): 
  25     _VALID_URL 
= r
'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})' 
  26     _NETRC_MACHINE 
= 'globo' 
  28         'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/', 
  29         'md5': 'b3ccc801f75cd04a914d51dadb83a78d', 
  33             'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa', 
  35             'uploader': 'Globo.com', 
  39         'url': 'http://globoplay.globo.com/v/4581987/', 
  40         'md5': 'f36a1ecd6a50da1577eee6dd17f67eff', 
  44             'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP', 
  46             'uploader': 'Rede Globo', 
  50         'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html', 
  51         'only_matching': True, 
  53         'url': 'http://globosatplay.globo.com/globonews/v/4472924/', 
  54         'only_matching': True, 
  56         'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/', 
  57         'only_matching': True, 
  59         'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', 
  60         'only_matching': True, 
  62         'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html', 
  63         'only_matching': True, 
  65         'url': 'globo:3607726', 
  66         'only_matching': True, 
  69     def _real_initialize(self
): 
  70         email
, password 
= self
._get
_login
_info
() 
  76                 'https://login.globo.com/api/authentication', None, data
=json
.dumps({ 
  82                 }).encode(), headers
={ 
  83                     'Content-Type': 'application/json; charset=utf-8', 
  85         except ExtractorError 
as e
: 
  86             if isinstance(e
.cause
, compat_HTTPError
) and e
.cause
.code 
== 401: 
  87                 resp 
= self
._parse
_json
(e
.cause
.read(), None) 
  88                 raise ExtractorError(resp
.get('userMessage') or resp
['id'], expected
=True) 
  91     def _real_extract(self
, url
): 
  92         video_id 
= self
._match
_id
(url
) 
  94         video 
= self
._download
_json
( 
  95             'http://api.globovideos.com/videos/%s/playlist' % video_id
, 
  96             video_id
)['videos'][0] 
  98         title 
= video
['title'] 
 101         for resource 
in video
['resources']: 
 102             resource_id 
= resource
.get('_id') 
 103             resource_url 
= resource
.get('url') 
 104             if not resource_id 
or not resource_url
: 
 107             security 
= self
._download
_json
( 
 108                 'http://security.video.globo.com/videos/%s/hash' % video_id
, 
 109                 video_id
, 'Downloading security hash for %s' % resource_id
, query
={ 
 111                     'version': '17.0.0.132', 
 112                     'resource_id': resource_id
, 
 115             security_hash 
= security
.get('hash') 
 116             if not security_hash
: 
 117                 message 
= security
.get('message') 
 119                     raise ExtractorError( 
 120                         '%s returned error: %s' % (self
.IE_NAME
, message
), expected
=True) 
 123             hash_code 
= security_hash
[:2] 
 124             received_time 
= security_hash
[2:12] 
 125             received_random 
= security_hash
[12:22] 
 126             received_md5 
= security_hash
[22:] 
 128             sign_time 
= compat_str(int(received_time
) + 86400) 
 129             padding 
= '%010d' % random
.randint(1, 10000000000) 
 131             md5_data 
= (received_md5 
+ sign_time 
+ padding 
+ '0xFF01DD').encode() 
 132             signed_md5 
= base64
.urlsafe_b64encode(hashlib
.md5(md5_data
).digest()).decode().strip('=') 
 133             signed_hash 
= hash_code 
+ received_time 
+ received_random 
+ sign_time 
+ padding 
+ signed_md5
 
 135             signed_url 
= '%s?h=%s&k=%s' % (resource_url
, signed_hash
, 'flash') 
 136             if resource_id
.endswith('m3u8') or resource_url
.endswith('.m3u8'): 
 137                 formats
.extend(self
._extract
_m
3u8_formats
( 
 138                     signed_url
, resource_id
, 'mp4', entry_protocol
='m3u8_native', 
 139                     m3u8_id
='hls', fatal
=False)) 
 140             elif resource_id
.endswith('mpd') or resource_url
.endswith('.mpd'): 
 141                 formats
.extend(self
._extract
_mpd
_formats
( 
 142                     signed_url
, resource_id
, mpd_id
='dash', fatal
=False)) 
 143             elif resource_id
.endswith('manifest') or resource_url
.endswith('/manifest'): 
 144                 formats
.extend(self
._extract
_ism
_formats
( 
 145                     signed_url
, resource_id
, ism_id
='mss', fatal
=False)) 
 149                     'format_id': 'http-%s' % resource_id
, 
 150                     'height': int_or_none(resource
.get('height')), 
 153         self
._sort
_formats
(formats
) 
 155         duration 
= float_or_none(video
.get('duration'), 1000) 
 156         uploader 
= video
.get('channel') 
 157         uploader_id 
= str_or_none(video
.get('channel_id')) 
 162             'duration': duration
, 
 163             'uploader': uploader
, 
 164             'uploader_id': uploader_id
, 
 169 class GloboArticleIE(InfoExtractor
): 
 170     _VALID_URL 
= r
'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?' 
 173         r
'\bdata-video-id=["\'](\d
{7,})', 
 174         r'\bdata
-player
-videosids
=["\'](\d{7,})', 
 175         r'\bvideosIDs\s*:\s*["\']?
(\d
{7,})', 
 176         r'\bdata
-id=["\'](\d{7,})', 
 177         r'<div[^>]+\bid=["\'](\d
{7,})', 
 181         'url
': 'http
://g1
.globo
.com
/jornal
-nacional
/noticia
/2014/09/novidade
-na
-fiscalizacao
-de
-bagagem
-pela
-receita
-provoca
-discussoes
.html
', 
 183             'id': 'novidade
-na
-fiscalizacao
-de
-bagagem
-pela
-receita
-provoca
-discussoes
', 
 184             'title
': 'Novidade na fiscalização de bagagem pela Receita provoca discussões
', 
 185             'description
': 'md5
:c3c4b4d4c30c32fce460040b1ac46b12
', 
 189         'url
': 'http
://g1
.globo
.com
/pr
/parana
/noticia
/2016/09/mpf
-denuncia
-lula
-marisa
-e
-mais
-seis
-na
-operacao
-lava
-jato
.html
', 
 191             'id': 'mpf
-denuncia
-lula
-marisa
-e
-mais
-seis
-na
-operacao
-lava
-jato
', 
 192             'title
': "Lula era o 'comandante máximo
' do esquema da Lava Jato, diz MPF", 
 193             'description
': 'md5
:8aa7cc8beda4dc71cc8553e00b77c54c
', 
 197         'url
': 'http
://gq
.globo
.com
/Prazeres
/Poder
/noticia
/2015/10/all
-o
-desafio
-assista
-ao
-segundo
-capitulo
-da
-serie
.html
', 
 198         'only_matching
': True, 
 200         'url
': 'http
://gshow
.globo
.com
/programas
/tv
-xuxa
/O
-Programa
/noticia
/2014/01/xuxa
-e
-junno
-namoram
-muuuito
-em
-luau
-de
-zeze
-di
-camargo
-e
-luciano
.html
', 
 201         'only_matching
': True, 
 203         'url
': 'http
://oglobo
.globo
.com
/rio
/a
-amizade
-entre
-um
-entregador
-de
-farmacia
-um
-piano
-19946271', 
 204         'only_matching
': True, 
 208     def suitable(cls, url): 
 209         return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url) 
 211     def _real_extract(self, url): 
 212         display_id = self._match_id(url) 
 213         webpage = self._download_webpage(url, display_id) 
 215         for video_regex in self._VIDEOID_REGEXES: 
 216             video_ids.extend(re.findall(video_regex, webpage)) 
 218             self.url_result('globo
:%s' % video_id, GloboIE.ie_key()) 
 219             for video_id in orderedSet(video_ids)] 
 220         title = self._og_search_title(webpage, fatal=False) 
 221         description = self._html_search_meta('description
', webpage) 
 222         return self.playlist_result(entries, display_id, title, description)