2 from __future__ 
import unicode_literals
 
  10 from .common 
import InfoExtractor
 
  11 from ..compat 
import ( 
  24 class GloboIE(InfoExtractor
): 
  25     _VALID_URL 
= r
'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})' 
  26     _NETRC_MACHINE 
= 'globo' 
  28         'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/', 
  29         'md5': 'b3ccc801f75cd04a914d51dadb83a78d', 
  33             'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa', 
  35             'uploader': 'Globo.com', 
  39         'url': 'http://globoplay.globo.com/v/4581987/', 
  40         'md5': 'f36a1ecd6a50da1577eee6dd17f67eff', 
  44             'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP', 
  46             'uploader': 'Rede Globo', 
  50         'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html', 
  51         'only_matching': True, 
  53         'url': 'http://globosatplay.globo.com/globonews/v/4472924/', 
  54         'only_matching': True, 
  56         'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/', 
  57         'only_matching': True, 
  59         'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', 
  60         'only_matching': True, 
  62         'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html', 
  63         'only_matching': True, 
  65         'url': 'globo:3607726', 
  66         'only_matching': True, 
  69     def _real_initialize(self
): 
  70         email
, password 
= self
._get
_login
_info
() 
  75             glb_id 
= (self
._download
_json
( 
  76                 'https://login.globo.com/api/authentication', None, data
=json
.dumps({ 
  82                 }).encode(), headers
={ 
  83                     'Content-Type': 'application/json; charset=utf-8', 
  84                 }) or {}).get('glbId') 
  86                 self
._set
_cookie
('.globo.com', 'GLBID', glb_id
) 
  87         except ExtractorError 
as e
: 
  88             if isinstance(e
.cause
, compat_HTTPError
) and e
.cause
.code 
== 401: 
  89                 resp 
= self
._parse
_json
(e
.cause
.read(), None) 
  90                 raise ExtractorError(resp
.get('userMessage') or resp
['id'], expected
=True) 
  93     def _real_extract(self
, url
): 
  94         video_id 
= self
._match
_id
(url
) 
  96         video 
= self
._download
_json
( 
  97             'http://api.globovideos.com/videos/%s/playlist' % video_id
, 
  98             video_id
)['videos'][0] 
 100         title 
= video
['title'] 
 103         for resource 
in video
['resources']: 
 104             resource_id 
= resource
.get('_id') 
 105             resource_url 
= resource
.get('url') 
 106             if not resource_id 
or not resource_url
: 
 109             security 
= self
._download
_json
( 
 110                 'http://security.video.globo.com/videos/%s/hash' % video_id
, 
 111                 video_id
, 'Downloading security hash for %s' % resource_id
, query
={ 
 113                     'version': '17.0.0.132', 
 114                     'resource_id': resource_id
, 
 117             security_hash 
= security
.get('hash') 
 118             if not security_hash
: 
 119                 message 
= security
.get('message') 
 121                     raise ExtractorError( 
 122                         '%s returned error: %s' % (self
.IE_NAME
, message
), expected
=True) 
 125             hash_code 
= security_hash
[:2] 
 126             received_time 
= security_hash
[2:12] 
 127             received_random 
= security_hash
[12:22] 
 128             received_md5 
= security_hash
[22:] 
 130             sign_time 
= compat_str(int(received_time
) + 86400) 
 131             padding 
= '%010d' % random
.randint(1, 10000000000) 
 133             md5_data 
= (received_md5 
+ sign_time 
+ padding 
+ '0xFF01DD').encode() 
 134             signed_md5 
= base64
.urlsafe_b64encode(hashlib
.md5(md5_data
).digest()).decode().strip('=') 
 135             signed_hash 
= hash_code 
+ received_time 
+ received_random 
+ sign_time 
+ padding 
+ signed_md5
 
 137             signed_url 
= '%s?h=%s&k=%s' % (resource_url
, signed_hash
, 'flash') 
 138             if resource_id
.endswith('m3u8') or resource_url
.endswith('.m3u8'): 
 139                 formats
.extend(self
._extract
_m
3u8_formats
( 
 140                     signed_url
, resource_id
, 'mp4', entry_protocol
='m3u8_native', 
 141                     m3u8_id
='hls', fatal
=False)) 
 142             elif resource_id
.endswith('mpd') or resource_url
.endswith('.mpd'): 
 143                 formats
.extend(self
._extract
_mpd
_formats
( 
 144                     signed_url
, resource_id
, mpd_id
='dash', fatal
=False)) 
 145             elif resource_id
.endswith('manifest') or resource_url
.endswith('/manifest'): 
 146                 formats
.extend(self
._extract
_ism
_formats
( 
 147                     signed_url
, resource_id
, ism_id
='mss', fatal
=False)) 
 151                     'format_id': 'http-%s' % resource_id
, 
 152                     'height': int_or_none(resource
.get('height')), 
 155         self
._sort
_formats
(formats
) 
 157         duration 
= float_or_none(video
.get('duration'), 1000) 
 158         uploader 
= video
.get('channel') 
 159         uploader_id 
= str_or_none(video
.get('channel_id')) 
 164             'duration': duration
, 
 165             'uploader': uploader
, 
 166             'uploader_id': uploader_id
, 
 171 class GloboArticleIE(InfoExtractor
): 
 172     _VALID_URL 
= r
'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?' 
 175         r
'\bdata-video-id=["\'](\d
{7,})', 
 176         r'\bdata
-player
-videosids
=["\'](\d{7,})', 
 177         r'\bvideosIDs\s*:\s*["\']?
(\d
{7,})', 
 178         r'\bdata
-id=["\'](\d{7,})', 
 179         r'<div[^>]+\bid=["\'](\d
{7,})', 
 183         'url
': 'http
://g1
.globo
.com
/jornal
-nacional
/noticia
/2014/09/novidade
-na
-fiscalizacao
-de
-bagagem
-pela
-receita
-provoca
-discussoes
.html
', 
 185             'id': 'novidade
-na
-fiscalizacao
-de
-bagagem
-pela
-receita
-provoca
-discussoes
', 
 186             'title
': 'Novidade na fiscalização de bagagem pela Receita provoca discussões
', 
 187             'description
': 'md5
:c3c4b4d4c30c32fce460040b1ac46b12
', 
 191         'url
': 'http
://g1
.globo
.com
/pr
/parana
/noticia
/2016/09/mpf
-denuncia
-lula
-marisa
-e
-mais
-seis
-na
-operacao
-lava
-jato
.html
', 
 193             'id': 'mpf
-denuncia
-lula
-marisa
-e
-mais
-seis
-na
-operacao
-lava
-jato
', 
 194             'title
': "Lula era o 'comandante máximo
' do esquema da Lava Jato, diz MPF", 
 195             'description
': 'md5
:8aa7cc8beda4dc71cc8553e00b77c54c
', 
 199         'url
': 'http
://gq
.globo
.com
/Prazeres
/Poder
/noticia
/2015/10/all
-o
-desafio
-assista
-ao
-segundo
-capitulo
-da
-serie
.html
', 
 200         'only_matching
': True, 
 202         'url
': 'http
://gshow
.globo
.com
/programas
/tv
-xuxa
/O
-Programa
/noticia
/2014/01/xuxa
-e
-junno
-namoram
-muuuito
-em
-luau
-de
-zeze
-di
-camargo
-e
-luciano
.html
', 
 203         'only_matching
': True, 
 205         'url
': 'http
://oglobo
.globo
.com
/rio
/a
-amizade
-entre
-um
-entregador
-de
-farmacia
-um
-piano
-19946271', 
 206         'only_matching
': True, 
 210     def suitable(cls, url): 
 211         return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url) 
 213     def _real_extract(self, url): 
 214         display_id = self._match_id(url) 
 215         webpage = self._download_webpage(url, display_id) 
 217         for video_regex in self._VIDEOID_REGEXES: 
 218             video_ids.extend(re.findall(video_regex, webpage)) 
 220             self.url_result('globo
:%s' % video_id, GloboIE.ie_key()) 
 221             for video_id in orderedSet(video_ids)] 
 222         title = self._og_search_title(webpage, fatal=False) 
 223         description = self._html_search_meta('description
', webpage) 
 224         return self.playlist_result(entries, display_id, title, description)