2 from __future__ 
import unicode_literals
 
  10 from .common 
import InfoExtractor
 
  13     compat_urllib_request
, 
  19 class SmotriIE(InfoExtractor
): 
  20     IE_DESC 
= 'Smotri.com' 
  22     _VALID_URL 
= r
'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))' 
  23     _NETRC_MACHINE 
= 'smotri' 
  26         # real video id 2610366 
  28             'url': 'http://smotri.com/video/view/?id=v261036632ab', 
  29             'file': 'v261036632ab.mp4', 
  30             'md5': '2a7b08249e6f5636557579c368040eb9', 
  32                 'title': 'катастрофа с камер видеонаблюдения', 
  33                 'uploader': 'rbc2008', 
  34                 'uploader_id': 'rbc08', 
  35                 'upload_date': '20131118', 
  36                 'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения', 
  37                 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg', 
  42             'url': 'http://smotri.com/video/view/?id=v57591cb20', 
  43             'file': 'v57591cb20.flv', 
  44             'md5': '830266dfc21f077eac5afd1883091bcd', 
  47                 'uploader': 'Support Photofile@photofile', 
  48                 'uploader_id': 'support-photofile', 
  49                 'upload_date': '20070704', 
  50                 'description': 'test, видео test', 
  51                 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg', 
  56             'url': 'http://smotri.com/video/view/?id=v1390466a13c', 
  57             'file': 'v1390466a13c.mp4', 
  58             'md5': 'f6331cef33cad65a0815ee482a54440b', 
  60                 'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', 
  61                 'uploader': 'timoxa40', 
  62                 'uploader_id': 'timoxa40', 
  63                 'upload_date': '20100404', 
  64                 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg', 
  65                 'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', 
  68                 'videopassword': 'qwerty', 
  71         # age limit + video-password 
  73             'url': 'http://smotri.com/video/view/?id=v15408898bcf', 
  74             'file': 'v15408898bcf.flv', 
  75             'md5': '91e909c9f0521adf5ee86fbe073aad70', 
  77                 'title': 'этот ролик не покажут по ТВ', 
  79                 'uploader_id': 'ueggb', 
  80                 'upload_date': '20101001', 
  81                 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg', 
  83                 'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ', 
  86                 'videopassword': '333' 
  92     _PASSWORD_NOT_VERIFIED 
= 1 
  93     _PASSWORD_DETECTED 
= 2 
  96     def _search_meta(self
, name
, html
, display_name
=None): 
  97         if display_name 
is None: 
  99         return self
._html
_search
_regex
( 
 100             r
'<meta itemprop="%s" content="([^"]+)" />' % re
.escape(name
), 
 101             html
, display_name
, fatal
=False) 
 102         return self
._html
_search
_meta
(name
, html
, display_name
) 
 104     def _real_extract(self
, url
): 
 105         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 106         video_id 
= mobj
.group('videoid') 
 107         real_video_id 
= mobj
.group('realvideoid') 
 109         # Download video JSON data 
 110         video_json_url 
= 'http://smotri.com/vt.php?id=%s' % real_video_id
 
 111         video_json_page 
= self
._download
_webpage
(video_json_url
, video_id
, 'Downloading video JSON') 
 112         video_json 
= json
.loads(video_json_page
) 
 114         status 
= video_json
['status'] 
 115         if status 
== self
._VIDEO
_NOT
_FOUND
: 
 116             raise ExtractorError('Video %s does not exist' % video_id
, expected
=True) 
 117         elif status 
== self
._PASSWORD
_DETECTED
: # The video is protected by a password, retry with 
 119             video_password 
= self
._downloader
.params
.get('videopassword', None) 
 120             if not video_password
: 
 121                 raise ExtractorError('This video is protected by a password, use the --video-password option', expected
=True) 
 122             video_json_url 
+= '&md5pass=%s' % hashlib
.md5(video_password
.encode('utf-8')).hexdigest() 
 123             video_json_page 
= self
._download
_webpage
(video_json_url
, video_id
, 'Downloading video JSON (video-password set)') 
 124             video_json 
= json
.loads(video_json_page
) 
 125             status 
= video_json
['status'] 
 126             if status 
== self
._PASSWORD
_NOT
_VERIFIED
: 
 127                 raise ExtractorError('Video password is invalid', expected
=True) 
 129         if status 
!= self
._SUCCESS
: 
 130             raise ExtractorError('Unexpected status value %s' % status
) 
 132         # Extract the URL of the video 
 133         video_url 
= video_json
['file_data'] 
 135         # Video JSON does not provide enough meta data 
 136         # We will extract some from the video web page instead 
 137         video_page_url 
= 'http://' + mobj
.group('url') 
 138         video_page 
= self
._download
_webpage
(video_page_url
, video_id
, 'Downloading video page') 
 140         # Warning if video is unavailable 
 141         warning 
= self
._html
_search
_regex
( 
 142             r
'<div class="videoUnModer">(.*?)</div>', video_page
, 
 143             'warning message', default
=None) 
 144         if warning 
is not None: 
 145             self
._downloader
.report_warning( 
 146                 'Video %s may not be available; smotri said: %s ' % 
 150         if re
.search('EroConfirmText">', video_page
) is not None: 
 151             self
.report_age_confirmation() 
 152             confirm_string 
= self
._html
_search
_regex
( 
 153                 r
'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id
, 
 154                 video_page
, 'confirm string') 
 155             confirm_url 
= video_page_url 
+ '&confirm=%s' % confirm_string
 
 156             video_page 
= self
._download
_webpage
(confirm_url
, video_id
, 'Downloading video page (age confirmed)') 
 159             adult_content 
= False 
 161         # Extract the rest of meta data 
 162         video_title 
= self
._search
_meta
('name', video_page
, 'title') 
 164             video_title 
= os
.path
.splitext(url_basename(video_url
))[0] 
 166         video_description 
= self
._search
_meta
('description', video_page
) 
 167         END_TEXT 
= ' на сайте Smotri.com' 
 168         if video_description 
and video_description
.endswith(END_TEXT
): 
 169             video_description 
= video_description
[:-len(END_TEXT
)] 
 170         START_TEXT 
= 'Смотреть онлайн ролик ' 
 171         if video_description 
and video_description
.startswith(START_TEXT
): 
 172             video_description 
= video_description
[len(START_TEXT
):] 
 173         video_thumbnail 
= self
._search
_meta
('thumbnail', video_page
) 
 175         upload_date_str 
= self
._search
_meta
('uploadDate', video_page
, 'upload date') 
 177             upload_date_m 
= re
.search(r
'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str
) 
 178             video_upload_date 
= ( 
 180                     upload_date_m
.group('year') + 
 181                     upload_date_m
.group('month') + 
 182                     upload_date_m
.group('day') 
 184                 if upload_date_m 
else None 
 187             video_upload_date 
= None 
 189         duration_str 
= self
._search
_meta
('duration', video_page
) 
 191             duration_m 
= re
.search(r
'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str
) 
 194                     (int(duration_m
.group('hours')) * 60 * 60) + 
 195                     (int(duration_m
.group('minutes')) * 60) + 
 196                     int(duration_m
.group('seconds')) 
 198                 if duration_m 
else None 
 201             video_duration 
= None 
 203         video_uploader 
= self
._html
_search
_regex
( 
 204             '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>', 
 205             video_page
, 'uploader', fatal
=False, flags
=re
.MULTILINE|re
.DOTALL
) 
 207         video_uploader_id 
= self
._html
_search
_regex
( 
 208             '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">', 
 209             video_page
, 'uploader id', fatal
=False, flags
=re
.MULTILINE|re
.DOTALL
) 
 211         video_view_count 
= self
._html
_search
_regex
( 
 212             'Общее количество просмотров.*?<span class="Number">(\\d+)</span>', 
 213             video_page
, 'view count', fatal
=False, flags
=re
.MULTILINE|re
.DOTALL
) 
 218             'title': video_title
, 
 219             'thumbnail': video_thumbnail
, 
 220             'description': video_description
, 
 221             'uploader': video_uploader
, 
 222             'upload_date': video_upload_date
, 
 223             'uploader_id': video_uploader_id
, 
 224             'duration': video_duration
, 
 225             'view_count': video_view_count
, 
 226             'age_limit': 18 if adult_content 
else 0, 
 227             'video_page_url': video_page_url
 
 231 class SmotriCommunityIE(InfoExtractor
): 
 232     IE_DESC 
= 'Smotri.com community videos' 
 233     IE_NAME 
= 'smotri:community' 
 234     _VALID_URL 
= r
'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)' 
 236     def _real_extract(self, url): 
 237         mobj = re.match(self._VALID_URL, url) 
 238         community_id = mobj.group('communityid
') 
 240         url = 'http
://smotri
.com
/export
/rss
/video
/by
/community
/-/%s/video
.xml
' % community_id 
 241         rss = self._download_xml(url, community_id, 'Downloading community RSS
') 
 243         entries = [self.url_result(video_url.text, 'Smotri
') 
 244                    for video_url in rss.findall('./channel
/item
/link
')] 
 246         description_text = rss.find('./channel
/description
').text 
 247         community_title = self._html_search_regex( 
 248             '^Видео сообщества 
"([^"]+)"$', description_text, 'community title') 
 250         return self.playlist_result(entries, community_id, community_title) 
 253 class SmotriUserIE(InfoExtractor): 
 254     IE_DESC = 'Smotri.com user videos' 
 255     IE_NAME = 'smotri:user' 
 256     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)' 
 258     def _real_extract(self, url): 
 259         mobj = re.match(self._VALID_URL, url) 
 260         user_id = mobj.group('userid') 
 262         url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id 
 263         rss = self._download_xml(url, user_id, 'Downloading user RSS') 
 265         entries = [self.url_result(video_url.text, 'Smotri') 
 266                    for video_url in rss.findall('./channel/item/link')] 
 268         description_text = rss.find('./channel/description').text 
 269         user_nickname = self._html_search_regex( 
 270             '^Видео режиссера (.*)$', description_text, 
 273         return self.playlist_result(entries, user_id, user_nickname) 
 276 class SmotriBroadcastIE(InfoExtractor): 
 277     IE_DESC = 'Smotri.com broadcasts' 
 278     IE_NAME = 'smotri:broadcast' 
 279     _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*' 
 281     def _real_extract(self, url): 
 282         mobj = re.match(self._VALID_URL, url) 
 283         broadcast_id = mobj.group('broadcastid') 
 285         broadcast_url = 'http://' + mobj.group('url') 
 286         broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page') 
 288         if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None: 
 289             raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True) 
 292         if re.search('EroConfirmText">', broadcast_page) is not None: 
 294             (username, password) = self._get_login_info() 
 296                 raise ExtractorError('Erotic broadcasts allowed only 
for registered users
, ' 
 297                     'use 
--username 
and --password options to provide account credentials
.', expected=True) 
 301                 'confirm_erotic
': '1', 
 303                 'password
': password, 
 306             request = compat_urllib_request.Request(broadcast_url + '/?no_redirect
=1', compat_urllib_parse.urlencode(login_form)) 
 307             request.add_header('Content
-Type
', 'application
/x
-www
-form
-urlencoded
') 
 308             broadcast_page = self._download_webpage(request, broadcast_id, 'Logging 
in and confirming age
') 
 310             if re.search('>Неверный логин или пароль
<', broadcast_page) is not None: 
 311                 raise ExtractorError('Unable to log 
in: bad username 
or password
', expected=True) 
 315             adult_content = False 
 317         ticket = self._html_search_regex( 
 318             'window\
.broadcast_control\
.addFlashVar
\\(\'file\', \'([^
\']+)\'\\);', 
 319             broadcast_page, 'broadcast ticket
') 
 321         url = 'http
://smotri
.com
/broadcast
/view
/url
/?ticket
=%s' % ticket 
 323         broadcast_password = self._downloader.params.get('videopassword
', None) 
 324         if broadcast_password: 
 325             url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf
-8')).hexdigest() 
 327         broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON
') 
 330             broadcast_json = json.loads(broadcast_json_page) 
 332             protected_broadcast = broadcast_json['_pass_protected
'] == 1 
 333             if protected_broadcast and not broadcast_password: 
 334                 raise ExtractorError('This broadcast 
is protected by a password
, use the 
--video
-password option
', expected=True) 
 336             broadcast_offline = broadcast_json['is_play
'] == 0 
 337             if broadcast_offline: 
 338                 raise ExtractorError('Broadcast 
%s is offline
' % broadcast_id, expected=True) 
 340             rtmp_url = broadcast_json['_server
'] 
 341             if not rtmp_url.startswith('rtmp
://'): 
 342                 raise ExtractorError('Unexpected broadcast rtmp URL
') 
 344             broadcast_playpath = broadcast_json['_streamName
'] 
 345             broadcast_thumbnail = broadcast_json['_imgURL
'] 
 346             broadcast_title = broadcast_json['title
'] 
 347             broadcast_description = broadcast_json['description
'] 
 348             broadcaster_nick = broadcast_json['nick
'] 
 349             broadcaster_login = broadcast_json['login
'] 
 350             rtmp_conn = 'S
:%s' % uuid.uuid4().hex 
 352             if protected_broadcast: 
 353                 raise ExtractorError('Bad broadcast password
', expected=True) 
 354             raise ExtractorError('Unexpected broadcast JSON
') 
 359             'title
': broadcast_title, 
 360             'thumbnail
': broadcast_thumbnail, 
 361             'description
': broadcast_description, 
 362             'uploader
': broadcaster_nick, 
 363             'uploader_id
': broadcaster_login, 
 364             'age_limit
': 18 if adult_content else 0, 
 366             'play_path
': broadcast_playpath, 
 368             'rtmp_conn
': rtmp_conn