2 from __future__ 
import unicode_literals
 
  10 from .common 
import InfoExtractor
 
  13     compat_urllib_request
, 
  20 class SmotriIE(InfoExtractor
): 
  21     IE_DESC 
= 'Smotri.com' 
  23     _VALID_URL 
= r
'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})' 
  24     _NETRC_MACHINE 
= 'smotri' 
  27         # real video id 2610366 
  29             'url': 'http://smotri.com/video/view/?id=v261036632ab', 
  30             'md5': '2a7b08249e6f5636557579c368040eb9', 
  34                 'title': 'катастрофа с камер видеонаблюдения', 
  35                 'uploader': 'rbc2008', 
  36                 'uploader_id': 'rbc08', 
  37                 'upload_date': '20131118', 
  38                 'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения', 
  39                 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg', 
  44             'url': 'http://smotri.com/video/view/?id=v57591cb20', 
  45             'md5': '830266dfc21f077eac5afd1883091bcd', 
  50                 'uploader': 'Support Photofile@photofile', 
  51                 'uploader_id': 'support-photofile', 
  52                 'upload_date': '20070704', 
  53                 'description': 'test, видео test', 
  54                 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg', 
  59             'url': 'http://smotri.com/video/view/?id=v1390466a13c', 
  60             'md5': 'f6331cef33cad65a0815ee482a54440b', 
  64                 'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', 
  65                 'uploader': 'timoxa40', 
  66                 'uploader_id': 'timoxa40', 
  67                 'upload_date': '20100404', 
  68                 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg', 
  69                 'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', 
  72                 'videopassword': 'qwerty', 
  75         # age limit + video-password 
  77             'url': 'http://smotri.com/video/view/?id=v15408898bcf', 
  78             'md5': '91e909c9f0521adf5ee86fbe073aad70', 
  82                 'title': 'этот ролик не покажут по ТВ', 
  84                 'uploader_id': 'ueggb', 
  85                 'upload_date': '20101001', 
  86                 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg', 
  88                 'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ', 
  91                 'videopassword': '333' 
  96             'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500', 
  97             'md5': '4d47034979d9390d14acdf59c4935bc2', 
 101                 'title': 'Shakira - Don\'t Bother', 
 102                 'uploader': 'HannahL', 
 103                 'uploader_id': 'lisaha95', 
 104                 'upload_date': '20090331', 
 105                 'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother', 
 106                 'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg', 
 112     _PASSWORD_NOT_VERIFIED 
= 1 
 113     _PASSWORD_DETECTED 
= 2 
 117     def _extract_url(cls
, webpage
): 
 119             r
'<embed[^>]src=(["\'])(?P
<url
>http
://pics\
.smotri\
.com
/(?
:player|scrubber_custom8
)\
.swf
\?file=v
.+?\
1)', 
 122             return mobj.group('url
') 
 125             r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s* 
 126                     <div\s+class="video_image">[^<]+</div>\s* 
 127                     <div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage) 
 129             return 'http
://smotri
.com
/video
/view
/?
id=%s' % mobj.group('id') 
 131     def _search_meta(self, name, html, display_name=None): 
 132         if display_name is None: 
 134         return self._html_search_regex( 
 135             r'<meta itemprop
="%s" content
="([^"]+)" />' % re.escape(name), 
 136             html, display_name, fatal=False) 
 137         return self._html_search_meta(name, html, display_name) 
 139     def _real_extract(self, url): 
 140         mobj = re.match(self._VALID_URL, url) 
 141         video_id = mobj.group('videoid') 
 142         real_video_id = mobj.group('realvideoid') 
 144         # Download video JSON data 
 145         video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id 
 146         video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON') 
 147         video_json = json.loads(video_json_page) 
 149         status = video_json['status'] 
 150         if status == self._VIDEO_NOT_FOUND: 
 151             raise ExtractorError('Video %s does not exist' % video_id, expected=True) 
 152         elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with 
 154             video_password = self._downloader.params.get('videopassword', None) 
 155             if not video_password: 
 156                 raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) 
 157             video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest() 
 158             video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)') 
 159             video_json = json.loads(video_json_page) 
 160             status = video_json['status'] 
 161             if status == self._PASSWORD_NOT_VERIFIED: 
 162                 raise ExtractorError('Video password is invalid', expected=True) 
 164         if status != self._SUCCESS: 
 165             raise ExtractorError('Unexpected status value %s' % status) 
 167         # Extract the URL of the video 
 168         video_url = video_json['file_data'] 
 170         # Video JSON does not provide enough meta data 
 171         # We will extract some from the video web page instead 
 172         video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id 
 173         video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page') 
 175         # Warning if video is unavailable 
 176         warning = self._html_search_regex( 
 177             r'<div class="videoUnModer
">(.*?)</div>', video_page, 
 178             'warning message', default=None) 
 179         if warning is not None: 
 180             self._downloader.report_warning( 
 181                 'Video %s may not be available; smotri said: %s ' % 
 185         if re.search('EroConfirmText">', video_page) is not None: 
 186             self.report_age_confirmation() 
 187             confirm_string = self._html_search_regex( 
 188                 r'<a href
="/video/view/\?id=%s&confirm=([^"]+)" title="[^
"]+">' % video_id, 
 189                 video_page, 'confirm string
') 
 190             confirm_url = video_page_url + '&confirm
=%s' % confirm_string 
 191             video_page = self._download_webpage(confirm_url, video_id, 'Downloading video 
page (age confirmed
)') 
 194             adult_content = False 
 196         # Extract the rest of meta data 
 197         video_title = self._search_meta('name
', video_page, 'title
') 
 199             video_title = os.path.splitext(url_basename(video_url))[0] 
 201         video_description = self._search_meta('description
', video_page) 
 202         END_TEXT = ' на сайте Smotri
.com
' 
 203         if video_description and video_description.endswith(END_TEXT): 
 204             video_description = video_description[:-len(END_TEXT)] 
 205         START_TEXT = 'Смотреть онлайн ролик 
' 
 206         if video_description and video_description.startswith(START_TEXT): 
 207             video_description = video_description[len(START_TEXT):] 
 208         video_thumbnail = self._search_meta('thumbnail
', video_page) 
 210         upload_date_str = self._search_meta('uploadDate
', video_page, 'upload date
') 
 212             upload_date_m = re.search(r'(?P
<year
>\d{4}
)\
.(?P
<month
>\d{2}
)\
.(?P
<day
>\d{2}
)T
', upload_date_str) 
 213             video_upload_date = ( 
 215                     upload_date_m.group('year
') + 
 216                     upload_date_m.group('month
') + 
 217                     upload_date_m.group('day
') 
 219                 if upload_date_m else None 
 222             video_upload_date = None 
 224         duration_str = self._search_meta('duration
', video_page) 
 226             duration_m = re.search(r'T(?P
<hours
>[0-9]{2}
)H(?P
<minutes
>[0-9]{2}
)M(?P
<seconds
>[0-9]{2}
)S
', duration_str) 
 229                     (int(duration_m.group('hours
')) * 60 * 60) + 
 230                     (int(duration_m.group('minutes
')) * 60) + 
 231                     int(duration_m.group('seconds
')) 
 233                 if duration_m else None 
 236             video_duration = None 
 238         video_uploader = self._html_search_regex( 
 239             '<div 
class="DescrUser"><div
>Автор
.*?onmouseover
="popup_user_info[^"]+">(.*?)</a>', 
 240             video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL) 
 242         video_uploader_id = self._html_search_regex( 
 243             '<div class="DescrUser
"><div>Автор.*?onmouseover="popup_user_info
\\(.*?
\'([^
\']+)\'\\);">', 
 244             video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL) 
 246         video_view_count = self._html_search_regex( 
 247             'Общее количество просмотров.*?<span class="Number
">(\\d+)</span>', 
 248             video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL) 
 253             'title': video_title, 
 254             'thumbnail': video_thumbnail, 
 255             'description': video_description, 
 256             'uploader': video_uploader, 
 257             'upload_date': video_upload_date, 
 258             'uploader_id': video_uploader_id, 
 259             'duration': video_duration, 
 260             'view_count': int_or_none(video_view_count), 
 261             'age_limit': 18 if adult_content else 0, 
 262             'video_page_url': video_page_url 
 266 class SmotriCommunityIE(InfoExtractor): 
 267     IE_DESC = 'Smotri.com community videos' 
 268     IE_NAME = 'smotri:community' 
 269     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)' 
 271     def _real_extract(self, url): 
 272         mobj = re.match(self._VALID_URL, url) 
 273         community_id = mobj.group('communityid') 
 275         url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id 
 276         rss = self._download_xml(url, community_id, 'Downloading community RSS') 
 278         entries = [self.url_result(video_url.text, 'Smotri') 
 279                    for video_url in rss.findall('./channel/item/link')] 
 281         description_text = rss.find('./channel/description').text 
 282         community_title = self._html_search_regex( 
 283             '^Видео сообщества "([^
"]+)"$
', description_text, 'community title
') 
 285         return self.playlist_result(entries, community_id, community_title) 
 288 class SmotriUserIE(InfoExtractor): 
 289     IE_DESC = 'Smotri
.com user videos
' 
 290     IE_NAME = 'smotri
:user
' 
 291     _VALID_URL = r'^https?
://(?
:www\
.)?smotri\
.com
/user
/(?P
<userid
>[0-9A
-Za
-z_
\'-]+)' 
 293     def _real_extract(self, url): 
 294         mobj = re.match(self._VALID_URL, url) 
 295         user_id = mobj.group('userid
') 
 297         url = 'http
://smotri
.com
/export
/rss
/user
/video
/-/%s/video
.xml
' % user_id 
 298         rss = self._download_xml(url, user_id, 'Downloading user RSS
') 
 300         entries = [self.url_result(video_url.text, 'Smotri
') 
 301                    for video_url in rss.findall('./channel
/item
/link
')] 
 303         description_text = rss.find('./channel
/description
').text 
 304         user_nickname = self._html_search_regex( 
 305             '^Видео режиссера 
(.*)$
', description_text, 
 308         return self.playlist_result(entries, user_id, user_nickname) 
 311 class SmotriBroadcastIE(InfoExtractor): 
 312     IE_DESC = 'Smotri
.com broadcasts
' 
 313     IE_NAME = 'smotri
:broadcast
' 
 314     _VALID_URL = r'^https?
://(?
:www\
.)?
(?P
<url
>smotri\
.com
/live
/(?P
<broadcastid
>[^
/]+))/?
.*' 
 316     def _real_extract(self, url): 
 317         mobj = re.match(self._VALID_URL, url) 
 318         broadcast_id = mobj.group('broadcastid
') 
 320         broadcast_url = 'http
://' + mobj.group('url
') 
 321         broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page
') 
 323         if re.search('>Режиссер с логином 
<br
/>"%s"<br
/> <span
>не существует
<' % broadcast_id, broadcast_page) is not None: 
 324             raise ExtractorError('Broadcast 
%s does 
not exist
' % broadcast_id, expected=True) 
 327         if re.search('EroConfirmText
">', broadcast_page) is not None: 
 329             (username, password) = self._get_login_info() 
 331                 raise ExtractorError('Erotic broadcasts allowed only for registered users, ' 
 332                     'use --username and --password options to provide account credentials.', expected=True) 
 336                 'confirm_erotic': '1', 
 338                 'password': password, 
 341             request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) 
 342             request.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 343             broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age') 
 345             if re.search('>Неверный логин или пароль<', broadcast_page) is not None: 
 346                 raise ExtractorError('Unable to log in: bad username or password', expected=True) 
 350             adult_content = False 
 352         ticket = self._html_search_regex( 
 353             'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);', 
 354             broadcast_page, 'broadcast ticket') 
 356         url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket 
 358         broadcast_password = self._downloader.params.get('videopassword', None) 
 359         if broadcast_password: 
 360             url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() 
 362         broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON') 
 365             broadcast_json = json.loads(broadcast_json_page) 
 367             protected_broadcast = broadcast_json['_pass_protected'] == 1 
 368             if protected_broadcast and not broadcast_password: 
 369                 raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True) 
 371             broadcast_offline = broadcast_json['is_play'] == 0 
 372             if broadcast_offline: 
 373                 raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True) 
 375             rtmp_url = broadcast_json['_server'] 
 376             if not rtmp_url.startswith('rtmp://'): 
 377                 raise ExtractorError('Unexpected broadcast rtmp URL') 
 379             broadcast_playpath = broadcast_json['_streamName'] 
 380             broadcast_thumbnail = broadcast_json['_imgURL'] 
 381             broadcast_title = broadcast_json['title'] 
 382             broadcast_description = broadcast_json['description'] 
 383             broadcaster_nick = broadcast_json['nick'] 
 384             broadcaster_login = broadcast_json['login'] 
 385             rtmp_conn = 'S:%s' % uuid.uuid4().hex 
 387             if protected_broadcast: 
 388                 raise ExtractorError('Bad broadcast password', expected=True) 
 389             raise ExtractorError('Unexpected broadcast JSON') 
 394             'title': broadcast_title, 
 395             'thumbnail': broadcast_thumbnail, 
 396             'description': broadcast_description, 
 397             'uploader': broadcaster_nick, 
 398             'uploader_id': broadcaster_login, 
 399             'age_limit': 18 if adult_content else 0, 
 401             'play_path': broadcast_playpath, 
 403             'rtmp_conn': rtmp_conn