7 from .common 
import InfoExtractor
 
  14 class SmotriIE(InfoExtractor
): 
  15     IE_DESC 
= u
'Smotri.com' 
  17     _VALID_URL 
= r
'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))' 
  20         # real video id 2610366 
  22             u
'url': u
'http://smotri.com/video/view/?id=v261036632ab', 
  23             u
'file': u
'v261036632ab.mp4', 
  24             u
'md5': u
'2a7b08249e6f5636557579c368040eb9', 
  26                 u
'title': u
'катастрофа с камер видеонаблюдения', 
  27                 u
'uploader': u
'rbc2008', 
  28                 u
'uploader_id': u
'rbc08', 
  29                 u
'upload_date': u
'20131118', 
  30                 u
'description': u
'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения', 
  31                 u
'thumbnail': u
'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg', 
  36             u
'url': u
'http://smotri.com/video/view/?id=v57591cb20', 
  37             u
'file': u
'v57591cb20.flv', 
  38             u
'md5': u
'830266dfc21f077eac5afd1883091bcd', 
  41                 u
'uploader': u
'Support Photofile@photofile', 
  42                 u
'uploader_id': u
'support-photofile', 
  43                 u
'upload_date': u
'20070704', 
  44                 u
'description': u
'test, видео test', 
  45                 u
'thumbnail': u
'http://frame4.loadup.ru/03/ed/57591.2.3.jpg', 
  50             u
'url': u
'http://smotri.com/video/view/?id=v1390466a13c', 
  51             u
'file': u
'v1390466a13c.mp4', 
  52             u
'md5': u
'f6331cef33cad65a0815ee482a54440b', 
  54                 u
'title': u
'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', 
  55                 u
'uploader': u
'timoxa40', 
  56                 u
'uploader_id': u
'timoxa40', 
  57                 u
'upload_date': u
'20100404', 
  58                 u
'thumbnail': u
'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg', 
  59                 u
'description': u
'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', 
  62                 u
'videopassword': u
'qwerty', 
  65         # age limit + video-password 
  67             u
'url': u
'http://smotri.com/video/view/?id=v15408898bcf', 
  68             u
'file': u
'v15408898bcf.flv', 
  69             u
'md5': u
'91e909c9f0521adf5ee86fbe073aad70', 
  71                 u
'title': u
'этот ролик не покажут по ТВ', 
  72                 u
'uploader': u
'zzxxx', 
  73                 u
'uploader_id': u
'ueggb', 
  74                 u
'upload_date': u
'20101001', 
  75                 u
'thumbnail': u
'http://frame3.loadup.ru/75/75/1540889.1.3.jpg', 
  77                 u
'description': u
'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ', 
  80                 u
'videopassword': u
'333' 
  86     _PASSWORD_NOT_VERIFIED 
= 1 
  87     _PASSWORD_DETECTED 
= 2 
  90     def _search_meta(self
, name
, html
, display_name
=None): 
  91         if display_name 
is None: 
  93         return self
._html
_search
_regex
( 
  94             r
'<meta itemprop="%s" content="([^"]+)" />' % re
.escape(name
), 
  95             html
, display_name
, fatal
=False) 
  96         return self
._html
_search
_meta
(name
, html
, display_name
) 
  98     def _real_extract(self
, url
): 
  99         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 100         video_id 
= mobj
.group('videoid') 
 101         real_video_id 
= mobj
.group('realvideoid') 
 103         # Download video JSON data 
 104         video_json_url 
= 'http://smotri.com/vt.php?id=%s' % real_video_id
 
 105         video_json_page 
= self
._download
_webpage
(video_json_url
, video_id
, u
'Downloading video JSON') 
 106         video_json 
= json
.loads(video_json_page
) 
 108         status 
= video_json
['status'] 
 109         if status 
== self
._VIDEO
_NOT
_FOUND
: 
 110             raise ExtractorError(u
'Video %s does not exist' % video_id
, expected
=True) 
 111         elif status 
== self
._PASSWORD
_DETECTED
:  # The video is protected by a password, retry with 
 113             video_password 
= self
._downloader
.params
.get('videopassword', None) 
 114             if not video_password
: 
 115                 raise ExtractorError(u
'This video is protected by a password, use the --video-password option', expected
=True) 
 116             video_json_url 
+= '&md5pass=%s' % hashlib
.md5(video_password
.encode('utf-8')).hexdigest() 
 117             video_json_page 
= self
._download
_webpage
(video_json_url
, video_id
, u
'Downloading video JSON (video-password set)') 
 118             video_json 
= json
.loads(video_json_page
) 
 119             status 
= video_json
['status'] 
 120             if status 
== self
._PASSWORD
_NOT
_VERIFIED
: 
 121                 raise ExtractorError(u
'Video password is invalid', expected
=True) 
 123         if status 
!= self
._SUCCESS
: 
 124             raise ExtractorError(u
'Unexpected status value %s' % status
) 
 126         # Extract the URL of the video 
 127         video_url 
= video_json
['file_data'] 
 129         # Video JSON does not provide enough meta data 
 130         # We will extract some from the video web page instead 
 131         video_page_url 
= 'http://' + mobj
.group('url') 
 132         video_page 
= self
._download
_webpage
(video_page_url
, video_id
, u
'Downloading video page') 
 135         if re
.search(u
'EroConfirmText">', video_page
) is not None: 
 136             self
.report_age_confirmation() 
 137             confirm_string 
= self
._html
_search
_regex
( 
 138                 r
'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id
, 
 139                 video_page
, u
'confirm string') 
 140             confirm_url 
= video_page_url 
+ '&confirm=%s' % confirm_string
 
 141             video_page 
= self
._download
_webpage
(confirm_url
, video_id
, u
'Downloading video page (age confirmed)') 
 144             adult_content 
= False 
 146         # Extract the rest of meta data 
 147         video_title 
= self
._search
_meta
(u
'name', video_page
, u
'title') 
 149             video_title 
= video_url
.rsplit('/', 1)[-1] 
 151         video_description 
= self
._search
_meta
(u
'description', video_page
) 
 152         END_TEXT 
= u
' на сайте Smotri.com' 
 153         if video_description
.endswith(END_TEXT
): 
 154             video_description 
= video_description
[:-len(END_TEXT
)] 
 155         START_TEXT 
= u
'Смотреть онлайн ролик ' 
 156         if video_description
.startswith(START_TEXT
): 
 157             video_description 
= video_description
[len(START_TEXT
):] 
 158         video_thumbnail 
= self
._search
_meta
(u
'thumbnail', video_page
) 
 160         upload_date_str 
= self
._search
_meta
(u
'uploadDate', video_page
, u
'upload date') 
 161         upload_date_m 
= re
.search(r
'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str
) 
 162         video_upload_date 
= ( 
 164                 upload_date_m
.group('year') + 
 165                 upload_date_m
.group('month') + 
 166                 upload_date_m
.group('day') 
 168             if upload_date_m 
else None 
 171         duration_str 
= self
._search
_meta
(u
'duration', video_page
) 
 172         duration_m 
= re
.search(r
'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str
) 
 175                 (int(duration_m
.group('hours')) * 60 * 60) + 
 176                 (int(duration_m
.group('minutes')) * 60) + 
 177                 int(duration_m
.group('seconds')) 
 179             if duration_m 
else None 
 182         video_uploader 
= self
._html
_search
_regex
( 
 183             u
'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>', 
 184             video_page
, u
'uploader', fatal
=False, flags
=re
.MULTILINE|re
.DOTALL
) 
 186         video_uploader_id 
= self
._html
_search
_regex
( 
 187             u
'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">', 
 188             video_page
, u
'uploader id', fatal
=False, flags
=re
.MULTILINE|re
.DOTALL
) 
 190         video_view_count 
= self
._html
_search
_regex
( 
 191             u
'Общее количество просмотров.*?<span class="Number">(\\d+)</span>', 
 192             video_page
, u
'view count', fatal
=False, flags
=re
.MULTILINE|re
.DOTALL
) 
 197             'title': video_title
, 
 198             'thumbnail': video_thumbnail
, 
 199             'description': video_description
, 
 200             'uploader': video_uploader
, 
 201             'upload_date': video_upload_date
, 
 202             'uploader_id': video_uploader_id
, 
 203             'video_duration': video_duration
, 
 204             'view_count': video_view_count
, 
 205             'age_limit': 18 if adult_content 
else 0, 
 206             'video_page_url': video_page_url
 
 210 class SmotriCommunityIE(InfoExtractor
): 
 211     IE_DESC 
= u
'Smotri.com community videos' 
 212     IE_NAME 
= u
'smotri:community' 
 213     _VALID_URL 
= r
'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)' 
 215     def _real_extract(self, url): 
 216         mobj = re.match(self._VALID_URL, url) 
 217         community_id = mobj.group('communityid
') 
 219         url = 'http
://smotri
.com
/export
/rss
/video
/by
/community
/-/%s/video
.xml
' % community_id 
 220         rss = self._download_xml(url, community_id, u'Downloading community RSS
') 
 222         entries = [self.url_result(video_url.text, 'Smotri
') 
 223                    for video_url in rss.findall('./channel
/item
/link
')] 
 225         description_text = rss.find('./channel
/description
').text 
 226         community_title = self._html_search_regex( 
 227             u'^Видео сообщества 
"([^"]+)"$', description_text, u'community title') 
 229         return self.playlist_result(entries, community_id, community_title) 
 232 class SmotriUserIE(InfoExtractor): 
 233     IE_DESC = u'Smotri.com user videos' 
 234     IE_NAME = u'smotri:user' 
 235     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)' 
 237     def _real_extract(self, url): 
 238         mobj = re.match(self._VALID_URL, url) 
 239         user_id = mobj.group('userid') 
 241         url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id 
 242         rss = self._download_xml(url, user_id, u'Downloading user RSS') 
 244         entries = [self.url_result(video_url.text, 'Smotri') 
 245                    for video_url in rss.findall('./channel/item/link')] 
 247         description_text = rss.find('./channel/description').text 
 248         user_nickname = self._html_search_regex( 
 249             u'^Видео режиссера (.*)$', description_text, 
 252         return self.playlist_result(entries, user_id, user_nickname)