2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
   6     compat_etree_fromstring
, 
   8     compat_urllib_parse_unquote
, 
   9     compat_urllib_parse_urlparse
, 
  20 class OdnoklassnikiIE(InfoExtractor
): 
  21     _VALID_URL 
= r
'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)' 
  24         'url': 'http://ok.ru/video/20079905452', 
  25         'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc', 
  29             'title': 'Культура меняет нас (прекрасный ролик!))', 
  31             'upload_date': '20141207', 
  32             'uploader_id': '330537914540', 
  33             'uploader': 'Виталий Добровольский', 
  37         'skip': 'Video has been blocked', 
  40         'url': 'http://ok.ru/video/63567059965189-0?fromTime=5', 
  41         'md5': '6ff470ea2dd51d5d18c295a355b0b6bc', 
  43             'id': '63567059965189-0', 
  45             'title': 'Девушка без комплексов ...', 
  47             'upload_date': '20150518', 
  48             'uploader_id': '534380003155', 
  49             'uploader': '☭ Андрей Мещанинов ☭', 
  55         # YouTube embed (metadataUrl, provider == USER_YOUTUBE) 
  56         'url': 'http://ok.ru/video/64211978996595-1', 
  57         'md5': '2f206894ffb5dbfcce2c5a14b909eea5', 
  59             'id': '64211978996595-1', 
  61             'title': 'Космическая среда от 26 августа 2015', 
  62             'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0', 
  64             'upload_date': '20150826', 
  65             'uploader_id': 'tvroscosmos', 
  66             'uploader': 'Телестудия Роскосмоса', 
  70         # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field) 
  71         'url': 'http://ok.ru/video/62036049272859-0', 
  73             'id': '62036049272859-0', 
  75             'title': 'МУЗЫКА     ДОЖДЯ .', 
  76             'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0', 
  77             'upload_date': '20120106', 
  78             'uploader_id': '473534735899', 
  79             'uploader': 'МARINA D', 
  83             'skip_download': True, 
  85         'skip': 'Video has not been found', 
  87         'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 
  88         'only_matching': True, 
  90         'url': 'http://www.ok.ru/video/20648036891', 
  91         'only_matching': True, 
  93         'url': 'http://www.ok.ru/videoembed/20648036891', 
  94         'only_matching': True, 
  96         'url': 'http://m.ok.ru/video/20079905452', 
  97         'only_matching': True, 
  99         'url': 'http://mobile.ok.ru/video/20079905452', 
 100         'only_matching': True, 
 103     def _real_extract(self
, url
): 
 104         start_time 
= int_or_none(compat_parse_qs( 
 105             compat_urllib_parse_urlparse(url
).query
).get('fromTime', [None])[0]) 
 107         video_id 
= self
._match
_id
(url
) 
 109         webpage 
= self
._download
_webpage
( 
 110             'http://ok.ru/video/%s' % video_id
, video_id
) 
 112         error 
= self
._search
_regex
( 
 113             r
'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<', 
 114             webpage
, 'error', default
=None) 
 116             raise ExtractorError(error
, expected
=True) 
 118         player 
= self
._parse
_json
( 
 119             unescapeHTML(self
._search
_regex
( 
 120                 r
'data-options=(?P<quote>["\'])(?P
<player
>{.+?
%s.+?
})(?P
=quote
)' % video_id, 
 121                 webpage, 'player
', group='player
')), 
 124         flashvars = player['flashvars
'] 
 126         metadata = flashvars.get('metadata
') 
 128             metadata = self._parse_json(metadata, video_id) 
 130             metadata = self._download_json( 
 131                 compat_urllib_parse_unquote(flashvars['metadataUrl
']), 
 132                 video_id, 'Downloading metadata JSON
') 
 134         movie = metadata['movie
'] 
 136         # Some embedded videos may not contain title in movie dict (e.g. 
 137         # http://ok.ru/video/62036049272859-0) thus we allow missing title 
 138         # here and it's going to be extracted later by an extractor that
 
 139         # will process the actual embed. 
 140         provider 
= metadata
.get('provider') 
 141         title 
= movie
['title'] if provider 
== 'UPLOADED_ODKL' else movie
.get('title') 
 143         thumbnail 
= movie
.get('poster') 
 144         duration 
= int_or_none(movie
.get('duration')) 
 146         author 
= metadata
.get('author', {}) 
 147         uploader_id 
= author
.get('id') 
 148         uploader 
= author
.get('name') 
 150         upload_date 
= unified_strdate(self
._html
_search
_meta
( 
 151             'ya:ovs:upload_date', webpage
, 'upload date', default
=None)) 
 154         adult 
= self
._html
_search
_meta
( 
 155             'ya:ovs:adult', webpage
, 'age limit', default
=None) 
 157             age_limit 
= 18 if adult 
== 'true' else 0 
 159         like_count 
= int_or_none(metadata
.get('likeCount')) 
 164             'thumbnail': thumbnail
, 
 165             'duration': duration
, 
 166             'upload_date': upload_date
, 
 167             'uploader': uploader
, 
 168             'uploader_id': uploader_id
, 
 169             'like_count': like_count
, 
 170             'age_limit': age_limit
, 
 171             'start_time': start_time
, 
 174         if provider 
== 'USER_YOUTUBE': 
 176                 '_type': 'url_transparent', 
 177                 'url': movie
['contentId'], 
 181         quality 
= qualities(('4', '0', '1', '2', '3', '5')) 
 186             'format_id': f
['name'], 
 187         } for f 
in metadata
['videos']] 
 189         m3u8_url 
= metadata
.get('hlsManifestUrl') 
 191             formats
.extend(self
._extract
_m
3u8_formats
( 
 192                 m3u8_url
, video_id
, 'mp4', 'm3u8_native', 
 193                 m3u8_id
='hls', fatal
=False)) 
 195         dash_manifest 
= metadata
.get('metadataEmbedded') 
 197             formats
.extend(self
._parse
_mpd
_formats
( 
 198                 compat_etree_fromstring(dash_manifest
), 'mpd')) 
 201             fmt_type 
= self
._search
_regex
( 
 202                 r
'\btype[/=](\d)', fmt
['url'], 
 203                 'format type', default
=None) 
 205                 fmt
['quality'] = quality(fmt_type
) 
 207         self
._sort
_formats
(formats
) 
 209         info
['formats'] = formats