1 from __future__ 
import unicode_literals
 
   3 from .common 
import InfoExtractor
 
   4 from ..compat 
import compat_urllib_parse_unquote
 
  13 class BetIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html' 
  17             'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', 
  19                 'id': 'news/national/2014/a-conversation-with-president-obama', 
  20                 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', 
  22                 'title': 'A Conversation With President Obama', 
  23                 'description': 'md5:699d0652a350cf3e491cd15cc745b5da', 
  25                 'timestamp': 1418075340, 
  26                 'upload_date': '20141208', 
  28                 'thumbnail': 're:(?i)^https?://.*\.jpg$', 
  32                 'skip_download': True, 
  36             'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', 
  38                 'id': 'news/national/2014/justice-for-ferguson-a-community-reacts', 
  39                 'display_id': 'justice-for-ferguson-a-community-reacts', 
  41                 'title': 'Justice for Ferguson: A Community Reacts', 
  42                 'description': 'A BET News special.', 
  44                 'timestamp': 1416942360, 
  45                 'upload_date': '20141125', 
  47                 'thumbnail': 're:(?i)^https?://.*\.jpg$', 
  51                 'skip_download': True, 
  56     def _real_extract(self
, url
): 
  57         display_id 
= self
._match
_id
(url
) 
  58         webpage 
= self
._download
_webpage
(url
, display_id
) 
  60         media_url 
= compat_urllib_parse_unquote(self
._search
_regex
( 
  61             [r
'mediaURL\s*:\s*"([^"]+)"', r
"var\s+mrssMediaUrl\s*=\s*'([^']+)'"], 
  62             webpage
, 'media URL')) 
  64         video_id 
= self
._search
_regex
( 
  65             r
'/video/(.*)/_jcr_content/', media_url
, 'video id') 
  67         mrss 
= self
._download
_xml
(media_url
, display_id
) 
  69         item 
= mrss
.find('./channel/item') 
  72             'dc': 'http://purl.org/dc/elements/1.1/', 
  73             'media': 'http://search.yahoo.com/mrss/', 
  74             'ka': 'http://kickapps.com/karss', 
  77         title 
= xpath_text(item
, './title', 'title') 
  78         description 
= xpath_text( 
  79             item
, './description', 'description', fatal
=False) 
  81         timestamp 
= parse_iso8601(xpath_text( 
  82             item
, xpath_with_ns('./dc:date', NS_MAP
), 
  83             'upload date', fatal
=False)) 
  84         uploader 
= xpath_text( 
  85             item
, xpath_with_ns('./dc:creator', NS_MAP
), 
  86             'uploader', fatal
=False) 
  88         media_content 
= item
.find( 
  89             xpath_with_ns('./media:content', NS_MAP
)) 
  90         duration 
= int_or_none(media_content
.get('duration')) 
  91         smil_url 
= media_content
.get('url') 
  93         thumbnail 
= media_content
.find( 
  94             xpath_with_ns('./media:thumbnail', NS_MAP
)).get('url') 
  96         formats 
= self
._extract
_smil
_formats
(smil_url
, display_id
) 
 100             'display_id': display_id
, 
 102             'description': description
, 
 103             'thumbnail': thumbnail
, 
 104             'timestamp': timestamp
, 
 105             'uploader': uploader
, 
 106             'duration': duration
,