]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bet.py
   1 from __future__ 
import unicode_literals
 
   3 from .common 
import InfoExtractor
 
   4 from ..compat 
import compat_urllib_parse
 
  13 class BetIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html' 
  17             'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', 
  19                 'id': '740ab250-bb94-4a8a-8787-fe0de7c74471', 
  20                 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', 
  22                 'title': 'BET News Presents: A Conversation With President Obama', 
  23                 'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6', 
  25                 'timestamp': 1418075340, 
  26                 'upload_date': '20141208', 
  28                 'thumbnail': 're:(?i)^https?://.*\.jpg$', 
  32                 'skip_download': True, 
  36             'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', 
  38                 'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d', 
  39                 'display_id': 'justice-for-ferguson-a-community-reacts', 
  41                 'title': 'Justice for Ferguson: A Community Reacts', 
  42                 'description': 'A BET News special.', 
  44                 'timestamp': 1416942360, 
  45                 'upload_date': '20141125', 
  47                 'thumbnail': 're:(?i)^https?://.*\.jpg$', 
  51                 'skip_download': True, 
  56     def _real_extract(self
, url
): 
  57         display_id 
= self
._match
_id
(url
) 
  58         webpage 
= self
._download
_webpage
(url
, display_id
) 
  60         media_url 
= compat_urllib_parse
.unquote(self
._search
_regex
( 
  61             [r
'mediaURL\s*:\s*"([^"]+)"', r
"var\s+mrssMediaUrl\s*=\s*'([^']+)'"], 
  62             webpage
, 'media URL')) 
  64         mrss 
= self
._download
_xml
(media_url
, display_id
) 
  66         item 
= mrss
.find('./channel/item') 
  69             'dc': 'http://purl.org/dc/elements/1.1/', 
  70             'media': 'http://search.yahoo.com/mrss/', 
  71             'ka': 'http://kickapps.com/karss', 
  74         title 
= xpath_text(item
, './title', 'title') 
  75         description 
= xpath_text( 
  76             item
, './description', 'description', fatal
=False) 
  78         video_id 
= xpath_text(item
, './guid', 'video id', fatal
=False) 
  80         timestamp 
= parse_iso8601(xpath_text( 
  81             item
, xpath_with_ns('./dc:date', NS_MAP
), 
  82             'upload date', fatal
=False)) 
  83         uploader 
= xpath_text( 
  84             item
, xpath_with_ns('./dc:creator', NS_MAP
), 
  85             'uploader', fatal
=False) 
  87         media_content 
= item
.find( 
  88             xpath_with_ns('./media:content', NS_MAP
)) 
  89         duration 
= int_or_none(media_content
.get('duration')) 
  90         smil_url 
= media_content
.get('url') 
  92         thumbnail 
= media_content
.find( 
  93             xpath_with_ns('./media:thumbnail', NS_MAP
)).get('url') 
  95         formats 
= self
._extract
_smil
_formats
(smil_url
, display_id
) 
  99             'display_id': display_id
, 
 101             'description': description
, 
 102             'thumbnail': thumbnail
, 
 103             'timestamp': timestamp
, 
 104             'uploader': uploader
, 
 105             'duration': duration
,