]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/jove.py
cf73cd7533177d028cee83a2a013914b93f64b15
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  12 class JoveIE(InfoExtractor
): 
  13     _VALID_URL 
= r
'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)' 
  14     _CHAPTERS_URL 
= 'http://www.jove.com/video-chapters?videoid={video_id:}' 
  17             'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current', 
  18             'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b', 
  22                 'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation', 
  23                 'description': 'md5:015dd4509649c0908bc27f049e0262c6', 
  24                 'thumbnail': 're:^https?://.*\.png$', 
  25                 'upload_date': '20110523', 
  29             'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation', 
  30             'md5': '914aeb356f416811d911996434811beb', 
  34                 'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment', 
  35                 'description': 'md5:35ff029261900583970c4023b70f1dc9', 
  36                 'thumbnail': 're:^https?://.*\.png$', 
  37                 'upload_date': '20140802', 
  43     def _real_extract(self
, url
): 
  44         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  45         video_id 
= mobj
.group('id') 
  47         webpage 
= self
._download
_webpage
(url
, video_id
) 
  49         chapters_id 
= self
._html
_search
_regex
( 
  50             r
'/video-chapters\?videoid=([0-9]+)', webpage
, 'chapters id') 
  52         chapters_xml 
= self
._download
_xml
( 
  53             self
._CHAPTERS
_URL
.format(video_id
=chapters_id
), 
  54             video_id
, note
='Downloading chapters XML', 
  55             errnote
='Failed to download chapters XML') 
  57         video_url 
= chapters_xml
.attrib
.get('video') 
  59             raise ExtractorError('Failed to get the video URL') 
  61         title 
= self
._html
_search
_meta
('citation_title', webpage
, 'title') 
  62         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
  63         description 
= self
._html
_search
_regex
( 
  64             r
'<div id="section_body_summary"><p class="jove_content">(.+?)</p>', 
  65             webpage
, 'description', fatal
=False) 
  66         publish_date 
= unified_strdate(self
._html
_search
_meta
( 
  67             'citation_publication_date', webpage
, 'publish date', fatal
=False)) 
  68         comment_count 
= self
._html
_search
_regex
( 
  69             r
'<meta name="num_comments" content="(\d+) Comments?"', 
  70             webpage
, 'comment count', fatal
=False) 
  76             'thumbnail': thumbnail
, 
  77             'description': description
, 
  78             'upload_date': publish_date
, 
  79             'comment_count': comment_count
,