]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/jove.py
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
12 class JoveIE(InfoExtractor
):
13 _VALID_URL
= r
'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
14 _CHAPTERS_URL
= 'http://www.jove.com/video-chapters?videoid={video_id:}'
17 'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
18 'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
22 'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
23 'description': 'md5:015dd4509649c0908bc27f049e0262c6',
24 'thumbnail': r
're:^https?://.*\.png$',
25 'upload_date': '20110523',
29 'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
30 'md5': '914aeb356f416811d911996434811beb',
34 'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
35 'description': 'md5:35ff029261900583970c4023b70f1dc9',
36 'thumbnail': r
're:^https?://.*\.png$',
37 'upload_date': '20140802',
43 def _real_extract(self
, url
):
44 mobj
= re
.match(self
._VALID
_URL
, url
)
45 video_id
= mobj
.group('id')
47 webpage
= self
._download
_webpage
(url
, video_id
)
49 chapters_id
= self
._html
_search
_regex
(
50 r
'/video-chapters\?videoid=([0-9]+)', webpage
, 'chapters id')
52 chapters_xml
= self
._download
_xml
(
53 self
._CHAPTERS
_URL
.format(video_id
=chapters_id
),
54 video_id
, note
='Downloading chapters XML',
55 errnote
='Failed to download chapters XML')
57 video_url
= chapters_xml
.attrib
.get('video')
59 raise ExtractorError('Failed to get the video URL')
61 title
= self
._html
_search
_meta
('citation_title', webpage
, 'title')
62 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
63 description
= self
._html
_search
_regex
(
64 r
'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
65 webpage
, 'description', fatal
=False)
66 publish_date
= unified_strdate(self
._html
_search
_meta
(
67 'citation_publication_date', webpage
, 'publish date', fatal
=False))
68 comment_count
= int(self
._html
_search
_regex
(
69 r
'<meta name="num_comments" content="(\d+) Comments?"',
70 webpage
, 'comment count', fatal
=False))
76 'thumbnail': thumbnail
,
77 'description': description
,
78 'upload_date': publish_date
,
79 'comment_count': comment_count
,