]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/collegehumor.py
New test release.
[youtubedl] / youtube_dl / extractor / collegehumor.py
1 import re
2 import socket
3 import xml.etree.ElementTree
4
5 from .common import InfoExtractor
6 from ..utils import (
7 compat_http_client,
8 compat_str,
9 compat_urllib_error,
10 compat_urllib_parse_urlparse,
11 compat_urllib_request,
12
13 ExtractorError,
14 )
15
16
17 class CollegeHumorIE(InfoExtractor):
18 _WORKING = False
19 _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
20
21 def report_manifest(self, video_id):
22 """Report information extraction."""
23 self.to_screen(u'%s: Downloading XML manifest' % video_id)
24
25 def _real_extract(self, url):
26 mobj = re.match(self._VALID_URL, url)
27 if mobj is None:
28 raise ExtractorError(u'Invalid URL: %s' % url)
29 video_id = mobj.group('videoid')
30
31 info = {
32 'id': video_id,
33 'uploader': None,
34 'upload_date': None,
35 }
36
37 self.report_extraction(video_id)
38 xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
39 try:
40 metaXml = compat_urllib_request.urlopen(xmlUrl).read()
41 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
42 raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
43
44 mdoc = xml.etree.ElementTree.fromstring(metaXml)
45 try:
46 videoNode = mdoc.findall('./video')[0]
47 info['description'] = videoNode.findall('./description')[0].text
48 info['title'] = videoNode.findall('./caption')[0].text
49 info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
50 manifest_url = videoNode.findall('./file')[0].text
51 except IndexError:
52 raise ExtractorError(u'Invalid metadata XML file')
53
54 manifest_url += '?hdcore=2.10.3'
55 self.report_manifest(video_id)
56 try:
57 manifestXml = compat_urllib_request.urlopen(manifest_url).read()
58 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
59 raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
60
61 adoc = xml.etree.ElementTree.fromstring(manifestXml)
62 try:
63 media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
64 node_id = media_node.attrib['url']
65 video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
66 except IndexError as err:
67 raise ExtractorError(u'Invalid manifest file')
68
69 url_pr = compat_urllib_parse_urlparse(manifest_url)
70 url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
71
72 info['url'] = url
73 info['ext'] = 'f4f'
74 return [info]