3 import xml
.etree
.ElementTree
 
   5 from .common 
import InfoExtractor
 
  10     compat_urllib_parse_urlparse
, 
  11     compat_urllib_request
, 
  17 class CollegeHumorIE(InfoExtractor
): 
  19     _VALID_URL 
= r
'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$' 
  21     def report_manifest(self
, video_id
): 
  22         """Report information extraction.""" 
  23         self
.to_screen(u
'%s: Downloading XML manifest' % video_id
) 
  25     def _real_extract(self
, url
): 
  26         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  28             raise ExtractorError(u
'Invalid URL: %s' % url
) 
  29         video_id 
= mobj
.group('videoid') 
  37         self
.report_extraction(video_id
) 
  38         xmlUrl 
= 'http://www.collegehumor.com/moogaloop/video/' + video_id
 
  40             metaXml 
= compat_urllib_request
.urlopen(xmlUrl
).read() 
  41         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  42             raise ExtractorError(u
'Unable to download video info XML: %s' % compat_str(err
)) 
  44         mdoc 
= xml
.etree
.ElementTree
.fromstring(metaXml
) 
  46             videoNode 
= mdoc
.findall('./video')[0] 
  47             info
['description'] = videoNode
.findall('./description')[0].text
 
  48             info
['title'] = videoNode
.findall('./caption')[0].text
 
  49             info
['thumbnail'] = videoNode
.findall('./thumbnail')[0].text
 
  50             manifest_url 
= videoNode
.findall('./file')[0].text
 
  52             raise ExtractorError(u
'Invalid metadata XML file') 
  54         manifest_url 
+= '?hdcore=2.10.3' 
  55         self
.report_manifest(video_id
) 
  57             manifestXml 
= compat_urllib_request
.urlopen(manifest_url
).read() 
  58         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  59             raise ExtractorError(u
'Unable to download video info XML: %s' % compat_str(err
)) 
  61         adoc 
= xml
.etree
.ElementTree
.fromstring(manifestXml
) 
  63             media_node 
= adoc
.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] 
  64             node_id 
= media_node
.attrib
['url'] 
  65             video_id 
= adoc
.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
 
  66         except IndexError as err
: 
  67             raise ExtractorError(u
'Invalid manifest file') 
  69         url_pr 
= compat_urllib_parse_urlparse(manifest_url
) 
  70         url 
= url_pr
.scheme 
+ '://' + url_pr
.netloc 
+ '/z' + video_id
[:-2] + '/' + node_id 
+ 'Seg1-Frag1'