Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/myspass.py

   1 import os.path
   2 import xml.etree.ElementTree
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     compat_urllib_parse_urlparse,
   7
   8     ExtractorError,
   9 )
  10
  11
  12 class MySpassIE(InfoExtractor):
  13     _VALID_URL = r'http://www.myspass.de/.*'
  14     _TEST = {
  15         u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
  16         u'file': u'11741.mp4',
  17         u'md5': u'0b49f4844a068f8b33f4b7c88405862b',
  18         u'info_dict': {
  19             u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
  20             u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
  21         }
  22     }
  23
  24     def _real_extract(self, url):
  25         META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
  26
  27         # video id is the last path element of the URL
  28         # usually there is a trailing slash, so also try the second but last
  29         url_path = compat_urllib_parse_urlparse(url).path
  30         url_parent_path, video_id = os.path.split(url_path)
  31         if not video_id:
  32             _, video_id = os.path.split(url_parent_path)
  33
  34         # get metadata
  35         metadata_url = META_DATA_URL_TEMPLATE % video_id
  36         metadata_text = self._download_webpage(metadata_url, video_id)
  37         metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
  38
  39         # extract values from metadata
  40         url_flv_el = metadata.find('url_flv')
  41         if url_flv_el is None:
  42             raise ExtractorError(u'Unable to extract download url')
  43         video_url = url_flv_el.text
  44         extension = os.path.splitext(video_url)[1][1:]
  45         title_el = metadata.find('title')
  46         if title_el is None:
  47             raise ExtractorError(u'Unable to extract title')
  48         title = title_el.text
  49         format_id_el = metadata.find('format_id')
  50         if format_id_el is None:
  51             format = 'mp4'
  52         else:
  53             format = format_id_el.text
  54         description_el = metadata.find('description')
  55         if description_el is not None:
  56             description = description_el.text
  57         else:
  58             description = None
  59         imagePreview_el = metadata.find('imagePreview')
  60         if imagePreview_el is not None:
  61             thumbnail = imagePreview_el.text
  62         else:
  63             thumbnail = None
  64         info = {
  65             'id': video_id,
  66             'url': video_url,
  67             'title': title,
  68             'ext': extension,
  69             'format': format,
  70             'thumbnail': thumbnail,
  71             'description': description
  72         }
  73         return [info]