Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/m6.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7
   8
   9 class M6IE(InfoExtractor):
  10     IE_NAME = 'm6'
  11     _VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
  12
  13     _TEST = {
  14         'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
  15         'md5': '242994a87de2c316891428e0176bcb77',
  16         'info_dict': {
  17             'id': '11323908',
  18             'ext': 'mp4',
  19             'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »',
  20             'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2',
  21             'duration': 100,
  22         }
  23     }
  24
  25     def _real_extract(self, url):
  26         mobj = re.match(self._VALID_URL, url)
  27         video_id = mobj.group('id')
  28
  29         rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
  30                                  'Downloading video RSS')
  31
  32         title = rss.find('./channel/item/title').text
  33         description = rss.find('./channel/item/description').text
  34         thumbnail = rss.find('./channel/item/visuel_clip_big').text
  35         duration = int(rss.find('./channel/item/duration').text)
  36         view_count = int(rss.find('./channel/item/nombre_vues').text)
  37
  38         formats = []
  39         for format_id in ['lq', 'sd', 'hq', 'hd']:
  40             video_url = rss.find('./channel/item/url_video_%s' % format_id)
  41             if video_url is None:
  42                 continue
  43             formats.append({
  44                 'url': video_url.text,
  45                 'format_id': format_id,
  46             })
  47
  48         return {
  49             'id': video_id,
  50             'title': title,
  51             'description': description,
  52             'thumbnail': thumbnail,
  53             'duration': duration,
  54             'view_count': view_count,
  55             'formats': formats,
  56         }