X-Git-Url: https://git.rapsys.eu/.gitweb.cgi/youtubedl/blobdiff_plain/af478477605bdf3f5d57562035885cfee905f379..c512650955de0b16d37e7fa7fb29ea0985e415bb:/youtube_dl/extractor/playvid.py?ds=sidebyside diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py new file mode 100644 index 0000000..b1322f1 --- /dev/null +++ b/youtube_dl/extractor/playvid.py @@ -0,0 +1,80 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, +) + + +class PlayvidIE(InfoExtractor): + _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P.+?)(?:#|$)' + _TEST = { + 'url': 'http://www.playvid.com/watch/agbDDi7WZTV', + 'md5': '44930f8afa616efdf9482daf4fe53e1e', + 'info_dict': { + 'id': 'agbDDi7WZTV', + 'ext': 'mp4', + 'title': 'Michelle Lewin in Miami Beach', + 'duration': 240, + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_title = None + duration = None + video_thumbnail = None + formats = [] + + # most of the information is stored in the flashvars + flashvars = self._html_search_regex( + r'flashvars="(.+?)"', webpage, 'flashvars') + + infos = compat_urllib_parse.unquote(flashvars).split(r'&') + for info in infos: + videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) + if videovars_match: + key = videovars_match.group(1) + val = videovars_match.group(2) + + if key == 'title': + video_title = compat_urllib_parse.unquote_plus(val) + if key == 'duration': + try: + duration = int(val) + except ValueError: + pass + if key == 'big_thumb': + video_thumbnail = val + + videourl_match = re.match( + r'^video_urls\]\[(?P[0-9]+)p', key) + if videourl_match: + height = int(videourl_match.group('resolution')) + formats.append({ + 'height': height, + 'url': val, + }) + self._sort_formats(formats) + + # Extract title - should be in the flashvars; if not, look elsewhere + if video_title is None: + video_title = self._html_search_regex( + r'(.*?)</title', webpage, 'title') + + return { + 'id': video_id, + 'formats': formats, + 'title': video_title, + 'thumbnail': video_thumbnail, + 'duration': duration, + 'description': None, + 'age_limit': 18 + }