]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/appleconnect.py
debian/control: Update list of extractors in long description.
[youtubedl] / youtube_dl / extractor / appleconnect.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6 str_to_int,
7 ExtractorError
8 )
9
10
11 class AppleConnectIE(InfoExtractor):
12 _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
13 _TEST = {
14 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
15 'md5': 'e7c38568a01ea45402570e6029206723',
16 'info_dict': {
17 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
18 'ext': 'm4v',
19 'title': 'Energy',
20 'uploader': 'Drake',
21 'thumbnail': r're:^https?://.*\.jpg$',
22 'upload_date': '20150710',
23 'timestamp': 1436545535,
24 },
25 }
26
27 def _real_extract(self, url):
28 video_id = self._match_id(url)
29 webpage = self._download_webpage(url, video_id)
30
31 try:
32 video_json = self._html_search_regex(
33 r'class="auc-video-data">(\{.*?\})', webpage, 'json')
34 except ExtractorError:
35 raise ExtractorError('This post doesn\'t contain a video', expected=True)
36
37 video_data = self._parse_json(video_json, video_id)
38 timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
39 like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
40
41 return {
42 'id': video_id,
43 'url': video_data['sslSrc'],
44 'title': video_data['title'],
45 'description': video_data['description'],
46 'uploader': video_data['artistName'],
47 'thumbnail': video_data['artworkUrl'],
48 'timestamp': timestamp,
49 'like_count': like_count,
50 }