]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/iconosquare.py
debian/copyright: Use HTTPS for upstream's source.
[youtubedl] / youtube_dl / extractor / iconosquare.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import (
5 int_or_none,
6 get_element_by_id,
7 remove_end,
8 )
9
10
11 class IconosquareIE(InfoExtractor):
12 _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
13 _TEST = {
14 'url': 'http://statigr.am/p/522207370455279102_24101272',
15 'md5': '6eb93b882a3ded7c378ee1d6884b1814',
16 'info_dict': {
17 'id': '522207370455279102_24101272',
18 'ext': 'mp4',
19 'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
20 'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
21 'timestamp': 1376471991,
22 'upload_date': '20130814',
23 'uploader': 'aguynamedpatrick',
24 'uploader_id': '24101272',
25 'comment_count': int,
26 'like_count': int,
27 },
28 }
29
30 def _real_extract(self, url):
31 video_id = self._match_id(url)
32
33 webpage = self._download_webpage(url, video_id)
34
35 media = self._parse_json(
36 get_element_by_id('mediaJson', webpage),
37 video_id)
38
39 formats = [{
40 'url': f['url'],
41 'format_id': format_id,
42 'width': int_or_none(f.get('width')),
43 'height': int_or_none(f.get('height'))
44 } for format_id, f in media['videos'].items()]
45 self._sort_formats(formats)
46
47 title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
48
49 timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
50 description = media.get('caption', {}).get('text')
51
52 uploader = media.get('user', {}).get('username')
53 uploader_id = media.get('user', {}).get('id')
54
55 comment_count = int_or_none(media.get('comments', {}).get('count'))
56 like_count = int_or_none(media.get('likes', {}).get('count'))
57
58 thumbnails = [{
59 'url': t['url'],
60 'id': thumbnail_id,
61 'width': int_or_none(t.get('width')),
62 'height': int_or_none(t.get('height'))
63 } for thumbnail_id, t in media.get('images', {}).items()]
64
65 comments = [{
66 'id': comment.get('id'),
67 'text': comment['text'],
68 'timestamp': int_or_none(comment.get('created_time')),
69 'author': comment.get('from', {}).get('full_name'),
70 'author_id': comment.get('from', {}).get('username'),
71 } for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
72
73 return {
74 'id': video_id,
75 'title': title,
76 'description': description,
77 'thumbnails': thumbnails,
78 'timestamp': timestamp,
79 'uploader': uploader,
80 'uploader_id': uploader_id,
81 'comment_count': comment_count,
82 'like_count': like_count,
83 'formats': formats,
84 'comments': comments,
85 }