X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/c512650955de0b16d37e7fa7fb29ea0985e415bb..7191504793f46c6f42db0b391d05903c99dc7406:/youtube_dl/extractor/iconosquare.py diff --git a/youtube_dl/extractor/iconosquare.py b/youtube_dl/extractor/iconosquare.py index 1d5a10a..70e4c0d 100644 --- a/youtube_dl/extractor/iconosquare.py +++ b/youtube_dl/extractor/iconosquare.py @@ -1,40 +1,75 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import int_or_none class IconosquareIE(InfoExtractor): - _VALID_URL = r'https?://(www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P[^/]+)' _TEST = { 'url': 'http://statigr.am/p/522207370455279102_24101272', 'md5': '6eb93b882a3ded7c378ee1d6884b1814', 'info_dict': { 'id': '522207370455279102_24101272', 'ext': 'mp4', - 'uploader_id': 'aguynamedpatrick', - 'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)', + 'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)', 'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d', + 'timestamp': 1376471991, + 'upload_date': '20130814', + 'uploader': 'aguynamedpatrick', + 'uploader_id': '24101272', + 'comment_count': int, + 'like_count': int, }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - html_title = self._html_search_regex( - r'(.+?)', + + media = self._parse_json( + self._search_regex( + r'window\.media\s*=\s*({.+?});\n', webpage, 'media'), + video_id) + + formats = [{ + 'url': f['url'], + 'format_id': format_id, + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')) + } for format_id, f in media['videos'].items()] + self._sort_formats(formats) + + title = self._html_search_regex( + r'(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)', webpage, 'title') - title = re.sub(r'(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)$', '', html_title) - uploader_id = self._html_search_regex( - r'@([^ ]+)', title, 'uploader name', fatal=False) + + timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time')) + description = media.get('caption', {}).get('text') + + uploader = media.get('user', {}).get('username') + uploader_id = media.get('user', {}).get('id') + + comment_count = int_or_none(media.get('comments', {}).get('count')) + like_count = int_or_none(media.get('likes', {}).get('count')) + + thumbnails = [{ + 'url': t['url'], + 'id': thumbnail_id, + 'width': int_or_none(t.get('width')), + 'height': int_or_none(t.get('height')) + } for thumbnail_id, t in media.get('images', {}).items()] return { 'id': video_id, - 'url': self._og_search_video_url(webpage), 'title': title, - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'uploader_id': uploader_id + 'description': description, + 'thumbnails': thumbnails, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'comment_count': comment_count, + 'like_count': like_count, + 'formats': formats, }