]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/instagram.py
6ae704efddce7a1b636cc9bd81b5244bbad95b2d
[youtubedl] / youtube_dl / extractor / instagram.py
1 import re
2
3 from .common import InfoExtractor
4
5 class InstagramIE(InfoExtractor):
6 _VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
7 _TEST = {
8 u'url': u'http://instagram.com/p/aye83DjauH/#',
9 u'file': u'aye83DjauH.mp4',
10 u'md5': u'0d2da106a9d2631273e192b372806516',
11 u'info_dict': {
12 u"uploader_id": u"naomipq",
13 u"title": u"Video by naomipq"
14 }
15 }
16
17 def _real_extract(self, url):
18 mobj = re.match(self._VALID_URL, url)
19 video_id = mobj.group(1)
20 webpage = self._download_webpage(url, video_id)
21 video_url = self._html_search_regex(
22 r'<meta property="og:video" content="(.+?)"',
23 webpage, u'video URL')
24 thumbnail_url = self._html_search_regex(
25 r'<meta property="og:image" content="(.+?)" />',
26 webpage, u'thumbnail URL', fatal=False)
27 html_title = self._html_search_regex(
28 r'<title>(.+?)</title>',
29 webpage, u'title', flags=re.DOTALL)
30 title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
31 uploader_id = self._html_search_regex(r'content="(.*?)\'s video on Instagram',
32 webpage, u'uploader name', fatal=False)
33 ext = 'mp4'
34
35 return [{
36 'id': video_id,
37 'url': video_url,
38 'ext': ext,
39 'title': title,
40 'thumbnail': thumbnail_url,
41 'uploader_id' : uploader_id
42 }]