]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/googledrive.py
fec36cbbb7f43d2b8b37370aec270f543e8f257d
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
13 class GoogleDriveIE(InfoExtractor
):
14 _VALID_URL
= r
'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
16 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
17 'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
19 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
21 'title': 'Big Buck Bunny.mp4',
25 # video id is longer than 28 characters
26 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
27 'only_matching': True,
49 def _extract_url(webpage
):
51 r
'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})',
54 return 'https://drive.google.com/file/d/%s' % mobj
.group('id')
56 def _real_extract(self
, url
):
57 video_id
= self
._match
_id
(url
)
58 webpage
= self
._download
_webpage
(
59 'http://docs.google.com/file/d/%s' % video_id
, video_id
)
61 reason
= self
._search
_regex
(r
'"reason"\s*,\s*"([^"]+)', webpage
, 'reason', default
=None)
63 raise ExtractorError(reason
)
65 title
= self
._search
_regex
(r
'"title"\s*,\s*"([^"]+)', webpage
, 'title')
66 duration
= int_or_none(self
._search
_regex
(
67 r
'"length_seconds"\s*,\s*"([^"]+)', webpage
, 'length seconds', default
=None))
68 fmt_stream_map
= self
._search
_regex
(
69 r
'"fmt_stream_map"\s*,\s*"([^"]+)', webpage
, 'fmt stream map').split(',')
70 fmt_list
= self
._search
_regex
(r
'"fmt_list"\s*,\s*"([^"]+)', webpage
, 'fmt_list').split(',')
73 for fmt
, fmt_stream
in zip(fmt_list
, fmt_stream_map
):
74 fmt_id
, fmt_url
= fmt_stream
.split('|')
75 resolution
= fmt
.split('/')[1]
76 width
, height
= resolution
.split('x')
78 'url': lowercase_escape(fmt_url
),
80 'resolution': resolution
,
81 'width': int_or_none(width
),
82 'height': int_or_none(height
),
83 'ext': self
._FORMATS
_EXT
[fmt_id
],
85 self
._sort
_formats
(formats
)
90 'thumbnail': self
._og
_search
_thumbnail
(webpage
, default
=None),