]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/sapo.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
13 class SapoIE(InfoExtractor
):
14 IE_DESC
= 'SAPO Vídeos'
15 _VALID_URL
= r
'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})'
19 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi',
20 'md5': '79ee523f6ecb9233ac25075dee0eda83',
23 'id': 'UBz95kOtiWYUMTA5Ghfi',
25 'title': 'Benfica - Marcas na Hitória',
26 'description': 'md5:c9082000a128c3fd57bf0299e1367f22',
28 'uploader': 'tiago_1988',
29 'upload_date': '20080229',
30 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'],
34 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF',
35 'md5': '90a2f283cfb49193fe06e861613a72aa',
38 'id': 'IyusNAZ791ZdoCY5H5IF',
40 'title': 'Codebits VII - Report',
41 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8',
43 'uploader': 'codebits',
44 'upload_date': '20140427',
45 'categories': ['codebits', 'codebits2014'],
49 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz',
50 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac',
53 'id': 'yLqjzPtbTimsn2wWBKHz',
55 'title': 'Hipnose Condicionativa 4',
56 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40',
58 'uploader': 'sapozen',
59 'upload_date': '20090609',
60 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'],
65 def _real_extract(self
, url
):
66 mobj
= re
.match(self
._VALID
_URL
, url
)
67 video_id
= mobj
.group('id')
69 item
= self
._download
_xml
(
70 'http://rd3.videos.sapo.pt/%s/rss2' % video_id
, video_id
).find('./channel/item')
72 title
= item
.find('./title').text
73 description
= item
.find('./{http://videos.sapo.pt/mrss/}synopse').text
74 thumbnail
= item
.find('./{http://search.yahoo.com/mrss/}content').get('url')
75 duration
= parse_duration(item
.find('./{http://videos.sapo.pt/mrss/}time').text
)
76 uploader
= item
.find('./{http://videos.sapo.pt/mrss/}author').text
77 upload_date
= unified_strdate(item
.find('./pubDate').text
)
78 view_count
= int(item
.find('./{http://videos.sapo.pt/mrss/}views').text
)
79 comment_count
= int(item
.find('./{http://videos.sapo.pt/mrss/}comment_count').text
)
80 tags
= item
.find('./{http://videos.sapo.pt/mrss/}tags').text
81 categories
= tags
.split() if tags
else []
82 age_limit
= 18 if item
.find('./{http://videos.sapo.pt/mrss/}m18').text
== 'true' else 0
84 video_url
= item
.find('./{http://videos.sapo.pt/mrss/}videoFile').text
85 video_size
= item
.find('./{http://videos.sapo.pt/mrss/}videoSize').text
.split('x')
91 'width': int(video_size
[0]),
92 'height': int(video_size
[1]),
95 if item
.find('./{http://videos.sapo.pt/mrss/}HD').text
== 'true':
97 'url': re
.sub(r
'/mov/1$', '/mov/39', video_url
),
104 self
._sort
_formats
(formats
)
109 'description': description
,
110 'thumbnail': thumbnail
,
111 'duration': duration
,
112 'uploader': uploader
,
113 'upload_date': upload_date
,
114 'view_count': view_count
,
115 'comment_count': comment_count
,
116 'categories': categories
,
117 'age_limit': age_limit
,