]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/postprocessor/metadatafromtitle.py
Merge tag 'upstream/2017.05.18.1'
[youtubedl] / youtube_dl / postprocessor / metadatafromtitle.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import PostProcessor
6
7
8 class MetadataFromTitlePP(PostProcessor):
9 def __init__(self, downloader, titleformat):
10 super(MetadataFromTitlePP, self).__init__(downloader)
11 self._titleformat = titleformat
12 self._titleregex = (self.format_to_regex(titleformat)
13 if re.search(r'%\(\w+\)s', titleformat)
14 else titleformat)
15
16 def format_to_regex(self, fmt):
17 r"""
18 Converts a string like
19 '%(title)s - %(artist)s'
20 to a regex like
21 '(?P<title>.+)\ \-\ (?P<artist>.+)'
22 """
23 lastpos = 0
24 regex = ''
25 # replace %(..)s with regex group and escape other string parts
26 for match in re.finditer(r'%\((\w+)\)s', fmt):
27 regex += re.escape(fmt[lastpos:match.start()])
28 regex += r'(?P<' + match.group(1) + '>.+)'
29 lastpos = match.end()
30 if lastpos < len(fmt):
31 regex += re.escape(fmt[lastpos:])
32 return regex
33
34 def run(self, info):
35 title = info['title']
36 match = re.match(self._titleregex, title)
37 if match is None:
38 self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat)
39 return [], info
40 for attribute, value in match.groupdict().items():
41 value = match.group(attribute)
42 info[attribute] = value
43 self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
44
45 return [], info