From e2042a8a80ab598bbb8d06cd7f3078923c7c77e3 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Thu, 15 Oct 2015 10:37:47 +0200 Subject: checkkconfigsymbols.py: multiprocessing of files Distribute the parsing of source and Kconfig files on all available cores to speed up processing. Signed-off-by: Valentin Rothberg Signed-off-by: Greg Kroah-Hartman --- scripts/checkkconfigsymbols.py | 128 ++++++++++++++++++++++++++++++++--------- 1 file changed, 102 insertions(+), 26 deletions(-) (limited to 'scripts/checkkconfigsymbols.py') diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py index 2f4b7ffd5570..cfe397b61c48 100755 --- a/scripts/checkkconfigsymbols.py +++ b/scripts/checkkconfigsymbols.py @@ -10,9 +10,11 @@ import os import re +import signal import sys -from subprocess import Popen, PIPE, STDOUT +from multiprocessing import Pool, cpu_count from optparse import OptionParser +from subprocess import Popen, PIPE, STDOUT # regex expressions @@ -26,7 +28,7 @@ SOURCE_FEATURE = r"(?:\W|\b)+[D]{,1}CONFIG_(" + FEATURE + r")" # regex objects REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$") -REGEX_FEATURE = re.compile(r'(?!\B"[^"]*)' + FEATURE + r'(?![^"]*"\B)') +REGEX_FEATURE = re.compile(r'(?!\B)' + FEATURE + r'(?!\B)') REGEX_SOURCE_FEATURE = re.compile(SOURCE_FEATURE) REGEX_KCONFIG_DEF = re.compile(DEF) REGEX_KCONFIG_EXPR = re.compile(EXPR) @@ -34,6 +36,7 @@ REGEX_KCONFIG_STMT = re.compile(STMT) REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$") REGEX_FILTER_FEATURES = re.compile(r"[A-Za-z0-9]$") REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") +REGEX_QUOTES = re.compile("(\"(.*?)\")") def parse_options(): @@ -209,14 +212,36 @@ def get_head(): return stdout.strip('\n') +def partition(lst, size): + """Partition list @lst into eveni-sized lists of size @size.""" + return [lst[i::size] for i in xrange(size)] + + +def init_worker(): + """Set signal handler to ignore SIGINT.""" + signal.signal(signal.SIGINT, signal.SIG_IGN) + + def check_symbols(ignore): """Find undefined Kconfig symbols and return a dict with the symbol as key and a list of referencing files as value. Files matching %ignore are not checked for undefined symbols.""" + pool = Pool(cpu_count(), init_worker) + try: + return check_symbols_helper(pool, ignore) + except KeyboardInterrupt: + pool.terminate() + pool.join() + sys.exit(1) + + +def check_symbols_helper(pool, ignore): + """Helper method for check_symbols(). Used to catch keyboard interrupts in + check_symbols() in order to properly terminate running worker processes.""" source_files = [] kconfig_files = [] - defined_features = set() - referenced_features = dict() # {feature: [files]} + defined_features = [] + referenced_features = dict() # {file: [features]} # use 'git ls-files' to get the worklist stdout = execute("git ls-files") @@ -231,21 +256,33 @@ def check_symbols(ignore): if REGEX_FILE_KCONFIG.match(gitfile): kconfig_files.append(gitfile) else: - # all non-Kconfig files are checked for consistency + if ignore and not re.match(ignore, gitfile): + continue + # add source files that do not match the ignore pattern source_files.append(gitfile) - for sfile in source_files: - if ignore and re.match(ignore, sfile): - # do not check files matching %ignore - continue - parse_source_file(sfile, referenced_features) + # parse source files + arglist = partition(source_files, cpu_count()) + for res in pool.map(parse_source_files, arglist): + referenced_features.update(res) - for kfile in kconfig_files: - if ignore and re.match(ignore, kfile): - # do not collect references for files matching %ignore - parse_kconfig_file(kfile, defined_features, dict()) - else: - parse_kconfig_file(kfile, defined_features, referenced_features) + + # parse kconfig files + arglist = [] + for part in partition(kconfig_files, cpu_count()): + arglist.append((part, ignore)) + for res in pool.map(parse_kconfig_files, arglist): + defined_features.extend(res[0]) + referenced_features.update(res[1]) + defined_features = set(defined_features) + + # inverse mapping of referenced_features to dict(feature: [files]) + inv_map = dict() + for _file, features in referenced_features.iteritems(): + for feature in features: + inv_map[feature] = inv_map.get(feature, set()) + inv_map[feature].add(_file) + referenced_features = inv_map undefined = {} # {feature: [files]} for feature in sorted(referenced_features): @@ -262,9 +299,23 @@ def check_symbols(ignore): return undefined -def parse_source_file(sfile, referenced_features): - """Parse @sfile for referenced Kconfig features.""" +def parse_source_files(source_files): + """Parse each source file in @source_files and return dictionary with source + files as keys and lists of references Kconfig symbols as values.""" + referenced_features = dict() + for sfile in source_files: + referenced_features[sfile] = parse_source_file(sfile) + return referenced_features + + +def parse_source_file(sfile): + """Parse @sfile and return a list of referenced Kconfig features.""" lines = [] + references = [] + + if not os.path.exists(sfile): + return references + with open(sfile, "r") as stream: lines = stream.readlines() @@ -275,9 +326,9 @@ def parse_source_file(sfile, referenced_features): for feature in features: if not REGEX_FILTER_FEATURES.search(feature): continue - sfiles = referenced_features.get(feature, set()) - sfiles.add(sfile) - referenced_features[feature] = sfiles + references.append(feature) + + return references def get_features_in_line(line): @@ -285,11 +336,35 @@ def get_features_in_line(line): return REGEX_FEATURE.findall(line) -def parse_kconfig_file(kfile, defined_features, referenced_features): +def parse_kconfig_files(args): + """Parse kconfig files and return tuple of defined and references Kconfig + symbols. Note, @args is a tuple of a list of files and the @ignore + pattern.""" + kconfig_files = args[0] + ignore = args[1] + defined_features = [] + referenced_features = dict() + + for kfile in kconfig_files: + defined, references = parse_kconfig_file(kfile) + defined_features.extend(defined) + if ignore and re.match(ignore, kfile): + # do not collect references for files that match the ignore pattern + continue + referenced_features[kfile] = references + return (defined_features, referenced_features) + + +def parse_kconfig_file(kfile): """Parse @kfile and update feature definitions and references.""" lines = [] + defined = [] + references = [] skip = False + if not os.path.exists(kfile): + return defined, references + with open(kfile, "r") as stream: lines = stream.readlines() @@ -300,7 +375,7 @@ def parse_kconfig_file(kfile, defined_features, referenced_features): if REGEX_KCONFIG_DEF.match(line): feature_def = REGEX_KCONFIG_DEF.findall(line) - defined_features.add(feature_def[0]) + defined.append(feature_def[0]) skip = False elif REGEX_KCONFIG_HELP.match(line): skip = True @@ -308,6 +383,7 @@ def parse_kconfig_file(kfile, defined_features, referenced_features): # ignore content of help messages pass elif REGEX_KCONFIG_STMT.match(line): + line = REGEX_QUOTES.sub("", line) features = get_features_in_line(line) # multi-line statements while line.endswith("\\"): @@ -319,9 +395,9 @@ def parse_kconfig_file(kfile, defined_features, referenced_features): if REGEX_NUMERIC.match(feature): # ignore numeric values continue - paths = referenced_features.get(feature, set()) - paths.add(kfile) - referenced_features[feature] = paths + references.append(feature) + + return defined, references if __name__ == "__main__": -- cgit v1.2.3 From 1b2c841467515425636c15799ac90f6c4821acc0 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Thu, 26 Nov 2015 14:17:15 +0100 Subject: checkkconfigsymbols.py: find similar symbols Add support to find string-similar symbols. When option --sim SYM is specified, checkkconfigsymbols.py will print at most 10 symbols defined in Kconfig that are string similar to SYM in the following format: Similar symbols: $COMMA_SEPARATED_LIST_OF_SYMBOLS Note, if no similar symbols are found it is indicated as follows: Similar symbols: no similar symbols found Since the implemented functionality is also useful when searching the entire source or when diffing two commits, a list of similar symbols is printed unconditionally with the other data. In order to make the output more readable, the format now looks as follows: $UNDEFINED_SYMBOL Referencing files: $COMMA_SEPARATED_LIST_OF_FILES Similar symbols: $COMMA_SEPARATED_LIST_OF_SYMBOLS [Optional with '--find'] Commits changing symbol: - $COMMIT_1_HASH ("$COMMIT_1_MESSAGE") - $COMMIT_2_HASH ("$COMMIT_2_MESSAGE") or - no commit found Signed-off-by: Valentin Rothberg Signed-off-by: Greg Kroah-Hartman --- scripts/checkkconfigsymbols.py | 114 +++++++++++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 26 deletions(-) (limited to 'scripts/checkkconfigsymbols.py') diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py index cfe397b61c48..d8f6c094cce5 100755 --- a/scripts/checkkconfigsymbols.py +++ b/scripts/checkkconfigsymbols.py @@ -8,6 +8,7 @@ # Licensed under the terms of the GNU GPL License version 2 +import difflib import os import re import signal @@ -74,6 +75,9 @@ def parse_options(): "the pattern needs to be a Python regex. To " "ignore defconfigs, specify -i '.*defconfig'.") + parser.add_option('-s', '--sim', dest='sim', action='store', default="", + help="Print a list of maximum 10 string-similar symbols.") + parser.add_option('', '--force', dest='force', action='store_true', default=False, help="Reset current Git tree even when it's dirty.") @@ -112,6 +116,18 @@ def main(): """Main function of this module.""" opts = parse_options() + if opts.sim and not opts.commit and not opts.diff: + sims = find_sims(opts.sim, opts.ignore) + if sims: + print "%s: %s" % (yel("Similar symbols"), ', '.join(sims)) + else: + print "%s: no similar symbols found" % yel("Similar symbols") + sys.exit(0) + + # dictionary of (un)defined symbols + defined = {} + undefined = {} + if opts.commit or opts.diff: head = get_head() @@ -130,40 +146,56 @@ def main(): # get undefined items before the commit execute("git reset --hard %s" % commit_a) - undefined_a = check_symbols(opts.ignore) + undefined_a, _ = check_symbols(opts.ignore) # get undefined items for the commit execute("git reset --hard %s" % commit_b) - undefined_b = check_symbols(opts.ignore) + undefined_b, defined = check_symbols(opts.ignore) # report cases that are present for the commit but not before for feature in sorted(undefined_b): # feature has not been undefined before if not feature in undefined_a: files = sorted(undefined_b.get(feature)) - print "%s\t%s" % (yel(feature), ", ".join(files)) - if opts.find: - commits = find_commits(feature, opts.diff) - print red(commits) + undefined[feature] = files # check if there are new files that reference the undefined feature else: files = sorted(undefined_b.get(feature) - undefined_a.get(feature)) if files: - print "%s\t%s" % (yel(feature), ", ".join(files)) - if opts.find: - commits = find_commits(feature, opts.diff) - print red(commits) + undefined[feature] = files # reset to head execute("git reset --hard %s" % head) # default to check the entire tree else: - undefined = check_symbols(opts.ignore) - for feature in sorted(undefined): - files = sorted(undefined.get(feature)) - print "%s\t%s" % (yel(feature), ", ".join(files)) + undefined, defined = check_symbols(opts.ignore) + + # now print the output + for feature in sorted(undefined): + print red(feature) + + files = sorted(undefined.get(feature)) + print "%s: %s" % (yel("Referencing files"), ", ".join(files)) + + sims = find_sims(feature, opts.ignore, defined) + sims_out = yel("Similar symbols") + if sims: + print "%s: %s" % (sims_out, ', '.join(sims)) + else: + print "%s: %s" % (sims_out, "no similar symbols found") + + if opts.find: + print "%s:" % yel("Commits changing symbol") + commits = find_commits(feature, opts.diff) + if commits: + for commit in commits: + commit = commit.split(" ", 1) + print "\t- %s (\"%s\")" % (yel(commit[0]), commit[1]) + else: + print "\t- no commit found" + print # new line def yel(string): @@ -193,7 +225,7 @@ def find_commits(symbol, diff): """Find commits changing %symbol in the given range of %diff.""" commits = execute("git log --pretty=oneline --abbrev-commit -G %s %s" % (symbol, diff)) - return commits + return [x for x in commits.split("\n") if x] def tree_is_dirty(): @@ -222,6 +254,45 @@ def init_worker(): signal.signal(signal.SIGINT, signal.SIG_IGN) +def find_sims(symbol, ignore, defined = []): + """Return a list of max. ten Kconfig symbols that are string-similar to + @symbol.""" + if defined: + return sorted(difflib.get_close_matches(symbol, set(defined), 10)) + + pool = Pool(cpu_count(), init_worker) + kfiles = [] + for gitfile in get_files(): + if REGEX_FILE_KCONFIG.match(gitfile): + kfiles.append(gitfile) + + arglist = [] + for part in partition(kfiles, cpu_count()): + arglist.append((part, ignore)) + + for res in pool.map(parse_kconfig_files, arglist): + defined.extend(res[0]) + + return sorted(difflib.get_close_matches(symbol, set(defined), 10)) + + +def get_files(): + """Return a list of all files in the current git directory.""" + # use 'git ls-files' to get the worklist + stdout = execute("git ls-files") + if len(stdout) > 0 and stdout[-1] == "\n": + stdout = stdout[:-1] + + files = [] + for gitfile in stdout.rsplit("\n"): + if ".git" in gitfile or "ChangeLog" in gitfile or \ + ".log" in gitfile or os.path.isdir(gitfile) or \ + gitfile.startswith("tools/"): + continue + files.append(gitfile) + return files + + def check_symbols(ignore): """Find undefined Kconfig symbols and return a dict with the symbol as key and a list of referencing files as value. Files matching %ignore are not @@ -243,16 +314,7 @@ def check_symbols_helper(pool, ignore): defined_features = [] referenced_features = dict() # {file: [features]} - # use 'git ls-files' to get the worklist - stdout = execute("git ls-files") - if len(stdout) > 0 and stdout[-1] == "\n": - stdout = stdout[:-1] - - for gitfile in stdout.rsplit("\n"): - if ".git" in gitfile or "ChangeLog" in gitfile or \ - ".log" in gitfile or os.path.isdir(gitfile) or \ - gitfile.startswith("tools/"): - continue + for gitfile in get_files(): if REGEX_FILE_KCONFIG.match(gitfile): kconfig_files.append(gitfile) else: @@ -296,7 +358,7 @@ def check_symbols_helper(pool, ignore): if feature[:-len("_MODULE")] in defined_features: continue undefined[feature] = referenced_features.get(feature) - return undefined + return undefined, defined_features def parse_source_files(source_files): -- cgit v1.2.3