# -*- coding: utf-8 -*- # The LLVM Compiler Infrastructure # # This file is distributed under the University of Illinois Open Source # License. See LICENSE.TXT for details. """ This module is responsible for to parse a compiler invocation. """ import re import os import collections __all__ = ['split_command', 'classify_source', 'compiler_language'] # Ignored compiler options map for compilation database creation. # The map is used in `split_command` method. (Which does ignore and classify # parameters.) Please note, that these are not the only parameters which # might be ignored. # # Keys are the option name, value number of options to skip IGNORED_FLAGS = { # compiling only flag, ignored because the creator of compilation # database will explicitly set it. '-c': 0, # preprocessor macros, ignored because would cause duplicate entries in # the output (the only difference would be these flags). this is actual # finding from users, who suffered longer execution time caused by the # duplicates. '-MD': 0, '-MMD': 0, '-MG': 0, '-MP': 0, '-MF': 1, '-MT': 1, '-MQ': 1, # linker options, ignored because for compilation database will contain # compilation commands only. so, the compiler would ignore these flags # anyway. the benefit to get rid of them is to make the output more # readable. '-static': 0, '-shared': 0, '-s': 0, '-rdynamic': 0, '-l': 1, '-L': 1, '-u': 1, '-z': 1, '-T': 1, '-Xlinker': 1 } # Known C/C++ compiler executable name patterns COMPILER_PATTERNS = frozenset([ re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), re.compile(r'^llvm-g(cc|\+\+)$'), ]) def split_command(command): """ Returns a value when the command is a compilation, None otherwise. The value on success is a named tuple with the following attributes: files: list of source files flags: list of compile options compiler: string value of 'c' or 'c++' """ # the result of this method result = collections.namedtuple('Compilation', ['compiler', 'flags', 'files']) result.compiler = compiler_language(command) result.flags = [] result.files = [] # quit right now, if the program was not a C/C++ compiler if not result.compiler: return None # iterate on the compile options args = iter(command[1:]) for arg in args: # quit when compilation pass is not involved if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: return None # ignore some flags elif arg in IGNORED_FLAGS: count = IGNORED_FLAGS[arg] for _ in range(count): next(args) elif re.match(r'^-(l|L|Wl,).+', arg): pass # some parameters could look like filename, take as compile option elif arg in {'-D', '-I'}: result.flags.extend([arg, next(args)]) # parameter which looks source file is taken... elif re.match(r'^[^-].+', arg) and classify_source(arg): result.files.append(arg) # and consider everything else as compile option. else: result.flags.append(arg) # do extra check on number of source files return result if result.files else None def classify_source(filename, c_compiler=True): """ Return the language from file name extension. """ mapping = { '.c': 'c' if c_compiler else 'c++', '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', '.ii': 'c++-cpp-output', '.m': 'objective-c', '.mi': 'objective-c-cpp-output', '.mm': 'objective-c++', '.mii': 'objective-c++-cpp-output', '.C': 'c++', '.cc': 'c++', '.CC': 'c++', '.cp': 'c++', '.cpp': 'c++', '.cxx': 'c++', '.c++': 'c++', '.C++': 'c++', '.txx': 'c++' } __, extension = os.path.splitext(os.path.basename(filename)) return mapping.get(extension) def compiler_language(command): """ A predicate to decide the command is a compiler call or not. Returns 'c' or 'c++' when it match. None otherwise. """ cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') if command: executable = os.path.basename(command[0]) if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): return 'c++' if cplusplus.match(executable) else 'c' return None