blob: 954040e1e037084aa3654bac9f5af5c6f29c79ce [file] [log] [blame]
# SPDX-License-Identifier: Apache-2.0
# Copyright 2012-2017 The Meson development team
from __future__ import annotations
import enum
import os.path
import string
import typing as T
from .. import coredata
from .. import options
from .. import mlog
from ..mesonlib import (
EnvironmentException, Popen_safe,
is_windows, LibType, version_compare,
)
from .compilers import Compiler
if T.TYPE_CHECKING:
from .compilers import CompileCheckMode
from ..build import BuildTarget
from ..coredata import MutableKeyedOptionDictType, KeyedOptionDictType
from ..dependencies import Dependency
from ..environment import Environment # noqa: F401
from ..envconfig import MachineInfo
from ..linkers.linkers import DynamicLinker
from ..mesonlib import MachineChoice
cuda_optimization_args: T.Dict[str, T.List[str]] = {
'plain': [],
'0': ['-G'],
'g': ['-O0'],
'1': ['-O1'],
'2': ['-O2', '-lineinfo'],
'3': ['-O3'],
's': ['-O3']
}
cuda_debug_args: T.Dict[bool, T.List[str]] = {
False: [],
True: ['-g']
}
class _Phase(enum.Enum):
COMPILER = 'compiler'
LINKER = 'linker'
class CudaCompiler(Compiler):
LINKER_PREFIX = '-Xlinker='
language = 'cuda'
# NVCC flags taking no arguments.
_FLAG_PASSTHRU_NOARGS = {
# NVCC --long-option, NVCC -short-option CUDA Toolkit 11.2.1 Reference
'--objdir-as-tempdir', '-objtemp', # 4.2.1.2
'--generate-dependency-targets', '-MP', # 4.2.1.12
'--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.1.14
'--link', # 4.2.2.1
'--lib', '-lib', # 4.2.2.2
'--device-link', '-dlink', # 4.2.2.3
'--device-c', '-dc', # 4.2.2.4
'--device-w', '-dw', # 4.2.2.5
'--cuda', '-cuda', # 4.2.2.6
'--compile', '-c', # 4.2.2.7
'--fatbin', '-fatbin', # 4.2.2.8
'--cubin', '-cubin', # 4.2.2.9
'--ptx', '-ptx', # 4.2.2.10
'--preprocess', '-E', # 4.2.2.11
'--generate-dependencies', '-M', # 4.2.2.12
'--generate-nonsystem-dependencies', '-MM', # 4.2.2.13
'--generate-dependencies-with-compile', '-MD', # 4.2.2.14
'--generate-nonsystem-dependencies-with-compile', '-MMD', # 4.2.2.15
'--run', # 4.2.2.16
'--profile', '-pg', # 4.2.3.1
'--debug', '-g', # 4.2.3.2
'--device-debug', '-G', # 4.2.3.3
'--extensible-whole-program', '-ewp', # 4.2.3.4
'--generate-line-info', '-lineinfo', # 4.2.3.5
'--dlink-time-opt', '-dlto', # 4.2.3.8
'--no-exceptions', '-noeh', # 4.2.3.11
'--shared', '-shared', # 4.2.3.12
'--no-host-device-initializer-list', '-nohdinitlist', # 4.2.3.15
'--expt-relaxed-constexpr', '-expt-relaxed-constexpr', # 4.2.3.16
'--extended-lambda', '-extended-lambda', # 4.2.3.17
'--expt-extended-lambda', '-expt-extended-lambda', # 4.2.3.18
'--m32', '-m32', # 4.2.3.20
'--m64', '-m64', # 4.2.3.21
'--forward-unknown-to-host-compiler', '-forward-unknown-to-host-compiler', # 4.2.5.1
'--forward-unknown-to-host-linker', '-forward-unknown-to-host-linker', # 4.2.5.2
'--dont-use-profile', '-noprof', # 4.2.5.3
'--dryrun', '-dryrun', # 4.2.5.5
'--verbose', '-v', # 4.2.5.6
'--keep', '-keep', # 4.2.5.7
'--save-temps', '-save-temps', # 4.2.5.9
'--clean-targets', '-clean', # 4.2.5.10
'--no-align-double', # 4.2.5.16
'--no-device-link', '-nodlink', # 4.2.5.17
'--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.5.18
'--use_fast_math', '-use_fast_math', # 4.2.7.7
'--extra-device-vectorization', '-extra-device-vectorization', # 4.2.7.12
'--compile-as-tools-patch', '-astoolspatch', # 4.2.7.13
'--keep-device-functions', '-keep-device-functions', # 4.2.7.14
'--disable-warnings', '-w', # 4.2.8.1
'--source-in-ptx', '-src-in-ptx', # 4.2.8.2
'--restrict', '-restrict', # 4.2.8.3
'--Wno-deprecated-gpu-targets', '-Wno-deprecated-gpu-targets', # 4.2.8.4
'--Wno-deprecated-declarations', '-Wno-deprecated-declarations', # 4.2.8.5
'--Wreorder', '-Wreorder', # 4.2.8.6
'--Wdefault-stream-launch', '-Wdefault-stream-launch', # 4.2.8.7
'--Wext-lambda-captures-this', '-Wext-lambda-captures-this', # 4.2.8.8
'--display-error-number', '-err-no', # 4.2.8.10
'--resource-usage', '-res-usage', # 4.2.8.14
'--help', '-h', # 4.2.8.15
'--version', '-V', # 4.2.8.16
'--list-gpu-code', '-code-ls', # 4.2.8.20
'--list-gpu-arch', '-arch-ls', # 4.2.8.21
}
# Dictionary of NVCC flags taking either one argument or a comma-separated list.
# Maps --long to -short options, because the short options are more GCC-like.
_FLAG_LONG2SHORT_WITHARGS = {
'--output-file': '-o', # 4.2.1.1
'--pre-include': '-include', # 4.2.1.3
'--library': '-l', # 4.2.1.4
'--define-macro': '-D', # 4.2.1.5
'--undefine-macro': '-U', # 4.2.1.6
'--include-path': '-I', # 4.2.1.7
'--system-include': '-isystem', # 4.2.1.8
'--library-path': '-L', # 4.2.1.9
'--output-directory': '-odir', # 4.2.1.10
'--dependency-output': '-MF', # 4.2.1.11
'--compiler-bindir': '-ccbin', # 4.2.1.13
'--archiver-binary': '-arbin', # 4.2.1.15
'--cudart': '-cudart', # 4.2.1.16
'--cudadevrt': '-cudadevrt', # 4.2.1.17
'--libdevice-directory': '-ldir', # 4.2.1.18
'--target-directory': '-target-dir', # 4.2.1.19
'--optimization-info': '-opt-info', # 4.2.3.6
'--optimize': '-O', # 4.2.3.7
'--ftemplate-backtrace-limit': '-ftemplate-backtrace-limit', # 4.2.3.9
'--ftemplate-depth': '-ftemplate-depth', # 4.2.3.10
'--x': '-x', # 4.2.3.13
'--std': '-std', # 4.2.3.14
'--machine': '-m', # 4.2.3.19
'--compiler-options': '-Xcompiler', # 4.2.4.1
'--linker-options': '-Xlinker', # 4.2.4.2
'--archive-options': '-Xarchive', # 4.2.4.3
'--ptxas-options': '-Xptxas', # 4.2.4.4
'--nvlink-options': '-Xnvlink', # 4.2.4.5
'--threads': '-t', # 4.2.5.4
'--keep-dir': '-keep-dir', # 4.2.5.8
'--run-args': '-run-args', # 4.2.5.11
'--input-drive-prefix': '-idp', # 4.2.5.12
'--dependency-drive-prefix': '-ddp', # 4.2.5.13
'--drive-prefix': '-dp', # 4.2.5.14
'--dependency-target-name': '-MT', # 4.2.5.15
'--default-stream': '-default-stream', # 4.2.6.1
'--gpu-architecture': '-arch', # 4.2.7.1
'--gpu-code': '-code', # 4.2.7.2
'--generate-code': '-gencode', # 4.2.7.3
'--relocatable-device-code': '-rdc', # 4.2.7.4
'--entries': '-e', # 4.2.7.5
'--maxrregcount': '-maxrregcount', # 4.2.7.6
'--ftz': '-ftz', # 4.2.7.8
'--prec-div': '-prec-div', # 4.2.7.9
'--prec-sqrt': '-prec-sqrt', # 4.2.7.10
'--fmad': '-fmad', # 4.2.7.11
'--Werror': '-Werror', # 4.2.8.9
'--diag-error': '-diag-error', # 4.2.8.11
'--diag-suppress': '-diag-suppress', # 4.2.8.12
'--diag-warn': '-diag-warn', # 4.2.8.13
'--options-file': '-optf', # 4.2.8.17
'--time': '-time', # 4.2.8.18
'--qpp-config': '-qpp-config', # 4.2.8.19
}
# Reverse map -short to --long options.
_FLAG_SHORT2LONG_WITHARGS = {v: k for k, v in _FLAG_LONG2SHORT_WITHARGS.items()}
id = 'nvcc'
def __init__(self, ccache: T.List[str], exelist: T.List[str], version: str, for_machine: MachineChoice,
is_cross: bool,
host_compiler: Compiler, info: 'MachineInfo',
linker: T.Optional['DynamicLinker'] = None,
full_version: T.Optional[str] = None):
super().__init__(ccache, exelist, version, for_machine, info, linker=linker, full_version=full_version, is_cross=is_cross)
self.host_compiler = host_compiler
self.base_options = host_compiler.base_options
# -Wpedantic generates useless churn due to nvcc's dual compilation model producing
# a temporary host C++ file that includes gcc-style line directives:
# https://stackoverflow.com/a/31001220
self.warn_args = {
level: self._to_host_flags(list(f for f in flags if f != '-Wpedantic'))
for level, flags in host_compiler.warn_args.items()
}
self.host_werror_args = ['-Xcompiler=' + x for x in self.host_compiler.get_werror_args()]
@classmethod
def _shield_nvcc_list_arg(cls, arg: str, listmode: bool = True) -> str:
r"""
Shield an argument against both splitting by NVCC's list-argument
parse logic, and interpretation by any shell.
NVCC seems to consider every comma , that is neither escaped by \ nor inside
a double-quoted string a split-point. Single-quotes do not provide protection
against splitting; In fact, after splitting they are \-escaped. Unfortunately,
double-quotes don't protect against shell expansion. What follows is a
complex dance to accommodate everybody.
"""
SQ = "'"
DQ = '"'
CM = ","
BS = "\\"
DQSQ = DQ+SQ+DQ
quotable = set(string.whitespace+'"$`\\')
if CM not in arg or not listmode:
if SQ not in arg:
# If any of the special characters "$`\ or whitespace are present, single-quote.
# Otherwise return bare.
if set(arg).intersection(quotable):
return SQ+arg+SQ
else:
return arg # Easy case: no splits, no quoting.
else:
# There are single quotes. Double-quote them, and single-quote the
# strings between them.
l = [cls._shield_nvcc_list_arg(s) for s in arg.split(SQ)]
l = sum([[s, DQSQ] for s in l][:-1], []) # Interleave l with DQSQs
return ''.join(l)
else:
# A comma is present, and list mode was active.
# We apply (what we guess is) the (primitive) NVCC splitting rule:
l = ['']
instring = False
argit = iter(arg)
for c in argit:
if c == CM and not instring:
l.append('')
elif c == DQ:
l[-1] += c
instring = not instring
elif c == BS:
try:
l[-1] += next(argit)
except StopIteration:
break
else:
l[-1] += c
# Shield individual strings, without listmode, then return them with
# escaped commas between them.
l = [cls._shield_nvcc_list_arg(s, listmode=False) for s in l]
return r'\,'.join(l)
@classmethod
def _merge_flags(cls, flags: T.List[str]) -> T.List[str]:
r"""
The flags to NVCC gets exceedingly verbose and unreadable when too many of them
are shielded with -Xcompiler. Merge consecutive -Xcompiler-wrapped arguments
into one.
"""
if len(flags) <= 1:
return flags
flagit = iter(flags)
xflags = []
def is_xcompiler_flag_isolated(flag: str) -> bool:
return flag == '-Xcompiler'
def is_xcompiler_flag_glued(flag: str) -> bool:
return flag.startswith('-Xcompiler=')
def is_xcompiler_flag(flag: str) -> bool:
return is_xcompiler_flag_isolated(flag) or is_xcompiler_flag_glued(flag)
def get_xcompiler_val(flag: str, flagit: T.Iterator[str]) -> str:
if is_xcompiler_flag_glued(flag):
return flag[len('-Xcompiler='):]
else:
try:
return next(flagit)
except StopIteration:
return ""
ingroup = False
for flag in flagit:
if not is_xcompiler_flag(flag):
ingroup = False
xflags.append(flag)
elif ingroup:
xflags[-1] += ','
xflags[-1] += get_xcompiler_val(flag, flagit)
elif is_xcompiler_flag_isolated(flag):
ingroup = True
xflags.append(flag)
xflags.append(get_xcompiler_val(flag, flagit))
elif is_xcompiler_flag_glued(flag):
ingroup = True
xflags.append(flag)
else:
raise ValueError("-Xcompiler flag merging failed, unknown argument form!")
return xflags
def _to_host_flags(self, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]:
"""
Translate generic "GCC-speak" plus particular "NVCC-speak" flags to NVCC flags.
NVCC's "short" flags have broad similarities to the GCC standard, but have
gratuitous, irritating differences.
"""
xflags = []
flagit = iter(flags)
for flag in flagit:
# The CUDA Toolkit Documentation, in 4.1. Command Option Types and Notation,
# specifies that NVCC does not parse the standard flags as GCC does. It has
# its own strategy, to wit:
#
# nvcc recognizes three types of command options: boolean options, single
# value options, and list options.
#
# Boolean options do not have an argument; they are either specified on a
# command line or not. Single value options must be specified at most once,
# and list options may be repeated. Examples of each of these option types
# are, respectively: --verbose (switch to verbose mode), --output-file
# (specify output file), and --include-path (specify include path).
#
# Single value options and list options must have arguments, which must
# follow the name of the option itself by either one of more spaces or an
# equals character. When a one-character short name such as -I, -l, and -L
# is used, the value of the option may also immediately follow the option
# itself without being separated by spaces or an equal character. The
# individual values of list options may be separated by commas in a single
# instance of the option, or the option may be repeated, or any
# combination of these two cases.
#
# One strange consequence of this choice is that directory and filenames that
# contain commas (',') cannot be passed to NVCC (at least, not as easily as
# in GCC). Another strange consequence is that it is legal to supply flags
# such as
#
# -lpthread,rt,dl,util
# -l pthread,rt,dl,util
# -l=pthread,rt,dl,util
#
# and each of the above alternatives is equivalent to GCC-speak
#
# -lpthread -lrt -ldl -lutil
# -l pthread -l rt -l dl -l util
# -l=pthread -l=rt -l=dl -l=util
#
# *With the exception of commas in the name*, GCC-speak for these list flags
# is a strict subset of NVCC-speak, so we passthrough those flags.
#
# The -D macro-define flag is documented as somehow shielding commas from
# splitting a definition. Balanced parentheses, braces and single-quotes
# around the comma are not sufficient, but balanced double-quotes are. The
# shielding appears to work with -l, -I, -L flags as well, for instance.
#
# Since our goal is to replicate GCC-speak as much as possible, we check for
# commas in all list-arguments and shield them with double-quotes. We make
# an exception for -D (where this would be value-changing) and -U (because
# it isn't possible to define a macro with a comma in the name).
if flag in self._FLAG_PASSTHRU_NOARGS:
xflags.append(flag)
continue
# Handle breakup of flag-values into a flag-part and value-part.
if flag[:1] not in '-/':
# This is not a flag. It's probably a file input. Pass it through.
xflags.append(flag)
continue
elif flag[:1] == '/':
# This is ambiguously either an MVSC-style /switch or an absolute path
# to a file. For some magical reason the following works acceptably in
# both cases.
# We only want to prefix arguments that are NOT static archives, since
# the latter could contain relocatable device code (-dc/-rdc=true).
prefix = '' if flag.endswith('.a') else f'-X{phase.value}='
wrap = '"' if ',' in flag else ''
xflags.append(f'{prefix}{wrap}{flag}{wrap}')
continue
elif len(flag) >= 2 and flag[0] == '-' and flag[1] in 'IDULlmOxmte':
# This is a single-letter short option. These options (with the
# exception of -o) are allowed to receive their argument with neither
# space nor = sign before them. Detect and separate them in that event.
if flag[2:3] == '': # -I something
try:
val = next(flagit)
except StopIteration:
pass
elif flag[2:3] == '=': # -I=something
val = flag[3:]
else: # -Isomething
val = flag[2:]
flag = flag[:2] # -I
elif flag in self._FLAG_LONG2SHORT_WITHARGS or \
flag in self._FLAG_SHORT2LONG_WITHARGS:
# This is either -o or a multi-letter flag, and it is receiving its
# value isolated.
try:
val = next(flagit) # -o something
except StopIteration:
pass
elif flag.split('=', 1)[0] in self._FLAG_LONG2SHORT_WITHARGS or \
flag.split('=', 1)[0] in self._FLAG_SHORT2LONG_WITHARGS:
# This is either -o or a multi-letter flag, and it is receiving its
# value after an = sign.
flag, val = flag.split('=', 1) # -o=something
# Some dependencies (e.g., BoostDependency) add unspaced "-isystem/usr/include" arguments
elif flag.startswith('-isystem'):
val = flag[8:].strip()
flag = flag[:8]
else:
# This is a flag, and it's foreign to NVCC.
#
# We do not know whether this GCC-speak flag takes an isolated
# argument. Assuming it does not (the vast majority indeed don't),
# wrap this argument in an -Xcompiler flag and send it down to NVCC.
if flag == '-ffast-math':
xflags.append('-use_fast_math')
xflags.append('-Xcompiler='+flag)
elif flag == '-fno-fast-math':
xflags.append('-ftz=false')
xflags.append('-prec-div=true')
xflags.append('-prec-sqrt=true')
xflags.append('-Xcompiler='+flag)
elif flag == '-freciprocal-math':
xflags.append('-prec-div=false')
xflags.append('-Xcompiler='+flag)
elif flag == '-fno-reciprocal-math':
xflags.append('-prec-div=true')
xflags.append('-Xcompiler='+flag)
else:
xflags.append('-Xcompiler='+self._shield_nvcc_list_arg(flag))
# The above should securely handle GCC's -Wl, -Wa, -Wp, arguments.
continue
assert val is not None # Should only trip if there is a missing argument.
# Take care of the various NVCC-supported flags that need special handling.
flag = self._FLAG_LONG2SHORT_WITHARGS.get(flag, flag)
if flag in {'-include', '-isystem', '-I', '-L', '-l'}:
# These flags are known to GCC, but list-valued in NVCC. They potentially
# require double-quoting to prevent NVCC interpreting the flags as lists
# when GCC would not have done so.
#
# We avoid doing this quoting for -D to avoid redefining macros and for
# -U because it isn't possible to define a macro with a comma in the name.
# -U with comma arguments is impossible in GCC-speak (and thus unambiguous
#in NVCC-speak, albeit unportable).
if len(flag) == 2:
xflags.append(flag+self._shield_nvcc_list_arg(val))
elif flag == '-isystem' and val in self.host_compiler.get_default_include_dirs():
# like GnuLikeCompiler, we have to filter out include directories specified
# with -isystem that overlap with the host compiler's search path
pass
else:
xflags.append(flag)
xflags.append(self._shield_nvcc_list_arg(val))
elif flag == '-O':
# Handle optimization levels GCC knows about that NVCC does not.
if val == 'fast':
xflags.append('-O3')
xflags.append('-use_fast_math')
xflags.append('-Xcompiler')
xflags.append(flag+val)
elif val in {'s', 'g', 'z'}:
xflags.append('-Xcompiler')
xflags.append(flag+val)
else:
xflags.append(flag+val)
elif flag in {'-D', '-U', '-m', '-t'}:
xflags.append(flag+val) # For style, keep glued.
elif flag in {'-std'}:
xflags.append(flag+'='+val) # For style, keep glued.
else:
xflags.append(flag)
xflags.append(val)
return self._merge_flags(xflags)
def needs_static_linker(self) -> bool:
return False
def thread_link_flags(self, environment: 'Environment') -> T.List[str]:
return self._to_host_flags(self.host_compiler.thread_link_flags(environment), _Phase.LINKER)
def sanity_check(self, work_dir: str, env: 'Environment') -> None:
mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
sname = 'sanitycheckcuda.cu'
code = r'''
#include <cuda_runtime.h>
#include <stdio.h>
__global__ void kernel (void) {}
int main(void){
struct cudaDeviceProp prop;
int count, i;
cudaError_t ret = cudaGetDeviceCount(&count);
if(ret != cudaSuccess){
fprintf(stderr, "%d\n", (int)ret);
}else{
for(i=0;i<count;i++){
if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){
fprintf(stdout, "%d.%d\n", prop.major, prop.minor);
}
}
}
fflush(stderr);
fflush(stdout);
return 0;
}
'''
binname = sname.rsplit('.', 1)[0]
binname += '_cross' if self.is_cross else ''
source_name = os.path.join(work_dir, sname)
binary_name = os.path.join(work_dir, binname + '.exe')
with open(source_name, 'w', encoding='utf-8') as ofile:
ofile.write(code)
# The Sanity Test for CUDA language will serve as both a sanity test
# and a native-build GPU architecture detection test, useful later.
#
# For this second purpose, NVCC has very handy flags, --run and
# --run-args, that allow one to run an application with the
# environment set up properly. Of course, this only works for native
# builds; For cross builds we must still use the exe_wrapper (if any).
self.detected_cc = ''
flags = []
# Disable warnings, compile with statically-linked runtime for minimum
# reliance on the system.
flags += ['-w', '-cudart', 'static', source_name]
# Use the -ccbin option, if available, even during sanity checking.
# Otherwise, on systems where CUDA does not support the default compiler,
# NVCC becomes unusable.
flags += self.get_ccbin_args(env.coredata.options)
# If cross-compiling, we can't run the sanity check, only compile it.
if env.need_exe_wrapper(self.for_machine) and not env.has_exe_wrapper():
# Linking cross built apps is painful. You can't really
# tell if you should use -nostdlib or not and for example
# on OSX the compiler binary is the same but you need
# a ton of compiler flags to differentiate between
# arm and x86_64. So just compile.
flags += self.get_compile_only_args()
flags += self.get_output_args(binary_name)
# Compile sanity check
cmdlist = self.exelist + flags
mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist))
pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
mlog.debug('Sanity check compile stdout: ')
mlog.debug(stdo)
mlog.debug('-----\nSanity check compile stderr:')
mlog.debug(stde)
mlog.debug('-----')
if pc.returncode != 0:
raise EnvironmentException(f'Compiler {self.name_string()} cannot compile programs.')
# Run sanity check (if possible)
if env.need_exe_wrapper(self.for_machine):
if not env.has_exe_wrapper():
return
else:
cmdlist = env.exe_wrapper.get_command() + [binary_name]
else:
cmdlist = self.exelist + ['--run', '"' + binary_name + '"']
mlog.debug('Sanity check run command line: ', ' '.join(cmdlist))
pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
mlog.debug('Sanity check run stdout: ')
mlog.debug(stdo)
mlog.debug('-----\nSanity check run stderr:')
mlog.debug(stde)
mlog.debug('-----')
pe.wait()
if pe.returncode != 0:
raise EnvironmentException(f'Executables created by {self.language} compiler {self.name_string()} are not runnable.')
# Interpret the result of the sanity test.
# As mentioned above, it is not only a sanity test but also a GPU
# architecture detection test.
if stde == '':
self.detected_cc = stdo
else:
mlog.debug('cudaGetDeviceCount() returned ' + stde)
def has_header_symbol(self, hname: str, symbol: str, prefix: str,
env: 'Environment', *,
extra_args: T.Union[None, T.List[str], T.Callable[[CompileCheckMode], T.List[str]]] = None,
dependencies: T.Optional[T.List['Dependency']] = None) -> T.Tuple[bool, bool]:
if extra_args is None:
extra_args = []
fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol}
# Check if it's a C-like symbol
t = '''{prefix}
#include <{header}>
int main(void) {{
/* If it's not defined as a macro, try to use as a symbol */
#ifndef {symbol}
{symbol};
#endif
return 0;
}}'''
found, cached = self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies)
if found:
return True, cached
# Check if it's a class or a template
t = '''{prefix}
#include <{header}>
using {symbol};
int main(void) {{
return 0;
}}'''
return self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies)
_CPP14_VERSION = '>=9.0'
_CPP17_VERSION = '>=11.0'
_CPP20_VERSION = '>=12.0'
def get_options(self) -> 'MutableKeyedOptionDictType':
cpp_stds = ['none', 'c++03', 'c++11']
if version_compare(self.version, self._CPP14_VERSION):
cpp_stds += ['c++14']
if version_compare(self.version, self._CPP17_VERSION):
cpp_stds += ['c++17']
if version_compare(self.version, self._CPP20_VERSION):
cpp_stds += ['c++20']
return self.update_options(
super().get_options(),
self.create_option(options.UserComboOption,
self.form_langopt_key('std'),
'C++ language standard to use with CUDA',
cpp_stds,
'none'),
self.create_option(options.UserStringOption,
self.form_langopt_key('ccbindir'),
'CUDA non-default toolchain directory to use (-ccbin)',
''),
)
def _to_host_compiler_options(self, options: 'KeyedOptionDictType') -> 'KeyedOptionDictType':
"""
Convert an NVCC Option set to a host compiler's option set.
"""
# We must strip the -std option from the host compiler option set, as NVCC has
# its own -std flag that may not agree with the host compiler's.
host_options = {key: options.get(key, opt) for key, opt in self.host_compiler.get_options().items()}
std_key = self.form_langopt_key('std')
overrides = {std_key: 'none'}
return coredata.OptionsView(host_options, overrides=overrides)
def get_option_compile_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
args = self.get_ccbin_args(options)
# On Windows, the version of the C++ standard used by nvcc is dictated by
# the combination of CUDA version and MSVC version; the --std= is thus ignored
# and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027
if not is_windows():
key = self.form_langopt_key('std')
std = options[key]
if std.value != 'none':
args.append('--std=' + std.value)
return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options)))
def get_option_link_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
args = self.get_ccbin_args(options)
return args + self._to_host_flags(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)), _Phase.LINKER)
def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
suffix: str, soversion: str,
darwin_versions: T.Tuple[str, str]) -> T.List[str]:
return self._to_host_flags(self.host_compiler.get_soname_args(
env, prefix, shlib_name, suffix, soversion, darwin_versions), _Phase.LINKER)
def get_compile_only_args(self) -> T.List[str]:
return ['-c']
def get_no_optimization_args(self) -> T.List[str]:
return ['-O0']
def get_optimization_args(self, optimization_level: str) -> T.List[str]:
# alternatively, consider simply redirecting this to the host compiler, which would
# give us more control over options like "optimize for space" (which nvcc doesn't support):
# return self._to_host_flags(self.host_compiler.get_optimization_args(optimization_level))
return cuda_optimization_args[optimization_level]
def sanitizer_compile_args(self, value: str) -> T.List[str]:
return self._to_host_flags(self.host_compiler.sanitizer_compile_args(value))
def sanitizer_link_args(self, value: str) -> T.List[str]:
return self._to_host_flags(self.host_compiler.sanitizer_link_args(value))
def get_debug_args(self, is_debug: bool) -> T.List[str]:
return cuda_debug_args[is_debug]
def get_werror_args(self) -> T.List[str]:
device_werror_args = ['-Werror=cross-execution-space-call,deprecated-declarations,reorder']
return device_werror_args + self.host_werror_args
def get_warn_args(self, level: str) -> T.List[str]:
return self.warn_args[level]
def get_include_args(self, path: str, is_system: bool) -> T.List[str]:
if path == '':
path = '.'
return ['-isystem=' + path] if is_system else ['-I' + path]
def get_compile_debugfile_args(self, rel_obj: str, pch: bool = False) -> T.List[str]:
return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, pch))
def get_link_debugfile_args(self, targetfile: str) -> T.List[str]:
return self._to_host_flags(self.host_compiler.get_link_debugfile_args(targetfile), _Phase.LINKER)
def get_depfile_suffix(self) -> str:
return 'd'
def get_optimization_link_args(self, optimization_level: str) -> T.List[str]:
return self._to_host_flags(self.host_compiler.get_optimization_link_args(optimization_level), _Phase.LINKER)
def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
rpath_paths: T.Tuple[str, ...], build_rpath: str,
install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
(rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args(
env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath)
return (self._to_host_flags(rpath_args, _Phase.LINKER), rpath_dirs_to_remove)
def linker_to_compiler_args(self, args: T.List[str]) -> T.List[str]:
return args
def get_pic_args(self) -> T.List[str]:
return self._to_host_flags(self.host_compiler.get_pic_args())
def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str],
build_dir: str) -> T.List[str]:
return []
def get_output_args(self, target: str) -> T.List[str]:
return ['-o', target]
def get_dependency_gen_args(self, outtarget: str, outfile: str) -> T.List[str]:
if version_compare(self.version, '>= 10.2'):
# According to nvcc Documentation, `-MD` option is added after 10.2
# Reference: [CUDA 10.1](https://docs.nvidia.com/cuda/archive/10.1/cuda-compiler-driver-nvcc/index.html#options-for-specifying-compilation-phase-generate-nonsystem-dependencies)
# Reference: [CUDA 10.2](https://docs.nvidia.com/cuda/archive/10.2/cuda-compiler-driver-nvcc/index.html#options-for-specifying-compilation-phase-generate-nonsystem-dependencies)
return ['-MD', '-MT', outtarget, '-MF', outfile]
else:
return []
def get_std_exe_link_args(self) -> T.List[str]:
return self._to_host_flags(self.host_compiler.get_std_exe_link_args(), _Phase.LINKER)
def find_library(self, libname: str, env: 'Environment', extra_dirs: T.List[str],
libtype: LibType = LibType.PREFER_SHARED, lib_prefix_warning: bool = True) -> T.Optional[T.List[str]]:
return self.host_compiler.find_library(libname, env, extra_dirs, libtype, lib_prefix_warning)
def get_crt_compile_args(self, crt_val: str, buildtype: str) -> T.List[str]:
return self._to_host_flags(self.host_compiler.get_crt_compile_args(crt_val, buildtype))
def get_crt_link_args(self, crt_val: str, buildtype: str) -> T.List[str]:
# nvcc defaults to static, release version of msvc runtime and provides no
# native option to override it; override it with /NODEFAULTLIB
host_link_arg_overrides = []
host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype)
if any(arg in {'/MDd', '/MD', '/MTd'} for arg in host_crt_compile_args):
host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib']
return self._to_host_flags(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype), _Phase.LINKER)
def get_target_link_args(self, target: 'BuildTarget') -> T.List[str]:
return self._to_host_flags(super().get_target_link_args(target), _Phase.LINKER)
def get_dependency_compile_args(self, dep: 'Dependency') -> T.List[str]:
return self._to_host_flags(super().get_dependency_compile_args(dep))
def get_dependency_link_args(self, dep: 'Dependency') -> T.List[str]:
return self._to_host_flags(super().get_dependency_link_args(dep), _Phase.LINKER)
def get_ccbin_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
key = self.form_langopt_key('ccbindir')
ccbindir = options[key].value
if isinstance(ccbindir, str) and ccbindir != '':
return [self._shield_nvcc_list_arg('-ccbin='+ccbindir, False)]
else:
return []
def get_profile_generate_args(self) -> T.List[str]:
return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_generate_args()]
def get_profile_use_args(self) -> T.List[str]:
return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_use_args()]
def get_assert_args(self, disable: bool, env: 'Environment') -> T.List[str]:
return self.host_compiler.get_assert_args(disable, env)