| # Copyright 2012-2017 The Meson development team |
| |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| import enum |
| import os.path |
| import string |
| import typing as T |
| |
| from .. import coredata |
| from .. import mlog |
| from ..mesonlib import ( |
| EnvironmentException, MachineChoice, Popen_safe, OptionOverrideProxy, |
| is_windows, LibType, OptionKey, |
| ) |
| from .compilers import (Compiler, cuda_buildtype_args, cuda_optimization_args, |
| cuda_debug_args, CompileCheckMode) |
| |
| if T.TYPE_CHECKING: |
| from ..build import BuildTarget |
| from ..coredata import KeyedOptionDictType |
| from ..dependencies import Dependency |
| from ..environment import Environment # noqa: F401 |
| from ..envconfig import MachineInfo |
| from ..linkers import DynamicLinker |
| from ..programs import ExternalProgram |
| |
| |
| class _Phase(enum.Enum): |
| |
| COMPILER = 'compiler' |
| LINKER = 'linker' |
| |
| |
| class CudaCompiler(Compiler): |
| |
| LINKER_PREFIX = '-Xlinker=' |
| language = 'cuda' |
| |
| # NVCC flags taking no arguments. |
| _FLAG_PASSTHRU_NOARGS = { |
| # NVCC --long-option, NVCC -short-option CUDA Toolkit 11.2.1 Reference |
| '--objdir-as-tempdir', '-objtemp', # 4.2.1.2 |
| '--generate-dependency-targets', '-MP', # 4.2.1.12 |
| '--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.1.14 |
| '--link', # 4.2.2.1 |
| '--lib', '-lib', # 4.2.2.2 |
| '--device-link', '-dlink', # 4.2.2.3 |
| '--device-c', '-dc', # 4.2.2.4 |
| '--device-w', '-dw', # 4.2.2.5 |
| '--cuda', '-cuda', # 4.2.2.6 |
| '--compile', '-c', # 4.2.2.7 |
| '--fatbin', '-fatbin', # 4.2.2.8 |
| '--cubin', '-cubin', # 4.2.2.9 |
| '--ptx', '-ptx', # 4.2.2.10 |
| '--preprocess', '-E', # 4.2.2.11 |
| '--generate-dependencies', '-M', # 4.2.2.12 |
| '--generate-nonsystem-dependencies', '-MM', # 4.2.2.13 |
| '--generate-dependencies-with-compile', '-MD', # 4.2.2.14 |
| '--generate-nonsystem-dependencies-with-compile', '-MMD', # 4.2.2.15 |
| '--run', # 4.2.2.16 |
| '--profile', '-pg', # 4.2.3.1 |
| '--debug', '-g', # 4.2.3.2 |
| '--device-debug', '-G', # 4.2.3.3 |
| '--extensible-whole-program', '-ewp', # 4.2.3.4 |
| '--generate-line-info', '-lineinfo', # 4.2.3.5 |
| '--dlink-time-opt', '-dlto', # 4.2.3.8 |
| '--no-exceptions', '-noeh', # 4.2.3.11 |
| '--shared', '-shared', # 4.2.3.12 |
| '--no-host-device-initializer-list', '-nohdinitlist', # 4.2.3.15 |
| '--expt-relaxed-constexpr', '-expt-relaxed-constexpr', # 4.2.3.16 |
| '--extended-lambda', '-extended-lambda', # 4.2.3.17 |
| '--expt-extended-lambda', '-expt-extended-lambda', # 4.2.3.18 |
| '--m32', '-m32', # 4.2.3.20 |
| '--m64', '-m64', # 4.2.3.21 |
| '--forward-unknown-to-host-compiler', '-forward-unknown-to-host-compiler', # 4.2.5.1 |
| '--forward-unknown-to-host-linker', '-forward-unknown-to-host-linker', # 4.2.5.2 |
| '--dont-use-profile', '-noprof', # 4.2.5.3 |
| '--dryrun', '-dryrun', # 4.2.5.5 |
| '--verbose', '-v', # 4.2.5.6 |
| '--keep', '-keep', # 4.2.5.7 |
| '--save-temps', '-save-temps', # 4.2.5.9 |
| '--clean-targets', '-clean', # 4.2.5.10 |
| '--no-align-double', # 4.2.5.16 |
| '--no-device-link', '-nodlink', # 4.2.5.17 |
| '--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.5.18 |
| '--use_fast_math', '-use_fast_math', # 4.2.7.7 |
| '--extra-device-vectorization', '-extra-device-vectorization', # 4.2.7.12 |
| '--compile-as-tools-patch', '-astoolspatch', # 4.2.7.13 |
| '--keep-device-functions', '-keep-device-functions', # 4.2.7.14 |
| '--disable-warnings', '-w', # 4.2.8.1 |
| '--source-in-ptx', '-src-in-ptx', # 4.2.8.2 |
| '--restrict', '-restrict', # 4.2.8.3 |
| '--Wno-deprecated-gpu-targets', '-Wno-deprecated-gpu-targets', # 4.2.8.4 |
| '--Wno-deprecated-declarations', '-Wno-deprecated-declarations', # 4.2.8.5 |
| '--Wreorder', '-Wreorder', # 4.2.8.6 |
| '--Wdefault-stream-launch', '-Wdefault-stream-launch', # 4.2.8.7 |
| '--Wext-lambda-captures-this', '-Wext-lambda-captures-this', # 4.2.8.8 |
| '--display-error-number', '-err-no', # 4.2.8.10 |
| '--resource-usage', '-res-usage', # 4.2.8.14 |
| '--help', '-h', # 4.2.8.15 |
| '--version', '-V', # 4.2.8.16 |
| '--list-gpu-code', '-code-ls', # 4.2.8.20 |
| '--list-gpu-arch', '-arch-ls', # 4.2.8.21 |
| } |
| # Dictionary of NVCC flags taking either one argument or a comma-separated list. |
| # Maps --long to -short options, because the short options are more GCC-like. |
| _FLAG_LONG2SHORT_WITHARGS = { |
| '--output-file': '-o', # 4.2.1.1 |
| '--pre-include': '-include', # 4.2.1.3 |
| '--library': '-l', # 4.2.1.4 |
| '--define-macro': '-D', # 4.2.1.5 |
| '--undefine-macro': '-U', # 4.2.1.6 |
| '--include-path': '-I', # 4.2.1.7 |
| '--system-include': '-isystem', # 4.2.1.8 |
| '--library-path': '-L', # 4.2.1.9 |
| '--output-directory': '-odir', # 4.2.1.10 |
| '--dependency-output': '-MF', # 4.2.1.11 |
| '--compiler-bindir': '-ccbin', # 4.2.1.13 |
| '--archiver-binary': '-arbin', # 4.2.1.15 |
| '--cudart': '-cudart', # 4.2.1.16 |
| '--cudadevrt': '-cudadevrt', # 4.2.1.17 |
| '--libdevice-directory': '-ldir', # 4.2.1.18 |
| '--target-directory': '-target-dir', # 4.2.1.19 |
| '--optimization-info': '-opt-info', # 4.2.3.6 |
| '--optimize': '-O', # 4.2.3.7 |
| '--ftemplate-backtrace-limit': '-ftemplate-backtrace-limit', # 4.2.3.9 |
| '--ftemplate-depth': '-ftemplate-depth', # 4.2.3.10 |
| '--x': '-x', # 4.2.3.13 |
| '--std': '-std', # 4.2.3.14 |
| '--machine': '-m', # 4.2.3.19 |
| '--compiler-options': '-Xcompiler', # 4.2.4.1 |
| '--linker-options': '-Xlinker', # 4.2.4.2 |
| '--archive-options': '-Xarchive', # 4.2.4.3 |
| '--ptxas-options': '-Xptxas', # 4.2.4.4 |
| '--nvlink-options': '-Xnvlink', # 4.2.4.5 |
| '--threads': '-t', # 4.2.5.4 |
| '--keep-dir': '-keep-dir', # 4.2.5.8 |
| '--run-args': '-run-args', # 4.2.5.11 |
| '--input-drive-prefix': '-idp', # 4.2.5.12 |
| '--dependency-drive-prefix': '-ddp', # 4.2.5.13 |
| '--drive-prefix': '-dp', # 4.2.5.14 |
| '--dependency-target-name': '-MT', # 4.2.5.15 |
| '--default-stream': '-default-stream', # 4.2.6.1 |
| '--gpu-architecture': '-arch', # 4.2.7.1 |
| '--gpu-code': '-code', # 4.2.7.2 |
| '--generate-code': '-gencode', # 4.2.7.3 |
| '--relocatable-device-code': '-rdc', # 4.2.7.4 |
| '--entries': '-e', # 4.2.7.5 |
| '--maxrregcount': '-maxrregcount', # 4.2.7.6 |
| '--ftz': '-ftz', # 4.2.7.8 |
| '--prec-div': '-prec-div', # 4.2.7.9 |
| '--prec-sqrt': '-prec-sqrt', # 4.2.7.10 |
| '--fmad': '-fmad', # 4.2.7.11 |
| '--Werror': '-Werror', # 4.2.8.9 |
| '--diag-error': '-diag-error', # 4.2.8.11 |
| '--diag-suppress': '-diag-suppress', # 4.2.8.12 |
| '--diag-warn': '-diag-warn', # 4.2.8.13 |
| '--options-file': '-optf', # 4.2.8.17 |
| '--time': '-time', # 4.2.8.18 |
| '--qpp-config': '-qpp-config', # 4.2.8.19 |
| } |
| # Reverse map -short to --long options. |
| _FLAG_SHORT2LONG_WITHARGS = {v:k for k,v in _FLAG_LONG2SHORT_WITHARGS.items()} |
| |
| def __init__(self, exelist: T.List[str], version: str, for_machine: MachineChoice, |
| is_cross: bool, exe_wrapper: T.Optional['ExternalProgram'], |
| host_compiler: Compiler, info: 'MachineInfo', |
| linker: T.Optional['DynamicLinker'] = None, |
| full_version: T.Optional[str] = None): |
| super().__init__(exelist, version, for_machine, info, linker=linker, full_version=full_version, is_cross=is_cross) |
| self.exe_wrapper = exe_wrapper |
| self.host_compiler = host_compiler |
| self.base_options = host_compiler.base_options |
| self.id = 'nvcc' |
| self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()} |
| |
| @classmethod |
| def _shield_nvcc_list_arg(cls, arg: str, listmode: bool=True) -> str: |
| r""" |
| Shield an argument against both splitting by NVCC's list-argument |
| parse logic, and interpretation by any shell. |
| |
| NVCC seems to consider every comma , that is neither escaped by \ nor inside |
| a double-quoted string a split-point. Single-quotes do not provide protection |
| against splitting; In fact, after splitting they are \-escaped. Unfortunately, |
| double-quotes don't protect against shell expansion. What follows is a |
| complex dance to accommodate everybody. |
| """ |
| |
| SQ = "'" |
| DQ = '"' |
| CM = "," |
| BS = "\\" |
| DQSQ = DQ+SQ+DQ |
| quotable = set(string.whitespace+'"$`\\') |
| |
| if CM not in arg or not listmode: |
| if SQ not in arg: |
| # If any of the special characters "$`\ or whitespace are present, single-quote. |
| # Otherwise return bare. |
| if set(arg).intersection(quotable): |
| return SQ+arg+SQ |
| else: |
| return arg # Easy case: no splits, no quoting. |
| else: |
| # There are single quotes. Double-quote them, and single-quote the |
| # strings between them. |
| l = [cls._shield_nvcc_list_arg(s) for s in arg.split(SQ)] |
| l = sum([[s, DQSQ] for s in l][:-1], []) # Interleave l with DQSQs |
| return ''.join(l) |
| else: |
| # A comma is present, and list mode was active. |
| # We apply (what we guess is) the (primitive) NVCC splitting rule: |
| l = [''] |
| instring = False |
| argit = iter(arg) |
| for c in argit: |
| if c == CM and not instring: |
| l.append('') |
| elif c == DQ: |
| l[-1] += c |
| instring = not instring |
| elif c == BS: |
| try: |
| l[-1] += next(argit) |
| except StopIteration: |
| break |
| else: |
| l[-1] += c |
| |
| # Shield individual strings, without listmode, then return them with |
| # escaped commas between them. |
| l = [cls._shield_nvcc_list_arg(s, listmode=False) for s in l] |
| return r'\,'.join(l) |
| |
| @classmethod |
| def _merge_flags(cls, flags: T.List[str]) -> T.List[str]: |
| r""" |
| The flags to NVCC gets exceedingly verbose and unreadable when too many of them |
| are shielded with -Xcompiler. Merge consecutive -Xcompiler-wrapped arguments |
| into one. |
| """ |
| if len(flags) <= 1: |
| return flags |
| flagit = iter(flags) |
| xflags = [] |
| |
| def is_xcompiler_flag_isolated(flag: str) -> bool: |
| return flag == '-Xcompiler' |
| def is_xcompiler_flag_glued(flag: str) -> bool: |
| return flag.startswith('-Xcompiler=') |
| def is_xcompiler_flag(flag: str) -> bool: |
| return is_xcompiler_flag_isolated(flag) or is_xcompiler_flag_glued(flag) |
| def get_xcompiler_val(flag: str, flagit: T.Iterator[str]) -> str: |
| if is_xcompiler_flag_glued(flag): |
| return flag[len('-Xcompiler='):] |
| else: |
| try: |
| return next(flagit) |
| except StopIteration: |
| return "" |
| |
| ingroup = False |
| for flag in flagit: |
| if not is_xcompiler_flag(flag): |
| ingroup = False |
| xflags.append(flag) |
| elif ingroup: |
| xflags[-1] += ',' |
| xflags[-1] += get_xcompiler_val(flag, flagit) |
| elif is_xcompiler_flag_isolated(flag): |
| ingroup = True |
| xflags.append(flag) |
| xflags.append(get_xcompiler_val(flag, flagit)) |
| elif is_xcompiler_flag_glued(flag): |
| ingroup = True |
| xflags.append(flag) |
| else: |
| raise ValueError("-Xcompiler flag merging failed, unknown argument form!") |
| return xflags |
| |
| def _to_host_flags(self, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]: |
| """ |
| Translate generic "GCC-speak" plus particular "NVCC-speak" flags to NVCC flags. |
| |
| NVCC's "short" flags have broad similarities to the GCC standard, but have |
| gratuitous, irritating differences. |
| """ |
| |
| xflags = [] |
| flagit = iter(flags) |
| |
| for flag in flagit: |
| # The CUDA Toolkit Documentation, in 4.1. Command Option Types and Notation, |
| # specifies that NVCC does not parse the standard flags as GCC does. It has |
| # its own strategy, to wit: |
| # |
| # nvcc recognizes three types of command options: boolean options, single |
| # value options, and list options. |
| # |
| # Boolean options do not have an argument; they are either specified on a |
| # command line or not. Single value options must be specified at most once, |
| # and list options may be repeated. Examples of each of these option types |
| # are, respectively: --verbose (switch to verbose mode), --output-file |
| # (specify output file), and --include-path (specify include path). |
| # |
| # Single value options and list options must have arguments, which must |
| # follow the name of the option itself by either one of more spaces or an |
| # equals character. When a one-character short name such as -I, -l, and -L |
| # is used, the value of the option may also immediately follow the option |
| # itself without being separated by spaces or an equal character. The |
| # individual values of list options may be separated by commas in a single |
| # instance of the option, or the option may be repeated, or any |
| # combination of these two cases. |
| # |
| # One strange consequence of this choice is that directory and filenames that |
| # contain commas (',') cannot be passed to NVCC (at least, not as easily as |
| # in GCC). Another strange consequence is that it is legal to supply flags |
| # such as |
| # |
| # -lpthread,rt,dl,util |
| # -l pthread,rt,dl,util |
| # -l=pthread,rt,dl,util |
| # |
| # and each of the above alternatives is equivalent to GCC-speak |
| # |
| # -lpthread -lrt -ldl -lutil |
| # -l pthread -l rt -l dl -l util |
| # -l=pthread -l=rt -l=dl -l=util |
| # |
| # *With the exception of commas in the name*, GCC-speak for these list flags |
| # is a strict subset of NVCC-speak, so we passthrough those flags. |
| # |
| # The -D macro-define flag is documented as somehow shielding commas from |
| # splitting a definition. Balanced parentheses, braces and single-quotes |
| # around the comma are not sufficient, but balanced double-quotes are. The |
| # shielding appears to work with -l, -I, -L flags as well, for instance. |
| # |
| # Since our goal is to replicate GCC-speak as much as possible, we check for |
| # commas in all list-arguments and shield them with double-quotes. We make |
| # an exception for -D (where this would be value-changing) and -U (because |
| # it isn't possible to define a macro with a comma in the name). |
| |
| if flag in self._FLAG_PASSTHRU_NOARGS: |
| xflags.append(flag) |
| continue |
| |
| # Handle breakup of flag-values into a flag-part and value-part. |
| if flag[:1] not in '-/': |
| # This is not a flag. It's probably a file input. Pass it through. |
| xflags.append(flag) |
| continue |
| elif flag[:1] == '/': |
| # This is ambiguously either an MVSC-style /switch or an absolute path |
| # to a file. For some magical reason the following works acceptably in |
| # both cases. |
| wrap = '"' if ',' in flag else '' |
| xflags.append(f'-X{phase.value}={wrap}{flag}{wrap}') |
| continue |
| elif len(flag) >= 2 and flag[0] == '-' and flag[1] in 'IDULlmOxmte': |
| # This is a single-letter short option. These options (with the |
| # exception of -o) are allowed to receive their argument with neither |
| # space nor = sign before them. Detect and separate them in that event. |
| if flag[2:3] == '': # -I something |
| try: |
| val = next(flagit) |
| except StopIteration: |
| pass |
| elif flag[2:3] == '=': # -I=something |
| val = flag[3:] |
| else: # -Isomething |
| val = flag[2:] |
| flag = flag[:2] # -I |
| elif flag in self._FLAG_LONG2SHORT_WITHARGS or \ |
| flag in self._FLAG_SHORT2LONG_WITHARGS: |
| # This is either -o or a multi-letter flag, and it is receiving its |
| # value isolated. |
| try: |
| val = next(flagit) # -o something |
| except StopIteration: |
| pass |
| elif flag.split('=',1)[0] in self._FLAG_LONG2SHORT_WITHARGS or \ |
| flag.split('=',1)[0] in self._FLAG_SHORT2LONG_WITHARGS: |
| # This is either -o or a multi-letter flag, and it is receiving its |
| # value after an = sign. |
| flag, val = flag.split('=',1) # -o=something |
| # Some dependencies (e.g., BoostDependency) add unspaced "-isystem/usr/include" arguments |
| elif flag.startswith('-isystem'): |
| val = flag[8:].strip() |
| flag = flag[:8] |
| else: |
| # This is a flag, and it's foreign to NVCC. |
| # |
| # We do not know whether this GCC-speak flag takes an isolated |
| # argument. Assuming it does not (the vast majority indeed don't), |
| # wrap this argument in an -Xcompiler flag and send it down to NVCC. |
| if flag == '-ffast-math': |
| xflags.append('-use_fast_math') |
| xflags.append('-Xcompiler='+flag) |
| elif flag == '-fno-fast-math': |
| xflags.append('-ftz=false') |
| xflags.append('-prec-div=true') |
| xflags.append('-prec-sqrt=true') |
| xflags.append('-Xcompiler='+flag) |
| elif flag == '-freciprocal-math': |
| xflags.append('-prec-div=false') |
| xflags.append('-Xcompiler='+flag) |
| elif flag == '-fno-reciprocal-math': |
| xflags.append('-prec-div=true') |
| xflags.append('-Xcompiler='+flag) |
| else: |
| xflags.append('-Xcompiler='+self._shield_nvcc_list_arg(flag)) |
| # The above should securely handle GCC's -Wl, -Wa, -Wp, arguments. |
| continue |
| |
| assert val is not None # Should only trip if there is a missing argument. |
| |
| # Take care of the various NVCC-supported flags that need special handling. |
| flag = self._FLAG_LONG2SHORT_WITHARGS.get(flag,flag) |
| |
| if flag in {'-include','-isystem','-I','-L','-l'}: |
| # These flags are known to GCC, but list-valued in NVCC. They potentially |
| # require double-quoting to prevent NVCC interpreting the flags as lists |
| # when GCC would not have done so. |
| # |
| # We avoid doing this quoting for -D to avoid redefining macros and for |
| # -U because it isn't possible to define a macro with a comma in the name. |
| # -U with comma arguments is impossible in GCC-speak (and thus unambiguous |
| #in NVCC-speak, albeit unportable). |
| if len(flag) == 2: |
| xflags.append(flag+self._shield_nvcc_list_arg(val)) |
| elif flag == '-isystem' and val in self.host_compiler.get_default_include_dirs(): |
| # like GnuLikeCompiler, we have to filter out include directories specified |
| # with -isystem that overlap with the host compiler's search path |
| pass |
| else: |
| xflags.append(flag) |
| xflags.append(self._shield_nvcc_list_arg(val)) |
| elif flag == '-O': |
| # Handle optimization levels GCC knows about that NVCC does not. |
| if val == 'fast': |
| xflags.append('-O3') |
| xflags.append('-use_fast_math') |
| xflags.append('-Xcompiler') |
| xflags.append(flag+val) |
| elif val in {'s', 'g', 'z'}: |
| xflags.append('-Xcompiler') |
| xflags.append(flag+val) |
| else: |
| xflags.append(flag+val) |
| elif flag in {'-D', '-U', '-m', '-t'}: |
| xflags.append(flag+val) # For style, keep glued. |
| elif flag in {'-std'}: |
| xflags.append(flag+'='+val) # For style, keep glued. |
| else: |
| xflags.append(flag) |
| xflags.append(val) |
| |
| return self._merge_flags(xflags) |
| |
| def needs_static_linker(self) -> bool: |
| return False |
| |
| def thread_link_flags(self, environment: 'Environment') -> T.List[str]: |
| return self._to_host_flags(self.host_compiler.thread_link_flags(environment), _Phase.LINKER) |
| |
| def sanity_check(self, work_dir: str, env: 'Environment') -> None: |
| mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) |
| mlog.debug('Is cross compiler: %s.' % str(self.is_cross)) |
| |
| sname = 'sanitycheckcuda.cu' |
| code = r''' |
| #include <cuda_runtime.h> |
| #include <stdio.h> |
| |
| __global__ void kernel (void) {} |
| |
| int main(void){ |
| struct cudaDeviceProp prop; |
| int count, i; |
| cudaError_t ret = cudaGetDeviceCount(&count); |
| if(ret != cudaSuccess){ |
| fprintf(stderr, "%d\n", (int)ret); |
| }else{ |
| for(i=0;i<count;i++){ |
| if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){ |
| fprintf(stdout, "%d.%d\n", prop.major, prop.minor); |
| } |
| } |
| } |
| fflush(stderr); |
| fflush(stdout); |
| return 0; |
| } |
| ''' |
| binname = sname.rsplit('.', 1)[0] |
| binname += '_cross' if self.is_cross else '' |
| source_name = os.path.join(work_dir, sname) |
| binary_name = os.path.join(work_dir, binname + '.exe') |
| with open(source_name, 'w', encoding='utf-8') as ofile: |
| ofile.write(code) |
| |
| # The Sanity Test for CUDA language will serve as both a sanity test |
| # and a native-build GPU architecture detection test, useful later. |
| # |
| # For this second purpose, NVCC has very handy flags, --run and |
| # --run-args, that allow one to run an application with the |
| # environment set up properly. Of course, this only works for native |
| # builds; For cross builds we must still use the exe_wrapper (if any). |
| self.detected_cc = '' |
| flags = [] |
| |
| # Disable warnings, compile with statically-linked runtime for minimum |
| # reliance on the system. |
| flags += ['-w', '-cudart', 'static', source_name] |
| |
| # Use the -ccbin option, if available, even during sanity checking. |
| # Otherwise, on systems where CUDA does not support the default compiler, |
| # NVCC becomes unusable. |
| flags += self.get_ccbin_args(env.coredata.options) |
| |
| # If cross-compiling, we can't run the sanity check, only compile it. |
| if self.is_cross and self.exe_wrapper is None: |
| # Linking cross built apps is painful. You can't really |
| # tell if you should use -nostdlib or not and for example |
| # on OSX the compiler binary is the same but you need |
| # a ton of compiler flags to differentiate between |
| # arm and x86_64. So just compile. |
| flags += self.get_compile_only_args() |
| flags += self.get_output_args(binary_name) |
| |
| # Compile sanity check |
| cmdlist = self.exelist + flags |
| mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist)) |
| pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir) |
| mlog.debug('Sanity check compile stdout: ') |
| mlog.debug(stdo) |
| mlog.debug('-----\nSanity check compile stderr:') |
| mlog.debug(stde) |
| mlog.debug('-----') |
| if pc.returncode != 0: |
| raise EnvironmentException(f'Compiler {self.name_string()} can not compile programs.') |
| |
| # Run sanity check (if possible) |
| if self.is_cross: |
| if self.exe_wrapper is None: |
| return |
| else: |
| cmdlist = self.exe_wrapper.get_command() + [binary_name] |
| else: |
| cmdlist = self.exelist + ['--run', '"' + binary_name + '"'] |
| mlog.debug('Sanity check run command line: ', ' '.join(cmdlist)) |
| pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir) |
| mlog.debug('Sanity check run stdout: ') |
| mlog.debug(stdo) |
| mlog.debug('-----\nSanity check run stderr:') |
| mlog.debug(stde) |
| mlog.debug('-----') |
| pe.wait() |
| if pe.returncode != 0: |
| raise EnvironmentException(f'Executables created by {self.language} compiler {self.name_string()} are not runnable.') |
| |
| # Interpret the result of the sanity test. |
| # As mentioned above, it is not only a sanity test but also a GPU |
| # architecture detection test. |
| if stde == '': |
| self.detected_cc = stdo |
| else: |
| mlog.debug('cudaGetDeviceCount() returned ' + stde) |
| |
| def has_header_symbol(self, hname: str, symbol: str, prefix: str, |
| env: 'Environment', *, |
| extra_args: T.Union[None, T.List[str], T.Callable[[CompileCheckMode], T.List[str]]] = None, |
| dependencies: T.Optional[T.List['Dependency']] = None) -> T.Tuple[bool, bool]: |
| if extra_args is None: |
| extra_args = [] |
| fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol} |
| # Check if it's a C-like symbol |
| t = '''{prefix} |
| #include <{header}> |
| int main(void) {{ |
| /* If it's not defined as a macro, try to use as a symbol */ |
| #ifndef {symbol} |
| {symbol}; |
| #endif |
| return 0; |
| }}''' |
| found, cached = self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies) |
| if found: |
| return True, cached |
| # Check if it's a class or a template |
| t = '''{prefix} |
| #include <{header}> |
| using {symbol}; |
| int main(void) {{ |
| return 0; |
| }}''' |
| return self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies) |
| |
| def get_options(self) -> 'KeyedOptionDictType': |
| opts = super().get_options() |
| std_key = OptionKey('std', machine=self.for_machine, lang=self.language) |
| ccbindir_key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language) |
| opts.update({ |
| std_key: coredata.UserComboOption('C++ language standard to use with CUDA', |
| ['none', 'c++03', 'c++11', 'c++14', 'c++17'], 'none'), |
| ccbindir_key: coredata.UserStringOption('CUDA non-default toolchain directory to use (-ccbin)', |
| ''), |
| }) |
| return opts |
| |
| def _to_host_compiler_options(self, options: 'KeyedOptionDictType') -> 'KeyedOptionDictType': |
| """ |
| Convert an NVCC Option set to a host compiler's option set. |
| """ |
| |
| # We must strip the -std option from the host compiler option set, as NVCC has |
| # its own -std flag that may not agree with the host compiler's. |
| overrides = {name: opt.value for name, opt in options.items()} |
| overrides.pop(OptionKey('std', machine=self.for_machine, |
| lang=self.host_compiler.language), None) |
| host_options = self.host_compiler.get_options().copy() |
| if 'std' in host_options: |
| del host_options['std'] # type: ignore |
| return OptionOverrideProxy(overrides, host_options) |
| |
| def get_option_compile_args(self, options: 'KeyedOptionDictType') -> T.List[str]: |
| args = self.get_ccbin_args(options) |
| # On Windows, the version of the C++ standard used by nvcc is dictated by |
| # the combination of CUDA version and MSVC version; the --std= is thus ignored |
| # and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027 |
| if not is_windows(): |
| key = OptionKey('std', machine=self.for_machine, lang=self.language) |
| std = options[key] |
| if std.value != 'none': |
| args.append('--std=' + std.value) |
| |
| return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options))) |
| |
| def get_option_link_args(self, options: 'KeyedOptionDictType') -> T.List[str]: |
| args = self.get_ccbin_args(options) |
| return args + self._to_host_flags(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)), _Phase.LINKER) |
| |
| def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str, |
| suffix: str, soversion: str, |
| darwin_versions: T.Tuple[str, str]) -> T.List[str]: |
| return self._to_host_flags(self.host_compiler.get_soname_args( |
| env, prefix, shlib_name, suffix, soversion, darwin_versions), _Phase.LINKER) |
| |
| def get_compile_only_args(self) -> T.List[str]: |
| return ['-c'] |
| |
| def get_no_optimization_args(self) -> T.List[str]: |
| return ['-O0'] |
| |
| def get_optimization_args(self, optimization_level: str) -> T.List[str]: |
| # alternatively, consider simply redirecting this to the host compiler, which would |
| # give us more control over options like "optimize for space" (which nvcc doesn't support): |
| # return self._to_host_flags(self.host_compiler.get_optimization_args(optimization_level)) |
| return cuda_optimization_args[optimization_level] |
| |
| def sanitizer_compile_args(self, value: str) -> T.List[str]: |
| return self._to_host_flags(self.host_compiler.sanitizer_compile_args(value)) |
| |
| def sanitizer_link_args(self, value: str) -> T.List[str]: |
| return self._to_host_flags(self.host_compiler.sanitizer_link_args(value)) |
| |
| def get_debug_args(self, is_debug: bool) -> T.List[str]: |
| return cuda_debug_args[is_debug] |
| |
| def get_werror_args(self) -> T.List[str]: |
| return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder'] |
| |
| def get_warn_args(self, level: str) -> T.List[str]: |
| return self.warn_args[level] |
| |
| def get_buildtype_args(self, buildtype: str) -> T.List[str]: |
| # nvcc doesn't support msvc's "Edit and Continue" PDB format; "downgrade" to |
| # a regular PDB to avoid cl's warning to that effect (D9025 : overriding '/ZI' with '/Zi') |
| host_args = ['/Zi' if arg == '/ZI' else arg for arg in self.host_compiler.get_buildtype_args(buildtype)] |
| return cuda_buildtype_args[buildtype] + self._to_host_flags(host_args) |
| |
| def get_include_args(self, path: str, is_system: bool) -> T.List[str]: |
| if path == '': |
| path = '.' |
| return ['-isystem=' + path] if is_system else ['-I' + path] |
| |
| def get_compile_debugfile_args(self, rel_obj: str, pch: bool = False) -> T.List[str]: |
| return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, pch)) |
| |
| def get_link_debugfile_args(self, targetfile: str) -> T.List[str]: |
| return self._to_host_flags(self.host_compiler.get_link_debugfile_args(targetfile), _Phase.LINKER) |
| |
| def get_depfile_suffix(self) -> str: |
| return 'd' |
| |
| def get_buildtype_linker_args(self, buildtype: str) -> T.List[str]: |
| return self._to_host_flags(self.host_compiler.get_buildtype_linker_args(buildtype), _Phase.LINKER) |
| |
| def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str, |
| rpath_paths: str, build_rpath: str, |
| install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]: |
| (rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args( |
| env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath) |
| return (self._to_host_flags(rpath_args, _Phase.LINKER), rpath_dirs_to_remove) |
| |
| def linker_to_compiler_args(self, args: T.List[str]) -> T.List[str]: |
| return args |
| |
| def get_pic_args(self) -> T.List[str]: |
| return self._to_host_flags(self.host_compiler.get_pic_args()) |
| |
| def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str], |
| build_dir: str) -> T.List[str]: |
| return [] |
| |
| def get_output_args(self, target: str) -> T.List[str]: |
| return ['-o', target] |
| |
| def get_std_exe_link_args(self) -> T.List[str]: |
| return self._to_host_flags(self.host_compiler.get_std_exe_link_args(), _Phase.LINKER) |
| |
| def find_library(self, libname: str, env: 'Environment', extra_dirs: T.List[str], |
| libtype: LibType = LibType.PREFER_SHARED) -> T.Optional[T.List[str]]: |
| return ['-l' + libname] # FIXME |
| |
| def get_crt_compile_args(self, crt_val: str, buildtype: str) -> T.List[str]: |
| return self._to_host_flags(self.host_compiler.get_crt_compile_args(crt_val, buildtype)) |
| |
| def get_crt_link_args(self, crt_val: str, buildtype: str) -> T.List[str]: |
| # nvcc defaults to static, release version of msvc runtime and provides no |
| # native option to override it; override it with /NODEFAULTLIB |
| host_link_arg_overrides = [] |
| host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype) |
| if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args): |
| host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib'] |
| return self._to_host_flags(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype), _Phase.LINKER) |
| |
| def get_target_link_args(self, target: 'BuildTarget') -> T.List[str]: |
| return self._to_host_flags(super().get_target_link_args(target), _Phase.LINKER) |
| |
| def get_dependency_compile_args(self, dep: 'Dependency') -> T.List[str]: |
| return self._to_host_flags(super().get_dependency_compile_args(dep)) |
| |
| def get_dependency_link_args(self, dep: 'Dependency') -> T.List[str]: |
| return self._to_host_flags(super().get_dependency_link_args(dep), _Phase.LINKER) |
| |
| def get_ccbin_args(self, options: 'KeyedOptionDictType') -> T.List[str]: |
| key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language) |
| ccbindir = options[key].value |
| if isinstance(ccbindir, str) and ccbindir != '': |
| return [self._shield_nvcc_list_arg('-ccbin='+ccbindir, False)] |
| else: |
| return [] |
| |
| def get_profile_generate_args(self) -> T.List[str]: |
| return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_generate_args()] |
| |
| def get_profile_use_args(self) -> T.List[str]: |
| return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_use_args()] |
| |
| def get_disable_assert_args(self) -> T.List[str]: |
| return self.host_compiler.get_disable_assert_args() |