|  | # SPDX-License-Identifier: Apache-2.0 | 
|  | # Copyright 2012-2017 The Meson development team | 
|  |  | 
|  | from __future__ import annotations | 
|  |  | 
|  | import enum | 
|  | import os.path | 
|  | import string | 
|  | import typing as T | 
|  |  | 
|  | from .. import coredata | 
|  | from .. import options | 
|  | from .. import mlog | 
|  | from ..mesonlib import ( | 
|  | EnvironmentException, Popen_safe, | 
|  | is_windows, LibType, version_compare, | 
|  | ) | 
|  | from .compilers import Compiler | 
|  |  | 
|  | if T.TYPE_CHECKING: | 
|  | from .compilers import CompileCheckMode | 
|  | from ..build import BuildTarget | 
|  | from ..coredata import MutableKeyedOptionDictType, KeyedOptionDictType | 
|  | from ..dependencies import Dependency | 
|  | from ..environment import Environment  # noqa: F401 | 
|  | from ..envconfig import MachineInfo | 
|  | from ..linkers.linkers import DynamicLinker | 
|  | from ..mesonlib import MachineChoice | 
|  |  | 
|  |  | 
|  | cuda_optimization_args: T.Dict[str, T.List[str]] = { | 
|  | 'plain': [], | 
|  | '0': ['-G'], | 
|  | 'g': ['-O0'], | 
|  | '1': ['-O1'], | 
|  | '2': ['-O2', '-lineinfo'], | 
|  | '3': ['-O3'], | 
|  | 's': ['-O3'] | 
|  | } | 
|  |  | 
|  | cuda_debug_args: T.Dict[bool, T.List[str]] = { | 
|  | False: [], | 
|  | True: ['-g'] | 
|  | } | 
|  |  | 
|  |  | 
|  | class _Phase(enum.Enum): | 
|  |  | 
|  | COMPILER = 'compiler' | 
|  | LINKER = 'linker' | 
|  |  | 
|  |  | 
|  | class CudaCompiler(Compiler): | 
|  |  | 
|  | LINKER_PREFIX = '-Xlinker=' | 
|  | language = 'cuda' | 
|  |  | 
|  | # NVCC flags taking no arguments. | 
|  | _FLAG_PASSTHRU_NOARGS = { | 
|  | # NVCC --long-option,                   NVCC -short-option              CUDA Toolkit 11.2.1 Reference | 
|  | '--objdir-as-tempdir',                  '-objtemp',                     # 4.2.1.2 | 
|  | '--generate-dependency-targets',        '-MP',                          # 4.2.1.12 | 
|  | '--allow-unsupported-compiler',         '-allow-unsupported-compiler',  # 4.2.1.14 | 
|  | '--link',                                                               # 4.2.2.1 | 
|  | '--lib',                                '-lib',                         # 4.2.2.2 | 
|  | '--device-link',                        '-dlink',                       # 4.2.2.3 | 
|  | '--device-c',                           '-dc',                          # 4.2.2.4 | 
|  | '--device-w',                           '-dw',                          # 4.2.2.5 | 
|  | '--cuda',                               '-cuda',                        # 4.2.2.6 | 
|  | '--compile',                            '-c',                           # 4.2.2.7 | 
|  | '--fatbin',                             '-fatbin',                      # 4.2.2.8 | 
|  | '--cubin',                              '-cubin',                       # 4.2.2.9 | 
|  | '--ptx',                                '-ptx',                         # 4.2.2.10 | 
|  | '--preprocess',                         '-E',                           # 4.2.2.11 | 
|  | '--generate-dependencies',              '-M',                           # 4.2.2.12 | 
|  | '--generate-nonsystem-dependencies',    '-MM',                          # 4.2.2.13 | 
|  | '--generate-dependencies-with-compile', '-MD',                          # 4.2.2.14 | 
|  | '--generate-nonsystem-dependencies-with-compile', '-MMD',               # 4.2.2.15 | 
|  | '--run',                                                                # 4.2.2.16 | 
|  | '--profile',                            '-pg',                          # 4.2.3.1 | 
|  | '--debug',                              '-g',                           # 4.2.3.2 | 
|  | '--device-debug',                       '-G',                           # 4.2.3.3 | 
|  | '--extensible-whole-program',           '-ewp',                         # 4.2.3.4 | 
|  | '--generate-line-info',                 '-lineinfo',                    # 4.2.3.5 | 
|  | '--dlink-time-opt',                     '-dlto',                        # 4.2.3.8 | 
|  | '--no-exceptions',                      '-noeh',                        # 4.2.3.11 | 
|  | '--shared',                             '-shared',                      # 4.2.3.12 | 
|  | '--no-host-device-initializer-list',    '-nohdinitlist',                # 4.2.3.15 | 
|  | '--expt-relaxed-constexpr',             '-expt-relaxed-constexpr',      # 4.2.3.16 | 
|  | '--extended-lambda',                    '-extended-lambda',             # 4.2.3.17 | 
|  | '--expt-extended-lambda',               '-expt-extended-lambda',        # 4.2.3.18 | 
|  | '--m32',                                '-m32',                         # 4.2.3.20 | 
|  | '--m64',                                '-m64',                         # 4.2.3.21 | 
|  | '--forward-unknown-to-host-compiler',   '-forward-unknown-to-host-compiler', # 4.2.5.1 | 
|  | '--forward-unknown-to-host-linker',     '-forward-unknown-to-host-linker',   # 4.2.5.2 | 
|  | '--dont-use-profile',                   '-noprof',                      # 4.2.5.3 | 
|  | '--dryrun',                             '-dryrun',                      # 4.2.5.5 | 
|  | '--verbose',                            '-v',                           # 4.2.5.6 | 
|  | '--keep',                               '-keep',                        # 4.2.5.7 | 
|  | '--save-temps',                         '-save-temps',                  # 4.2.5.9 | 
|  | '--clean-targets',                      '-clean',                       # 4.2.5.10 | 
|  | '--no-align-double',                                                    # 4.2.5.16 | 
|  | '--no-device-link',                     '-nodlink',                     # 4.2.5.17 | 
|  | '--allow-unsupported-compiler',         '-allow-unsupported-compiler',  # 4.2.5.18 | 
|  | '--use_fast_math',                      '-use_fast_math',               # 4.2.7.7 | 
|  | '--extra-device-vectorization',         '-extra-device-vectorization',  # 4.2.7.12 | 
|  | '--compile-as-tools-patch',             '-astoolspatch',                # 4.2.7.13 | 
|  | '--keep-device-functions',              '-keep-device-functions',       # 4.2.7.14 | 
|  | '--disable-warnings',                   '-w',                           # 4.2.8.1 | 
|  | '--source-in-ptx',                      '-src-in-ptx',                  # 4.2.8.2 | 
|  | '--restrict',                           '-restrict',                    # 4.2.8.3 | 
|  | '--Wno-deprecated-gpu-targets',         '-Wno-deprecated-gpu-targets',  # 4.2.8.4 | 
|  | '--Wno-deprecated-declarations',        '-Wno-deprecated-declarations', # 4.2.8.5 | 
|  | '--Wreorder',                           '-Wreorder',                    # 4.2.8.6 | 
|  | '--Wdefault-stream-launch',             '-Wdefault-stream-launch',      # 4.2.8.7 | 
|  | '--Wext-lambda-captures-this',          '-Wext-lambda-captures-this',   # 4.2.8.8 | 
|  | '--display-error-number',               '-err-no',                      # 4.2.8.10 | 
|  | '--resource-usage',                     '-res-usage',                   # 4.2.8.14 | 
|  | '--help',                               '-h',                           # 4.2.8.15 | 
|  | '--version',                            '-V',                           # 4.2.8.16 | 
|  | '--list-gpu-code',                      '-code-ls',                     # 4.2.8.20 | 
|  | '--list-gpu-arch',                      '-arch-ls',                     # 4.2.8.21 | 
|  | } | 
|  | # Dictionary of NVCC flags taking either one argument or a comma-separated list. | 
|  | # Maps --long to -short options, because the short options are more GCC-like. | 
|  | _FLAG_LONG2SHORT_WITHARGS = { | 
|  | '--output-file':                        '-o',                           # 4.2.1.1 | 
|  | '--pre-include':                        '-include',                     # 4.2.1.3 | 
|  | '--library':                            '-l',                           # 4.2.1.4 | 
|  | '--define-macro':                       '-D',                           # 4.2.1.5 | 
|  | '--undefine-macro':                     '-U',                           # 4.2.1.6 | 
|  | '--include-path':                       '-I',                           # 4.2.1.7 | 
|  | '--system-include':                     '-isystem',                     # 4.2.1.8 | 
|  | '--library-path':                       '-L',                           # 4.2.1.9 | 
|  | '--output-directory':                   '-odir',                        # 4.2.1.10 | 
|  | '--dependency-output':                  '-MF',                          # 4.2.1.11 | 
|  | '--compiler-bindir':                    '-ccbin',                       # 4.2.1.13 | 
|  | '--archiver-binary':                    '-arbin',                       # 4.2.1.15 | 
|  | '--cudart':                             '-cudart',                      # 4.2.1.16 | 
|  | '--cudadevrt':                          '-cudadevrt',                   # 4.2.1.17 | 
|  | '--libdevice-directory':                '-ldir',                        # 4.2.1.18 | 
|  | '--target-directory':                   '-target-dir',                  # 4.2.1.19 | 
|  | '--optimization-info':                  '-opt-info',                    # 4.2.3.6 | 
|  | '--optimize':                           '-O',                           # 4.2.3.7 | 
|  | '--ftemplate-backtrace-limit':          '-ftemplate-backtrace-limit',   # 4.2.3.9 | 
|  | '--ftemplate-depth':                    '-ftemplate-depth',             # 4.2.3.10 | 
|  | '--x':                                  '-x',                           # 4.2.3.13 | 
|  | '--std':                                '-std',                         # 4.2.3.14 | 
|  | '--machine':                            '-m',                           # 4.2.3.19 | 
|  | '--compiler-options':                   '-Xcompiler',                   # 4.2.4.1 | 
|  | '--linker-options':                     '-Xlinker',                     # 4.2.4.2 | 
|  | '--archive-options':                    '-Xarchive',                    # 4.2.4.3 | 
|  | '--ptxas-options':                      '-Xptxas',                      # 4.2.4.4 | 
|  | '--nvlink-options':                     '-Xnvlink',                     # 4.2.4.5 | 
|  | '--threads':                            '-t',                           # 4.2.5.4 | 
|  | '--keep-dir':                           '-keep-dir',                    # 4.2.5.8 | 
|  | '--run-args':                           '-run-args',                    # 4.2.5.11 | 
|  | '--input-drive-prefix':                 '-idp',                         # 4.2.5.12 | 
|  | '--dependency-drive-prefix':            '-ddp',                         # 4.2.5.13 | 
|  | '--drive-prefix':                       '-dp',                          # 4.2.5.14 | 
|  | '--dependency-target-name':             '-MT',                          # 4.2.5.15 | 
|  | '--default-stream':                     '-default-stream',              # 4.2.6.1 | 
|  | '--gpu-architecture':                   '-arch',                        # 4.2.7.1 | 
|  | '--gpu-code':                           '-code',                        # 4.2.7.2 | 
|  | '--generate-code':                      '-gencode',                     # 4.2.7.3 | 
|  | '--relocatable-device-code':            '-rdc',                         # 4.2.7.4 | 
|  | '--entries':                            '-e',                           # 4.2.7.5 | 
|  | '--maxrregcount':                       '-maxrregcount',                # 4.2.7.6 | 
|  | '--ftz':                                '-ftz',                         # 4.2.7.8 | 
|  | '--prec-div':                           '-prec-div',                    # 4.2.7.9 | 
|  | '--prec-sqrt':                          '-prec-sqrt',                   # 4.2.7.10 | 
|  | '--fmad':                               '-fmad',                        # 4.2.7.11 | 
|  | '--Werror':                             '-Werror',                      # 4.2.8.9 | 
|  | '--diag-error':                         '-diag-error',                  # 4.2.8.11 | 
|  | '--diag-suppress':                      '-diag-suppress',               # 4.2.8.12 | 
|  | '--diag-warn':                          '-diag-warn',                   # 4.2.8.13 | 
|  | '--options-file':                       '-optf',                        # 4.2.8.17 | 
|  | '--time':                               '-time',                        # 4.2.8.18 | 
|  | '--qpp-config':                         '-qpp-config',                  # 4.2.8.19 | 
|  | } | 
|  | # Reverse map -short to --long options. | 
|  | _FLAG_SHORT2LONG_WITHARGS = {v: k for k, v in _FLAG_LONG2SHORT_WITHARGS.items()} | 
|  |  | 
|  | id = 'nvcc' | 
|  |  | 
|  | def __init__(self, ccache: T.List[str], exelist: T.List[str], version: str, for_machine: MachineChoice, | 
|  | is_cross: bool, | 
|  | host_compiler: Compiler, info: 'MachineInfo', | 
|  | linker: T.Optional['DynamicLinker'] = None, | 
|  | full_version: T.Optional[str] = None): | 
|  | super().__init__(ccache, exelist, version, for_machine, info, linker=linker, full_version=full_version, is_cross=is_cross) | 
|  | self.host_compiler = host_compiler | 
|  | self.base_options = host_compiler.base_options | 
|  | # -Wpedantic generates useless churn due to nvcc's dual compilation model producing | 
|  | # a temporary host C++ file that includes gcc-style line directives: | 
|  | # https://stackoverflow.com/a/31001220 | 
|  | self.warn_args = { | 
|  | level: self._to_host_flags(list(f for f in flags if f != '-Wpedantic')) | 
|  | for level, flags in host_compiler.warn_args.items() | 
|  | } | 
|  | self.host_werror_args = ['-Xcompiler=' + x for x in self.host_compiler.get_werror_args()] | 
|  |  | 
|  | @classmethod | 
|  | def _shield_nvcc_list_arg(cls, arg: str, listmode: bool = True) -> str: | 
|  | r""" | 
|  | Shield an argument against both splitting by NVCC's list-argument | 
|  | parse logic, and interpretation by any shell. | 
|  |  | 
|  | NVCC seems to consider every comma , that is neither escaped by \ nor inside | 
|  | a double-quoted string a split-point. Single-quotes do not provide protection | 
|  | against splitting; In fact, after splitting they are \-escaped. Unfortunately, | 
|  | double-quotes don't protect against shell expansion. What follows is a | 
|  | complex dance to accommodate everybody. | 
|  | """ | 
|  |  | 
|  | SQ = "'" | 
|  | DQ = '"' | 
|  | CM = "," | 
|  | BS = "\\" | 
|  | DQSQ = DQ+SQ+DQ | 
|  | quotable = set(string.whitespace+'"$`\\') | 
|  |  | 
|  | if CM not in arg or not listmode: | 
|  | if SQ not in arg: | 
|  | # If any of the special characters "$`\ or whitespace are present, single-quote. | 
|  | # Otherwise return bare. | 
|  | if set(arg).intersection(quotable): | 
|  | return SQ+arg+SQ | 
|  | else: | 
|  | return arg # Easy case: no splits, no quoting. | 
|  | else: | 
|  | # There are single quotes. Double-quote them, and single-quote the | 
|  | # strings between them. | 
|  | l = [cls._shield_nvcc_list_arg(s) for s in arg.split(SQ)] | 
|  | l = sum([[s, DQSQ] for s in l][:-1], [])  # Interleave l with DQSQs | 
|  | return ''.join(l) | 
|  | else: | 
|  | # A comma is present, and list mode was active. | 
|  | # We apply (what we guess is) the (primitive) NVCC splitting rule: | 
|  | l = [''] | 
|  | instring = False | 
|  | argit = iter(arg) | 
|  | for c in argit: | 
|  | if c == CM and not instring: | 
|  | l.append('') | 
|  | elif c == DQ: | 
|  | l[-1] += c | 
|  | instring = not instring | 
|  | elif c == BS: | 
|  | try: | 
|  | l[-1] += next(argit) | 
|  | except StopIteration: | 
|  | break | 
|  | else: | 
|  | l[-1] += c | 
|  |  | 
|  | # Shield individual strings, without listmode, then return them with | 
|  | # escaped commas between them. | 
|  | l = [cls._shield_nvcc_list_arg(s, listmode=False) for s in l] | 
|  | return r'\,'.join(l) | 
|  |  | 
|  | @classmethod | 
|  | def _merge_flags(cls, flags: T.List[str]) -> T.List[str]: | 
|  | r""" | 
|  | The flags to NVCC gets exceedingly verbose and unreadable when too many of them | 
|  | are shielded with -Xcompiler. Merge consecutive -Xcompiler-wrapped arguments | 
|  | into one. | 
|  | """ | 
|  | if len(flags) <= 1: | 
|  | return flags | 
|  | flagit = iter(flags) | 
|  | xflags = [] | 
|  |  | 
|  | def is_xcompiler_flag_isolated(flag: str) -> bool: | 
|  | return flag == '-Xcompiler' | 
|  |  | 
|  | def is_xcompiler_flag_glued(flag: str) -> bool: | 
|  | return flag.startswith('-Xcompiler=') | 
|  |  | 
|  | def is_xcompiler_flag(flag: str) -> bool: | 
|  | return is_xcompiler_flag_isolated(flag) or is_xcompiler_flag_glued(flag) | 
|  |  | 
|  | def get_xcompiler_val(flag: str, flagit: T.Iterator[str]) -> str: | 
|  | if is_xcompiler_flag_glued(flag): | 
|  | return flag[len('-Xcompiler='):] | 
|  | else: | 
|  | try: | 
|  | return next(flagit) | 
|  | except StopIteration: | 
|  | return "" | 
|  |  | 
|  | ingroup = False | 
|  | for flag in flagit: | 
|  | if not is_xcompiler_flag(flag): | 
|  | ingroup = False | 
|  | xflags.append(flag) | 
|  | elif ingroup: | 
|  | xflags[-1] += ',' | 
|  | xflags[-1] += get_xcompiler_val(flag, flagit) | 
|  | elif is_xcompiler_flag_isolated(flag): | 
|  | ingroup = True | 
|  | xflags.append(flag) | 
|  | xflags.append(get_xcompiler_val(flag, flagit)) | 
|  | elif is_xcompiler_flag_glued(flag): | 
|  | ingroup = True | 
|  | xflags.append(flag) | 
|  | else: | 
|  | raise ValueError("-Xcompiler flag merging failed, unknown argument form!") | 
|  | return xflags | 
|  |  | 
|  | def _to_host_flags(self, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]: | 
|  | """ | 
|  | Translate generic "GCC-speak" plus particular "NVCC-speak" flags to NVCC flags. | 
|  |  | 
|  | NVCC's "short" flags have broad similarities to the GCC standard, but have | 
|  | gratuitous, irritating differences. | 
|  | """ | 
|  |  | 
|  | xflags = [] | 
|  | flagit = iter(flags) | 
|  |  | 
|  | for flag in flagit: | 
|  | # The CUDA Toolkit Documentation, in 4.1. Command Option Types and Notation, | 
|  | # specifies that NVCC does not parse the standard flags as GCC does. It has | 
|  | # its own strategy, to wit: | 
|  | # | 
|  | #     nvcc recognizes three types of command options: boolean options, single | 
|  | #     value options, and list options. | 
|  | # | 
|  | #     Boolean options do not have an argument; they are either specified on a | 
|  | #     command line or not. Single value options must be specified at most once, | 
|  | #     and list options may be repeated. Examples of each of these option types | 
|  | #     are, respectively: --verbose (switch to verbose mode), --output-file | 
|  | #     (specify output file), and --include-path (specify include path). | 
|  | # | 
|  | #     Single value options and list options must have arguments, which must | 
|  | #     follow the name of the option itself by either one of more spaces or an | 
|  | #     equals character. When a one-character short name such as -I, -l, and -L | 
|  | #     is used, the value of the option may also immediately follow the option | 
|  | #     itself without being separated by spaces or an equal character. The | 
|  | #     individual values of list options may be separated by commas in a single | 
|  | #     instance of the option, or the option may be repeated, or any | 
|  | #     combination of these two cases. | 
|  | # | 
|  | # One strange consequence of this choice is that directory and filenames that | 
|  | # contain commas (',') cannot be passed to NVCC (at least, not as easily as | 
|  | # in GCC). Another strange consequence is that it is legal to supply flags | 
|  | # such as | 
|  | # | 
|  | #     -lpthread,rt,dl,util | 
|  | #     -l pthread,rt,dl,util | 
|  | #     -l=pthread,rt,dl,util | 
|  | # | 
|  | # and each of the above alternatives is equivalent to GCC-speak | 
|  | # | 
|  | #     -lpthread -lrt -ldl -lutil | 
|  | #     -l pthread -l rt -l dl -l util | 
|  | #     -l=pthread -l=rt -l=dl -l=util | 
|  | # | 
|  | # *With the exception of commas in the name*, GCC-speak for these list flags | 
|  | # is a strict subset of NVCC-speak, so we passthrough those flags. | 
|  | # | 
|  | # The -D macro-define flag is documented as somehow shielding commas from | 
|  | # splitting a definition. Balanced parentheses, braces and single-quotes | 
|  | # around the comma are not sufficient, but balanced double-quotes are. The | 
|  | # shielding appears to work with -l, -I, -L flags as well, for instance. | 
|  | # | 
|  | # Since our goal is to replicate GCC-speak as much as possible, we check for | 
|  | # commas in all list-arguments and shield them with double-quotes. We make | 
|  | # an exception for -D (where this would be value-changing) and -U (because | 
|  | # it isn't possible to define a macro with a comma in the name). | 
|  |  | 
|  | if flag in self._FLAG_PASSTHRU_NOARGS: | 
|  | xflags.append(flag) | 
|  | continue | 
|  |  | 
|  | # Handle breakup of flag-values into a flag-part and value-part. | 
|  | if flag[:1] not in '-/': | 
|  | # This is not a flag. It's probably a file input. Pass it through. | 
|  | xflags.append(flag) | 
|  | continue | 
|  | elif flag[:1] == '/': | 
|  | # This is ambiguously either an MVSC-style /switch or an absolute path | 
|  | # to a file. For some magical reason the following works acceptably in | 
|  | # both cases. | 
|  | # We only want to prefix arguments that are NOT static archives, since | 
|  | # the latter could contain relocatable device code (-dc/-rdc=true). | 
|  | prefix = '' if flag.endswith('.a') else f'-X{phase.value}=' | 
|  | wrap = '"' if ',' in flag else '' | 
|  | xflags.append(f'{prefix}{wrap}{flag}{wrap}') | 
|  | continue | 
|  | elif len(flag) >= 2 and flag[0] == '-' and flag[1] in 'IDULlmOxmte': | 
|  | # This is a single-letter short option. These options (with the | 
|  | # exception of -o) are allowed to receive their argument with neither | 
|  | # space nor = sign before them. Detect and separate them in that event. | 
|  | if flag[2:3] == '':            # -I something | 
|  | try: | 
|  | val = next(flagit) | 
|  | except StopIteration: | 
|  | pass | 
|  | elif flag[2:3] == '=':           # -I=something | 
|  | val = flag[3:] | 
|  | else:                            # -Isomething | 
|  | val = flag[2:] | 
|  | flag = flag[:2]                  # -I | 
|  | elif flag in self._FLAG_LONG2SHORT_WITHARGS or \ | 
|  | flag in self._FLAG_SHORT2LONG_WITHARGS: | 
|  | # This is either -o or a multi-letter flag, and it is receiving its | 
|  | # value isolated. | 
|  | try: | 
|  | val = next(flagit)           # -o something | 
|  | except StopIteration: | 
|  | pass | 
|  | elif flag.split('=', 1)[0] in self._FLAG_LONG2SHORT_WITHARGS or \ | 
|  | flag.split('=', 1)[0] in self._FLAG_SHORT2LONG_WITHARGS: | 
|  | # This is either -o or a multi-letter flag, and it is receiving its | 
|  | # value after an = sign. | 
|  | flag, val = flag.split('=', 1)    # -o=something | 
|  | # Some dependencies (e.g., BoostDependency) add unspaced "-isystem/usr/include" arguments | 
|  | elif flag.startswith('-isystem'): | 
|  | val = flag[8:].strip() | 
|  | flag = flag[:8] | 
|  | else: | 
|  | # This is a flag, and it's foreign to NVCC. | 
|  | # | 
|  | # We do not know whether this GCC-speak flag takes an isolated | 
|  | # argument. Assuming it does not (the vast majority indeed don't), | 
|  | # wrap this argument in an -Xcompiler flag and send it down to NVCC. | 
|  | if flag == '-ffast-math': | 
|  | xflags.append('-use_fast_math') | 
|  | xflags.append('-Xcompiler='+flag) | 
|  | elif flag == '-fno-fast-math': | 
|  | xflags.append('-ftz=false') | 
|  | xflags.append('-prec-div=true') | 
|  | xflags.append('-prec-sqrt=true') | 
|  | xflags.append('-Xcompiler='+flag) | 
|  | elif flag == '-freciprocal-math': | 
|  | xflags.append('-prec-div=false') | 
|  | xflags.append('-Xcompiler='+flag) | 
|  | elif flag == '-fno-reciprocal-math': | 
|  | xflags.append('-prec-div=true') | 
|  | xflags.append('-Xcompiler='+flag) | 
|  | else: | 
|  | xflags.append('-Xcompiler='+self._shield_nvcc_list_arg(flag)) | 
|  | # The above should securely handle GCC's -Wl, -Wa, -Wp, arguments. | 
|  | continue | 
|  |  | 
|  | assert val is not None  # Should only trip if there is a missing argument. | 
|  |  | 
|  | # Take care of the various NVCC-supported flags that need special handling. | 
|  | flag = self._FLAG_LONG2SHORT_WITHARGS.get(flag, flag) | 
|  |  | 
|  | if flag in {'-include', '-isystem', '-I', '-L', '-l'}: | 
|  | # These flags are known to GCC, but list-valued in NVCC. They potentially | 
|  | # require double-quoting to prevent NVCC interpreting the flags as lists | 
|  | # when GCC would not have done so. | 
|  | # | 
|  | # We avoid doing this quoting for -D to avoid redefining macros and for | 
|  | # -U because it isn't possible to define a macro with a comma in the name. | 
|  | # -U with comma arguments is impossible in GCC-speak (and thus unambiguous | 
|  | #in NVCC-speak, albeit unportable). | 
|  | if len(flag) == 2: | 
|  | xflags.append(flag+self._shield_nvcc_list_arg(val)) | 
|  | elif flag == '-isystem' and val in self.host_compiler.get_default_include_dirs(): | 
|  | # like GnuLikeCompiler, we have to filter out include directories specified | 
|  | # with -isystem that overlap with the host compiler's search path | 
|  | pass | 
|  | else: | 
|  | xflags.append(flag) | 
|  | xflags.append(self._shield_nvcc_list_arg(val)) | 
|  | elif flag == '-O': | 
|  | # Handle optimization levels GCC knows about that NVCC does not. | 
|  | if val == 'fast': | 
|  | xflags.append('-O3') | 
|  | xflags.append('-use_fast_math') | 
|  | xflags.append('-Xcompiler') | 
|  | xflags.append(flag+val) | 
|  | elif val in {'s', 'g', 'z'}: | 
|  | xflags.append('-Xcompiler') | 
|  | xflags.append(flag+val) | 
|  | else: | 
|  | xflags.append(flag+val) | 
|  | elif flag in {'-D', '-U', '-m', '-t'}: | 
|  | xflags.append(flag+val)       # For style, keep glued. | 
|  | elif flag in {'-std'}: | 
|  | xflags.append(flag+'='+val)   # For style, keep glued. | 
|  | else: | 
|  | xflags.append(flag) | 
|  | xflags.append(val) | 
|  |  | 
|  | return self._merge_flags(xflags) | 
|  |  | 
|  | def needs_static_linker(self) -> bool: | 
|  | return False | 
|  |  | 
|  | def thread_link_flags(self, environment: 'Environment') -> T.List[str]: | 
|  | return self._to_host_flags(self.host_compiler.thread_link_flags(environment), _Phase.LINKER) | 
|  |  | 
|  | def sanity_check(self, work_dir: str, env: 'Environment') -> None: | 
|  | mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) | 
|  | mlog.debug('Is cross compiler: %s.' % str(self.is_cross)) | 
|  |  | 
|  | sname = 'sanitycheckcuda.cu' | 
|  | code = r''' | 
|  | #include <cuda_runtime.h> | 
|  | #include <stdio.h> | 
|  |  | 
|  | __global__ void kernel (void) {} | 
|  |  | 
|  | int main(void){ | 
|  | struct cudaDeviceProp prop; | 
|  | int count, i; | 
|  | cudaError_t ret = cudaGetDeviceCount(&count); | 
|  | if(ret != cudaSuccess){ | 
|  | fprintf(stderr, "%d\n", (int)ret); | 
|  | }else{ | 
|  | for(i=0;i<count;i++){ | 
|  | if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){ | 
|  | fprintf(stdout, "%d.%d\n", prop.major, prop.minor); | 
|  | } | 
|  | } | 
|  | } | 
|  | fflush(stderr); | 
|  | fflush(stdout); | 
|  | return 0; | 
|  | } | 
|  | ''' | 
|  | binname = sname.rsplit('.', 1)[0] | 
|  | binname += '_cross' if self.is_cross else '' | 
|  | source_name = os.path.join(work_dir, sname) | 
|  | binary_name = os.path.join(work_dir, binname + '.exe') | 
|  | with open(source_name, 'w', encoding='utf-8') as ofile: | 
|  | ofile.write(code) | 
|  |  | 
|  | # The Sanity Test for CUDA language will serve as both a sanity test | 
|  | # and a native-build GPU architecture detection test, useful later. | 
|  | # | 
|  | # For this second purpose, NVCC has very handy flags, --run and | 
|  | # --run-args, that allow one to run an application with the | 
|  | # environment set up properly. Of course, this only works for native | 
|  | # builds; For cross builds we must still use the exe_wrapper (if any). | 
|  | self.detected_cc = '' | 
|  | flags = [] | 
|  |  | 
|  | # Disable warnings, compile with statically-linked runtime for minimum | 
|  | # reliance on the system. | 
|  | flags += ['-w', '-cudart', 'static', source_name] | 
|  |  | 
|  | # Use the -ccbin option, if available, even during sanity checking. | 
|  | # Otherwise, on systems where CUDA does not support the default compiler, | 
|  | # NVCC becomes unusable. | 
|  | flags += self.get_ccbin_args(env.coredata.optstore) | 
|  |  | 
|  | # If cross-compiling, we can't run the sanity check, only compile it. | 
|  | if env.need_exe_wrapper(self.for_machine) and not env.has_exe_wrapper(): | 
|  | # Linking cross built apps is painful. You can't really | 
|  | # tell if you should use -nostdlib or not and for example | 
|  | # on OSX the compiler binary is the same but you need | 
|  | # a ton of compiler flags to differentiate between | 
|  | # arm and x86_64. So just compile. | 
|  | flags += self.get_compile_only_args() | 
|  | flags += self.get_output_args(binary_name) | 
|  |  | 
|  | # Compile sanity check | 
|  | cmdlist = self.exelist + flags | 
|  | mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist)) | 
|  | pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir) | 
|  | mlog.debug('Sanity check compile stdout: ') | 
|  | mlog.debug(stdo) | 
|  | mlog.debug('-----\nSanity check compile stderr:') | 
|  | mlog.debug(stde) | 
|  | mlog.debug('-----') | 
|  | if pc.returncode != 0: | 
|  | raise EnvironmentException(f'Compiler {self.name_string()} cannot compile programs.') | 
|  |  | 
|  | # Run sanity check (if possible) | 
|  | if env.need_exe_wrapper(self.for_machine): | 
|  | if not env.has_exe_wrapper(): | 
|  | return | 
|  | else: | 
|  | cmdlist = env.exe_wrapper.get_command() + [binary_name] | 
|  | else: | 
|  | cmdlist = self.exelist + ['--run', '"' + binary_name + '"'] | 
|  | mlog.debug('Sanity check run command line: ', ' '.join(cmdlist)) | 
|  | pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir) | 
|  | mlog.debug('Sanity check run stdout: ') | 
|  | mlog.debug(stdo) | 
|  | mlog.debug('-----\nSanity check run stderr:') | 
|  | mlog.debug(stde) | 
|  | mlog.debug('-----') | 
|  | pe.wait() | 
|  | if pe.returncode != 0: | 
|  | raise EnvironmentException(f'Executables created by {self.language} compiler {self.name_string()} are not runnable.') | 
|  |  | 
|  | # Interpret the result of the sanity test. | 
|  | # As mentioned above, it is not only a sanity test but also a GPU | 
|  | # architecture detection test. | 
|  | if stde == '': | 
|  | self.detected_cc = stdo | 
|  | else: | 
|  | mlog.debug('cudaGetDeviceCount() returned ' + stde) | 
|  |  | 
|  | def has_header_symbol(self, hname: str, symbol: str, prefix: str, | 
|  | env: 'Environment', *, | 
|  | extra_args: T.Union[None, T.List[str], T.Callable[[CompileCheckMode], T.List[str]]] = None, | 
|  | dependencies: T.Optional[T.List['Dependency']] = None) -> T.Tuple[bool, bool]: | 
|  | if extra_args is None: | 
|  | extra_args = [] | 
|  | fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol} | 
|  | # Check if it's a C-like symbol | 
|  | t = '''{prefix} | 
|  | #include <{header}> | 
|  | int main(void) {{ | 
|  | /* If it's not defined as a macro, try to use as a symbol */ | 
|  | #ifndef {symbol} | 
|  | {symbol}; | 
|  | #endif | 
|  | return 0; | 
|  | }}''' | 
|  | found, cached = self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies) | 
|  | if found: | 
|  | return True, cached | 
|  | # Check if it's a class or a template | 
|  | t = '''{prefix} | 
|  | #include <{header}> | 
|  | using {symbol}; | 
|  | int main(void) {{ | 
|  | return 0; | 
|  | }}''' | 
|  | return self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies) | 
|  |  | 
|  | _CPP14_VERSION = '>=9.0' | 
|  | _CPP17_VERSION = '>=11.0' | 
|  | _CPP20_VERSION = '>=12.0' | 
|  |  | 
|  | def get_options(self) -> 'MutableKeyedOptionDictType': | 
|  | cpp_stds = ['none', 'c++03', 'c++11'] | 
|  | if version_compare(self.version, self._CPP14_VERSION): | 
|  | cpp_stds += ['c++14'] | 
|  | if version_compare(self.version, self._CPP17_VERSION): | 
|  | cpp_stds += ['c++17'] | 
|  | if version_compare(self.version, self._CPP20_VERSION): | 
|  | cpp_stds += ['c++20'] | 
|  |  | 
|  | return self.update_options( | 
|  | super().get_options(), | 
|  | self.create_option(options.UserComboOption, | 
|  | self.form_langopt_key('std'), | 
|  | 'C++ language standard to use with CUDA', | 
|  | cpp_stds, | 
|  | 'none'), | 
|  | self.create_option(options.UserStringOption, | 
|  | self.form_langopt_key('ccbindir'), | 
|  | 'CUDA non-default toolchain directory to use (-ccbin)', | 
|  | ''), | 
|  | ) | 
|  |  | 
|  | def _to_host_compiler_options(self, options: 'KeyedOptionDictType') -> 'KeyedOptionDictType': | 
|  | """ | 
|  | Convert an NVCC Option set to a host compiler's option set. | 
|  | """ | 
|  |  | 
|  | # We must strip the -std option from the host compiler option set, as NVCC has | 
|  | # its own -std flag that may not agree with the host compiler's. | 
|  | host_options = {key: options.get(key, opt) for key, opt in self.host_compiler.get_options().items()} | 
|  | std_key = self.form_langopt_key('std') | 
|  | overrides = {std_key: 'none'} | 
|  | return coredata.OptionsView(host_options, overrides=overrides) | 
|  |  | 
|  | def get_option_compile_args(self, options: 'KeyedOptionDictType') -> T.List[str]: | 
|  | args = self.get_ccbin_args(options) | 
|  | # On Windows, the version of the C++ standard used by nvcc is dictated by | 
|  | # the combination of CUDA version and MSVC version; the --std= is thus ignored | 
|  | # and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027 | 
|  | if not is_windows(): | 
|  | key = self.form_langopt_key('std') | 
|  | std = options[key] | 
|  | if std.value != 'none': | 
|  | args.append('--std=' + std.value) | 
|  |  | 
|  | return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options))) | 
|  |  | 
|  | def get_option_link_args(self, options: 'KeyedOptionDictType') -> T.List[str]: | 
|  | args = self.get_ccbin_args(options) | 
|  | return args + self._to_host_flags(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)), _Phase.LINKER) | 
|  |  | 
|  | def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str, | 
|  | suffix: str, soversion: str, | 
|  | darwin_versions: T.Tuple[str, str]) -> T.List[str]: | 
|  | return self._to_host_flags(self.host_compiler.get_soname_args( | 
|  | env, prefix, shlib_name, suffix, soversion, darwin_versions), _Phase.LINKER) | 
|  |  | 
|  | def get_compile_only_args(self) -> T.List[str]: | 
|  | return ['-c'] | 
|  |  | 
|  | def get_no_optimization_args(self) -> T.List[str]: | 
|  | return ['-O0'] | 
|  |  | 
|  | def get_optimization_args(self, optimization_level: str) -> T.List[str]: | 
|  | # alternatively, consider simply redirecting this to the host compiler, which would | 
|  | # give us more control over options like "optimize for space" (which nvcc doesn't support): | 
|  | # return self._to_host_flags(self.host_compiler.get_optimization_args(optimization_level)) | 
|  | return cuda_optimization_args[optimization_level] | 
|  |  | 
|  | def sanitizer_compile_args(self, value: str) -> T.List[str]: | 
|  | return self._to_host_flags(self.host_compiler.sanitizer_compile_args(value)) | 
|  |  | 
|  | def sanitizer_link_args(self, value: str) -> T.List[str]: | 
|  | return self._to_host_flags(self.host_compiler.sanitizer_link_args(value)) | 
|  |  | 
|  | def get_debug_args(self, is_debug: bool) -> T.List[str]: | 
|  | return cuda_debug_args[is_debug] | 
|  |  | 
|  | def get_werror_args(self) -> T.List[str]: | 
|  | device_werror_args = ['-Werror=cross-execution-space-call,deprecated-declarations,reorder'] | 
|  | return device_werror_args + self.host_werror_args | 
|  |  | 
|  | def get_warn_args(self, level: str) -> T.List[str]: | 
|  | return self.warn_args[level] | 
|  |  | 
|  | def get_include_args(self, path: str, is_system: bool) -> T.List[str]: | 
|  | if path == '': | 
|  | path = '.' | 
|  | return ['-isystem=' + path] if is_system else ['-I' + path] | 
|  |  | 
|  | def get_compile_debugfile_args(self, rel_obj: str, pch: bool = False) -> T.List[str]: | 
|  | return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, pch)) | 
|  |  | 
|  | def get_link_debugfile_args(self, targetfile: str) -> T.List[str]: | 
|  | return self._to_host_flags(self.host_compiler.get_link_debugfile_args(targetfile), _Phase.LINKER) | 
|  |  | 
|  | def get_depfile_suffix(self) -> str: | 
|  | return 'd' | 
|  |  | 
|  | def get_optimization_link_args(self, optimization_level: str) -> T.List[str]: | 
|  | return self._to_host_flags(self.host_compiler.get_optimization_link_args(optimization_level), _Phase.LINKER) | 
|  |  | 
|  | def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str, | 
|  | rpath_paths: T.Tuple[str, ...], build_rpath: str, | 
|  | install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]: | 
|  | (rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args( | 
|  | env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath) | 
|  | return (self._to_host_flags(rpath_args, _Phase.LINKER), rpath_dirs_to_remove) | 
|  |  | 
|  | def linker_to_compiler_args(self, args: T.List[str]) -> T.List[str]: | 
|  | return args | 
|  |  | 
|  | def get_pic_args(self) -> T.List[str]: | 
|  | return self._to_host_flags(self.host_compiler.get_pic_args()) | 
|  |  | 
|  | def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str], | 
|  | build_dir: str) -> T.List[str]: | 
|  | return [] | 
|  |  | 
|  | def get_output_args(self, target: str) -> T.List[str]: | 
|  | return ['-o', target] | 
|  |  | 
|  | def get_dependency_gen_args(self, outtarget: str, outfile: str) -> T.List[str]: | 
|  | if version_compare(self.version, '>= 10.2'): | 
|  | # According to nvcc Documentation, `-MD` option is added after 10.2 | 
|  | # Reference: [CUDA 10.1](https://docs.nvidia.com/cuda/archive/10.1/cuda-compiler-driver-nvcc/index.html#options-for-specifying-compilation-phase-generate-nonsystem-dependencies) | 
|  | # Reference: [CUDA 10.2](https://docs.nvidia.com/cuda/archive/10.2/cuda-compiler-driver-nvcc/index.html#options-for-specifying-compilation-phase-generate-nonsystem-dependencies) | 
|  | return ['-MD', '-MT', outtarget, '-MF', outfile] | 
|  | else: | 
|  | return [] | 
|  |  | 
|  | def get_std_exe_link_args(self) -> T.List[str]: | 
|  | return self._to_host_flags(self.host_compiler.get_std_exe_link_args(), _Phase.LINKER) | 
|  |  | 
|  | def find_library(self, libname: str, env: 'Environment', extra_dirs: T.List[str], | 
|  | libtype: LibType = LibType.PREFER_SHARED, lib_prefix_warning: bool = True) -> T.Optional[T.List[str]]: | 
|  | return self.host_compiler.find_library(libname, env, extra_dirs, libtype, lib_prefix_warning) | 
|  |  | 
|  | def get_crt_compile_args(self, crt_val: str, buildtype: str) -> T.List[str]: | 
|  | return self._to_host_flags(self.host_compiler.get_crt_compile_args(crt_val, buildtype)) | 
|  |  | 
|  | def get_crt_link_args(self, crt_val: str, buildtype: str) -> T.List[str]: | 
|  | # nvcc defaults to static, release version of msvc runtime and provides no | 
|  | # native option to override it; override it with /NODEFAULTLIB | 
|  | host_link_arg_overrides = [] | 
|  | host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype) | 
|  | if any(arg in {'/MDd', '/MD', '/MTd'} for arg in host_crt_compile_args): | 
|  | host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib'] | 
|  | return self._to_host_flags(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype), _Phase.LINKER) | 
|  |  | 
|  | def get_target_link_args(self, target: 'BuildTarget') -> T.List[str]: | 
|  | return self._to_host_flags(super().get_target_link_args(target), _Phase.LINKER) | 
|  |  | 
|  | def get_dependency_compile_args(self, dep: 'Dependency') -> T.List[str]: | 
|  | return self._to_host_flags(super().get_dependency_compile_args(dep)) | 
|  |  | 
|  | def get_dependency_link_args(self, dep: 'Dependency') -> T.List[str]: | 
|  | return self._to_host_flags(super().get_dependency_link_args(dep), _Phase.LINKER) | 
|  |  | 
|  | def get_ccbin_args(self, options: 'KeyedOptionDictType') -> T.List[str]: | 
|  | key = self.form_langopt_key('ccbindir') | 
|  | ccbindir = options[key].value | 
|  | if isinstance(ccbindir, str) and ccbindir != '': | 
|  | return [self._shield_nvcc_list_arg('-ccbin='+ccbindir, False)] | 
|  | else: | 
|  | return [] | 
|  |  | 
|  | def get_profile_generate_args(self) -> T.List[str]: | 
|  | return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_generate_args()] | 
|  |  | 
|  | def get_profile_use_args(self) -> T.List[str]: | 
|  | return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_use_args()] | 
|  |  | 
|  | def get_assert_args(self, disable: bool, env: 'Environment') -> T.List[str]: | 
|  | return self.host_compiler.get_assert_args(disable, env) |