cmake/Tests/Cuda/ProperDeviceLibraries/main.cu


#include <iostream>

#include <cublas_v2.h>
#include <cuda_runtime.h>

#if defined(USE_THREADS_POSIX) && defined(HAVE_PTHREAD_H)

#  include <pthread.h>
static int verify_linking_to_pthread()
{
  return static_cast<int>(pthread_self());
}
#endif

// this test only makes sense for versions of CUDA that ships
// static libraries that have separable compilation device symbols
#if __CUDACC_VER_MAJOR__ <= 9
__global__ void deviceCublasSgemm(int n, float alpha, float beta,
                                  const float* d_A, const float* d_B,
                                  float* d_C)
{
  cublasHandle_t cnpHandle;
  cublasStatus_t status = cublasCreate(&cnpHandle);

  if (status != CUBLAS_STATUS_SUCCESS) {
    return;
  }

  // Call function defined in the cublas_device system static library.
  // This way we can verify that we properly pass system libraries to the
  // device link line
  status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha,
                       d_A, n, d_B, n, &beta, d_C, n);

  cublasDestroy(cnpHandle);
}
#endif

int choose_cuda_device()
{
  int nDevices = 0;
  cudaError_t err = cudaGetDeviceCount(&nDevices);
  if (err != cudaSuccess) {
    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
              << std::endl;
    return 1;
  }
  for (int i = 0; i < nDevices; ++i) {
    cudaDeviceProp prop;
    cudaError_t err = cudaGetDeviceProperties(&prop, i);
    if (err != cudaSuccess) {
      std::cerr << "Could not retrieve properties from CUDA device " << i
                << std::endl;
      return 1;
    }

    if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) {
      err = cudaSetDevice(i);
      if (err != cudaSuccess) {
        std::cout << "Could not select CUDA device " << i << std::endl;
      } else {
        return 0;
      }
    }
  }

  std::cout << "Could not find a CUDA enabled card supporting compute >=3.5"
            << std::endl;
  return 1;
}

int main(int argc, char** argv)
{
  int ret = choose_cuda_device();
  if (ret) {
    return 0;
  }

#if __CUDACC_VER_MAJOR__ <= 9
  // initial values that will make sure that the cublasSgemm won't actually
  // do any work
  int n = 0;
  float alpha = 1;
  float beta = 1;
  float* d_A = nullptr;
  float* d_B = nullptr;
  float* d_C = nullptr;
  deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C);
#endif

  return 0;
}
New upstream version 3.12.1 6 years ago
New upstream version 3.16.3 5 years ago			`#include <iostream>`

New upstream version 3.12.1 6 years ago			`#include <cublas_v2.h>`
			`#include <cuda_runtime.h>`

New upstream version 3.13.0~rc2 6 years ago			`#if defined(USE_THREADS_POSIX) && defined(HAVE_PTHREAD_H)`

			`# include <pthread.h>`
			`static int verify_linking_to_pthread()`
			`{`
			`return static_cast<int>(pthread_self());`
			`}`
			`#endif`

New upstream version 3.12.3 6 years ago			`// this test only makes sense for versions of CUDA that ships`
			`// static libraries that have separable compilation device symbols`
			`#if __CUDACC_VER_MAJOR__ <= 9`
New upstream version 3.12.1 6 years ago			`__global__ void deviceCublasSgemm(int n, float alpha, float beta,`
			`const float* d_A, const float* d_B,`
			`float* d_C)`
			`{`
			`cublasHandle_t cnpHandle;`
			`cublasStatus_t status = cublasCreate(&cnpHandle);`

			`if (status != CUBLAS_STATUS_SUCCESS) {`
			`return;`
			`}`

			`// Call function defined in the cublas_device system static library.`
			`// This way we can verify that we properly pass system libraries to the`
			`// device link line`
			`status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha,`
			`d_A, n, d_B, n, &beta, d_C, n);`

			`cublasDestroy(cnpHandle);`
			`}`
New upstream version 3.12.3 6 years ago			`#endif`
New upstream version 3.12.1 6 years ago
			`int choose_cuda_device()`
			`{`
			`int nDevices = 0;`
			`cudaError_t err = cudaGetDeviceCount(&nDevices);`
			`if (err != cudaSuccess) {`
			`std::cerr << "Failed to retrieve the number of CUDA enabled devices"`
			`<< std::endl;`
			`return 1;`
			`}`
			`for (int i = 0; i < nDevices; ++i) {`
			`cudaDeviceProp prop;`
			`cudaError_t err = cudaGetDeviceProperties(&prop, i);`
			`if (err != cudaSuccess) {`
			`std::cerr << "Could not retrieve properties from CUDA device " << i`
			`<< std::endl;`
			`return 1;`
			`}`

			`if (prop.major > 3 \|\| (prop.major == 3 && prop.minor >= 5)) {`
			`err = cudaSetDevice(i);`
			`if (err != cudaSuccess) {`
			`std::cout << "Could not select CUDA device " << i << std::endl;`
			`} else {`
			`return 0;`
			`}`
			`}`
			`}`

			`std::cout << "Could not find a CUDA enabled card supporting compute >=3.5"`
			`<< std::endl;`
			`return 1;`
			`}`

			`int main(int argc, char** argv)`
			`{`
			`int ret = choose_cuda_device();`
			`if (ret) {`
			`return 0;`
			`}`

New upstream version 3.12.3 6 years ago			`#if __CUDACC_VER_MAJOR__ <= 9`
New upstream version 3.12.1 6 years ago			`// initial values that will make sure that the cublasSgemm won't actually`
			`// do any work`
			`int n = 0;`
			`float alpha = 1;`
			`float beta = 1;`
			`float* d_A = nullptr;`
			`float* d_B = nullptr;`
			`float* d_C = nullptr;`
			`deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C);`
New upstream version 3.12.3 6 years ago			`#endif`
New upstream version 3.12.1 6 years ago
			`return 0;`
			`}`