cuda程序在教程中多是用nvcc来进行编译,但是实际项目中cuda程序往往是和c++混在一起的,所以用cmake编译会更方便
cmake目前可以较好的支持cuda编译,本文只列出其中一种方法,仅供参考
CMakeLists.txt
cmake_minimum_required(VERSION 3.15)
project(test)
find_package(CUDA REQUIRED)
cuda_add_executable(test test.cu)
target_link_libraries(test)
test.cu
#include <iostream>
void __global__ add(const int* a, const int* b, int* c)
{
int index = threadIdx.x;
c[index] = a[index] + b[index];
}
int main(void)
{
const int N = 10;
int *h_a = new int[N];
int *h_b = new int[N];
int *h_c = new int[N];
for (int i = 0; i < N; ++i)
{
h_a[i] = 1;
h_b[i] = 2;
}
int *d_a, *d_b, *d_c;
cudaMalloc((void**)&d_a, sizeof(int) * N);
cudaMalloc((void**)&d_b, sizeof(int) * N);
cudaMalloc((void**)&d_c, sizeof(int) * N);
cudaMemcpy(d_a, h_a, sizeof(int) * N, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, h_b, sizeof(int) * N, cudaMemcpyHostToDevice);
add<<<1, N>>>(d_a, d_b, d_c);
cudaDeviceSynchronize();
cudaMemcpy(h_c, d_c, sizeof(int) * N, cudaMemcpyDeviceToHost);
for (int i = 0; i < N; ++i)
{
std::cout << h_c[i] << std::endl;
}
delete [] h_a;
delete [] h_b;
delete [] h_c;
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return 0;
}
无情的摸鱼机器