CUDA program structure
use CUDA API to query for compatible devices
cudaError_t cudaGetDeviceProperties(cudaDevProp* prop, int dev)
Example:
int device=1;
cudaDeviceProp props;
cudaGetDeviceProperties(&props, device)
allocate memory on the CUDA device
cudaError_t cudaMalloc(void** devPtr, size_t size)
Example:
size_t E_bytes = N_pts * 3 * sizeof(std::complex<double>);
HANDLE_ERROR(cudaMalloc((void**)&gpu_E, E_bytes));
copy data from main memory to the CUDA device
cudaError_t cudaMemcpy(void* dest, const void* source, size_t size, cudaMemcpyKind type)
Example:
HANDLE_ERROR(cudaMemcpy((void*)gpu_F, (void*)F, F_bytes, cudaMemcpyHostToDevice));
launch the kernel
(To be added in future blog...)
copy data from device to main mamory
Example:
HANDLE_ERROR(cudaMemcpy((void*)E, (void*)gpu_E, E_bytes, cudaMemcpyDeviceToHost));
More explanation is shown below:
Device Properties
Get the number of devices:
cudaError_t cudaGetDeviceCount(int* num)
Control what devices execute code
cudaError_t cudaGetDevice(int* d) // get the current device
cudaError_t cudaSetDevice(int d) // send future commands to this device
Error Checking
Almost all CUDA functions return a cudaError_t value. We need to check it everytime to see if the CUDA operation works or not.
cudaError_t =
cudaSuccess
cudaErrorMemoryAllocation // not enough memory
cudaErrorInvalidValue // a parameter is wrong
cudaErrorInvalidDeviceProinter // point wrong, didn't allocate memory maybe
A useful macro
HANDLE_Error is defined in STIM_CUDA_ERROR_H. It allows to display a string describe the error, along with the file and line number where error occured.
#define HANDLE_ERROR( err ) (cuHandleError( err, __FILE__, __LINE__ ))
static void cufftHandleError( cufftResult err, const char*file, int line )
{
if (err != CUFFT_SUCCESS)
{
if(err == CUFFT_INVALID_PLAN)
std::cout<<"The plan parameter is not a valid handle."<<std::endl;
else if(err == CUFFT_ALLOC_FAILED)
std::cout<<"Allocation failed."<<std::endl;
else if(err == CUFFT_INVALID_VALUE)
std::cout<<"At least one of the parameters idata, odata, and direction is not valid."<<std::endl;
else if(err == CUFFT_INTERNAL_ERROR)
std::cout<<"An internal driver error was detected."<<std::endl;
else if(err == CUFFT_EXEC_FAILED)
std::cout<<"CUFFT failed to execute the transform on the GPU."<<std::endl;
else if(err == CUFFT_SETUP_FAILED)
std::cout<<"The CUFFT library failed to initialize."<<std::endl;
else
std::cout<<"Unknown error: "<<err<<std::endl;
}
}