1. cuda-tuntime的初始化
#include <cuda_runtime.h>
#include <cuda.h>
#include <stdio.h>
#include <string.h>
#define checkRuntime(op) __check_cuda_runtime((op), #op, __FILE__, __LINE__)
bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line){
if(code != cudaSuccess){
const char* err_name = cudaGetErrorName(code);
const char* err_message = cudaGetErrorString(code);
printf("runtime error %s:%d %s failed. \n code = %s, message = %s\n", file, line, op, err_name, err_message);
return false;
}
return true;
}
int main(){
CUcontext context = nullptr;
cuCtxGetCurrent(&context);
printf("Current context = %p,当前无context\n", context);
int device_count = 0;
checkRuntime(cudaGetDeviceCount(&device_count));
printf("device_count = %d\n", device_count);
int device_id = 0;
printf("set current device to : %d,这个API依赖CUcontext,触发创建并设置\n", device_id);
checkRuntime(cudaSetDevice(device_id));
cuCtxGetCurrent(&context);
printf("SetDevice after, Current context = %p,获取当前context\n", context);
int current_device = 0;
checkRuntime(cudaGetDevice(¤t_device));
printf("current_device = %d\n", current_device);
return 0;
}
2. GPU的内存分配
#include <cuda_runtime.h>
#include <stdio.h>
#include <string.h>
#define checkRuntime(op) __check_cuda_runtime((op), #op, __FILE__, __LINE__)
bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line){
if(code != cudaSuccess){
const char* err_name = cudaGetErrorName(code);
const char* err_message = cudaGetErrorString(code);
printf("runtime error %s:%d %s failed. \n code = %s, message = %s\n", file, line, op, err_name, err_message);
return false;
}
return true;
}
int main(){
int device_id = 0;
checkRuntime(cudaSetDevice(device_id));
float* memory_device = nullptr;
checkRuntime(cudaMalloc(&memory_device, 100 * sizeof(float)));
float* memory_host = new float[100];
memory_host[2] = 520.25;
checkRuntime(cudaMemcpy(memory_device, memory_host, sizeof(float) * 100, cudaMemcpyHostToDevice));
float* memory_page_locked = nullptr;
checkRuntime(cudaMallocHost(&memory_page_locked, 100 * sizeof(float)));
checkRuntime(cudaMemcpy(memory_page_locked, memory_device, sizeof(float) * 100, cudaMemcpyDeviceToHost));
printf("%f\n", memory_page_locked[2]);
checkRuntime(cudaFreeHost(memory_page_locked));
checkRuntime(cudaFree(memory_device));
delete [] memory_host;
return 0;
}
3. cudastream流管理,函数的异步控制
#include <cuda_runtime.h>
#include <stdio.h>
#include <string.h>
#define checkRuntime(op) __check_cuda_runtime((op), #op, __FILE__, __LINE__)
bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line){
if(code != cudaSuccess){
const char* err_name = cudaGetErrorName(code);
const char* err_message = cudaGetErrorString(code);
printf("runtime error %s:%d %s failed. \n code = %s, message = %s\n", file, line, op, err_name, err_message);
return false;
}
return true;
}
int main(){
int device_id = 0;
checkRuntime(cudaSetDevice(device_id));
cudaStream_t stream = nullptr;
checkRuntime(cudaStreamCreate(&stream));
float* memory_device = nullptr;
checkRuntime(cudaMalloc(&memory_device, 100 * sizeof(float)));
float* memory_host = new float[100];
memory_host[2] = 520.25;
checkRuntime(cudaMemcpyAsync(memory_device, memory_host, sizeof(float) * 100, cudaMemcpyHostToDevice, stream));
float* memory_page_locked = nullptr;
checkRuntime(cudaMallocHost(&memory_page_locked, 100 * sizeof(float)));
checkRuntime(cudaMemcpyAsync(memory_page_locked, memory_device, sizeof(float) * 100, cudaMemcpyDeviceToHost, stream));
checkRuntime(cudaStreamSynchronize(stream));
printf("%f\n", memory_page_locked[2]);
checkRuntime(cudaFreeHost(memory_page_locked));
checkRuntime(cudaFree(memory_device));
checkRuntime(cudaStreamDestroy(stream));
delete [] memory_host;
return 0;
}