#include using namespace std; #include #include // Declarations of wrapper routines (prefixed with gpu_) void gpu_VectAdd(float *A, float *B, float *C); // Declarations of GPU routines (prefixed with kernel_) __global__ void kernel_VectAdd(float *A, float *B, float *C); //----------------------- // gpu_VectAdd //----------------------- void gpu_VectAdd(float *A, float *B, float *C) { float *Agpu, *Bgpu, *Cgpu; int size = 3*sizeof(float); cudaMalloc(&Agpu, size); cudaMalloc(&Bgpu, size); cudaMalloc(&Cgpu, size); cudaMemcpy(Agpu, A, size, cudaMemcpyHostToDevice); cudaMemcpy(Bgpu, B, size, cudaMemcpyHostToDevice); kernel_VectAdd<<<1,3>>>(Agpu, Bgpu, Cgpu); cudaMemcpy(C, Cgpu, size, cudaMemcpyDeviceToHost); } //----------------------- // kernel_VectAdd //----------------------- __global__ void kernel_VectAdd(float *A, float *B, float *C) { int i = threadIdx.x; C[i] = A[i] + B[i]; }