#include using namespace std; #include #include #include "prof_time.h" // This example tests memory access speeds using a B-field map #define BZ_MIN -50.0 #define BZ_MAX 650.0 #define BZ_N 700 #define BR_MIN 0.0 #define BR_MAX 100.0 #define BR_N 100 typedef struct{ float r, z; float Br, Bz; float dBrdr, dBzdr; float dBrdz, dBzdz; }Bfield_Map_t; void TestAccessSpeedOnHost(Bfield_Map_t* bfield, int Naccesses, Bfield_Map_t *checksums); __global__ void TestAccessSpeedOnDevice(Bfield_Map_t* bfield, int Naccesses, Bfield_Map_t *checksums); //-------------------------- // main //-------------------------- int main(int narg, char *argv[]) { // Start hi-res timers prof_time::StartTimers(); map prof_times; // Get number of CUDA devices to make sure one is available to run on int deviceCount=0; cudaGetDeviceCount(&deviceCount); if(deviceCount<1){ cout<<"No CUDA devices available!"<r = r; B->z = z; B->Br = sin(3.14159265*r/100.0); B->Bz = -2.0 + 0.2*cos(3.14159265*z/200.0); B->dBrdr = -cos(3.14159265*r/100.0); B->dBzdr = 0.0; B->dBrdz = 0.0; B->dBzdz = 0.2*sin(3.14159265*z/200.0); } } // Copy map to device cout<<"Copying map to device ..."<>>(bfield_d, 100, checksums_d); // Test access speed on device cout<<"Testing "<>>(bfield_d, Naccesses, checksums_d); cudaDeviceSynchronize(); start_time_device.TimeDiffNow(prof_times, "Device"); // Copy results from device checksum to host Bfield_Map_t checksums_d_h; cudaMemcpy(&checksums_d_h, checksums_d, sizeof(Bfield_Map_t), cudaMemcpyDeviceToHost); // Print checksum info to screen cout<<"Checksums (first is host, second device): ----"<r<z<Br<Bz<::iterator iter = prof_times.begin(); for(; iter!=prof_times.end(); iter++){ cout<< " "<first<r = 0.0; checksums->z = 0.0; checksums->Br = 0.0; checksums->Bz = 0.0; checksums->dBrdr = 0.0; checksums->dBzdr = 0.0; checksums->dBrdz = 0.0; checksums->dBzdz = 0.0; float delta_r = (BR_MAX-BR_MIN)/(float)(BR_N-1); float delta_z = (BZ_MAX-BZ_MIN)/(float)(BZ_N-1); for(int i=0; ir += r; checksums->z += z; checksums->Br += my_B.Br; checksums->Bz += my_B.Bz; checksums->dBrdr += my_B.dBrdr; checksums->dBzdr += my_B.dBzdr; checksums->dBrdz += my_B.dBrdz; checksums->dBzdz += my_B.dBzdz; } } //-------------------------- // TestAccessSpeedOnDevice //-------------------------- __global__ void TestAccessSpeedOnDevice(Bfield_Map_t* bfield, int Naccesses, Bfield_Map_t *checksums) { //printf("Hello from device!\n"); checksums->r = 0.0; checksums->z = 0.0; checksums->Br = 0.0; checksums->Bz = 0.0; checksums->dBrdr = 0.0; checksums->dBzdr = 0.0; checksums->dBrdz = 0.0; checksums->dBzdz = 0.0; float delta_r = (BR_MAX-BR_MIN)/(float)(BR_N-1); float delta_z = (BZ_MAX-BZ_MIN)/(float)(BZ_N-1); for(int i=0; ir += r; checksums->z += z; checksums->Br += my_B.Br; checksums->Bz += my_B.Bz; checksums->dBrdr += my_B.dBrdr; checksums->dBzdr += my_B.dBzdr; checksums->dBrdz += my_B.dBrdz; checksums->dBzdz += my_B.dBzdz; } }