#include #include #include #include #include #include #include #include using namespace std; #include #include // Declare our texture reference. This MUST be a global variable! // float2 = texel element is 2-D value using CUDA array type float2 // cudaTextureType2D = indexing will use 2-dimensions // cudaReadModeElementType = Values returned will be same type as stored (no // transformation to normalized values) static texture bfield_GPU_texref; float2* ReadBfield(int &Nzbins, float &zmin, float &zmax, int &Nrbins, float &rmin, float &rmax); //...................... // SimpleException //...................... class SimpleException:public std::exception { public: SimpleException(string str){ msg = str;} virtual ~SimpleException() throw(){} virtual const char* what(void) const throw() { return msg.c_str(); } private: string msg; }; //---------------- // InitCUDA //---------------- void InitCUDA(bool quiet=false) { // Verify that a CUDA device exists and optionally print some // info about it. int deviceCount=0; cudaGetDeviceCount(&deviceCount); if(deviceCount<1){ cout<<"No CUDA devices available!"<(); // Bind texture object to allocated memory on device cout << "Binding B-field to texture object ...." << endl; err = cudaBindTexture2D(NULL, &bfield_GPU_texref, d_bfield_table, &desc, Nzbins, Nrbins, pitch); if(err != cudaSuccess) throw SimpleException(string("CUDA ERROR: ") + cudaGetErrorString(err)); #if 0 // This commented out section tries to allocate and copy the map // as a cuda3D array object which in principle, is supposed to be // a better way of doing it (saving us from having to worry about // pitched memory for one thing). However, it never worked and most // of the web documentation centered on pitched pointer allocation // so I reverted to that. Leaving this here temporarily in case // additional inspiration arises to have another run at this method. // Create a channel descriptor that says the data will be stored as float2 cudaChannelFormatDesc desc = cudaCreateChannelDesc(); // Allocate the array on the GPU device cudaArray_t bfield_GPU; err = cudaMalloc3DArray(&bfield_GPU, &desc, extent); if(err != cudaSuccess) throw SimpleException(string("CUDA ERROR: ") + cudaGetErrorString(err)); // A brief note on CUDA pitched pointers: // A CUDA pitched pointer is used to describe the format of the // data in the memory allocated on the host. The "pitch" is how // many bytes are allocated for each row and the xsz and ysz // parameters tell how many elements per row and rows in the // table repectively. This system allows one to ensure that the // rows are properly byte aligned on the host (in case there is // some need for that). We use the "make_cudaPitchedPtr" routine // to do this below. // Copy the Bfield from host memory to device memory cout << "Copying Bfield map to GPU ..." << endl; cudaMemcpy3DParms myparms = {0}; myparms.srcPtr = make_cudaPitchedPtr((void*)bfield_table, sizeof(float2)*Nzbins, Nzbins, Nrbins); myparms.dstArray = bfield_GPU; myparms.extent = extent; myparms.kind = cudaMemcpyHostToDevice; err = cudaMemcpy3D(&myparms); if(err != cudaSuccess) throw SimpleException(string("CUDA ERROR: ") + cudaGetErrorString(err)); // Create texture object bfield_GPU_texref.addressMode[0] = bfield_GPU_texref.addressMode[1] = cudaAddressModeBorder; //bfield_GPU_texref.filterMode = cudaFilterModeLinear; bfield_GPU_texref.filterMode = cudaFilterModePoint; bfield_GPU_texref.normalized = true; // Bind texture object to allocated array on device cout << "Binding B-field to texture object ...." << endl; err = cudaBindTextureToArray(&bfield_GPU_texref, bfield_GPU, &desc); if(err != cudaSuccess) throw SimpleException(string("CUDA ERROR: ") + cudaGetErrorString(err)); #endif cout << "B-field ready on GPU." << endl; } //---------------- // ReadBfield //---------------- float2* ReadBfield(int &Nzbins, float &zmin, float &zmax, int &Nrbins, float &rmin, float &rmax) { // Open the input file and read in all values to figure out how // many bins we have in z and r. string fname = "solenoid_1500_poisson_20090814_01"; ifstream ifs(fname.c_str()); if(!ifs.is_open()) throw SimpleException(string("Unable to open B-field map file: " + fname)); // Read all elements into a vector, storing x(=r) and z coordinates in // separate set containers so we can know how many elements and what their // range is for each dimension. set z_vals; set r_vals; vector map_vals; // .x=r .y=z .z=Br .w=Bz while(ifs.good()){ char line[1024]; ifs.getline(line, 1024); if(!ifs.good()) break; if(line[0] == '#') continue; stringstream ss(line); float x,y,z, Bx, By, Bz; ss>>x>>y>>z>>Bx>>By>>Bz; x *= 2.54; // convert to inches z *= 2.54; // convert to inches z_vals.insert(z); r_vals.insert(x); float4 tmp = {x, z, Bx, Bz}; map_vals.push_back(tmp); } // Copy ranges to caller's variables and report map stats to user Nzbins = z_vals.size(); Nrbins = r_vals.size(); rmin = *(r_vals.begin()); // assume set orders by ascending value! rmax = *(r_vals.rbegin()); // assume set orders by ascending value! zmin = *(z_vals.begin()); // assume set orders by ascending value! zmax = *(z_vals.rbegin()); // assume set orders by ascending value! cout << " Found "< indexes; for(unsigned int i=0; i points; for(int i=0; i<10; i++){ float z = -100.0 + 700.0*(float)random()/(float)RAND_MAX; float r = 250.0*(float)random()/(float)RAND_MAX; z=(float)random()/(float)RAND_MAX; r=(float)random()/(float)RAND_MAX/4.0; float2 tmp = {z,r}; points.push_back(tmp); } cout << " " << points.size() << " points generated for testing" <>>(points.size(), d_coordinates, d_B); // Copy memory back to host float2 *h_B = (float2*)malloc(Nbytes); cudaMemcpy(h_B, d_B, Nbytes, cudaMemcpyDeviceToHost); // Print results to screen cout << " Results:" << endl; cout << " ---------------" << endl; for(int i=0; i<10; i++){ float z = -100.0 + 700.0* points[i].x; float r = 0.0 + 250.0* points[i].y; float Bz = h_B[i].x; float Br = h_B[i].y; cout << " (z,r)=(" << z << ", " << r << ")"; cout << " (Bz,Br)=(" << Bz << ", " << Br << ")"; cout << endl; } cout << " Done." <