#include <stdio.h>
#include <cuda.h>

struct Wire //Storage will be (Xo, Yo, Zo, X, Y, Z, magnitude, id)
{  
     float x,y,z,i,j,k,magnitude;
     int id;
     void *ptr;
};
/*
__device__ __constant__ Wire *a_dd;

void printWire(Wire *tempH);
// Kernel that executes on the CUDA device
__global__ void returnWire(Wire *a, int y)
{
	*a = a_dd[y];
        a->ptr = (void*)a_dd;
}

int main(void)
{
  Wire *a_h, *a_d, *tempD, *tempH;  // Pointer to host & device arrays
  const int N = 2400;  // Number of elements in array
  size_t size = N * sizeof(Wire);


  a_h = (Wire *)malloc(size);        // Allocate array on host
  tempH = (Wire *)malloc(sizeof(Wire)); 
  cudaMalloc((void **) &a_d, size);   // Allocate array on device
  cudaMalloc((void **) &tempD, sizeof(Wire));
   
  int index;
  // Initialize wire arrays
  for(int i = 0; i < 24; i++) //Planes
  {
	  if(i % 2 == 0) //Check if even
	  {
		  for (int j = 0; j < 100; j++) //Wires vertical
		  {
		   index = j + (i * 100);
			a_h[index].id = j + i*100; //Measurements in cm
			a_h[index].x = j + 1;
			a_h[index].y = 0;
			a_h[index].z = i * 5;
			a_h[index].i = 0;
			a_h[index].j = 1;
			a_h[index].k = 0;
			a_h[index].magnitude = 1;
			a_h[index].ptr = NULL;
		  }
	  }
	  else
	  {
		  for (int j = 0; j < 100; j++) //Wires horizontal
		  {
		   index = j + (i * 100);
			a_h[index].id = j + i*100; //Measurements in cm
			a_h[index].x = 0;
			a_h[index].y = j + 1;
			a_h[index].z = i * 5;
			a_h[index].i = 1;
			a_h[index].j = 0;
			a_h[index].k = 0;
			a_h[index].magnitude = 1;
			a_h[index].ptr = NULL;
		  }
	  }
  }
  cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice); //Copy to device
  cudaError_t error = cudaMemcpyToSymbol(a_dd, &a_d, sizeof(Wire*), size_t(0),cudaMemcpyHostToDevice);
  printf("CUDA Error: %s\n", cudaGetErrorString(error));

  // Do calculation on device:
  int threadCount = 1;
  int block_size = 4;
  int n_blocks = threadCount/block_size + (threadCount%block_size == 0 ? 0:1);
  returnWire <<< n_blocks, block_size >>> (tempD, 3); //ID of 3 for testing
  cudaMemcpy(tempH, tempD, sizeof(Wire), cudaMemcpyDeviceToHost);
  printWire(tempH);
  returnWire <<< n_blocks, block_size >>> (tempD, 4); //ID of 4 for testing
  cudaMemcpy(tempH, tempD, sizeof(Wire), cudaMemcpyDeviceToHost);
  printWire(tempH);
  returnWire <<< n_blocks, block_size >>> (tempD, 5); //ID of 5 for testing
  cudaMemcpy(tempH, tempD, sizeof(Wire), cudaMemcpyDeviceToHost);
  printWire(tempH);
  returnWire <<< n_blocks, block_size >>> (tempD, 500); //ID of 5 for testing
  cudaMemcpy(tempH, tempD, sizeof(Wire), cudaMemcpyDeviceToHost);
  printWire(tempH);
  printf("0x%x\n", a_d);
  // Cleanup
  free(a_h);
  free(tempH);
  cudaFree(a_d);
  cudaFree(tempH);
  return cudaThreadExit();
}
void printWire(Wire *tempH)
{
	printf("%d\n", tempH->id);
	printf("%f\n", tempH->x);
	printf("%f\n", tempH->y);
	printf("%f\n", tempH->z);
	printf("%f\n", tempH->i);
	printf("%f\n", tempH->j);
	printf("%f\n", tempH->k);
	printf("%f\n", tempH->magnitude);
	printf("0x%x\n", tempH->ptr);
}
*/