Cuda Example 1
#include
const int N = 16;
const int blocksize = 16;
\_\_global\_\_
void simple(float *c)
{
c[threadIdx.x] = threadIdx.x;
}
int main()
{
int i;
float *c = new float[N];
float *cd;
const int size = N*sizeof(float);
cudaMalloc( (void**)&cd, size );
dim3 dimBlock( blocksize, 1 );
dim3 dimGrid( 1, 1 );
simple<<>>(cd);
cudaMemcpy( c, cd, size, cudaMemcpyDeviceToHost );
cudaFree( cd );
for (i = 0; i < N; i++)
printf("%f ", c[i]);
printf("\n");
delete[] c;
printf("done\n");
return EXIT_SUCCESS;
}modifiers for code
__device__ __global__ __host__