#include #include #include #define BLOCKSIZE 8 // Size of Block #define FILENAMEMAT "NewA.txt" //Name of file for matrix #define FILENAMEVECIN "Newx.txt" //Name of file for vector #define FILENAMEVECOUT "Ax.txt" //Name of file for product vector #define FILENAMEVECOUTGPU "AX.txt" //Name of file for GPU product vector #define NIter 100 //Number of runs for timing //Kernel definition __global__ void GPUSPMV1(int * StartPoints_d, int * ColIndices_d, float * MatVals_d, float * x_d, float * Ax_d){ int i, j; i=threadIdx.x + BLOCKSIZE*blockIdx.x; float sum=0.0f; for(j=StartPoints_d[i];j>>(StartPoints_d, ColIndices_d, MatVals_d, x_d, Ax_d); } //Retrieving the device product vector cudaMemcpy(Ax, Ax_d, sizeof(float)*n, cudaMemcpyDeviceToHost); //Dumping the GPU answer to a file fp=fopen(FILENAMEVECOUTGPU,"w"); for(i=0;i