c++ - CUDA invalid argument when trying to copy struct to device's memory (cudaMemcpy) -
i'm trying figure out how should create struct/class destined sent device, keep getting "invalid argument" cuda error. did small example shows error:
#include <iostream> #include <cstdio> using namespace std; #define cuda_warn(xxx) \ { if (xxx != cudasuccess) cerr << "cuda error: " << \ cudageterrorstring(xxx) << ", @ line " << __line__ \ << endl; cudadevicesynchronize(); } while (0) struct p { double x,y; __host__ __device__ void init(const double &a, const double &b) { x = a; y = b; } }; int main(int argc, char **argv) { p hp, hq, dp; cout << "size of p: " << sizeof(p) << endl; cuda_warn(cudamalloc((void**) &dp, sizeof(p))); printf("dp: %p\n", &dp); // print dp's address on device hp.init(1.2,-2.1); hq.init(0.,0.); cuda_warn(cudamemcpy(&dp, &hp, sizeof(p), cudamemcpyhosttodevice)); cuda_warn(cudamemcpy(&hq, &dp, sizeof(p), cudamemcpydevicetohost)); cout << "copy back: " << hq.x << "\t" << hq.y << endl; dp.init(3.,3.); cuda_warn(cudamemcpy(&hp, &dp, sizeof(p), cudamemcpydevicetohost)); cout << "copy new: " << hp.x << "\t" << hp.y << endl; return 0; } i'm compiling (my card tesla c2050):
nvcc -arch sm_20 -o exec file.cu the result i'm getting is:
size of p: 16 dp: 0x7fff82d4b7b0 cuda error: invalid argument, @ line 24 cuda error: invalid argument, @ line 25 copy back: 0 0 cuda error: invalid argument, @ line 28 copy new: 1.2 -2.1 ------------------ (program exited code: 0) press return continue thanks guys if me on this!
====== after comments of @talonmies, @jackolantern, @robert crovella =======
thanks, guys! helped! based on comments, correct code , working. register final solution:
#include <iostream> #include <cstdio> using namespace std; #define cuda_warn(xxx) \ { if (xxx != cudasuccess) cerr << "cuda error: " << \ cudageterrorstring(xxx) << ", @ line " << __line__ \ << endl; cudadevicesynchronize(); } while (0) struct p { double x,y; __host__ __device__ void init(const double &a, const double &b) { x = a; y = b; } }; /* included kernel function */ __global__ void dev_p_init(p *p, double a, double b) { p->init(a,b); } int main(int argc, char **argv) { p hp, hq, *dp; //*changed* cout << "size of p: " << sizeof(p) << endl; cuda_warn(cudamalloc((void**) &dp, sizeof(p))); printf("dp: %p\n", &dp); // print dp's address on device hp.init(1.2,-2.1); hq.init(0.,0.); cuda_warn(cudamemcpy(dp, &hp, sizeof(p), cudamemcpyhosttodevice)); //*changed* cuda_warn(cudamemcpy(&hq, dp, sizeof(p), cudamemcpydevicetohost)); //*changed* cout << "copy back: " << hq.x << "\t" << hq.y << endl; dev_p_init<<< 1, 1 >>>(dp,3., 3.); //*call kernel* cuda_warn(cudamemcpy(&hp, dp, sizeof(p), cudamemcpydevicetohost)); //*changed* cout << "copy new: " << hp.x << "\t" << hp.y << endl; return 0; } and corrected output:
size of p: 16 dp: 0x7fff6fa2e498 copy back: 1.2 -2.1 copy new: 3 3 ------------------ (program exited code: 0) press return continue
as noticed @talonmies, &dp not valid device pointer. indeed, dp variable resides on host, address points host memory space. opposite that, when dp pointer, cudamalloc receive value parameter , value point device memory space.
this correct version of code:
#include <iostream> #include <cstdio> using namespace std; #define cuda_warn(xxx) \ { if (xxx != cudasuccess) cerr << "cuda error: " << \ cudageterrorstring(xxx) << ", @ line " << __line__ \ << endl; cudadevicesynchronize(); } while (0) struct p { double x,y; __host__ __device__ void init(const double &a, const double &b) { x = a; y = b; } }; int main(int argc, char **argv) { p *dp; p hp, hq; cuda_warn(cudamalloc((void**) &dp, sizeof(p))); cuda_warn(cudamemcpy(dp, &hp, sizeof(p), cudamemcpyhosttodevice)); cuda_warn(cudamemcpy(&hq, dp, sizeof(p), cudamemcpydevicetohost)); cuda_warn(cudamemcpy(&hp, dp, sizeof(p), cudamemcpydevicetohost)); return 0; }
Comments
Post a Comment