Merge pull request #3552 from fgfuchs/cuda_improvements

cusparsesolver wrong size and type in cols,rows fixed
This commit is contained in:
Atgeirr Flø Rasmussen 2021-09-28 11:41:17 +02:00 committed by GitHub
commit e5c85e88ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 4 additions and 4 deletions

View File

@ -227,8 +227,8 @@ void cusparseSolverBackend<block_size>::initialize(int N, int nnz, int dim) {
cudaMalloc((void**)&d_t, sizeof(double) * N);
cudaMalloc((void**)&d_v, sizeof(double) * N);
cudaMalloc((void**)&d_bVals, sizeof(double) * nnz);
cudaMalloc((void**)&d_bCols, sizeof(double) * nnz);
cudaMalloc((void**)&d_bRows, sizeof(double) * (Nb + 1));
cudaMalloc((void**)&d_bCols, sizeof(int) * nnzb);
cudaMalloc((void**)&d_bRows, sizeof(int) * (Nb + 1));
cudaMalloc((void**)&d_mVals, sizeof(double) * nnz);
cudaCheckLastError("Could not allocate enough memory on GPU");
@ -295,7 +295,7 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *ro
cudaMemcpyAsync(d_bVals, vals, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
#endif
cudaMemcpyAsync(d_bCols, cols, nnz * sizeof(int), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(d_bCols, cols, nnzb * sizeof(int), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(d_bRows, rows, (Nb + 1) * sizeof(int), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(d_b, b, N * sizeof(double), cudaMemcpyHostToDevice, stream);
cudaMemsetAsync(d_x, 0, sizeof(double) * N, stream);

View File

@ -84,7 +84,7 @@ private:
/// Copy linear system to GPU
/// \param[in] vals array of nonzeroes, each block is stored row-wise, contains nnz values
/// \param[in] rows array of rowPointers, contains N/dim+1 values
/// \param[in] cols array of columnIndices, contains nnz values
/// \param[in] cols array of columnIndices, contains nnzb values
/// \param[in] b input vector, contains N values
void copy_system_to_gpu(double *vals, int *rows, int *cols, double *b);