/* Copyright 2020 Equinor ASA This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #include // CMake #include #include #include #include #include namespace Opm { #if HAVE_OPENCL using Opm::Accelerator::OpenclKernels; #endif WellContributions::WellContributions(std::string accelerator_mode, bool useWellConn){ if(accelerator_mode.compare("cusparse") == 0){ cuda_gpu = true; } else if(accelerator_mode.compare("opencl") == 0){ opencl_gpu = true; } else if(accelerator_mode.compare("fpga") == 0){ // unused for FPGA, but must be defined to avoid error } else if(accelerator_mode.compare("amgcl") == 0){ if (!useWellConn) { OPM_THROW(std::logic_error, "Error amgcl requires --matrix-add-well-contributions=true"); } } else{ OPM_THROW(std::logic_error, "Invalid accelerator mode"); } } WellContributions::~WellContributions() { multisegments.clear(); #if HAVE_CUDA if(cuda_gpu){ freeCudaMemory(); // should come before 'delete[] h_x' } #endif if(num_std_wells > 0){ delete[] val_pointers; } #if HAVE_OPENCL if(opencl_gpu){ if(num_ms_wells > 0){ delete[] h_x; delete[] h_y; } } #endif } #if HAVE_OPENCL void WellContributions::setOpenCLEnv(cl::Context *context_, cl::CommandQueue *queue_){ this->context = context_; this->queue = queue_; } void WellContributions::setKernel(Opm::Accelerator::stdwell_apply_kernel_type *kernel_, Opm::Accelerator::stdwell_apply_no_reorder_kernel_type *kernel_no_reorder_){ this->kernel = kernel_; this->kernel_no_reorder = kernel_no_reorder_; } void WellContributions::setReordering(int *h_toOrder_, bool reorder_) { this->h_toOrder = h_toOrder_; this->reorder = reorder_; } void WellContributions::apply_stdwells(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){ if (reorder) { OpenclKernels::apply_stdwells_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl, d_x, d_y, d_toOrder, dim, dim_wells, *d_val_pointers_ocl, num_std_wells); } else { OpenclKernels::apply_stdwells_no_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl, d_x, d_y, dim, dim_wells, *d_val_pointers_ocl, num_std_wells); } } void WellContributions::apply_mswells(cl::Buffer d_x, cl::Buffer d_y){ if(h_x == nullptr){ h_x = new double[N]; h_y = new double[N]; } events.resize(2); queue->enqueueReadBuffer(d_x, CL_FALSE, 0, sizeof(double) * N, h_x, nullptr, &events[0]); queue->enqueueReadBuffer(d_y, CL_FALSE, 0, sizeof(double) * N, h_y, nullptr, &events[1]); cl::WaitForEvents(events); events.clear(); // actually apply MultisegmentWells for (auto& well : multisegments) { well->setReordering(h_toOrder, reorder); well->apply(h_x, h_y); } // copy vector y from CPU to GPU events.resize(1); queue->enqueueWriteBuffer(d_y, CL_FALSE, 0, sizeof(double) * N, h_y, nullptr, &events[0]); events[0].wait(); events.clear(); } void WellContributions::apply(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){ if(num_std_wells > 0){ apply_stdwells(d_x, d_y, d_toOrder); } if(num_ms_wells > 0){ apply_mswells(d_x, d_y); } } #endif void WellContributions::addMatrix([[maybe_unused]] MatrixType type, [[maybe_unused]] int *colIndices, [[maybe_unused]] double *values, [[maybe_unused]] unsigned int val_size) { if (!allocated) { OPM_THROW(std::logic_error, "Error cannot add wellcontribution before allocating memory in WellContributions"); } #if HAVE_CUDA if(cuda_gpu){ addMatrixGpu(type, colIndices, values, val_size); } #endif #if HAVE_OPENCL if(opencl_gpu){ switch (type) { case MatrixType::C: events.resize(2); queue->enqueueWriteBuffer(*d_Cnnzs_ocl, CL_FALSE, sizeof(double) * num_blocks_so_far * dim * dim_wells, sizeof(double) * val_size * dim * dim_wells, values, nullptr, &events[0]); queue->enqueueWriteBuffer(*d_Ccols_ocl, CL_FALSE, sizeof(int) * num_blocks_so_far, sizeof(int) * val_size, colIndices, nullptr, &events[1]); cl::WaitForEvents(events); events.clear(); break; case MatrixType::D: events.resize(1); queue->enqueueWriteBuffer(*d_Dnnzs_ocl, CL_FALSE, sizeof(double) * num_std_wells_so_far * dim_wells * dim_wells, sizeof(double) * dim_wells * dim_wells, values, nullptr, &events[0]); events[0].wait(); events.clear(); break; case MatrixType::B: events.resize(2); queue->enqueueWriteBuffer(*d_Bnnzs_ocl, CL_FALSE, sizeof(double) * num_blocks_so_far * dim * dim_wells, sizeof(double) * val_size * dim * dim_wells, values, nullptr, &events[0]); queue->enqueueWriteBuffer(*d_Bcols_ocl, CL_FALSE, sizeof(int) * num_blocks_so_far, sizeof(int) * val_size, colIndices, nullptr, &events[1]); cl::WaitForEvents(events); events.clear(); val_pointers[num_std_wells_so_far] = num_blocks_so_far; if (num_std_wells_so_far == num_std_wells - 1) { val_pointers[num_std_wells] = num_blocks; events.resize(1); queue->enqueueWriteBuffer(*d_val_pointers_ocl, CL_FALSE, 0, sizeof(unsigned int) * (num_std_wells + 1), val_pointers, nullptr, &events[0]); events[0].wait(); events.clear(); } break; default: OPM_THROW(std::logic_error, "Error unsupported matrix ID for WellContributions::addMatrix()"); } } #endif if(MatrixType::B == type) { num_blocks_so_far += val_size; num_std_wells_so_far++; } #if !HAVE_CUDA && !HAVE_OPENCL OPM_THROW(std::logic_error, "Error cannot add StandardWell matrix on GPU because neither CUDA nor OpenCL were found by cmake"); #endif } void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_) { dim = dim_; dim_wells = dim_wells_; if(dim != 3 || dim_wells != 4){ std::ostringstream oss; oss << "WellContributions::setBlockSize error: dim and dim_wells must be equal to 3 and 4, repectivelly, otherwise the add well contributions kernel won't work.\n"; OPM_THROW(std::logic_error, oss.str()); } } void WellContributions::addNumBlocks(unsigned int numBlocks) { if (allocated) { OPM_THROW(std::logic_error, "Error cannot add more sizes after allocated in WellContributions"); } num_blocks += numBlocks; num_std_wells++; } void WellContributions::alloc() { if (num_std_wells > 0) { val_pointers = new unsigned int[num_std_wells + 1]; #if HAVE_CUDA if(cuda_gpu){ allocStandardWells(); } #endif #if HAVE_OPENCL if(opencl_gpu){ d_Cnnzs_ocl = std::make_unique(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells); d_Dnnzs_ocl = std::make_unique(*context, CL_MEM_READ_WRITE, sizeof(double) * num_std_wells * dim_wells * dim_wells); d_Bnnzs_ocl = std::make_unique(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells); d_Ccols_ocl = std::make_unique(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks); d_Bcols_ocl = std::make_unique(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks); d_val_pointers_ocl = std::make_unique(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * (num_std_wells + 1)); } #endif allocated = true; } } void WellContributions::addMultisegmentWellContribution(unsigned int dim_, unsigned int dim_wells_, unsigned int Mb, std::vector& Bvalues, std::vector& BcolIndices, std::vector& BrowPointers, unsigned int DnumBlocks, double* Dvalues, UMFPackIndex* DcolPointers, UMFPackIndex* DrowIndices, std::vector& Cvalues) { assert(dim==dim_); multisegments.push_back(std::make_unique(dim_, dim_wells_, Mb, Bvalues, BcolIndices, BrowPointers, DnumBlocks, Dvalues, DcolPointers, DrowIndices, Cvalues)); ++num_ms_wells; } } //namespace Opm