mirror of
https://github.com/OPM/opm-simulators.git
synced 2024-12-01 13:29:08 -06:00
Fix stdwell_apply opencl kernel
This commit is contained in:
parent
994260aaea
commit
3decb73561
@ -80,11 +80,10 @@ __global__ void apply_well_contributions(
|
||||
}
|
||||
|
||||
// merge all blocks into 1 dim*dim_wells block
|
||||
// since NORNE has only 2 parallel blocks, do not use a loop
|
||||
// since 3*4 blocks has give 2 parallel blocks, do not use a loop
|
||||
// 0x00ffffff contains 24 ones, representing the two blocks that are added
|
||||
temp += __shfl_down_sync(0x00ffffff, temp, dim * dim_wells);
|
||||
|
||||
b = idx_t / vals_per_block + val_pointers[idx_b];
|
||||
|
||||
// merge all (dim) columns of 1 block, results in a single 1*dim_wells vector, which is used to multiply with invD
|
||||
if (idx_t < vals_per_block) {
|
||||
// should be a loop as well, now only works for dim == 3
|
||||
|
@ -37,15 +37,17 @@ __kernel void stdwell_apply(
|
||||
b += numBlocksPerWarp;
|
||||
}
|
||||
|
||||
// merge all blocks in this workgroup into 1 block
|
||||
// if numBlocksPerWarp >= 3, should use loop
|
||||
if(wiId < valsPerBlock){
|
||||
localSum[wiId] += localSum[wiId + valsPerBlock];
|
||||
for (int i = 1; i < numBlocksPerWarp; ++i) {
|
||||
localSum[wiId] += localSum[wiId + i*valsPerBlock];
|
||||
}
|
||||
}
|
||||
|
||||
b = wiId/valsPerBlock + val_pointers[wgId];
|
||||
|
||||
if(c == 0 && wiId < valsPerBlock){
|
||||
for(unsigned int stride = 2; stride > 0; stride >>= 1){
|
||||
localSum[wiId] += localSum[wiId + stride];
|
||||
for(unsigned int i = dim - 1; i > 0; --i){
|
||||
localSum[wiId] += localSum[wiId + i];
|
||||
}
|
||||
z1[r] = localSum[wiId];
|
||||
}
|
||||
|
@ -35,15 +35,17 @@ __kernel void stdwell_apply_no_reorder(
|
||||
b += numBlocksPerWarp;
|
||||
}
|
||||
|
||||
// merge all blocks in this workgroup into 1 block
|
||||
// if numBlocksPerWarp >= 3, should use loop
|
||||
if(wiId < valsPerBlock){
|
||||
localSum[wiId] += localSum[wiId + valsPerBlock];
|
||||
for (int i = 1; i < numBlocksPerWarp; ++i) {
|
||||
localSum[wiId] += localSum[wiId + i*valsPerBlock];
|
||||
}
|
||||
}
|
||||
|
||||
b = wiId/valsPerBlock + val_pointers[wgId];
|
||||
|
||||
if(c == 0 && wiId < valsPerBlock){
|
||||
for(unsigned int stride = 2; stride > 0; stride >>= 1){
|
||||
localSum[wiId] += localSum[wiId + stride];
|
||||
for(unsigned int i = dim - 1; i > 0; --i){
|
||||
localSum[wiId] += localSum[wiId + i];
|
||||
}
|
||||
z1[r] = localSum[wiId];
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user