[GPU] Fixed a couple of issues in clDNN found by static analysis (#8706)
This commit is contained in:
committed by
GitHub
parent
d248bdc91a
commit
24675fd8ea
@@ -599,8 +599,6 @@ struct tensor {
|
||||
|
||||
private:
|
||||
value_type _sizes[tensor_dim_max];
|
||||
value_type _dimOffset;
|
||||
value_type _dimSize;
|
||||
|
||||
public:
|
||||
explicit tensor(value_type default_size = 0) :
|
||||
@@ -884,7 +882,7 @@ public:
|
||||
for (size_t i = 0; i < sizes.size(); ++i) {
|
||||
auto c = output_order[i];
|
||||
auto pos = internal_order.find(c);
|
||||
if (pos == internal_order.npos)
|
||||
if (pos == std::string::npos)
|
||||
throw std::domain_error(std::string("Unknown coord type: ") + c);
|
||||
|
||||
sizes[i] = _sizes[pos];
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
#include "common_types.h"
|
||||
#include <type_traits>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace kernel_selector {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -27,7 +28,7 @@ inline uint32_t BytesPerElement(Datatype dt) {
|
||||
case Datatype::INT64:
|
||||
return 8;
|
||||
default:
|
||||
return 0;
|
||||
throw std::runtime_error("[GPU] BytesPerElement doesn't support given precision");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,7 +43,7 @@ inline uint32_t BytesPerElement(WeightsType wt) {
|
||||
case WeightsType::BINARY:
|
||||
return 4;
|
||||
default:
|
||||
return 0;
|
||||
throw std::runtime_error("[GPU] BytesPerElement doesn't support given precision");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -537,10 +537,12 @@ NDims WeightsTensor::GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l
|
||||
case g_os_is_zyx_osa4_isa8_osv8_isv4:
|
||||
newDims[4] = RoundUp(newDims[4], 32);
|
||||
newDims[3] = RoundUp(newDims[3], 32);
|
||||
break;
|
||||
case os_is_zyx_osa4_isa8_osv8_isv2:
|
||||
case g_os_is_zyx_osa4_isa8_osv8_isv2:
|
||||
newDims[4] = RoundUp(newDims[4], 32);
|
||||
newDims[3] = RoundUp(newDims[3], 16);
|
||||
break;
|
||||
case os_is_yx_osa4_isa8_osv8_isv2:
|
||||
case g_os_is_yx_osa4_isa8_osv8_isv2:
|
||||
case os_is_yx_osa2_isa8_osv16_isv2:
|
||||
|
||||
@@ -151,7 +151,7 @@ JitConstants DeconvolutionKernel_imad_along_f_tile_bfx::GetJitConstants(const de
|
||||
input_tile_ifm_pitch = zyx_pitch_factor * 16;
|
||||
}
|
||||
input_in_tile_batch_pitch = Align(in.Feature().LogicalDimPadded(), 16) * zyx_pitch_factor;
|
||||
} else if (in_layout == DataLayout::b_fs_yx_fsv32 || in_layout == DataLayout::b_fs_yx_fsv32) {
|
||||
} else if (in_layout == DataLayout::b_fs_yx_fsv32 || in_layout == DataLayout::b_fs_zyx_fsv32) {
|
||||
if (tile_ifm == 32) {
|
||||
input_tile_ifm_pitch = zyx_pitch_factor * 32;
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ Datatype NonMaxSuppressionKernelRef::GetAccumulatorType(const non_max_suppressio
|
||||
auto out_dt = params.output.GetDType();
|
||||
|
||||
auto smaller_fp_type = [](const Datatype& current, const Datatype& candidate) -> Datatype {
|
||||
if (candidate != Datatype::F32 || candidate != Datatype::F16)
|
||||
if (candidate != Datatype::F32 && candidate != Datatype::F16)
|
||||
return current;
|
||||
|
||||
return BytesPerElement(candidate) < BytesPerElement(current) ? candidate : current;
|
||||
|
||||
@@ -155,10 +155,6 @@ bool PoolingKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional_param
|
||||
}
|
||||
|
||||
KernelsData PoolingKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
const auto& pooling_p = static_cast<const pooling_params&>(params);
|
||||
if (pooling_p.output.Batch().v == 1)
|
||||
return GetCommonKernelsData(params, options);
|
||||
else
|
||||
return GetCommonKernelsData(params, options);
|
||||
return GetCommonKernelsData(params, options);
|
||||
}
|
||||
} // namespace kernel_selector
|
||||
|
||||
@@ -171,12 +171,12 @@ struct quantize_fuse_params : fuse_params {
|
||||
float out_scale;
|
||||
float out_shift;
|
||||
|
||||
size_t in_range_lo_idx;
|
||||
size_t in_range_hi_idx;
|
||||
size_t in_scale_idx;
|
||||
size_t in_shift_idx;
|
||||
size_t out_scale_idx;
|
||||
size_t out_shift_idx;
|
||||
size_t in_range_lo_idx = 0;
|
||||
size_t in_range_hi_idx = 0;
|
||||
size_t in_scale_idx = 0;
|
||||
size_t in_shift_idx = 0;
|
||||
size_t out_scale_idx = 0;
|
||||
size_t out_shift_idx = 0;
|
||||
};
|
||||
|
||||
} // namespace kernel_selector
|
||||
|
||||
@@ -251,23 +251,11 @@ KernelsData ResampleKernelBase::GetCommonKernelsData(const Params& params, const
|
||||
|
||||
Datatype ResampleKernelBase::GetAccumulatorType(const resample_params& params) const {
|
||||
auto in_dt = params.inputs[0].GetDType();
|
||||
auto out_dt = params.output.GetDType();
|
||||
|
||||
if (params.resampleType == ResampleType::NEAREST_NEIGHBOR)
|
||||
return in_dt;
|
||||
|
||||
auto smaller_fp_type = [](const Datatype& current, const Datatype& candidate) -> Datatype {
|
||||
if (candidate != Datatype::F32 || candidate != Datatype::F16)
|
||||
return current;
|
||||
|
||||
return BytesPerElement(candidate) < BytesPerElement(current) ? candidate : current;
|
||||
};
|
||||
|
||||
Datatype fp_type = Datatype::F32;
|
||||
fp_type = smaller_fp_type(fp_type, in_dt);
|
||||
fp_type = smaller_fp_type(fp_type, out_dt);
|
||||
|
||||
return fp_type;
|
||||
return Datatype::F32;
|
||||
}
|
||||
|
||||
} // namespace kernel_selector
|
||||
|
||||
@@ -73,7 +73,10 @@ static std::vector<unsigned char> loadBinaryFromFile(std::string path) {
|
||||
|
||||
if (fp) {
|
||||
fseek(fp, 0, SEEK_END);
|
||||
size_t nsize = (size_t)ftell(fp);
|
||||
auto sz = ftell(fp);
|
||||
if (sz < 0)
|
||||
return {};
|
||||
auto nsize = static_cast<size_t>(sz);
|
||||
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
|
||||
|
||||
@@ -32,7 +32,9 @@ memory::memory(engine* engine, const layout& layout, allocation_type type, bool
|
||||
|
||||
memory::~memory() {
|
||||
if (!_reused && _engine) {
|
||||
_engine->subtract_memory_used(_bytes_count, _type);
|
||||
try {
|
||||
_engine->subtract_memory_used(_bytes_count, _type);
|
||||
} catch (...) {}
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 1) {
|
||||
GPU_DEBUG_COUT << "Free " << _bytes_count << " bytes of " << _type << " allocation type"
|
||||
|
||||
@@ -149,6 +149,7 @@ bool get_imad_support(const cl::Device& device) {
|
||||
|
||||
bool is_local_block_io_supported(const cl::Device& device) {
|
||||
try {
|
||||
cl_int status = CL_SUCCESS;
|
||||
cl::Context ctx(device);
|
||||
std::string kernel_code =
|
||||
"__attribute__((intel_reqd_sub_group_size(8)))"
|
||||
@@ -167,16 +168,23 @@ bool is_local_block_io_supported(const cl::Device& device) {
|
||||
return false;
|
||||
cl::Buffer buffer(ctx, CL_MEM_READ_WRITE, sizeof(uint8_t) * 8);
|
||||
cl::Kernel kernel(program, "is_local_block_io_supported");
|
||||
kernel.setArg(0, buffer);
|
||||
status = kernel.setArg(0, buffer);
|
||||
|
||||
if (status != CL_SUCCESS)
|
||||
return false;
|
||||
|
||||
cl::Event ev;
|
||||
cl::CommandQueue queue(ctx, device);
|
||||
queue.enqueueNDRangeKernel(kernel, cl::NDRange(), cl::NDRange(8), cl::NDRange(8), nullptr, &ev);
|
||||
status = queue.enqueueNDRangeKernel(kernel, cl::NDRange(), cl::NDRange(8), cl::NDRange(8), nullptr, &ev);
|
||||
if (status != CL_SUCCESS)
|
||||
return false;
|
||||
ev.wait();
|
||||
|
||||
uint8_t result[8];
|
||||
uint8_t expected[8] = { 1, 3, 5, 7, 9, 11, 13, 15 };
|
||||
queue.enqueueReadBuffer(buffer, CL_TRUE, 0, sizeof(uint8_t) * 8, &result);
|
||||
status = queue.enqueueReadBuffer(buffer, CL_TRUE, 0, sizeof(uint8_t) * 8, &result);
|
||||
if (status != CL_SUCCESS)
|
||||
return false;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
if (result[i] != expected[i])
|
||||
return false;
|
||||
|
||||
@@ -643,7 +643,11 @@ public:
|
||||
UsmHolder(const cl::UsmHelper& usmHelper, void* ptr) : _usmHelper(usmHelper), _ptr(ptr) { }
|
||||
void* ptr() { return _ptr; }
|
||||
~UsmHolder() {
|
||||
_usmHelper.free_mem(_ptr);
|
||||
try {
|
||||
_usmHelper.free_mem(_ptr);
|
||||
} catch (...) {
|
||||
// Exception may happen only when clMemFreeINTEL function is unavailable, thus can't free memory properly
|
||||
}
|
||||
}
|
||||
private:
|
||||
const cl::UsmHelper& _usmHelper;
|
||||
|
||||
@@ -441,7 +441,7 @@ void ocl_stream::wait_for_events(const std::vector<event::ptr>& events) {
|
||||
|
||||
std::vector<cl::Event> clevents;
|
||||
for (auto& ev : events) {
|
||||
if (auto ocl_base_ev = dynamic_cast<ocl_base_event*>(ev.get()))
|
||||
if (auto ocl_base_ev = downcast<ocl_base_event>(ev.get()))
|
||||
clevents.push_back(ocl_base_ev->get());
|
||||
}
|
||||
|
||||
|
||||
@@ -41,7 +41,8 @@ public:
|
||||
back_edges(this->get_primitive()->back_edges),
|
||||
use_current_iteration(!this->get_primitive()->current_iteration_id.empty()),
|
||||
use_execution_condition(!this->get_primitive()->condition_id.empty()),
|
||||
max_iteration(this->get_primitive()->max_iteration < 0 ? DEFAULT_MAX_NUM_ITERATION : this->get_primitive()->max_iteration) {}
|
||||
max_iteration(this->get_primitive()->max_iteration < 0 ? DEFAULT_MAX_NUM_ITERATION : this->get_primitive()->max_iteration),
|
||||
iteration_axis(0) {}
|
||||
|
||||
mutable size_t iteration_axis;
|
||||
int64_t max_iteration;
|
||||
|
||||
@@ -368,7 +368,7 @@ network::output_chains_map::iterator network::add_output_chain(std::shared_ptr<p
|
||||
|
||||
// find all dependencies that are 'optimized'
|
||||
while (!candidates.empty()) {
|
||||
auto& cand = candidates.top();
|
||||
auto cand = candidates.top();
|
||||
candidates.pop();
|
||||
const auto& mem_cand = cand->output_memory();
|
||||
if (eng.is_the_same_buffer(mem_orig, mem_cand)) {
|
||||
|
||||
@@ -647,7 +647,7 @@ struct resample_random_test : testing::TestWithParam<resample_random_test_params
|
||||
auto output_coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
|
||||
auto output_val = out_ptr[output_lay.get_linear_offset(output_coords)];
|
||||
|
||||
EXPECT_NEAR(static_cast<float>(output_val), final_val, 1.e-1f)
|
||||
ASSERT_NEAR(static_cast<float>(output_val), final_val, 1.e-1f)
|
||||
<< " at bi=" << bi << ", fi=" << fi << ", xi=" << xi << ", yi=" << yi;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user