[IE CLDNN] Eltwise b_fs_yx_fsv16 mixed presicion support (#3734)
This commit is contained in:
parent
559b509b15
commit
f7e0d90292
@ -24,6 +24,8 @@ ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const {
|
|||||||
ParamsKey k;
|
ParamsKey k;
|
||||||
k.EnableInputDataType(Datatype::F16);
|
k.EnableInputDataType(Datatype::F16);
|
||||||
k.EnableInputDataType(Datatype::F32);
|
k.EnableInputDataType(Datatype::F32);
|
||||||
|
k.EnableInputDataType(Datatype::INT8);
|
||||||
|
k.EnableInputDataType(Datatype::UINT8);
|
||||||
k.EnableOutputDataType(Datatype::F16);
|
k.EnableOutputDataType(Datatype::F16);
|
||||||
k.EnableOutputDataType(Datatype::F32);
|
k.EnableOutputDataType(Datatype::F32);
|
||||||
k.EnableOutputDataType(Datatype::INT8);
|
k.EnableOutputDataType(Datatype::INT8);
|
||||||
@ -78,9 +80,12 @@ JitConstants EltwiseKernel_b_fs_yx_fsv16::MakeLoadJitConstants(const eltwise_par
|
|||||||
"input" + std::to_string(input.index) +
|
"input" + std::to_string(input.index) +
|
||||||
"[0]"));
|
"[0]"));
|
||||||
} else {
|
} else {
|
||||||
|
std::string block_read_str = "BLOCK_READN(INPUT" + std::to_string(input.index) + "_TYPE, " +
|
||||||
|
"BLOCK_SIZE, " +
|
||||||
|
"input" + std::to_string(input.index) + ", " +
|
||||||
|
"INPUT" + std::to_string(input.index) + "_GET_INDEX(b, f_block*16, y, x))";
|
||||||
jit.AddConstant(MakeJitConstant(name,
|
jit.AddConstant(MakeJitConstant(name,
|
||||||
"READ_FUNC(input" + std::to_string(input.index) +
|
"TO_TYPE(MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, BLOCK_SIZE), " + block_read_str + ")"));
|
||||||
", INPUT"+std::to_string(input.index)+"_GET_INDEX(b, f_block*16, y, x))"));
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case EltwiseInputMode::OUTPUT_BUFFER:
|
case EltwiseInputMode::OUTPUT_BUFFER:
|
||||||
@ -176,20 +181,33 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_
|
|||||||
|
|
||||||
auto input0 = ewParams.inputs[0];
|
auto input0 = ewParams.inputs[0];
|
||||||
|
|
||||||
for (size_t i = 1; i < ewParams.inputs.size(); i++) {
|
|
||||||
if (input0.GetDType() != ewParams.inputs[i].GetDType()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check that padding before features doesn't miss-align the blocks
|
// Check that padding before features doesn't miss-align the blocks
|
||||||
auto feature_block_size = 16;
|
auto feature_block_size = 16;
|
||||||
if (input0.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) {
|
if (input0.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto compareTensors = [](const DataTensor& input0, const DataTensor& input1) -> bool {
|
||||||
|
// Check all parameters except DataType
|
||||||
|
auto& input0_dims = input0.GetDims();
|
||||||
|
auto& input1_dims = input1.GetDims();
|
||||||
|
bool same = input0.GetLayout() == input1.GetLayout() &&
|
||||||
|
input0.GetPaddedVal() == input1.GetPaddedVal() &&
|
||||||
|
input0.GetViewOffset() == input1.GetViewOffset() &&
|
||||||
|
input0_dims.size() == input1_dims.size();
|
||||||
|
if (same) {
|
||||||
|
for (size_t i = 0; i < input0_dims.size(); i++) {
|
||||||
|
same &= input0_dims[i].v == input1_dims[i].v &&
|
||||||
|
input0_dims[i].pad.before == input1_dims[i].pad.before &&
|
||||||
|
input0_dims[i].pad.after == input1_dims[i].pad.after &&
|
||||||
|
input0_dims[i].pitch == input1_dims[i].pitch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return same;
|
||||||
|
};
|
||||||
|
|
||||||
for (size_t i = 1; i < ewParams.inputs.size(); i++) {
|
for (size_t i = 1; i < ewParams.inputs.size(); i++) {
|
||||||
if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(ewParams.inputs[i] == input0))
|
if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(ewParams.inputs[i], input0)))
|
||||||
return false;
|
return false;
|
||||||
if (ewParams.inputs[i].Feature().pad.before % feature_block_size != 0) {
|
if (ewParams.inputs[i].Feature().pad.before % feature_block_size != 0) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
#define FEATURE_SLICE_SIZE 16
|
#define FEATURE_SLICE_SIZE 16
|
||||||
|
|
||||||
#define OUTPUT_TYPE_BLOCK MAKE_VECTOR_TYPE(OUTPUT_TYPE, BLOCK_SIZE)
|
#define OUTPUT_TYPE_BLOCK MAKE_VECTOR_TYPE(OUTPUT_TYPE, BLOCK_SIZE)
|
||||||
#define TO_OUTPUT_TYPE_BLOCK(val) CAT(convert_, OUTPUT_TYPE_BLOCK)(val)
|
#define TO_TYPE(type, val) CAT(convert_, type)(val)
|
||||||
|
|
||||||
#if BLOCK_SIZE != 1
|
#if BLOCK_SIZE != 1
|
||||||
#define READ_FUNC(ptr, offset) CAT(DT_INPUT_BLOCK_READ, BLOCK_SIZE)(ptr, offset)
|
#define READ_FUNC(ptr, offset) CAT(DT_INPUT_BLOCK_READ, BLOCK_SIZE)(ptr, offset)
|
||||||
@ -68,10 +68,10 @@ KERNEL(eltwise_b_fs_yx_fsv16)(INPUTS_DECLS
|
|||||||
|
|
||||||
#if HAS_FUSED_OPS
|
#if HAS_FUSED_OPS
|
||||||
FUSED_OPS;
|
FUSED_OPS;
|
||||||
OUTPUT_TYPE_BLOCK out = TO_OUTPUT_TYPE_BLOCK(FUSED_OPS_RESULT);
|
OUTPUT_TYPE_BLOCK out = TO_TYPE(MAKE_VECTOR_TYPE(OUTPUT_TYPE, BLOCK_SIZE), FUSED_OPS_RESULT);
|
||||||
#else
|
#else
|
||||||
#if BLOCK_SIZE != 1
|
#if BLOCK_SIZE != 1
|
||||||
OUTPUT_TYPE_BLOCK out = ACTIVATION_TYPED(TO_OUTPUT_TYPE_BLOCK(res), ACTIVATION_PARAMS_TYPED);
|
OUTPUT_TYPE_BLOCK out = ACTIVATION_TYPED(TO_TYPE(MAKE_VECTOR_TYPE(OUTPUT_TYPE, BLOCK_SIZE), res), ACTIVATION_PARAMS_TYPED);
|
||||||
#else
|
#else
|
||||||
OUTPUT_TYPE out = ACTIVATION_TYPED(TO_OUTPUT_TYPE(res), ACTIVATION_PARAMS_TYPED);
|
OUTPUT_TYPE out = ACTIVATION_TYPED(TO_OUTPUT_TYPE(res), ACTIVATION_PARAMS_TYPED);
|
||||||
#endif
|
#endif
|
||||||
|
@ -3214,7 +3214,8 @@ class BaseEltwiseTest : public ::testing::TestWithParam<T> {
|
|||||||
|
|
||||||
class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
|
class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
|
||||||
public:
|
public:
|
||||||
VF<float> eltwise_ref(VVVVVVF<float> input0, VVVVVVF<float> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
|
template<typename T1, typename T2>
|
||||||
|
VF<float> eltwise_ref(VVVVVVF<T1> input0, VVVVVVF<T2> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
|
||||||
auto out_size = tensor::max(input0_size, input1_size);
|
auto out_size = tensor::max(input0_size, input1_size);
|
||||||
|
|
||||||
int output_b = out_size.batch[0];
|
int output_b = out_size.batch[0];
|
||||||
@ -3250,8 +3251,8 @@ public:
|
|||||||
int in1_y = y % input1_size.spatial[1];
|
int in1_y = y % input1_size.spatial[1];
|
||||||
int in1_x = x % input1_size.spatial[0];
|
int in1_x = x % input1_size.spatial[0];
|
||||||
|
|
||||||
auto in0 = input0[in0_b][in0_f][in0_w][in0_z][in0_y][in0_x];
|
auto in0 = static_cast<float>(input0[in0_b][in0_f][in0_w][in0_z][in0_y][in0_x]);
|
||||||
auto in1 = input1[in1_b][in1_f][in1_w][in1_z][in1_y][in1_x];
|
auto in1 = static_cast<float>(input1[in1_b][in1_f][in1_w][in1_z][in1_y][in1_x]);
|
||||||
output[b][f][w][z][y][x] = eltwise_execute<float>(mode, in0, in1);
|
output[b][f][w][z][y][x] = eltwise_execute<float>(mode, in0, in1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3448,3 +3449,84 @@ INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_6d,
|
|||||||
::testing::ValuesIn(types),
|
::testing::ValuesIn(types),
|
||||||
::testing::ValuesIn(inputs_6d)
|
::testing::ValuesIn(inputs_6d)
|
||||||
), );
|
), );
|
||||||
|
|
||||||
|
class eltwise_test_mixed_precision : public eltwise_test {};
|
||||||
|
TEST_P(eltwise_test_mixed_precision, fsv16) {
|
||||||
|
auto p = GetParam();
|
||||||
|
|
||||||
|
ASSERT_EQ(std::get<2>(p).size(), 2);
|
||||||
|
|
||||||
|
auto mode = std::get<0>(p);
|
||||||
|
auto input0_dt = data_types::f16;
|
||||||
|
auto input1_dt = std::get<1>(p);
|
||||||
|
auto input0_size = std::get<2>(p)[0];
|
||||||
|
auto input1_size = std::get<2>(p)[1];
|
||||||
|
|
||||||
|
int b0 = input0_size[0];
|
||||||
|
int f0 = input0_size[1];
|
||||||
|
int z0 = input0_size.size() == 4 ? 1 : input0_size[2];
|
||||||
|
int y0 = input0_size[input0_size.size() == 4 ? 2 : 3];
|
||||||
|
int x0 = input0_size[input0_size.size() == 4 ? 3 : 4];
|
||||||
|
|
||||||
|
int b1 = input1_size[0];
|
||||||
|
int f1 = input1_size[1];
|
||||||
|
int z1 = input1_size.size() == 4 ? 1 : input1_size[2];
|
||||||
|
int y1 = input1_size[input1_size.size() == 4 ? 2 : 3];
|
||||||
|
int x1 = input1_size[input1_size.size() == 4 ? 3 : 4];
|
||||||
|
|
||||||
|
int min_random = input1_dt == data_types::u8 ? 0 : -2;
|
||||||
|
int max_random = input1_dt == data_types::u8 ? 4 : 2;
|
||||||
|
VVVVVVF<float> input1_rnd = generate_random_6d<float>(b0, f0, 1, z0, y0, x0, min_random, max_random);
|
||||||
|
VVVVVVF<int> input2_rnd = generate_random_6d<int>(b1, f1, 1, z1, y1, x1, min_random, max_random);
|
||||||
|
VF<float> input1_rnd_vec = flatten_6d<float>(format::bfwzyx, input1_rnd);
|
||||||
|
VF<int> input2_rnd_vec = flatten_6d<int>(format::bfwzyx, input2_rnd);
|
||||||
|
|
||||||
|
const auto& engine = get_test_engine();
|
||||||
|
auto fmt_pln = input0_size.size() == 4 ? format::bfyx : format::bfzyx;
|
||||||
|
auto fmt_fsv16 = input0_size.size() == 4 ? format::b_fs_yx_fsv16 : format::b_fs_zyx_fsv16;
|
||||||
|
|
||||||
|
auto in0_size = tensor(fmt_pln, input0_size);
|
||||||
|
auto in1_size = tensor(fmt_pln, input1_size);
|
||||||
|
|
||||||
|
auto input1 = memory::allocate(engine, { data_types::f32, fmt_pln, in0_size });
|
||||||
|
auto input2 = memory::allocate(engine, { data_types::i32, fmt_pln, in1_size });
|
||||||
|
set_values(input1, input1_rnd_vec);
|
||||||
|
set_values(input2, input2_rnd_vec);
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input1", input1.get_layout()));
|
||||||
|
topology.add(input_layout("input2", input2.get_layout()));
|
||||||
|
topology.add(reorder("reorder1", "input1", fmt_fsv16, input0_dt));
|
||||||
|
topology.add(reorder("reorder2", "input2", fmt_fsv16, input1_dt));
|
||||||
|
topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode));
|
||||||
|
topology.add(reorder("out", "eltwise", fmt_pln, data_types::f32));
|
||||||
|
primitive_id out_id = "out";
|
||||||
|
|
||||||
|
build_options bo;
|
||||||
|
bo.set_option(build_option::optimize_data(true));
|
||||||
|
network network(engine, topology, bo);
|
||||||
|
|
||||||
|
network.set_input_data("input1", input1);
|
||||||
|
network.set_input_data("input2", input2);
|
||||||
|
auto outputs = network.execute();
|
||||||
|
EXPECT_EQ(outputs.size(), size_t(1));
|
||||||
|
EXPECT_EQ(outputs.begin()->first, out_id);
|
||||||
|
|
||||||
|
auto output_memory = outputs.at(out_id).get_memory();
|
||||||
|
auto output_ptr = output_memory.pointer<float>();
|
||||||
|
|
||||||
|
VF<float> output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, in0_size, in1_size, mode);
|
||||||
|
for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
|
||||||
|
EXPECT_TRUE(!(std::isnan((float)output_cpu_vec[i]) && std::isnan((float)output_ptr[i])));
|
||||||
|
ASSERT_FLOAT_EQ(output_cpu_vec[i], output_ptr[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<data_types> mixed_types = {data_types::i8, data_types::u8};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_precision,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(modes),
|
||||||
|
::testing::ValuesIn(mixed_types),
|
||||||
|
::testing::ValuesIn(inputs)
|
||||||
|
), );
|
||||||
|
Loading…
Reference in New Issue
Block a user