[CPU] Fixed integer compute for arithmetic operations (#13556)

This commit is contained in:
Gorokhov Dmitriy 2022-10-24 15:54:52 +04:00 committed by GitHub
parent be1b72d1e9
commit 9f40eb7196
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 246 additions and 107 deletions

View File

@ -120,7 +120,7 @@ auto is_supported_op(const std::shared_ptr<const Node> &n) -> bool {
auto has_supported_in_out(const std::shared_ptr<const Node> &n) -> bool {
auto supported = [](descriptor::Tensor& t) -> bool {
static const std::set<ngraph::element::Type> supported_data_types =
{ ngraph::element::f32, ngraph::element::i32, ngraph::element::bf16, ngraph::element::i8, ngraph::element::u8 };
{ ngraph::element::f32, ngraph::element::bf16, ngraph::element::i8, ngraph::element::u8 };
return t.get_partial_shape().is_static() && supported_data_types.count(t.get_element_type()) != 0;
};
const auto & inputs = n->inputs();

View File

@ -46,14 +46,26 @@ void jit_add_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std
Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
auto uni_vadd = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
switch (exec_prc_) {
case Precision::FP32: h->uni_vaddps(vmm_dst, vmm_src0, vmm_src1); break;
case Precision::I32: h->uni_vpaddd(vmm_dst, vmm_src0, vmm_src1); break;
default: assert(!"unsupported precision");
}
};
if (isa == cpu::x64::sse41) {
h->uni_vmovups(vmm_dst, vmm_src0);
h->uni_vaddps(vmm_dst, vmm_dst, vmm_src1);
uni_vadd(vmm_dst, vmm_dst, vmm_src1);
} else {
h->uni_vaddps(vmm_dst, vmm_src0, vmm_src1);
uni_vadd(vmm_dst, vmm_src0, vmm_src1);
}
}
std::set<InferenceEngine::Precision> jit_add_emitter::get_supported_precisions() {
return {Precision::FP32, Precision::I32};
}
/// MUL_ADD ///
jit_mul_add_emitter::jit_mul_add_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
: jit_emitter(host, host_isa, node, exec_prc) {}
@ -85,30 +97,57 @@ void jit_mul_add_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const
Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
if (isa == cpu::x64::sse41) {
auto uni_vfmadd231_xmm = [this](Xmm vmm_dst, Xmm vmm_src0, Xmm vmm_src1, Xmm vmm_src2) {
h->uni_vmovups(vmm_dst, vmm_src0);
h->uni_vmulps(vmm_dst, vmm_dst, vmm_src1);
h->uni_vaddps(vmm_dst, vmm_dst, vmm_src2);
switch (exec_prc_) {
case Precision::FP32: {
h->uni_vmulps(vmm_dst, vmm_dst, vmm_src1);
h->uni_vaddps(vmm_dst, vmm_dst, vmm_src2);
} break;
case Precision::I32: {
h->uni_vpmulld(vmm_dst, vmm_dst, vmm_src1);
h->uni_vpaddd(vmm_dst, vmm_dst, vmm_src2);
} break;
default: assert(!"unsupported precision");
}
};
auto uni_vfmadd231_vmm = [this, vmm_aux0](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1, Vmm vmm_src2) {
switch (exec_prc_) {
case Precision::FP32: {
Vmm vmm_mul0;
if (vmm_dst.getIdx() == vmm_src0.getIdx()) {
h->uni_vmovups(vmm_aux0, vmm_src0);
vmm_mul0 = vmm_aux0;
} else {
vmm_mul0 = vmm_src0;
}
Vmm vmm_mul1;
if (vmm_dst.getIdx() == vmm_src1.getIdx()) {
h->uni_vmovups(vmm_aux0, vmm_src1);
vmm_mul1 = vmm_aux0;
} else {
vmm_mul1 = vmm_src1;
}
if (vmm_dst.getIdx() != vmm_src2.getIdx())
h->uni_vmovups(vmm_dst, vmm_src2);
h->uni_vfmadd231ps(vmm_dst, vmm_mul0, vmm_mul1);
} break;
case Precision::I32: {
h->uni_vpmulld(vmm_dst, vmm_src0, vmm_src1);
h->uni_vpaddd(vmm_dst, vmm_dst, vmm_src2);
} break;
default: assert(!"unsupported precision");
}
};
if (isa == cpu::x64::sse41) {
uni_vfmadd231_xmm(vmm_dst, vmm_src0, vmm_src1, vmm_src2);
} else {
Vmm vmm_mul0;
if (vmm_dst.getIdx() == vmm_src0.getIdx()) {
h->uni_vmovups(vmm_aux0, vmm_src0);
vmm_mul0 = vmm_aux0;
} else {
vmm_mul0 = vmm_src0;
}
Vmm vmm_mul1;
if (vmm_dst.getIdx() == vmm_src1.getIdx()) {
h->uni_vmovups(vmm_aux0, vmm_src1);
vmm_mul1 = vmm_aux0;
} else {
vmm_mul1 = vmm_src1;
}
if (vmm_dst.getIdx() != vmm_src2.getIdx())
h->uni_vmovups(vmm_dst, vmm_src2);
h->uni_vfmadd231ps(vmm_dst, vmm_mul0, vmm_mul1);
uni_vfmadd231_vmm(vmm_dst, vmm_src0, vmm_src1, vmm_src2);
}
}
@ -116,6 +155,10 @@ size_t jit_mul_add_emitter::aux_vecs_count() const {
return 1;
}
std::set<InferenceEngine::Precision> jit_mul_add_emitter::get_supported_precisions() {
return {Precision::FP32, Precision::I32};
}
/// SUB ///
jit_subtract_emitter::jit_subtract_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
: jit_emitter(host, host_isa, node, exec_prc) {}
@ -145,14 +188,25 @@ void jit_subtract_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, cons
Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
auto uni_vsub = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
switch (exec_prc_) {
case Precision::FP32: h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1); break;
case Precision::I32: h->uni_vpsubd(vmm_dst, vmm_src0, vmm_src1); break;
default: assert(!"unsupported precision");
}
};
if (isa == cpu::x64::sse41) {
h->uni_vmovups(vmm_dst, vmm_src0);
h->uni_vsubps(vmm_dst, vmm_dst, vmm_src1);
uni_vsub(vmm_dst, vmm_dst, vmm_src1);
} else {
h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1);
uni_vsub(vmm_dst, vmm_src0, vmm_src1);
}
}
std::set<InferenceEngine::Precision> jit_subtract_emitter::get_supported_precisions() {
return {Precision::FP32, Precision::I32};
}
/// MULTIPLY ///
jit_multiply_emitter::jit_multiply_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
@ -183,14 +237,25 @@ void jit_multiply_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, cons
Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
auto uni_vmul = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
switch (exec_prc_) {
case Precision::FP32: h->uni_vmulps(vmm_dst, vmm_src0, vmm_src1); break;
case Precision::I32: h->uni_vpmulld(vmm_dst, vmm_src0, vmm_src1); break;
default: assert(!"unsupported precision");
}
};
if (isa == cpu::x64::sse41) {
h->uni_vmovups(vmm_dst, vmm_src0);
h->uni_vmulps(vmm_dst, vmm_dst, vmm_src1);
uni_vmul(vmm_dst, vmm_dst, vmm_src1);
} else {
h->uni_vmulps(vmm_dst, vmm_src0, vmm_src1);
uni_vmul(vmm_dst, vmm_src0, vmm_src1);
}
}
std::set<InferenceEngine::Precision> jit_multiply_emitter::get_supported_precisions() {
return {Precision::FP32, Precision::I32};
}
/// DIVIDE ///
jit_divide_emitter::jit_divide_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
@ -554,17 +619,32 @@ void jit_squared_difference_emitter::emit_isa(const std::vector<size_t> &in_vec_
Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
auto uni_vsqdiff = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
switch (exec_prc_) {
case Precision::FP32: {
h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1);
h->uni_vmulps(vmm_dst, vmm_dst, vmm_dst);
} break;
case Precision::I32: {
h->uni_vpsubd(vmm_dst, vmm_src0, vmm_src1);
h->uni_vpmulld(vmm_dst, vmm_dst, vmm_dst);
} break;
default: assert(!"unsupported precision");
}
};
if (isa == cpu::x64::sse41) {
if (vmm_src0.getIdx() != vmm_dst.getIdx())
h->uni_vmovups(vmm_dst, vmm_src0);
h->uni_vsubps(vmm_dst, vmm_dst, vmm_src1);
h->uni_vmulps(vmm_dst, vmm_dst, vmm_dst);
uni_vsqdiff(vmm_dst, vmm_dst, vmm_src1);
} else {
h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1);
h->uni_vmulps(vmm_dst, vmm_dst, vmm_dst);
uni_vsqdiff(vmm_dst, vmm_src0, vmm_src1);
}
}
std::set<InferenceEngine::Precision> jit_squared_difference_emitter::get_supported_precisions() {
return {Precision::FP32, Precision::I32};
}
/// POWER_DYNAMIC ///
jit_power_dynamic_emitter::jit_power_dynamic_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)

View File

@ -18,6 +18,7 @@ public:
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
static std::set<InferenceEngine::Precision> get_supported_precisions();
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
@ -36,6 +37,7 @@ public:
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
static std::set<InferenceEngine::Precision> get_supported_precisions();
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
@ -57,6 +59,7 @@ public:
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
static std::set<InferenceEngine::Precision> get_supported_precisions();
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
@ -76,6 +79,7 @@ public:
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
static std::set<InferenceEngine::Precision> get_supported_precisions();
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
@ -232,6 +236,7 @@ public:
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
static std::set<InferenceEngine::Precision> get_supported_precisions();
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,

View File

@ -2252,16 +2252,19 @@ void Eltwise::appendBinPostOps(dnnl::post_ops& ops, const VectorDims& postOpDims
}
bool Eltwise::canFuse(const NodePtr& node) const {
auto isSuitableNode = [this](const Eltwise* node) {
// [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results
// we disable its fusing otherwise there is no guarantee it will be executed it I32
// [TODO] We need to rewrite support for different precisions at all to avoid implicit conversions to FP32
// (all should be handled via explicit convert operations)
if (node->getAlgorithm() == Algorithm::EltwiseDivide) {
for (const auto &originalInputPrecision : getOriginalInputPrecisions()) {
if (originalInputPrecision == Precision::I32) {
return false;
}
auto isIntegerComputeSupported = [this](const Node* node) {
if (!one_of(node->getAlgorithm(), Algorithm::EltwiseAdd,
Algorithm::EltwiseMultiply,
Algorithm::EltwiseMulAdd,
Algorithm::EltwiseSubtract,
Algorithm::EltwiseDivide,
Algorithm::EltwiseSquaredDifference)) {
return false;
}
for (const auto &originalInputPrecision : node->getOriginalInputPrecisions()) {
if (originalInputPrecision != Precision::I32) {
return false;
}
}
@ -2271,9 +2274,10 @@ bool Eltwise::canFuse(const NodePtr& node) const {
if (!mayiuse(x64::sse41) || getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK)
return false;
if (!isSuitableNode(this)) {
bool isIntegerNode = isIntegerComputeSupported(this);
if (isIntegerNode && node->getType() != Type::Eltwise)
return false;
}
// FQ inputs with quantization parameters will be hided inside post_op object, so will not increase inputs number
size_t addedInputEdgesNum = node->getType() != Type::FakeQuantize ? (node->getParentEdges().size() - 1) : 0;
@ -2281,6 +2285,16 @@ bool Eltwise::canFuse(const NodePtr& node) const {
return false;
if (node->getType() == Type::Eltwise) {
// [WA] Since execution precision change from I32 to FP32 for arithmetic operations may lead to incorrect results
// we disable fusing cases which may lead to invalid precision conversions inside the kernel
// [TODO] We need to rewrite support for different precisions at all to avoid implicit conversions to FP32
// (all should be handled via explicit convert operations)
bool isIntegerFusingNode = isIntegerComputeSupported(node.get());
if (isIntegerNode && !isIntegerFusingNode ||
!isIntegerNode && isIntegerFusingNode) {
return false;
}
if (node->getParentEdgesAtPort(0)[0]->getParent().get() != this) {
// Eltwise jitter doesn't respect commutative property, so fusing is disabled in case it applied not for 0-th port.
if (one_of(node->getAlgorithm(), Algorithm::EltwiseSubtract,

View File

@ -13,23 +13,19 @@ namespace snippets {
namespace {
const std::vector<std::pair<std::vector<ov::element::Type>, std::vector<ov::element::Type>>> types_Convert = {
{ { ov::element::f32 }, { ov::element::i32 } },
{ { ov::element::f32 }, { ov::element::bf16 } },
{ { ov::element::f32 }, { ov::element::u8 } },
{ { ov::element::f32 }, { ov::element::i8 } },
{ { ov::element::bf16 }, { ov::element::f32 } },
{ { ov::element::bf16 }, { ov::element::i32 } },
{ { ov::element::bf16 }, { ov::element::i8 } },
{ { ov::element::bf16 }, { ov::element::u8 } },
{ { ov::element::i8 }, { ov::element::f32 } },
{ { ov::element::i8 }, { ov::element::i32 } },
{ { ov::element::i8 }, { ov::element::bf16 } },
{ { ov::element::i8 }, { ov::element::u8 } },
{ { ov::element::u8 }, { ov::element::f32 } },
{ { ov::element::u8 }, { ov::element::i32 } },
{ { ov::element::u8 }, { ov::element::bf16 } },
{ { ov::element::u8 }, { ov::element::i8 } },
};
@ -50,17 +46,14 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Convert, Convert,
Convert::getTestCaseName);
const std::vector<std::pair<std::vector<ov::element::Type>, std::vector<ov::element::Type>>> types_ConvertInput = {
{ { ov::element::f32 }, { ov::element::i32 } },
{ { ov::element::f32 }, { ov::element::bf16 } },
{ { ov::element::bf16 }, { ov::element::f32 } },
{ { ov::element::i8 }, { ov::element::f32 } },
{ { ov::element::i8 }, { ov::element::i32 } },
{ { ov::element::i8 }, { ov::element::bf16 } },
{ { ov::element::u8 }, { ov::element::f32 } },
{ { ov::element::u8 }, { ov::element::i32 } },
{ { ov::element::u8 }, { ov::element::bf16 } },
};
@ -98,8 +91,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertStub, ConvertStub,
Convert::getTestCaseName);
const std::vector<std::pair<std::vector<ov::element::Type>, std::vector<ov::element::Type>>> types_ConvertPartialInputsAndResults = {
{ { ov::element::i8, ov::element::i32, ov::element::f32 }, { ov::element::f32, ov::element::i8 } },
{ { ov::element::bf16, ov::element::u8, ov::element::i32 }, { ov::element::i32, ov::element::bf16 } },
{ { ov::element::i8, ov::element::i8, ov::element::f32 }, { ov::element::f32, ov::element::i8 } },
};
const std::vector<std::vector<ov::Shape>> inputShapes_ConvertPartialInputsAndResults = {
@ -118,10 +110,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertPartialInputsAndResults, ConvertP
Convert::getTestCaseName);
const std::vector<std::pair<std::vector<ov::element::Type>, std::vector<ov::element::Type>>> types_ConvertMany = {
{ { ov::element::i32, ov::element::u8}, {} },
{ { ov::element::i32, ov::element::u8, ov::element::i32 }, {} },
{ { ov::element::i32, ov::element::f32, ov::element::i32, ov::element::i8 }, {} },
{ { ov::element::i32, ov::element::i8, ov::element::i32, ov::element::f32 }, {} },
{ { ov::element::f32, ov::element::u8}, {} },
{ { ov::element::f32, ov::element::u8, ov::element::i8 }, {} },
{ { ov::element::f32, ov::element::f32, ov::element::i8, ov::element::i8 }, {} },
};
INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnInputs, ConvertManyOnInputs,
@ -137,21 +128,21 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnOutputs, ConvertManyOnOutpu
::testing::Combine(
::testing::Values(std::vector<ov::Shape>{ov::Shape{5, 5, 5, 5}}),
::testing::ValuesIn(types_ConvertMany),
::testing::Values(5), // sinh + subgraph + reorders for sinh
::testing::Values(2), // sinh + subgraph
::testing::Values(1),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
Convert::getTestCaseName);
const std::vector<std::pair<std::vector<ov::element::Type>, std::vector<ov::element::Type>>> types_ConvertManyIO = {
{ { ov::element::i32, ov::element::u8}, {ov::element::i32} },
{ { ov::element::i32, ov::element::u8, ov::element::i32 }, { ov::element::i32, ov::element::i8, ov::element::i32, ov::element::f32 } },
{ { ov::element::f32, ov::element::u8}, {ov::element::i8} },
{ { ov::element::f32, ov::element::u8, ov::element::i8 }, { ov::element::u8, ov::element::i8, ov::element::f32, ov::element::f32 } },
};
INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnInputOutput, ConvertManyOnInputOutput,
::testing::Combine(
::testing::Values(std::vector<ov::Shape>{ov::Shape{5, 5, 5, 5}}),
::testing::ValuesIn(types_ConvertManyIO),
::testing::Values(5), // sinh + subgraph + reorders for sinh
::testing::Values(2), // sinh + subgraph
::testing::Values(1),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
Convert::getTestCaseName);

View File

@ -37,38 +37,47 @@ public:
}
protected:
ov::Tensor generate_eltwise_input(const ov::element::Type& type, const ngraph::Shape& shape) {
struct gen_params {
uint32_t range;
int32_t start_from;
int32_t resolution;
gen_params(uint32_t range = 10, int32_t start_from = 0, int32_t resolution = 1)
: range(range), start_from(start_from), resolution(resolution) {}
};
gen_params params = gen_params();
if (type.is_real()) {
switch (eltwiseType) {
case ngraph::helpers::EltwiseTypes::POWER:
case ngraph::helpers::EltwiseTypes::MOD:
case ngraph::helpers::EltwiseTypes::FLOOR_MOD:
params = gen_params(2, 2, 8);
break;
case ngraph::helpers::EltwiseTypes::DIVIDE:
params = gen_params(2, 2, 8);
break;
case ngraph::helpers::EltwiseTypes::ERF:
params = gen_params(6, -3);
break;
default:
params = gen_params(80, 0, 8);
break;
}
} else {
params = gen_params(INT32_MAX, INT32_MIN);
}
return ov::test::utils::create_and_fill_tensor(type, shape, params.range, params.start_from, params.resolution);
}
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
inputs.clear();
const auto& funcInputs = function->inputs();
for (int i = 0; i < funcInputs.size(); ++i) {
const auto& funcInput = funcInputs[i];
ov::Tensor tensor;
bool isReal = funcInput.get_element_type().is_real();
switch (eltwiseType) {
case ngraph::helpers::EltwiseTypes::POWER:
case ngraph::helpers::EltwiseTypes::MOD:
case ngraph::helpers::EltwiseTypes::FLOOR_MOD:
tensor = isReal ?
ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2, 2, 8) :
ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 4, 2);
break;
case ngraph::helpers::EltwiseTypes::DIVIDE:
tensor = isReal ?
ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2, 2, 8) :
ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 100, 101);
break;
case ngraph::helpers::EltwiseTypes::ERF:
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 6, -3);
break;
default:
if (funcInput.get_element_type().is_real()) {
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 80, 0, 8);
} else {
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
}
break;
}
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
inputs.insert({funcInput.get_node_shared_ptr(), generate_eltwise_input(funcInput.get_element_type(), targetInputStaticShapes[i])});
}
}
@ -88,6 +97,8 @@ protected:
if (ElementType::bf16 == netType) {
rel_threshold = 2e-2f;
} else if (ElementType::i32 == netType) {
abs_threshold = 0;
}
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
@ -134,29 +145,21 @@ protected:
}
}
}
if (eltwiseType == ngraph::helpers::EltwiseTypes::DIVIDE ||
eltwiseType == ngraph::helpers::EltwiseTypes::MOD) {
std::vector<float> data(ngraph::shape_size(shape));
data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(ngraph::shape_size(shape), 10, 2);
if (netType == ElementType::i32) {
auto data_tensor = generate_eltwise_input(ElementType::i32, shape);
auto data_ptr = reinterpret_cast<int32_t*>(data_tensor.data());
std::vector<int32_t> data(data_ptr, data_ptr + ngraph::shape_size(shape));
secondaryInput = ngraph::builder::makeConstant(netType, shape, data);
} else if (eltwiseType == ngraph::helpers::EltwiseTypes::FLOOR_MOD) {
auto negative_data_size = ngraph::shape_size(shape) / 2;
auto positive_data_size = ngraph::shape_size(shape) - negative_data_size;
std::vector<float> negative_data(negative_data_size);
std::vector<float> data(positive_data_size);
negative_data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(negative_data_size, -10, -2);
data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(positive_data_size, 10, 2);
data.insert(data.end(), negative_data.begin(), negative_data.end());
secondaryInput = ngraph::builder::makeConstant(netType, shape, data);
} else if (eltwiseType == ngraph::helpers::EltwiseTypes::POWER) {
secondaryInput = ngraph::builder::makeConstant<float>(netType, shape, {}, true, 3);
} else {
secondaryInput = ngraph::builder::makeConstant<float>(netType, shape, {}, true);
auto data_tensor = generate_eltwise_input(ElementType::f32, shape);
auto data_ptr = reinterpret_cast<float*>(data_tensor.data());
std::vector<float> data(data_ptr, data_ptr + ngraph::shape_size(shape));
secondaryInput = ngraph::builder::makeConstant(netType, shape, data);
}
}
auto eltwise = ngraph::builder::makeEltwise(parameters[0], secondaryInput, eltwiseType);
function = makeNgraphFunction(netType, parameters, eltwise, "Eltwise");
}
@ -325,6 +328,36 @@ const auto params_5D_emptyCPUSpec = ::testing::Combine(
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D, EltwiseLayerCPUTest, params_5D_emptyCPUSpec, EltwiseLayerCPUTest::getTestCaseName);
std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypesI32 = {
ngraph::helpers::EltwiseTypes::ADD,
ngraph::helpers::EltwiseTypes::MULTIPLY,
ngraph::helpers::EltwiseTypes::SUBTRACT,
ngraph::helpers::EltwiseTypes::DIVIDE,
ngraph::helpers::EltwiseTypes::SQUARED_DIFF,
};
const std::vector<fusingSpecificParams> fusingParamsSetI32{
emptyFusingSpec,
fusingMultiplyAddPerChannel,
};
const auto params_5D_emptyCPUSpec_I32 = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_5D)),
::testing::ValuesIn(eltwiseOpTypesI32),
::testing::ValuesIn(secondaryInputTypes),
::testing::ValuesIn(opTypes),
::testing::Values(ElementType::i32),
::testing::Values(ElementType::i32),
::testing::Values(ElementType::i32),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::Values(emptyCPUSpec),
::testing::ValuesIn(fusingParamsSetI32));
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_I32, EltwiseLayerCPUTest, params_5D_emptyCPUSpec_I32, EltwiseLayerCPUTest::getTestCaseName);
std::vector<std::vector<ov::Shape>> inShapes_4D_Blocked_Planar = {
{{2, 17, 31, 3}, {2, 1, 31, 3}},
{{2, 17, 5, 1}, {2, 1, 1, 4}},

View File

@ -361,6 +361,18 @@ const auto fusingMultiplyPerChannel = fusingSpecificParams{std::make_shared<post
return std::make_shared<ngraph::opset1::Multiply>(cfg.input, secondMultInput);
}, "Multiply(PerChannel)"}}), {"Multiply"}};
const auto fusingMultiplyAddPerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
{[](postNodeConfig& cfg) {
ngraph::Shape newShape = generatePerChannelShape(cfg.input);
auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
return std::make_shared<ngraph::opset1::Multiply>(cfg.input, constNode);
}, "Multiply(PerChannel)"},
{[](postNodeConfig& cfg) {
ngraph::Shape newShape = generatePerChannelShape(cfg.input);
auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
return std::make_shared<ngraph::opset1::Add>(cfg.input, constNode);
}, "Add(PerChannel)"}}), {"Add"} };
const auto fusingAddPerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
{[](postNodeConfig& cfg){
ngraph::Shape secondMultInShape(1, 1);

View File

@ -230,6 +230,10 @@ void compare(const ov::Tensor& expected,
auto eps = std::numeric_limits<double>::epsilon();
return (b - a) > (std::fmax(std::fabs(a), std::fabs(b)) * eps);
};
auto less_or_equal = [] (double a, double b) {
auto eps = std::numeric_limits<double>::epsilon();
return (b - a) >= (std::fmax(std::fabs(a), std::fabs(b)) * eps);
};
for (size_t i = 0; i < shape_size(expected_shape); i++) {
double expected_value = expected_data[i];
double actual_value = actual_data[i];
@ -258,7 +262,7 @@ void compare(const ov::Tensor& expected,
}
abs_error.mean /= shape_size(expected_shape);
rel_error.mean /= shape_size(expected_shape);
if (!(less(abs_error.max, abs_threshold) && less(rel_error.max, rel_threshold))) {
if (!(less_or_equal(abs_error.max, abs_threshold) && less_or_equal(rel_error.max, rel_threshold))) {
std::ostringstream out_stream;
out_stream << "abs_max < abs_threshold && rel_max < rel_threshold" <<
"\n\t abs_max: " << abs_error.max <<