diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp index b69d6c85d35..57c737f992b 100644 --- a/src/common/snippets/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/src/pass/collapse_subgraph.cpp @@ -120,7 +120,7 @@ auto is_supported_op(const std::shared_ptr &n) -> bool { auto has_supported_in_out(const std::shared_ptr &n) -> bool { auto supported = [](descriptor::Tensor& t) -> bool { static const std::set supported_data_types = - { ngraph::element::f32, ngraph::element::i32, ngraph::element::bf16, ngraph::element::i8, ngraph::element::u8 }; + { ngraph::element::f32, ngraph::element::bf16, ngraph::element::i8, ngraph::element::u8 }; return t.get_partial_shape().is_static() && supported_data_types.count(t.get_element_type()) != 0; }; const auto & inputs = n->inputs(); diff --git a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp index 1592c814bb8..20d48e3ac18 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp @@ -46,14 +46,26 @@ void jit_add_emitter::emit_isa(const std::vector &in_vec_idxs, const std Vmm vmm_src1 = Vmm(in_vec_idxs[1]); Vmm vmm_dst = Vmm(out_vec_idxs[0]); + auto uni_vadd = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) { + switch (exec_prc_) { + case Precision::FP32: h->uni_vaddps(vmm_dst, vmm_src0, vmm_src1); break; + case Precision::I32: h->uni_vpaddd(vmm_dst, vmm_src0, vmm_src1); break; + default: assert(!"unsupported precision"); + } + }; + if (isa == cpu::x64::sse41) { h->uni_vmovups(vmm_dst, vmm_src0); - h->uni_vaddps(vmm_dst, vmm_dst, vmm_src1); + uni_vadd(vmm_dst, vmm_dst, vmm_src1); } else { - h->uni_vaddps(vmm_dst, vmm_src0, vmm_src1); + uni_vadd(vmm_dst, vmm_src0, vmm_src1); } } +std::set jit_add_emitter::get_supported_precisions() { + return {Precision::FP32, Precision::I32}; +} + /// MUL_ADD /// jit_mul_add_emitter::jit_mul_add_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {} @@ -85,30 +97,57 @@ void jit_mul_add_emitter::emit_isa(const std::vector &in_vec_idxs, const Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]); Vmm vmm_dst = Vmm(out_vec_idxs[0]); - if (isa == cpu::x64::sse41) { + auto uni_vfmadd231_xmm = [this](Xmm vmm_dst, Xmm vmm_src0, Xmm vmm_src1, Xmm vmm_src2) { h->uni_vmovups(vmm_dst, vmm_src0); - h->uni_vmulps(vmm_dst, vmm_dst, vmm_src1); - h->uni_vaddps(vmm_dst, vmm_dst, vmm_src2); + switch (exec_prc_) { + case Precision::FP32: { + h->uni_vmulps(vmm_dst, vmm_dst, vmm_src1); + h->uni_vaddps(vmm_dst, vmm_dst, vmm_src2); + } break; + case Precision::I32: { + h->uni_vpmulld(vmm_dst, vmm_dst, vmm_src1); + h->uni_vpaddd(vmm_dst, vmm_dst, vmm_src2); + } break; + default: assert(!"unsupported precision"); + } + }; + + auto uni_vfmadd231_vmm = [this, vmm_aux0](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1, Vmm vmm_src2) { + switch (exec_prc_) { + case Precision::FP32: { + Vmm vmm_mul0; + if (vmm_dst.getIdx() == vmm_src0.getIdx()) { + h->uni_vmovups(vmm_aux0, vmm_src0); + vmm_mul0 = vmm_aux0; + } else { + vmm_mul0 = vmm_src0; + } + + Vmm vmm_mul1; + if (vmm_dst.getIdx() == vmm_src1.getIdx()) { + h->uni_vmovups(vmm_aux0, vmm_src1); + vmm_mul1 = vmm_aux0; + } else { + vmm_mul1 = vmm_src1; + } + + if (vmm_dst.getIdx() != vmm_src2.getIdx()) + h->uni_vmovups(vmm_dst, vmm_src2); + + h->uni_vfmadd231ps(vmm_dst, vmm_mul0, vmm_mul1); + } break; + case Precision::I32: { + h->uni_vpmulld(vmm_dst, vmm_src0, vmm_src1); + h->uni_vpaddd(vmm_dst, vmm_dst, vmm_src2); + } break; + default: assert(!"unsupported precision"); + } + }; + + if (isa == cpu::x64::sse41) { + uni_vfmadd231_xmm(vmm_dst, vmm_src0, vmm_src1, vmm_src2); } else { - Vmm vmm_mul0; - if (vmm_dst.getIdx() == vmm_src0.getIdx()) { - h->uni_vmovups(vmm_aux0, vmm_src0); - vmm_mul0 = vmm_aux0; - } else { - vmm_mul0 = vmm_src0; - } - - Vmm vmm_mul1; - if (vmm_dst.getIdx() == vmm_src1.getIdx()) { - h->uni_vmovups(vmm_aux0, vmm_src1); - vmm_mul1 = vmm_aux0; - } else { - vmm_mul1 = vmm_src1; - } - - if (vmm_dst.getIdx() != vmm_src2.getIdx()) - h->uni_vmovups(vmm_dst, vmm_src2); - h->uni_vfmadd231ps(vmm_dst, vmm_mul0, vmm_mul1); + uni_vfmadd231_vmm(vmm_dst, vmm_src0, vmm_src1, vmm_src2); } } @@ -116,6 +155,10 @@ size_t jit_mul_add_emitter::aux_vecs_count() const { return 1; } +std::set jit_mul_add_emitter::get_supported_precisions() { + return {Precision::FP32, Precision::I32}; +} + /// SUB /// jit_subtract_emitter::jit_subtract_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {} @@ -145,14 +188,25 @@ void jit_subtract_emitter::emit_isa(const std::vector &in_vec_idxs, cons Vmm vmm_src1 = Vmm(in_vec_idxs[1]); Vmm vmm_dst = Vmm(out_vec_idxs[0]); + auto uni_vsub = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) { + switch (exec_prc_) { + case Precision::FP32: h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1); break; + case Precision::I32: h->uni_vpsubd(vmm_dst, vmm_src0, vmm_src1); break; + default: assert(!"unsupported precision"); + } + }; + if (isa == cpu::x64::sse41) { h->uni_vmovups(vmm_dst, vmm_src0); - h->uni_vsubps(vmm_dst, vmm_dst, vmm_src1); + uni_vsub(vmm_dst, vmm_dst, vmm_src1); } else { - h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1); + uni_vsub(vmm_dst, vmm_src0, vmm_src1); } } +std::set jit_subtract_emitter::get_supported_precisions() { + return {Precision::FP32, Precision::I32}; +} /// MULTIPLY /// jit_multiply_emitter::jit_multiply_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) @@ -183,14 +237,25 @@ void jit_multiply_emitter::emit_isa(const std::vector &in_vec_idxs, cons Vmm vmm_src1 = Vmm(in_vec_idxs[1]); Vmm vmm_dst = Vmm(out_vec_idxs[0]); + auto uni_vmul = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) { + switch (exec_prc_) { + case Precision::FP32: h->uni_vmulps(vmm_dst, vmm_src0, vmm_src1); break; + case Precision::I32: h->uni_vpmulld(vmm_dst, vmm_src0, vmm_src1); break; + default: assert(!"unsupported precision"); + } + }; + if (isa == cpu::x64::sse41) { h->uni_vmovups(vmm_dst, vmm_src0); - h->uni_vmulps(vmm_dst, vmm_dst, vmm_src1); + uni_vmul(vmm_dst, vmm_dst, vmm_src1); } else { - h->uni_vmulps(vmm_dst, vmm_src0, vmm_src1); + uni_vmul(vmm_dst, vmm_src0, vmm_src1); } } +std::set jit_multiply_emitter::get_supported_precisions() { + return {Precision::FP32, Precision::I32}; +} /// DIVIDE /// jit_divide_emitter::jit_divide_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) @@ -554,17 +619,32 @@ void jit_squared_difference_emitter::emit_isa(const std::vector &in_vec_ Vmm vmm_src1 = Vmm(in_vec_idxs[1]); Vmm vmm_dst = Vmm(out_vec_idxs[0]); + auto uni_vsqdiff = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) { + switch (exec_prc_) { + case Precision::FP32: { + h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1); + h->uni_vmulps(vmm_dst, vmm_dst, vmm_dst); + } break; + case Precision::I32: { + h->uni_vpsubd(vmm_dst, vmm_src0, vmm_src1); + h->uni_vpmulld(vmm_dst, vmm_dst, vmm_dst); + } break; + default: assert(!"unsupported precision"); + } + }; + if (isa == cpu::x64::sse41) { if (vmm_src0.getIdx() != vmm_dst.getIdx()) h->uni_vmovups(vmm_dst, vmm_src0); - h->uni_vsubps(vmm_dst, vmm_dst, vmm_src1); - h->uni_vmulps(vmm_dst, vmm_dst, vmm_dst); + uni_vsqdiff(vmm_dst, vmm_dst, vmm_src1); } else { - h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1); - h->uni_vmulps(vmm_dst, vmm_dst, vmm_dst); + uni_vsqdiff(vmm_dst, vmm_src0, vmm_src1); } } +std::set jit_squared_difference_emitter::get_supported_precisions() { + return {Precision::FP32, Precision::I32}; +} /// POWER_DYNAMIC /// jit_power_dynamic_emitter::jit_power_dynamic_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) diff --git a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp index 349bafe5d43..0a374a418f8 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp @@ -18,6 +18,7 @@ public: InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set get_supported_precisions(); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, @@ -36,6 +37,7 @@ public: InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set get_supported_precisions(); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, @@ -57,6 +59,7 @@ public: InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set get_supported_precisions(); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, @@ -76,6 +79,7 @@ public: InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set get_supported_precisions(); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, @@ -232,6 +236,7 @@ public: InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set get_supported_precisions(); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index c6691e2d843..5737b2e30d3 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -2252,16 +2252,19 @@ void Eltwise::appendBinPostOps(dnnl::post_ops& ops, const VectorDims& postOpDims } bool Eltwise::canFuse(const NodePtr& node) const { - auto isSuitableNode = [this](const Eltwise* node) { - // [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results - // we disable its fusing otherwise there is no guarantee it will be executed it I32 - // [TODO] We need to rewrite support for different precisions at all to avoid implicit conversions to FP32 - // (all should be handled via explicit convert operations) - if (node->getAlgorithm() == Algorithm::EltwiseDivide) { - for (const auto &originalInputPrecision : getOriginalInputPrecisions()) { - if (originalInputPrecision == Precision::I32) { - return false; - } + auto isIntegerComputeSupported = [this](const Node* node) { + if (!one_of(node->getAlgorithm(), Algorithm::EltwiseAdd, + Algorithm::EltwiseMultiply, + Algorithm::EltwiseMulAdd, + Algorithm::EltwiseSubtract, + Algorithm::EltwiseDivide, + Algorithm::EltwiseSquaredDifference)) { + return false; + } + + for (const auto &originalInputPrecision : node->getOriginalInputPrecisions()) { + if (originalInputPrecision != Precision::I32) { + return false; } } @@ -2271,9 +2274,10 @@ bool Eltwise::canFuse(const NodePtr& node) const { if (!mayiuse(x64::sse41) || getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK) return false; - if (!isSuitableNode(this)) { + + bool isIntegerNode = isIntegerComputeSupported(this); + if (isIntegerNode && node->getType() != Type::Eltwise) return false; - } // FQ inputs with quantization parameters will be hided inside post_op object, so will not increase inputs number size_t addedInputEdgesNum = node->getType() != Type::FakeQuantize ? (node->getParentEdges().size() - 1) : 0; @@ -2281,6 +2285,16 @@ bool Eltwise::canFuse(const NodePtr& node) const { return false; if (node->getType() == Type::Eltwise) { + // [WA] Since execution precision change from I32 to FP32 for arithmetic operations may lead to incorrect results + // we disable fusing cases which may lead to invalid precision conversions inside the kernel + // [TODO] We need to rewrite support for different precisions at all to avoid implicit conversions to FP32 + // (all should be handled via explicit convert operations) + bool isIntegerFusingNode = isIntegerComputeSupported(node.get()); + if (isIntegerNode && !isIntegerFusingNode || + !isIntegerNode && isIntegerFusingNode) { + return false; + } + if (node->getParentEdgesAtPort(0)[0]->getParent().get() != this) { // Eltwise jitter doesn't respect commutative property, so fusing is disabled in case it applied not for 0-th port. if (one_of(node->getAlgorithm(), Algorithm::EltwiseSubtract, diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/convert.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/convert.cpp index 5c074239077..323e069ebc0 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/convert.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/convert.cpp @@ -13,23 +13,19 @@ namespace snippets { namespace { const std::vector, std::vector>> types_Convert = { - { { ov::element::f32 }, { ov::element::i32 } }, { { ov::element::f32 }, { ov::element::bf16 } }, { { ov::element::f32 }, { ov::element::u8 } }, { { ov::element::f32 }, { ov::element::i8 } }, { { ov::element::bf16 }, { ov::element::f32 } }, - { { ov::element::bf16 }, { ov::element::i32 } }, { { ov::element::bf16 }, { ov::element::i8 } }, { { ov::element::bf16 }, { ov::element::u8 } }, { { ov::element::i8 }, { ov::element::f32 } }, - { { ov::element::i8 }, { ov::element::i32 } }, { { ov::element::i8 }, { ov::element::bf16 } }, { { ov::element::i8 }, { ov::element::u8 } }, { { ov::element::u8 }, { ov::element::f32 } }, - { { ov::element::u8 }, { ov::element::i32 } }, { { ov::element::u8 }, { ov::element::bf16 } }, { { ov::element::u8 }, { ov::element::i8 } }, }; @@ -50,17 +46,14 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Convert, Convert, Convert::getTestCaseName); const std::vector, std::vector>> types_ConvertInput = { - { { ov::element::f32 }, { ov::element::i32 } }, { { ov::element::f32 }, { ov::element::bf16 } }, { { ov::element::bf16 }, { ov::element::f32 } }, { { ov::element::i8 }, { ov::element::f32 } }, - { { ov::element::i8 }, { ov::element::i32 } }, { { ov::element::i8 }, { ov::element::bf16 } }, { { ov::element::u8 }, { ov::element::f32 } }, - { { ov::element::u8 }, { ov::element::i32 } }, { { ov::element::u8 }, { ov::element::bf16 } }, }; @@ -98,8 +91,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertStub, ConvertStub, Convert::getTestCaseName); const std::vector, std::vector>> types_ConvertPartialInputsAndResults = { - { { ov::element::i8, ov::element::i32, ov::element::f32 }, { ov::element::f32, ov::element::i8 } }, - { { ov::element::bf16, ov::element::u8, ov::element::i32 }, { ov::element::i32, ov::element::bf16 } }, + { { ov::element::i8, ov::element::i8, ov::element::f32 }, { ov::element::f32, ov::element::i8 } }, }; const std::vector> inputShapes_ConvertPartialInputsAndResults = { @@ -118,10 +110,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertPartialInputsAndResults, ConvertP Convert::getTestCaseName); const std::vector, std::vector>> types_ConvertMany = { - { { ov::element::i32, ov::element::u8}, {} }, - { { ov::element::i32, ov::element::u8, ov::element::i32 }, {} }, - { { ov::element::i32, ov::element::f32, ov::element::i32, ov::element::i8 }, {} }, - { { ov::element::i32, ov::element::i8, ov::element::i32, ov::element::f32 }, {} }, + { { ov::element::f32, ov::element::u8}, {} }, + { { ov::element::f32, ov::element::u8, ov::element::i8 }, {} }, + { { ov::element::f32, ov::element::f32, ov::element::i8, ov::element::i8 }, {} }, }; INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnInputs, ConvertManyOnInputs, @@ -137,21 +128,21 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnOutputs, ConvertManyOnOutpu ::testing::Combine( ::testing::Values(std::vector{ov::Shape{5, 5, 5, 5}}), ::testing::ValuesIn(types_ConvertMany), - ::testing::Values(5), // sinh + subgraph + reorders for sinh + ::testing::Values(2), // sinh + subgraph ::testing::Values(1), ::testing::Values(CommonTestUtils::DEVICE_CPU)), Convert::getTestCaseName); const std::vector, std::vector>> types_ConvertManyIO = { - { { ov::element::i32, ov::element::u8}, {ov::element::i32} }, - { { ov::element::i32, ov::element::u8, ov::element::i32 }, { ov::element::i32, ov::element::i8, ov::element::i32, ov::element::f32 } }, + { { ov::element::f32, ov::element::u8}, {ov::element::i8} }, + { { ov::element::f32, ov::element::u8, ov::element::i8 }, { ov::element::u8, ov::element::i8, ov::element::f32, ov::element::f32 } }, }; INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnInputOutput, ConvertManyOnInputOutput, ::testing::Combine( ::testing::Values(std::vector{ov::Shape{5, 5, 5, 5}}), ::testing::ValuesIn(types_ConvertManyIO), - ::testing::Values(5), // sinh + subgraph + reorders for sinh + ::testing::Values(2), // sinh + subgraph ::testing::Values(1), ::testing::Values(CommonTestUtils::DEVICE_CPU)), Convert::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/eltwise.cpp index 4e051b2e715..73c5fd5a86a 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/eltwise.cpp @@ -37,38 +37,47 @@ public: } protected: + ov::Tensor generate_eltwise_input(const ov::element::Type& type, const ngraph::Shape& shape) { + struct gen_params { + uint32_t range; + int32_t start_from; + int32_t resolution; + + gen_params(uint32_t range = 10, int32_t start_from = 0, int32_t resolution = 1) + : range(range), start_from(start_from), resolution(resolution) {} + }; + + gen_params params = gen_params(); + if (type.is_real()) { + switch (eltwiseType) { + case ngraph::helpers::EltwiseTypes::POWER: + case ngraph::helpers::EltwiseTypes::MOD: + case ngraph::helpers::EltwiseTypes::FLOOR_MOD: + params = gen_params(2, 2, 8); + break; + case ngraph::helpers::EltwiseTypes::DIVIDE: + params = gen_params(2, 2, 8); + break; + case ngraph::helpers::EltwiseTypes::ERF: + params = gen_params(6, -3); + break; + default: + params = gen_params(80, 0, 8); + break; + } + } else { + params = gen_params(INT32_MAX, INT32_MIN); + } + + return ov::test::utils::create_and_fill_tensor(type, shape, params.range, params.start_from, params.resolution); + } + void generate_inputs(const std::vector& targetInputStaticShapes) override { inputs.clear(); const auto& funcInputs = function->inputs(); for (int i = 0; i < funcInputs.size(); ++i) { const auto& funcInput = funcInputs[i]; - ov::Tensor tensor; - bool isReal = funcInput.get_element_type().is_real(); - switch (eltwiseType) { - case ngraph::helpers::EltwiseTypes::POWER: - case ngraph::helpers::EltwiseTypes::MOD: - case ngraph::helpers::EltwiseTypes::FLOOR_MOD: - tensor = isReal ? - ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2, 2, 8) : - ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 4, 2); - break; - case ngraph::helpers::EltwiseTypes::DIVIDE: - tensor = isReal ? - ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2, 2, 8) : - ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 100, 101); - break; - case ngraph::helpers::EltwiseTypes::ERF: - tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 6, -3); - break; - default: - if (funcInput.get_element_type().is_real()) { - tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 80, 0, 8); - } else { - tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); - } - break; - } - inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + inputs.insert({funcInput.get_node_shared_ptr(), generate_eltwise_input(funcInput.get_element_type(), targetInputStaticShapes[i])}); } } @@ -88,6 +97,8 @@ protected: if (ElementType::bf16 == netType) { rel_threshold = 2e-2f; + } else if (ElementType::i32 == netType) { + abs_threshold = 0; } std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; @@ -134,29 +145,21 @@ protected: } } } - if (eltwiseType == ngraph::helpers::EltwiseTypes::DIVIDE || - eltwiseType == ngraph::helpers::EltwiseTypes::MOD) { - std::vector data(ngraph::shape_size(shape)); - data = NGraphFunctions::Utils::generateVector(ngraph::shape_size(shape), 10, 2); + + if (netType == ElementType::i32) { + auto data_tensor = generate_eltwise_input(ElementType::i32, shape); + auto data_ptr = reinterpret_cast(data_tensor.data()); + std::vector data(data_ptr, data_ptr + ngraph::shape_size(shape)); secondaryInput = ngraph::builder::makeConstant(netType, shape, data); - } else if (eltwiseType == ngraph::helpers::EltwiseTypes::FLOOR_MOD) { - auto negative_data_size = ngraph::shape_size(shape) / 2; - auto positive_data_size = ngraph::shape_size(shape) - negative_data_size; - std::vector negative_data(negative_data_size); - std::vector data(positive_data_size); - negative_data = NGraphFunctions::Utils::generateVector(negative_data_size, -10, -2); - data = NGraphFunctions::Utils::generateVector(positive_data_size, 10, 2); - data.insert(data.end(), negative_data.begin(), negative_data.end()); - secondaryInput = ngraph::builder::makeConstant(netType, shape, data); - } else if (eltwiseType == ngraph::helpers::EltwiseTypes::POWER) { - secondaryInput = ngraph::builder::makeConstant(netType, shape, {}, true, 3); } else { - secondaryInput = ngraph::builder::makeConstant(netType, shape, {}, true); + auto data_tensor = generate_eltwise_input(ElementType::f32, shape); + auto data_ptr = reinterpret_cast(data_tensor.data()); + std::vector data(data_ptr, data_ptr + ngraph::shape_size(shape)); + secondaryInput = ngraph::builder::makeConstant(netType, shape, data); } } auto eltwise = ngraph::builder::makeEltwise(parameters[0], secondaryInput, eltwiseType); - function = makeNgraphFunction(netType, parameters, eltwise, "Eltwise"); } @@ -325,6 +328,36 @@ const auto params_5D_emptyCPUSpec = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D, EltwiseLayerCPUTest, params_5D_emptyCPUSpec, EltwiseLayerCPUTest::getTestCaseName); +std::vector eltwiseOpTypesI32 = { + ngraph::helpers::EltwiseTypes::ADD, + ngraph::helpers::EltwiseTypes::MULTIPLY, + ngraph::helpers::EltwiseTypes::SUBTRACT, + ngraph::helpers::EltwiseTypes::DIVIDE, + ngraph::helpers::EltwiseTypes::SQUARED_DIFF, +}; + +const std::vector fusingParamsSetI32{ + emptyFusingSpec, + fusingMultiplyAddPerChannel, +}; + +const auto params_5D_emptyCPUSpec_I32 = ::testing::Combine( + ::testing::Combine( + ::testing::ValuesIn(static_shapes_to_test_representation(inShapes_5D)), + ::testing::ValuesIn(eltwiseOpTypesI32), + ::testing::ValuesIn(secondaryInputTypes), + ::testing::ValuesIn(opTypes), + ::testing::Values(ElementType::i32), + ::testing::Values(ElementType::i32), + ::testing::Values(ElementType::i32), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(additional_config)), + ::testing::Values(emptyCPUSpec), + ::testing::ValuesIn(fusingParamsSetI32)); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_I32, EltwiseLayerCPUTest, params_5D_emptyCPUSpec_I32, EltwiseLayerCPUTest::getTestCaseName); + + std::vector> inShapes_4D_Blocked_Planar = { {{2, 17, 31, 3}, {2, 1, 31, 3}}, {{2, 17, 5, 1}, {2, 1, 1, 4}}, diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.hpp b/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.hpp index 0369b62a82d..f15442ccef3 100644 --- a/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.hpp +++ b/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.hpp @@ -361,6 +361,18 @@ const auto fusingMultiplyPerChannel = fusingSpecificParams{std::make_shared(cfg.input, secondMultInput); }, "Multiply(PerChannel)"}}), {"Multiply"}}; +const auto fusingMultiplyAddPerChannel = fusingSpecificParams{std::make_shared(std::vector{ + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); + }, "Multiply(PerChannel)"}, + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); + }, "Add(PerChannel)"}}), {"Add"} }; + const auto fusingAddPerTensor = fusingSpecificParams{std::make_shared(std::vector{ {[](postNodeConfig& cfg){ ngraph::Shape secondMultInShape(1, 1); diff --git a/src/tests/ie_test_utils/common_test_utils/ov_tensor_utils.cpp b/src/tests/ie_test_utils/common_test_utils/ov_tensor_utils.cpp index 44051788c42..9b348cc0f92 100644 --- a/src/tests/ie_test_utils/common_test_utils/ov_tensor_utils.cpp +++ b/src/tests/ie_test_utils/common_test_utils/ov_tensor_utils.cpp @@ -230,6 +230,10 @@ void compare(const ov::Tensor& expected, auto eps = std::numeric_limits::epsilon(); return (b - a) > (std::fmax(std::fabs(a), std::fabs(b)) * eps); }; + auto less_or_equal = [] (double a, double b) { + auto eps = std::numeric_limits::epsilon(); + return (b - a) >= (std::fmax(std::fabs(a), std::fabs(b)) * eps); + }; for (size_t i = 0; i < shape_size(expected_shape); i++) { double expected_value = expected_data[i]; double actual_value = actual_data[i]; @@ -258,7 +262,7 @@ void compare(const ov::Tensor& expected, } abs_error.mean /= shape_size(expected_shape); rel_error.mean /= shape_size(expected_shape); - if (!(less(abs_error.max, abs_threshold) && less(rel_error.max, rel_threshold))) { + if (!(less_or_equal(abs_error.max, abs_threshold) && less_or_equal(rel_error.max, rel_threshold))) { std::ostringstream out_stream; out_stream << "abs_max < abs_threshold && rel_max < rel_threshold" << "\n\t abs_max: " << abs_error.max <<