diff --git a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp index cca2e6e53e9..77a74bf91b6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp @@ -204,14 +204,43 @@ void jit_divide_emitter::emit_isa(const std::vector &in_vec_idxs, const Vmm vmm_src1 = Vmm(in_vec_idxs[1]); Vmm vmm_dst = Vmm(out_vec_idxs[0]); + auto uni_vdiv = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) { + switch (exec_prc_) { + case Precision::FP32: { + h->uni_vdivps(vmm_dst, vmm_src0, vmm_src1); + break; + } + case Precision::I32: { + Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]); + + // The opset doesn't contain vector instruction for integer divide operation + // As WA we emulate its behavior via fp divide followed by rounding to zero + h->uni_vcvtdq2ps(vmm_dst, vmm_src0); + h->uni_vcvtdq2ps(vmm_aux0, vmm_src1); + h->uni_vdivps(vmm_dst, vmm_dst, vmm_aux0); + h->uni_vroundps(vmm_dst, vmm_dst, 3); // rounding to zero + h->uni_vcvtps2dq(vmm_dst, vmm_dst); + break; + } + default: assert(!"unsupported precision"); + } + }; + if (isa == cpu::sse42) { h->uni_vmovups(vmm_dst, vmm_src0); - h->uni_vdivps(vmm_dst, vmm_dst, vmm_src1); + uni_vdiv(vmm_dst, vmm_dst, vmm_src1); } else { - h->uni_vdivps(vmm_dst, vmm_src0, vmm_src1); + uni_vdiv(vmm_dst, vmm_src0, vmm_src1); } } +std::set jit_divide_emitter::get_supported_precisions() { + return {Precision::FP32, Precision::I32}; +} + +size_t jit_divide_emitter::aux_vecs_count() const { + return exec_prc_ == Precision::I32 ? 1 : 0; +} /// FLOOR_MOD /// jit_floor_mod_emitter::jit_floor_mod_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc) diff --git a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp index 4bf427e6f49..910d6a1b08a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.hpp @@ -81,6 +81,7 @@ public: InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() override; + static std::set get_supported_precisions(); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, @@ -88,6 +89,7 @@ private: template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + size_t aux_vecs_count() const override; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp index 958219794fa..df6fd85f3c2 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp @@ -70,9 +70,12 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener for (int i = 0; i < eltwiseNode.getFusedWith().size(); i++) { if (eltwiseNode.getFusedWith()[i].get()->getType() == Eltwise) { std::set prcs = get_supported_precisions(*eltwiseNode.getFusedWith()[i].get()); + std::set prcs_intersect = {}; std::set_intersection(supported_precision_intersection.begin(), supported_precision_intersection.end(), - prcs.begin(), prcs.end(), std::inserter(supported_precision_intersection, supported_precision_intersection.begin())); + prcs.begin(), prcs.end(), std::inserter(prcs_intersect, prcs_intersect.begin())); + + supported_precision_intersection = prcs_intersect; } } @@ -1723,9 +1726,29 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const { return false; }; + auto isSuitableNode = [](const MKLDNNEltwiseNode* node) { + // [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results + // we disable its fusing otherwise there is no guarantee it will be executed it I32 + // [TODO] We need to rewrite support for different precisions at all to avoid implicit conversions to FP32 + // (all should be handled via explicit convert operations) + if (node->getOpType() == Divide) { + for (int i = 0; i < node->getCnnLayer()->insData.size(); i++) { + if (node->getCnnLayer()->insData[i].lock()->getPrecision() == Precision::I32) { + return false; + } + } + } + + return true; + }; + if (!mayiuse(cpu::sse42)) return false; + if (!isSuitableNode(this)) { + return false; + } + // FQ inputs with quantization parameters will be hided inside post_op object, so will not increase inputs number size_t addedInputEdgesNum = node->getType() != Quantize ? (node->getParentEdges().size() - 1) : 0; if (getParentEdges().size() + addedInputEdgesNum > MAX_ELTWISE_INPUTS) @@ -1734,6 +1757,10 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const { if (node->getType() == Eltwise) { auto eltwiseNode = dynamic_cast(node.get()); if (eltwiseNode->getParentEdgesAtPort(0)[0]->getParent().get() != this) { + if (!isSuitableNode(this)) { + return false; + } + // Eltwise jitter doesn't respect commutative property, so fusing is disabled in case it applied not for 0-th port. if (isOneOf(eltwiseNode->getOpType(), {Subtract, Divide, FloorMod, Mod, PowerDynamic, Greater, GreaterEqual, Less, LessEqual})) { return false; diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/eltwise.cpp index 66863b9c561..a8cb97e7836 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/eltwise.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/eltwise.cpp @@ -28,6 +28,7 @@ std::vector>> inShapes = { std::vector netPrecisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16, + InferenceEngine::Precision::I32, }; std::vector secondaryInputTypes = { diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/eltwise_chain.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/eltwise_chain.cpp index 5c44c892be5..bcb71b67685 100644 --- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/eltwise_chain.cpp +++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/eltwise_chain.cpp @@ -136,7 +136,7 @@ std::vector> inputPrecisions = { std::vector> eltwiseOps = { { EltwiseTypes::ADD, EltwiseTypes::MULTIPLY, EltwiseTypes::SUBTRACT }, - { EltwiseTypes::DIVIDE, EltwiseTypes::POWER, EltwiseTypes::ADD }, + { EltwiseTypes::DIVIDE, EltwiseTypes::SQUARED_DIFF, EltwiseTypes::ADD }, }; INSTANTIATE_TEST_CASE_P(smoke_EltwiseChain, EltwiseChainTest, diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp index 324805227be..f65f68a8551 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp @@ -168,7 +168,8 @@ protected: } const auto max = std::max(CommonTestUtils::ie_abs(res), CommonTestUtils::ie_abs(ref)); - ASSERT_TRUE(max != 0 && ((absoluteDifference / max) <= threshold)) + float diff = static_cast(absoluteDifference) / static_cast(max); + ASSERT_TRUE(max != 0 && (diff <= static_cast(threshold))) << "Relative comparison of values expected: " << ref << " and actual: " << res << " at index " << i << " with threshold " << threshold << " failed"; diff --git a/ngraph/python/tests/test_onnx/test_backend.py b/ngraph/python/tests/test_onnx/test_backend.py index 8a67427952a..5c708c78ba9 100644 --- a/ngraph/python/tests/test_onnx/test_backend.py +++ b/ngraph/python/tests/test_onnx/test_backend.py @@ -635,8 +635,7 @@ tests_expected_to_fail = [ "OnnxBackendNodeModelTest.test_adagrad_multiple_cpu", "OnnxBackendNodeModelTest.test_adagrad_cpu"), (xfail_issue_41894, - "OnnxBackendNodeModelTest.test_max_uint16_cpu", - "OnnxBackendNodeModelTest.test_mod_int64_fmod_cpu"), + "OnnxBackendNodeModelTest.test_max_uint16_cpu"), (xfail_issue_43523, "OnnxBackendNodeModelTest.test_reduce_sum_do_not_keepdims_example_cpu", "OnnxBackendNodeModelTest.test_reduce_sum_do_not_keepdims_random_cpu",