[CPU] IsFinite, IsInf and IsNaN operations JIT implementation. (#14697)

This commit is contained in:
Nikolay Shchegolev 2022-12-29 16:21:25 +04:00 committed by GitHub
parent 36a16c8441
commit 24f1a9270a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 732 additions and 421 deletions

File diff suppressed because it is too large Load Diff

View File

@ -4,7 +4,6 @@
#pragma once
#include <cpu/x64/jit_generator.hpp>
#include "jit_emitter.hpp"
namespace ov {
@ -14,7 +13,7 @@ class jit_add_emitter : public jit_emitter {
public:
jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -33,7 +32,7 @@ class jit_mul_add_emitter : public jit_emitter {
public:
jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -55,7 +54,7 @@ class jit_subtract_emitter : public jit_emitter {
public:
jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -75,7 +74,7 @@ class jit_multiply_emitter : public jit_emitter {
public:
jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -95,7 +94,7 @@ class jit_divide_emitter : public jit_emitter {
public:
jit_divide_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_divide_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_divide_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -115,7 +114,7 @@ class jit_floor_emitter : public jit_emitter {
public:
jit_floor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_floor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_floor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -133,7 +132,7 @@ class jit_ceiling_emitter : public jit_emitter {
public:
jit_ceiling_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_ceiling_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_ceiling_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -151,7 +150,7 @@ class jit_floor_mod_emitter : public jit_emitter {
public:
jit_floor_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_floor_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_floor_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -171,7 +170,7 @@ class jit_mod_emitter : public jit_emitter {
public:
jit_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -191,7 +190,7 @@ class jit_maximum_emitter : public jit_emitter {
public:
jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -211,7 +210,7 @@ class jit_minimum_emitter : public jit_emitter {
public:
jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -232,7 +231,7 @@ public:
jit_squared_difference_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_squared_difference_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
const std::shared_ptr<ngraph::Node>& n,
const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -252,7 +251,7 @@ class jit_power_dynamic_emitter : public jit_emitter {
public:
jit_power_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_power_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_power_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -271,7 +270,7 @@ class jit_equal_emitter : public jit_emitter {
public:
jit_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -293,7 +292,7 @@ class jit_not_equal_emitter : public jit_emitter {
public:
jit_not_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_not_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_not_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -315,7 +314,7 @@ class jit_greater_emitter : public jit_emitter {
public:
jit_greater_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_greater_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_greater_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -337,7 +336,7 @@ class jit_greater_equal_emitter : public jit_emitter {
public:
jit_greater_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_greater_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_greater_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -359,7 +358,7 @@ class jit_less_emitter : public jit_emitter {
public:
jit_less_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_less_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_less_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -382,7 +381,7 @@ public:
jit_less_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_less_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_less_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -404,7 +403,7 @@ class jit_logical_and_emitter : public jit_emitter {
public:
jit_logical_and_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_logical_and_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_logical_and_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -426,7 +425,7 @@ class jit_logical_or_emitter : public jit_emitter {
public:
jit_logical_or_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_logical_or_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_logical_or_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -448,7 +447,7 @@ class jit_logical_xor_emitter : public jit_emitter {
public:
jit_logical_xor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_logical_xor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_logical_xor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -469,7 +468,7 @@ class jit_logical_not_emitter : public jit_emitter {
public:
jit_logical_not_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_logical_not_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_logical_not_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -491,7 +490,7 @@ public:
jit_power_static_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
float inpPower, float inpScale, float inpShift,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_power_static_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_power_static_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -516,7 +515,7 @@ class jit_prelu_emitter : public jit_emitter {
public:
jit_prelu_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_prelu_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_prelu_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -536,7 +535,7 @@ class jit_sqrt_emitter : public jit_emitter {
public:
jit_sqrt_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_sqrt_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_sqrt_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -552,7 +551,7 @@ private:
class jit_negative_emitter : public jit_emitter {
public:
jit_negative_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_negative_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -571,7 +570,7 @@ public:
jit_erf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_erf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_erf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -595,7 +594,7 @@ class jit_soft_sign_emitter : public jit_emitter {
public:
jit_soft_sign_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_soft_sign_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
jit_soft_sign_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
@ -611,5 +610,87 @@ private:
void register_table_entries() override;
};
class jit_is_finite_emitter : public jit_emitter {
public:
jit_is_finite_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa,
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32) : jit_emitter(host, hostIsa, execPrc) {
prepare_table();
}
jit_is_finite_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa, const std::shared_ptr<ov::Node>& node,
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32) : jit_emitter(host, hostIsa, node, execPrc) {
prepare_table();
}
size_t get_inputs_num() const override { return 1; };
protected:
size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; }
void register_table_entries() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emitContext) const override;
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
class jit_is_inf_emitter : public jit_emitter {
public:
jit_is_inf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa,
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32, bool detect_negative = true, bool detect_positive = true)
: jit_emitter(host, hostIsa, execPrc), detect_negative(detect_negative), detect_positive(detect_positive) {
prepare_table();
}
jit_is_inf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa, const std::shared_ptr<ov::Node>& node,
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32): jit_emitter(host, hostIsa, node, execPrc) {
prepare_table();
}
size_t get_inputs_num() const override { return 1; };
protected:
size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; }
void register_table_entries() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emitContext) const override;
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
bool detect_negative;
bool detect_positive;
};
class jit_is_nan_emitter : public jit_emitter {
public:
jit_is_nan_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa,
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32) : jit_emitter(host, hostIsa, execPrc) {
prepare_table();
}
jit_is_nan_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa, const std::shared_ptr<ov::Node>& node,
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32) : jit_emitter(host, hostIsa, node, execPrc) {
prepare_table();
}
size_t get_inputs_num() const override { return 1; }
protected:
size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; }
void register_table_entries() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emitContext) const override;
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
} // namespace intel_cpu
} // namespace ov

View File

@ -94,6 +94,13 @@ struct EltwiseEmitter<jit_power_static_emitter> {
}
};
template<>
struct EltwiseEmitter<jit_is_inf_emitter> {
void operator()(EltwiseEmitterContext & ctx) {
ctx.emitter = std::make_shared<jit_is_inf_emitter>(ctx.host, ctx.host_isa, ctx.exec_prc, ctx.opData.alpha, ctx.opData.beta);
}
};
/**
* Implements Eltwise shape inference algorithm. The algorithm is based on broadcasting all the input shapes
* according to the NUMPY broadcast rule. This implementation is more lightweight than the ngraph one.
@ -518,7 +525,10 @@ private:
OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter),
OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter),
OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter),
OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter));
OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter),
OV_CASE(Algorithm::EltwiseIsFinite, jit_is_finite_emitter),
OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter),
OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter));
if (precisions.empty())
IE_THROW() << "Unsupported operation type for Eltwise emitter";
@ -576,7 +586,10 @@ private:
OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter),
OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter),
OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter),
OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter));
OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter),
OV_CASE(Algorithm::EltwiseIsFinite, jit_is_finite_emitter),
OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter),
OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter));
if (!ctx.emitter)
IE_THROW() << "Unsupported operation type for Eltwise emitter";
@ -1750,8 +1763,6 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
// if dim rank is greater than the maximum possible, we should use the reference execution
canUseOptimizedImpl = mayiuse(x64::sse41) && getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK;
// 98206 to add JIT implementation.
canUseOptimizedImpl &= !one_of(getAlgorithm(), Algorithm::EltwiseIsFinite, Algorithm::EltwiseIsInf, Algorithm::EltwiseIsNaN);
if (!canUseOptimizedImpl && !fusedWith.empty()) {
IE_THROW(Unexpected) << "Eltwise node with name '" << getName() << "' uses reference impl, but unexpectedly fused with other ops";

View File

@ -55,14 +55,31 @@ const auto ComparisonTestParams = ::testing::Combine(
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs, ComparisonLayerTest, ComparisonTestParams, ComparisonLayerTest::getTestCaseName);
std::vector<InputShapesTuple> inputShapesIsOps = {
{{1}, {1}},
{{1, 2}, {1}},
{{3, 1}, {1}},
{{2, 2}, {1}},
{{1, 5, 1}, {1}},
{{2, 1, 1, 3, 1}, {1}},
{{7, 1, 1, 1, 1}, {1}},
{{2, 2, 2}, {1}},
{{3, 1, 3, 3}, {1}},
{{17}, {1}},
{{2, 18}, {1}},
{{1, 3, 20}, {1}},
{{2, 200}, {1}},
{{2, 17, 3, 4}, {1}}
};
std::vector<ngraph::helpers::ComparisonTypes> comparisonOpTypesIs = {
ngraph::helpers::ComparisonTypes::IS_FINITE,
ngraph::helpers::ComparisonTypes::IS_INF,
ngraph::helpers::ComparisonTypes::IS_NAN
};
const auto ComparisonTestParamsIs = ::testing::Combine(
::testing::ValuesIn(CommonTestUtils::combineParams(inputShapes)),
::testing::ValuesIn(inputShapesIsOps),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(comparisonOpTypesIs),
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
@ -73,4 +90,4 @@ const auto ComparisonTestParamsIs = ::testing::Combine(
INSTANTIATE_TEST_SUITE_P(smoke_IsOp, ComparisonLayerTest, ComparisonTestParamsIs, ComparisonLayerTest::getTestCaseName);
} // namespace
} // namespace

View File

@ -78,24 +78,34 @@ void ComparisonLayerTest::SetUp() {
}
InferenceEngine::Blob::Ptr ComparisonLayerTest::GenerateInput(const InferenceEngine::InputInfo &inputInfo) const {
auto blob = LayerTestsUtils::LayerTestsCommon::GenerateInput(inputInfo);
InferenceEngine::Blob::Ptr blob;
if (comparisonOpType == ComparisonTypes::IS_FINITE || comparisonOpType == ComparisonTypes::IS_NAN) {
auto *dataPtr = blob->buffer().as<float*>();
auto range = blob->size();
blob = make_blob_with_precision(inputInfo.getTensorDesc());
blob->allocate();
auto dataPtr = blob->buffer().as<float*>();
auto dataPtrInt = blob->buffer().as<int*>();
const auto range = blob->size();
const float start = -static_cast<float>(range) / 2.f;
testing::internal::Random random(1);
if (comparisonOpType == ComparisonTypes::IS_FINITE) {
for (size_t i = 0; i < range / 2; i++) {
dataPtr[random.Generate(range)] =
i % 3 == 0 ? std::numeric_limits<float>::infinity() : i % 3 == 1 ? -std::numeric_limits<float>::infinity() :
std::numeric_limits<double>::quiet_NaN();
}
} else {
for (size_t i = 0; i < range / 2; i++) {
dataPtr[random.Generate(range)] = std::numeric_limits<double>::quiet_NaN();
for (size_t i = 0; i < range; i++) {
if (i % 7 == 0) {
dataPtr[i] = std::numeric_limits<float>::infinity();
} else if (i % 7 == 1) {
dataPtr[i] = -std::numeric_limits<float>::infinity();
} else if (i % 7 == 2) {
dataPtrInt[i] = 0x7F800000 + random.Generate(range);
} else if (i % 7 == 3) {
dataPtr[i] = std::numeric_limits<double>::quiet_NaN();
} else if (i % 7 == 5) {
dataPtr[i] = -std::numeric_limits<double>::quiet_NaN();
} else {
dataPtr[i] = start + static_cast<float>(random.Generate(range));
}
}
} else {
blob = LayerTestsUtils::LayerTestsCommon::GenerateInput(inputInfo);
}
return blob;

View File

@ -76,15 +76,25 @@ void IsInfLayerTest::generate_inputs(const std::vector<ov::Shape>& targetInputSt
const auto& funcInputs = function->inputs();
const auto& input = funcInputs[0];
int32_t range = std::accumulate(targetInputStaticShapes[0].begin(), targetInputStaticShapes[0].end(), 1u, std::multiplies<uint32_t>());
auto tensor = utils::create_and_fill_tensor(
input.get_element_type(), targetInputStaticShapes[0], range, -range / 2, 1);
int32_t range = std::accumulate(targetInputStaticShapes[0].begin(), targetInputStaticShapes[0].end(), 1, std::multiplies<uint32_t>());
float startFrom = -static_cast<float>(range) / 2.f;
auto tensor = ov::Tensor{ input.get_element_type(), targetInputStaticShapes[0]};
auto pointer = tensor.data<element_type_traits<ov::element::Type_t::f32>::value_type>();
testing::internal::Random random(1);
for (size_t i = 0; i < range / 2; i++) {
pointer[random.Generate(range)] = i % 2 == 0 ? std::numeric_limits<float>::infinity() : -std::numeric_limits<float>::infinity();
for (size_t i = 0; i < range; i++) {
if (i % 7 == 0) {
pointer[i] = std::numeric_limits<float>::infinity();
} else if (i % 7 == 1) {
pointer[i] = std::numeric_limits<double>::quiet_NaN();
} else if (i % 7 == 3) {
pointer[i] = -std::numeric_limits<float>::infinity();
} else if (i % 7 == 5) {
pointer[i] = -std::numeric_limits<double>::quiet_NaN();
} else {
pointer[i] = startFrom + static_cast<float>(random.Generate(range));
}
}
inputs.insert({input.get_node_shared_ptr(), tensor});