[CPU] IsFinite, IsInf and IsNaN operations JIT implementation. (#14697)
This commit is contained in:
parent
36a16c8441
commit
24f1a9270a
File diff suppressed because it is too large
Load Diff
@ -4,7 +4,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cpu/x64/jit_generator.hpp>
|
||||
#include "jit_emitter.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -14,7 +13,7 @@ class jit_add_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -33,7 +32,7 @@ class jit_mul_add_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -55,7 +54,7 @@ class jit_subtract_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -75,7 +74,7 @@ class jit_multiply_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -95,7 +94,7 @@ class jit_divide_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_divide_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_divide_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_divide_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -115,7 +114,7 @@ class jit_floor_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_floor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_floor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_floor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -133,7 +132,7 @@ class jit_ceiling_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_ceiling_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_ceiling_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_ceiling_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -151,7 +150,7 @@ class jit_floor_mod_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_floor_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_floor_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_floor_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -171,7 +170,7 @@ class jit_mod_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_mod_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -191,7 +190,7 @@ class jit_maximum_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -211,7 +210,7 @@ class jit_minimum_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -232,7 +231,7 @@ public:
|
||||
jit_squared_difference_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_squared_difference_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
const std::shared_ptr<ngraph::Node>& n,
|
||||
const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -252,7 +251,7 @@ class jit_power_dynamic_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_power_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_power_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_power_dynamic_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -271,7 +270,7 @@ class jit_equal_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -293,7 +292,7 @@ class jit_not_equal_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_not_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_not_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_not_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -315,7 +314,7 @@ class jit_greater_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_greater_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_greater_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_greater_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -337,7 +336,7 @@ class jit_greater_equal_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_greater_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_greater_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_greater_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -359,7 +358,7 @@ class jit_less_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_less_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_less_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_less_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -382,7 +381,7 @@ public:
|
||||
jit_less_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
jit_less_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_less_equal_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -404,7 +403,7 @@ class jit_logical_and_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_logical_and_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_logical_and_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_logical_and_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -426,7 +425,7 @@ class jit_logical_or_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_logical_or_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_logical_or_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_logical_or_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -448,7 +447,7 @@ class jit_logical_xor_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_logical_xor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_logical_xor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_logical_xor_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -469,7 +468,7 @@ class jit_logical_not_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_logical_not_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_logical_not_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_logical_not_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -491,7 +490,7 @@ public:
|
||||
jit_power_static_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
float inpPower, float inpScale, float inpShift,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_power_static_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_power_static_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -516,7 +515,7 @@ class jit_prelu_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_prelu_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_prelu_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_prelu_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -536,7 +535,7 @@ class jit_sqrt_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_sqrt_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_sqrt_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_sqrt_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -552,7 +551,7 @@ private:
|
||||
|
||||
class jit_negative_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_negative_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_negative_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -571,7 +570,7 @@ public:
|
||||
jit_erf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
jit_erf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_erf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -595,7 +594,7 @@ class jit_soft_sign_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_soft_sign_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_soft_sign_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
jit_soft_sign_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ov::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
@ -611,5 +610,87 @@ private:
|
||||
void register_table_entries() override;
|
||||
};
|
||||
|
||||
class jit_is_finite_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_is_finite_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa,
|
||||
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32) : jit_emitter(host, hostIsa, execPrc) {
|
||||
prepare_table();
|
||||
}
|
||||
jit_is_finite_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa, const std::shared_ptr<ov::Node>& node,
|
||||
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32) : jit_emitter(host, hostIsa, node, execPrc) {
|
||||
prepare_table();
|
||||
}
|
||||
|
||||
size_t get_inputs_num() const override { return 1; };
|
||||
|
||||
protected:
|
||||
size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; }
|
||||
void register_table_entries() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
|
||||
const emitter_context *emitContext) const override;
|
||||
|
||||
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
class jit_is_inf_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_is_inf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa,
|
||||
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32, bool detect_negative = true, bool detect_positive = true)
|
||||
: jit_emitter(host, hostIsa, execPrc), detect_negative(detect_negative), detect_positive(detect_positive) {
|
||||
prepare_table();
|
||||
}
|
||||
jit_is_inf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa, const std::shared_ptr<ov::Node>& node,
|
||||
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32): jit_emitter(host, hostIsa, node, execPrc) {
|
||||
prepare_table();
|
||||
}
|
||||
|
||||
size_t get_inputs_num() const override { return 1; };
|
||||
|
||||
protected:
|
||||
size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; }
|
||||
void register_table_entries() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
|
||||
const emitter_context *emitContext) const override;
|
||||
|
||||
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
bool detect_negative;
|
||||
bool detect_positive;
|
||||
};
|
||||
|
||||
class jit_is_nan_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_is_nan_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa,
|
||||
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32) : jit_emitter(host, hostIsa, execPrc) {
|
||||
prepare_table();
|
||||
}
|
||||
jit_is_nan_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t hostIsa, const std::shared_ptr<ov::Node>& node,
|
||||
InferenceEngine::Precision execPrc = InferenceEngine::Precision::FP32) : jit_emitter(host, hostIsa, node, execPrc) {
|
||||
prepare_table();
|
||||
}
|
||||
|
||||
size_t get_inputs_num() const override { return 1; }
|
||||
|
||||
protected:
|
||||
size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; }
|
||||
void register_table_entries() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
|
||||
const emitter_context *emitContext) const override;
|
||||
|
||||
template <dnnl::impl::cpu::x64::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
||||
|
@ -94,6 +94,13 @@ struct EltwiseEmitter<jit_power_static_emitter> {
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct EltwiseEmitter<jit_is_inf_emitter> {
|
||||
void operator()(EltwiseEmitterContext & ctx) {
|
||||
ctx.emitter = std::make_shared<jit_is_inf_emitter>(ctx.host, ctx.host_isa, ctx.exec_prc, ctx.opData.alpha, ctx.opData.beta);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Implements Eltwise shape inference algorithm. The algorithm is based on broadcasting all the input shapes
|
||||
* according to the NUMPY broadcast rule. This implementation is more lightweight than the ngraph one.
|
||||
@ -518,7 +525,10 @@ private:
|
||||
OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter),
|
||||
OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter),
|
||||
OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter),
|
||||
OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter));
|
||||
OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter),
|
||||
OV_CASE(Algorithm::EltwiseIsFinite, jit_is_finite_emitter),
|
||||
OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter),
|
||||
OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter));
|
||||
|
||||
if (precisions.empty())
|
||||
IE_THROW() << "Unsupported operation type for Eltwise emitter";
|
||||
@ -576,7 +586,10 @@ private:
|
||||
OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter),
|
||||
OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter),
|
||||
OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter),
|
||||
OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter));
|
||||
OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter),
|
||||
OV_CASE(Algorithm::EltwiseIsFinite, jit_is_finite_emitter),
|
||||
OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter),
|
||||
OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter));
|
||||
|
||||
if (!ctx.emitter)
|
||||
IE_THROW() << "Unsupported operation type for Eltwise emitter";
|
||||
@ -1750,8 +1763,6 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
|
||||
|
||||
// if dim rank is greater than the maximum possible, we should use the reference execution
|
||||
canUseOptimizedImpl = mayiuse(x64::sse41) && getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK;
|
||||
// 98206 to add JIT implementation.
|
||||
canUseOptimizedImpl &= !one_of(getAlgorithm(), Algorithm::EltwiseIsFinite, Algorithm::EltwiseIsInf, Algorithm::EltwiseIsNaN);
|
||||
|
||||
if (!canUseOptimizedImpl && !fusedWith.empty()) {
|
||||
IE_THROW(Unexpected) << "Eltwise node with name '" << getName() << "' uses reference impl, but unexpectedly fused with other ops";
|
||||
|
@ -55,14 +55,31 @@ const auto ComparisonTestParams = ::testing::Combine(
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs, ComparisonLayerTest, ComparisonTestParams, ComparisonLayerTest::getTestCaseName);
|
||||
|
||||
|
||||
std::vector<InputShapesTuple> inputShapesIsOps = {
|
||||
{{1}, {1}},
|
||||
{{1, 2}, {1}},
|
||||
{{3, 1}, {1}},
|
||||
{{2, 2}, {1}},
|
||||
{{1, 5, 1}, {1}},
|
||||
{{2, 1, 1, 3, 1}, {1}},
|
||||
{{7, 1, 1, 1, 1}, {1}},
|
||||
{{2, 2, 2}, {1}},
|
||||
{{3, 1, 3, 3}, {1}},
|
||||
{{17}, {1}},
|
||||
{{2, 18}, {1}},
|
||||
{{1, 3, 20}, {1}},
|
||||
{{2, 200}, {1}},
|
||||
{{2, 17, 3, 4}, {1}}
|
||||
};
|
||||
|
||||
std::vector<ngraph::helpers::ComparisonTypes> comparisonOpTypesIs = {
|
||||
ngraph::helpers::ComparisonTypes::IS_FINITE,
|
||||
ngraph::helpers::ComparisonTypes::IS_INF,
|
||||
ngraph::helpers::ComparisonTypes::IS_NAN
|
||||
};
|
||||
|
||||
const auto ComparisonTestParamsIs = ::testing::Combine(
|
||||
::testing::ValuesIn(CommonTestUtils::combineParams(inputShapes)),
|
||||
::testing::ValuesIn(inputShapesIsOps),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::ValuesIn(comparisonOpTypesIs),
|
||||
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
|
||||
@ -73,4 +90,4 @@ const auto ComparisonTestParamsIs = ::testing::Combine(
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_IsOp, ComparisonLayerTest, ComparisonTestParamsIs, ComparisonLayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
@ -78,24 +78,34 @@ void ComparisonLayerTest::SetUp() {
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr ComparisonLayerTest::GenerateInput(const InferenceEngine::InputInfo &inputInfo) const {
|
||||
auto blob = LayerTestsUtils::LayerTestsCommon::GenerateInput(inputInfo);
|
||||
InferenceEngine::Blob::Ptr blob;
|
||||
|
||||
if (comparisonOpType == ComparisonTypes::IS_FINITE || comparisonOpType == ComparisonTypes::IS_NAN) {
|
||||
auto *dataPtr = blob->buffer().as<float*>();
|
||||
auto range = blob->size();
|
||||
blob = make_blob_with_precision(inputInfo.getTensorDesc());
|
||||
blob->allocate();
|
||||
auto dataPtr = blob->buffer().as<float*>();
|
||||
auto dataPtrInt = blob->buffer().as<int*>();
|
||||
const auto range = blob->size();
|
||||
const float start = -static_cast<float>(range) / 2.f;
|
||||
testing::internal::Random random(1);
|
||||
|
||||
if (comparisonOpType == ComparisonTypes::IS_FINITE) {
|
||||
for (size_t i = 0; i < range / 2; i++) {
|
||||
dataPtr[random.Generate(range)] =
|
||||
i % 3 == 0 ? std::numeric_limits<float>::infinity() : i % 3 == 1 ? -std::numeric_limits<float>::infinity() :
|
||||
std::numeric_limits<double>::quiet_NaN();
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < range / 2; i++) {
|
||||
dataPtr[random.Generate(range)] = std::numeric_limits<double>::quiet_NaN();
|
||||
for (size_t i = 0; i < range; i++) {
|
||||
if (i % 7 == 0) {
|
||||
dataPtr[i] = std::numeric_limits<float>::infinity();
|
||||
} else if (i % 7 == 1) {
|
||||
dataPtr[i] = -std::numeric_limits<float>::infinity();
|
||||
} else if (i % 7 == 2) {
|
||||
dataPtrInt[i] = 0x7F800000 + random.Generate(range);
|
||||
} else if (i % 7 == 3) {
|
||||
dataPtr[i] = std::numeric_limits<double>::quiet_NaN();
|
||||
} else if (i % 7 == 5) {
|
||||
dataPtr[i] = -std::numeric_limits<double>::quiet_NaN();
|
||||
} else {
|
||||
dataPtr[i] = start + static_cast<float>(random.Generate(range));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
blob = LayerTestsUtils::LayerTestsCommon::GenerateInput(inputInfo);
|
||||
}
|
||||
|
||||
return blob;
|
||||
|
@ -76,15 +76,25 @@ void IsInfLayerTest::generate_inputs(const std::vector<ov::Shape>& targetInputSt
|
||||
const auto& funcInputs = function->inputs();
|
||||
const auto& input = funcInputs[0];
|
||||
|
||||
int32_t range = std::accumulate(targetInputStaticShapes[0].begin(), targetInputStaticShapes[0].end(), 1u, std::multiplies<uint32_t>());
|
||||
auto tensor = utils::create_and_fill_tensor(
|
||||
input.get_element_type(), targetInputStaticShapes[0], range, -range / 2, 1);
|
||||
int32_t range = std::accumulate(targetInputStaticShapes[0].begin(), targetInputStaticShapes[0].end(), 1, std::multiplies<uint32_t>());
|
||||
float startFrom = -static_cast<float>(range) / 2.f;
|
||||
auto tensor = ov::Tensor{ input.get_element_type(), targetInputStaticShapes[0]};
|
||||
|
||||
auto pointer = tensor.data<element_type_traits<ov::element::Type_t::f32>::value_type>();
|
||||
testing::internal::Random random(1);
|
||||
|
||||
for (size_t i = 0; i < range / 2; i++) {
|
||||
pointer[random.Generate(range)] = i % 2 == 0 ? std::numeric_limits<float>::infinity() : -std::numeric_limits<float>::infinity();
|
||||
for (size_t i = 0; i < range; i++) {
|
||||
if (i % 7 == 0) {
|
||||
pointer[i] = std::numeric_limits<float>::infinity();
|
||||
} else if (i % 7 == 1) {
|
||||
pointer[i] = std::numeric_limits<double>::quiet_NaN();
|
||||
} else if (i % 7 == 3) {
|
||||
pointer[i] = -std::numeric_limits<float>::infinity();
|
||||
} else if (i % 7 == 5) {
|
||||
pointer[i] = -std::numeric_limits<double>::quiet_NaN();
|
||||
} else {
|
||||
pointer[i] = startFrom + static_cast<float>(random.Generate(range));
|
||||
}
|
||||
}
|
||||
|
||||
inputs.insert({input.get_node_shared_ptr(), tensor});
|
||||
|
Loading…
Reference in New Issue
Block a user