[CPU] Code generation: Floor & Ceiling & Round implementation (#10666)
This commit is contained in:
@@ -51,6 +51,7 @@ NGRAPH_OP(Log, ngraph::op::v0)
|
||||
NGRAPH_OP(LogicalNot, ngraph::op::v1)
|
||||
NGRAPH_OP(Negative, ngraph::op::v0)
|
||||
NGRAPH_OP(Relu, ngraph::op::v0)
|
||||
NGRAPH_OP(Round, ngraph::op::v5)
|
||||
NGRAPH_OP(Selu, ngraph::op::v0)
|
||||
NGRAPH_OP(Sign, ngraph::op::v0)
|
||||
NGRAPH_OP(Sigmoid, ngraph::op::v0)
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "snippets/op/subgraph.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/op/loop.hpp>
|
||||
#include "transformations/utils/utils.hpp"
|
||||
@@ -84,12 +85,15 @@ auto is_layout_oblivious(const std::shared_ptr<const Node> &n) -> bool {
|
||||
auto is_layout_oblivious_unary = [](const std::shared_ptr<const Node> &n) -> bool {
|
||||
return ov::is_type<opset1::Abs>(n)
|
||||
|| ov::is_type<opset1::Clamp>(n)
|
||||
|| ov::is_type<opset1::Floor>(n)
|
||||
|| ov::is_type<opset1::Ceiling>(n)
|
||||
|| ov::is_type<opset1::Elu>(n)
|
||||
|| ov::is_type<opset1::Erf>(n)
|
||||
|| ov::is_type<opset1::Exp>(n)
|
||||
|| ov::is_type<opset1::LogicalNot>(n)
|
||||
|| ov::is_type<opset1::Negative>(n)
|
||||
|| ov::is_type<opset1::Relu>(n)
|
||||
|| ov::is_type<opset5::Round>(n)
|
||||
|| ov::is_type<opset1::Sigmoid>(n)
|
||||
|| ov::is_type<opset1::Sqrt>(n)
|
||||
|| ov::is_type<opset1::Tanh>(n)
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
#include "jit_mkldnn_emitters.hpp"
|
||||
#include "jit_mkldnn_ext_emitters.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph::snippets;
|
||||
|
||||
@@ -87,14 +89,15 @@ ov::intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_
|
||||
// jitters[ngraph::opset1::Acos::get_type_info_static()] = CREATE_EMITTER(); // not supported
|
||||
// jitters[ngraph::opset1::Asin::get_type_info_static()] = CREATE_EMITTER(); // not supported
|
||||
// jitters[ngraph::opset1::Atan::get_type_info_static()] = CREATE_EMITTER(); // not supported
|
||||
// jitters[ngraph::opset1::Ceiling::get_type_info_static()] = CREATE_EMITTER(); // not supported
|
||||
jitters[ngraph::opset1::Ceiling::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_ceiling_emitter);
|
||||
jitters[ngraph::opset1::Clamp::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_clamp_emitter);
|
||||
// jitters[ngraph::opset1::Cos::get_type_info_static()] = CREATE_EMITTER(); // not supported
|
||||
// jitters[ngraph::opset1::Cosh::get_type_info_static()] = CREATE_EMITTER(); // not supported
|
||||
jitters[ngraph::opset1::Elu::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_elu_emitter);
|
||||
jitters[ngraph::opset1::Erf::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_erf_emitter);
|
||||
jitters[ngraph::opset1::Exp::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_exp_emitter);
|
||||
// jitters[ngraph::opset1::Floor::get_type_info_static()] = CREATE_EMITTER(); // not supported
|
||||
jitters[ngraph::opset1::Floor::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_floor_emitter);
|
||||
jitters[ngraph::opset5::Round::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_round_emitter);
|
||||
// jitters[ngraph::opset1::Log::get_type_info_static()] = CREATE_EMITTER(); // not supported
|
||||
jitters[ngraph::opset1::LogicalNot::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_logical_not_emitter);
|
||||
jitters[ngraph::opset1::Negative::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_negative_emitter);
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "jit_eltwise_emitters.hpp"
|
||||
#include <cpu/x64/jit_uni_eltwise.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
#include <nodes/eltwise.h>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
@@ -258,6 +259,67 @@ size_t jit_divide_emitter::aux_vecs_count() const {
|
||||
return exec_prc_ == Precision::I32 ? 1 : 0;
|
||||
}
|
||||
|
||||
/// FLOOR ///
|
||||
jit_floor_emitter::jit_floor_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
|
||||
: jit_emitter(host, host_isa, node, exec_prc) {}
|
||||
jit_floor_emitter::jit_floor_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
|
||||
: jit_emitter(host, host_isa, exec_prc) {}
|
||||
|
||||
size_t jit_floor_emitter::get_inputs_num() const { return 1; }
|
||||
|
||||
void jit_floor_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
|
||||
const emitter_context *emit_context) const {
|
||||
if (host_isa_ == cpu::x64::sse41) {
|
||||
emit_isa<cpu::x64::sse41>(in_vec_idxs, out_vec_idxs);
|
||||
} else if (host_isa_ == cpu::x64::avx2) {
|
||||
emit_isa<cpu::x64::avx2>(in_vec_idxs, out_vec_idxs);
|
||||
} else if (host_isa_ == cpu::x64::avx512_common) {
|
||||
emit_isa<cpu::x64::avx512_common>(in_vec_idxs, out_vec_idxs);
|
||||
} else {
|
||||
assert(!"unsupported isa");
|
||||
}
|
||||
}
|
||||
|
||||
template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
|
||||
void jit_floor_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
|
||||
using Vmm = typename conditional3<isa == cpu::x64::sse41, Xmm, isa == cpu::x64::avx2, Ymm, Zmm>::type;
|
||||
Vmm vmm_src = Vmm(in_vec_idxs[0]);
|
||||
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
|
||||
h->uni_vroundps(vmm_dst, vmm_src, 1);
|
||||
}
|
||||
|
||||
/// CEILING ///
|
||||
jit_ceiling_emitter::jit_ceiling_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
|
||||
: jit_emitter(host, host_isa, node, exec_prc) {}
|
||||
jit_ceiling_emitter::jit_ceiling_emitter(jit_generator* host, cpu_isa_t host_isa, Precision exec_prc)
|
||||
: jit_emitter(host, host_isa, exec_prc) {}
|
||||
|
||||
size_t jit_ceiling_emitter::get_inputs_num() const { return 1; }
|
||||
|
||||
void jit_ceiling_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs,
|
||||
const std::vector<size_t>& out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
|
||||
const emitter_context *emit_context) const {
|
||||
if (host_isa_ == cpu::x64::sse41) {
|
||||
emit_isa<cpu::x64::sse41>(in_vec_idxs, out_vec_idxs);
|
||||
} else if (host_isa_ == cpu::x64::avx2) {
|
||||
emit_isa<cpu::x64::avx2>(in_vec_idxs, out_vec_idxs);
|
||||
} else if (host_isa_ == cpu::x64::avx512_common) {
|
||||
emit_isa<cpu::x64::avx512_common>(in_vec_idxs, out_vec_idxs);
|
||||
} else {
|
||||
assert(!"unsupported isa");
|
||||
}
|
||||
}
|
||||
|
||||
template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
|
||||
void jit_ceiling_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
|
||||
using Vmm = typename conditional3<isa == cpu::x64::sse41, Xmm, isa == cpu::x64::avx2, Ymm, Zmm>::type;
|
||||
Vmm vmm_src = Vmm(in_vec_idxs[0]);
|
||||
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
|
||||
h->uni_vroundps(vmm_dst, vmm_src, 2);
|
||||
}
|
||||
|
||||
/// FLOOR_MOD ///
|
||||
jit_floor_mod_emitter::jit_floor_mod_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
|
||||
: jit_emitter(host, host_isa, node, exec_prc) {}
|
||||
|
||||
@@ -107,6 +107,41 @@ private:
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
class jit_floor_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_floor_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_floor_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
|
||||
const emitter_context *emit_context) const override;
|
||||
|
||||
template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
class jit_ceiling_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_ceiling_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
jit_ceiling_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
|
||||
const emitter_context *emit_context) const override;
|
||||
|
||||
template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
class jit_floor_mod_emitter : public jit_emitter {
|
||||
public:
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ngraph/opsets/opset5.hpp"
|
||||
#include "jit_mkldnn_emitters.hpp"
|
||||
|
||||
namespace ov {
|
||||
@@ -142,5 +143,26 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class jit_round_emitter : public jit_mkldnn_emitter {
|
||||
public:
|
||||
jit_round_emitter(
|
||||
mkldnn::impl::cpu::x64::jit_generator *host,
|
||||
mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
|
||||
const std::shared_ptr<ngraph::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32) : jit_mkldnn_emitter(host, host_isa, n, exec_prc) {
|
||||
const auto round = getNgraphOpAs<ngraph::op::v5::Round>(n);
|
||||
const auto mode = round->get_mode();
|
||||
if ((mode != ngraph::opset5::Round::RoundMode::HALF_AWAY_FROM_ZERO) &&
|
||||
(mode != ngraph::opset5::Round::RoundMode::HALF_TO_EVEN)) {
|
||||
IE_THROW(NotImplemented) << "Round emitter doesn't support ngraph operation Round with mode: " << static_cast<int>(mode);
|
||||
}
|
||||
|
||||
kind = mode == ngraph::opset5::Round::RoundMode::HALF_AWAY_FROM_ZERO ?
|
||||
dnnl_eltwise_round_half_away_from_zero :
|
||||
dnnl_eltwise_round_half_to_even;
|
||||
set_injector();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
||||
|
||||
Reference in New Issue
Block a user