[CPU] Code generation: Floor & Ceiling & Round implementation (#10666)

This commit is contained in:
Edward Shogulin
2022-03-23 10:32:25 +03:00
committed by GitHub
parent 9bc7ebda7b
commit cd361ecae1
6 changed files with 129 additions and 2 deletions

View File

@@ -51,6 +51,7 @@ NGRAPH_OP(Log, ngraph::op::v0)
NGRAPH_OP(LogicalNot, ngraph::op::v1)
NGRAPH_OP(Negative, ngraph::op::v0)
NGRAPH_OP(Relu, ngraph::op::v0)
NGRAPH_OP(Round, ngraph::op::v5)
NGRAPH_OP(Selu, ngraph::op::v0)
NGRAPH_OP(Sign, ngraph::op::v0)
NGRAPH_OP(Sigmoid, ngraph::op::v0)

View File

@@ -9,6 +9,7 @@
#include "snippets/op/subgraph.hpp"
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset5.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/op/loop.hpp>
#include "transformations/utils/utils.hpp"
@@ -84,12 +85,15 @@ auto is_layout_oblivious(const std::shared_ptr<const Node> &n) -> bool {
auto is_layout_oblivious_unary = [](const std::shared_ptr<const Node> &n) -> bool {
return ov::is_type<opset1::Abs>(n)
|| ov::is_type<opset1::Clamp>(n)
|| ov::is_type<opset1::Floor>(n)
|| ov::is_type<opset1::Ceiling>(n)
|| ov::is_type<opset1::Elu>(n)
|| ov::is_type<opset1::Erf>(n)
|| ov::is_type<opset1::Exp>(n)
|| ov::is_type<opset1::LogicalNot>(n)
|| ov::is_type<opset1::Negative>(n)
|| ov::is_type<opset1::Relu>(n)
|| ov::is_type<opset5::Round>(n)
|| ov::is_type<opset1::Sigmoid>(n)
|| ov::is_type<opset1::Sqrt>(n)
|| ov::is_type<opset1::Tanh>(n)

View File

@@ -14,6 +14,8 @@
#include "jit_mkldnn_emitters.hpp"
#include "jit_mkldnn_ext_emitters.hpp"
#include <ngraph/opsets/opset5.hpp>
using namespace std;
using namespace ngraph::snippets;
@@ -87,14 +89,15 @@ ov::intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_
// jitters[ngraph::opset1::Acos::get_type_info_static()] = CREATE_EMITTER(); // not supported
// jitters[ngraph::opset1::Asin::get_type_info_static()] = CREATE_EMITTER(); // not supported
// jitters[ngraph::opset1::Atan::get_type_info_static()] = CREATE_EMITTER(); // not supported
// jitters[ngraph::opset1::Ceiling::get_type_info_static()] = CREATE_EMITTER(); // not supported
jitters[ngraph::opset1::Ceiling::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_ceiling_emitter);
jitters[ngraph::opset1::Clamp::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_clamp_emitter);
// jitters[ngraph::opset1::Cos::get_type_info_static()] = CREATE_EMITTER(); // not supported
// jitters[ngraph::opset1::Cosh::get_type_info_static()] = CREATE_EMITTER(); // not supported
jitters[ngraph::opset1::Elu::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_elu_emitter);
jitters[ngraph::opset1::Erf::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_erf_emitter);
jitters[ngraph::opset1::Exp::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_exp_emitter);
// jitters[ngraph::opset1::Floor::get_type_info_static()] = CREATE_EMITTER(); // not supported
jitters[ngraph::opset1::Floor::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_floor_emitter);
jitters[ngraph::opset5::Round::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_round_emitter);
// jitters[ngraph::opset1::Log::get_type_info_static()] = CREATE_EMITTER(); // not supported
jitters[ngraph::opset1::LogicalNot::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_logical_not_emitter);
jitters[ngraph::opset1::Negative::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_negative_emitter);

View File

@@ -5,6 +5,7 @@
#include "jit_eltwise_emitters.hpp"
#include <cpu/x64/jit_uni_eltwise.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset5.hpp>
#include <nodes/eltwise.h>
using namespace InferenceEngine;
@@ -258,6 +259,67 @@ size_t jit_divide_emitter::aux_vecs_count() const {
return exec_prc_ == Precision::I32 ? 1 : 0;
}
/// FLOOR ///
jit_floor_emitter::jit_floor_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
: jit_emitter(host, host_isa, node, exec_prc) {}
jit_floor_emitter::jit_floor_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
: jit_emitter(host, host_isa, exec_prc) {}
size_t jit_floor_emitter::get_inputs_num() const { return 1; }
void jit_floor_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const {
if (host_isa_ == cpu::x64::sse41) {
emit_isa<cpu::x64::sse41>(in_vec_idxs, out_vec_idxs);
} else if (host_isa_ == cpu::x64::avx2) {
emit_isa<cpu::x64::avx2>(in_vec_idxs, out_vec_idxs);
} else if (host_isa_ == cpu::x64::avx512_common) {
emit_isa<cpu::x64::avx512_common>(in_vec_idxs, out_vec_idxs);
} else {
assert(!"unsupported isa");
}
}
template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
void jit_floor_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
using Vmm = typename conditional3<isa == cpu::x64::sse41, Xmm, isa == cpu::x64::avx2, Ymm, Zmm>::type;
Vmm vmm_src = Vmm(in_vec_idxs[0]);
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
h->uni_vroundps(vmm_dst, vmm_src, 1);
}
/// CEILING ///
jit_ceiling_emitter::jit_ceiling_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
: jit_emitter(host, host_isa, node, exec_prc) {}
jit_ceiling_emitter::jit_ceiling_emitter(jit_generator* host, cpu_isa_t host_isa, Precision exec_prc)
: jit_emitter(host, host_isa, exec_prc) {}
size_t jit_ceiling_emitter::get_inputs_num() const { return 1; }
void jit_ceiling_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs,
const std::vector<size_t>& out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const {
if (host_isa_ == cpu::x64::sse41) {
emit_isa<cpu::x64::sse41>(in_vec_idxs, out_vec_idxs);
} else if (host_isa_ == cpu::x64::avx2) {
emit_isa<cpu::x64::avx2>(in_vec_idxs, out_vec_idxs);
} else if (host_isa_ == cpu::x64::avx512_common) {
emit_isa<cpu::x64::avx512_common>(in_vec_idxs, out_vec_idxs);
} else {
assert(!"unsupported isa");
}
}
template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
void jit_ceiling_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
using Vmm = typename conditional3<isa == cpu::x64::sse41, Xmm, isa == cpu::x64::avx2, Ymm, Zmm>::type;
Vmm vmm_src = Vmm(in_vec_idxs[0]);
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
h->uni_vroundps(vmm_dst, vmm_src, 2);
}
/// FLOOR_MOD ///
jit_floor_mod_emitter::jit_floor_mod_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
: jit_emitter(host, host_isa, node, exec_prc) {}

View File

@@ -107,6 +107,41 @@ private:
size_t aux_vecs_count() const override;
};
class jit_floor_emitter : public jit_emitter {
public:
jit_floor_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_floor_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const override;
template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
class jit_ceiling_emitter : public jit_emitter {
public:
jit_ceiling_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
jit_ceiling_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() const override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const override;
template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
class jit_floor_mod_emitter : public jit_emitter {
public:

View File

@@ -4,6 +4,7 @@
#pragma once
#include "ngraph/opsets/opset5.hpp"
#include "jit_mkldnn_emitters.hpp"
namespace ov {
@@ -142,5 +143,26 @@ public:
}
};
class jit_round_emitter : public jit_mkldnn_emitter {
public:
jit_round_emitter(
mkldnn::impl::cpu::x64::jit_generator *host,
mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
const std::shared_ptr<ngraph::Node>& n,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32) : jit_mkldnn_emitter(host, host_isa, n, exec_prc) {
const auto round = getNgraphOpAs<ngraph::op::v5::Round>(n);
const auto mode = round->get_mode();
if ((mode != ngraph::opset5::Round::RoundMode::HALF_AWAY_FROM_ZERO) &&
(mode != ngraph::opset5::Round::RoundMode::HALF_TO_EVEN)) {
IE_THROW(NotImplemented) << "Round emitter doesn't support ngraph operation Round with mode: " << static_cast<int>(mode);
}
kind = mode == ngraph::opset5::Round::RoundMode::HALF_AWAY_FROM_ZERO ?
dnnl_eltwise_round_half_away_from_zero :
dnnl_eltwise_round_half_to_even;
set_injector();
}
};
} // namespace intel_cpu
} // namespace ov