[CPU] Fixed integer compute for arithmetic operations (#13556)
This commit is contained in:
parent
be1b72d1e9
commit
9f40eb7196
@ -120,7 +120,7 @@ auto is_supported_op(const std::shared_ptr<const Node> &n) -> bool {
|
||||
auto has_supported_in_out(const std::shared_ptr<const Node> &n) -> bool {
|
||||
auto supported = [](descriptor::Tensor& t) -> bool {
|
||||
static const std::set<ngraph::element::Type> supported_data_types =
|
||||
{ ngraph::element::f32, ngraph::element::i32, ngraph::element::bf16, ngraph::element::i8, ngraph::element::u8 };
|
||||
{ ngraph::element::f32, ngraph::element::bf16, ngraph::element::i8, ngraph::element::u8 };
|
||||
return t.get_partial_shape().is_static() && supported_data_types.count(t.get_element_type()) != 0;
|
||||
};
|
||||
const auto & inputs = n->inputs();
|
||||
|
@ -46,14 +46,26 @@ void jit_add_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std
|
||||
Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
|
||||
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
|
||||
|
||||
auto uni_vadd = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
|
||||
switch (exec_prc_) {
|
||||
case Precision::FP32: h->uni_vaddps(vmm_dst, vmm_src0, vmm_src1); break;
|
||||
case Precision::I32: h->uni_vpaddd(vmm_dst, vmm_src0, vmm_src1); break;
|
||||
default: assert(!"unsupported precision");
|
||||
}
|
||||
};
|
||||
|
||||
if (isa == cpu::x64::sse41) {
|
||||
h->uni_vmovups(vmm_dst, vmm_src0);
|
||||
h->uni_vaddps(vmm_dst, vmm_dst, vmm_src1);
|
||||
uni_vadd(vmm_dst, vmm_dst, vmm_src1);
|
||||
} else {
|
||||
h->uni_vaddps(vmm_dst, vmm_src0, vmm_src1);
|
||||
uni_vadd(vmm_dst, vmm_src0, vmm_src1);
|
||||
}
|
||||
}
|
||||
|
||||
std::set<InferenceEngine::Precision> jit_add_emitter::get_supported_precisions() {
|
||||
return {Precision::FP32, Precision::I32};
|
||||
}
|
||||
|
||||
/// MUL_ADD ///
|
||||
jit_mul_add_emitter::jit_mul_add_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
|
||||
: jit_emitter(host, host_isa, node, exec_prc) {}
|
||||
@ -85,30 +97,57 @@ void jit_mul_add_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const
|
||||
Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]);
|
||||
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
|
||||
|
||||
if (isa == cpu::x64::sse41) {
|
||||
auto uni_vfmadd231_xmm = [this](Xmm vmm_dst, Xmm vmm_src0, Xmm vmm_src1, Xmm vmm_src2) {
|
||||
h->uni_vmovups(vmm_dst, vmm_src0);
|
||||
h->uni_vmulps(vmm_dst, vmm_dst, vmm_src1);
|
||||
h->uni_vaddps(vmm_dst, vmm_dst, vmm_src2);
|
||||
switch (exec_prc_) {
|
||||
case Precision::FP32: {
|
||||
h->uni_vmulps(vmm_dst, vmm_dst, vmm_src1);
|
||||
h->uni_vaddps(vmm_dst, vmm_dst, vmm_src2);
|
||||
} break;
|
||||
case Precision::I32: {
|
||||
h->uni_vpmulld(vmm_dst, vmm_dst, vmm_src1);
|
||||
h->uni_vpaddd(vmm_dst, vmm_dst, vmm_src2);
|
||||
} break;
|
||||
default: assert(!"unsupported precision");
|
||||
}
|
||||
};
|
||||
|
||||
auto uni_vfmadd231_vmm = [this, vmm_aux0](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1, Vmm vmm_src2) {
|
||||
switch (exec_prc_) {
|
||||
case Precision::FP32: {
|
||||
Vmm vmm_mul0;
|
||||
if (vmm_dst.getIdx() == vmm_src0.getIdx()) {
|
||||
h->uni_vmovups(vmm_aux0, vmm_src0);
|
||||
vmm_mul0 = vmm_aux0;
|
||||
} else {
|
||||
vmm_mul0 = vmm_src0;
|
||||
}
|
||||
|
||||
Vmm vmm_mul1;
|
||||
if (vmm_dst.getIdx() == vmm_src1.getIdx()) {
|
||||
h->uni_vmovups(vmm_aux0, vmm_src1);
|
||||
vmm_mul1 = vmm_aux0;
|
||||
} else {
|
||||
vmm_mul1 = vmm_src1;
|
||||
}
|
||||
|
||||
if (vmm_dst.getIdx() != vmm_src2.getIdx())
|
||||
h->uni_vmovups(vmm_dst, vmm_src2);
|
||||
|
||||
h->uni_vfmadd231ps(vmm_dst, vmm_mul0, vmm_mul1);
|
||||
} break;
|
||||
case Precision::I32: {
|
||||
h->uni_vpmulld(vmm_dst, vmm_src0, vmm_src1);
|
||||
h->uni_vpaddd(vmm_dst, vmm_dst, vmm_src2);
|
||||
} break;
|
||||
default: assert(!"unsupported precision");
|
||||
}
|
||||
};
|
||||
|
||||
if (isa == cpu::x64::sse41) {
|
||||
uni_vfmadd231_xmm(vmm_dst, vmm_src0, vmm_src1, vmm_src2);
|
||||
} else {
|
||||
Vmm vmm_mul0;
|
||||
if (vmm_dst.getIdx() == vmm_src0.getIdx()) {
|
||||
h->uni_vmovups(vmm_aux0, vmm_src0);
|
||||
vmm_mul0 = vmm_aux0;
|
||||
} else {
|
||||
vmm_mul0 = vmm_src0;
|
||||
}
|
||||
|
||||
Vmm vmm_mul1;
|
||||
if (vmm_dst.getIdx() == vmm_src1.getIdx()) {
|
||||
h->uni_vmovups(vmm_aux0, vmm_src1);
|
||||
vmm_mul1 = vmm_aux0;
|
||||
} else {
|
||||
vmm_mul1 = vmm_src1;
|
||||
}
|
||||
|
||||
if (vmm_dst.getIdx() != vmm_src2.getIdx())
|
||||
h->uni_vmovups(vmm_dst, vmm_src2);
|
||||
h->uni_vfmadd231ps(vmm_dst, vmm_mul0, vmm_mul1);
|
||||
uni_vfmadd231_vmm(vmm_dst, vmm_src0, vmm_src1, vmm_src2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -116,6 +155,10 @@ size_t jit_mul_add_emitter::aux_vecs_count() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::set<InferenceEngine::Precision> jit_mul_add_emitter::get_supported_precisions() {
|
||||
return {Precision::FP32, Precision::I32};
|
||||
}
|
||||
|
||||
/// SUB ///
|
||||
jit_subtract_emitter::jit_subtract_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
|
||||
: jit_emitter(host, host_isa, node, exec_prc) {}
|
||||
@ -145,14 +188,25 @@ void jit_subtract_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, cons
|
||||
Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
|
||||
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
|
||||
|
||||
auto uni_vsub = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
|
||||
switch (exec_prc_) {
|
||||
case Precision::FP32: h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1); break;
|
||||
case Precision::I32: h->uni_vpsubd(vmm_dst, vmm_src0, vmm_src1); break;
|
||||
default: assert(!"unsupported precision");
|
||||
}
|
||||
};
|
||||
|
||||
if (isa == cpu::x64::sse41) {
|
||||
h->uni_vmovups(vmm_dst, vmm_src0);
|
||||
h->uni_vsubps(vmm_dst, vmm_dst, vmm_src1);
|
||||
uni_vsub(vmm_dst, vmm_dst, vmm_src1);
|
||||
} else {
|
||||
h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1);
|
||||
uni_vsub(vmm_dst, vmm_src0, vmm_src1);
|
||||
}
|
||||
}
|
||||
|
||||
std::set<InferenceEngine::Precision> jit_subtract_emitter::get_supported_precisions() {
|
||||
return {Precision::FP32, Precision::I32};
|
||||
}
|
||||
|
||||
/// MULTIPLY ///
|
||||
jit_multiply_emitter::jit_multiply_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
|
||||
@ -183,14 +237,25 @@ void jit_multiply_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, cons
|
||||
Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
|
||||
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
|
||||
|
||||
auto uni_vmul = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
|
||||
switch (exec_prc_) {
|
||||
case Precision::FP32: h->uni_vmulps(vmm_dst, vmm_src0, vmm_src1); break;
|
||||
case Precision::I32: h->uni_vpmulld(vmm_dst, vmm_src0, vmm_src1); break;
|
||||
default: assert(!"unsupported precision");
|
||||
}
|
||||
};
|
||||
|
||||
if (isa == cpu::x64::sse41) {
|
||||
h->uni_vmovups(vmm_dst, vmm_src0);
|
||||
h->uni_vmulps(vmm_dst, vmm_dst, vmm_src1);
|
||||
uni_vmul(vmm_dst, vmm_dst, vmm_src1);
|
||||
} else {
|
||||
h->uni_vmulps(vmm_dst, vmm_src0, vmm_src1);
|
||||
uni_vmul(vmm_dst, vmm_src0, vmm_src1);
|
||||
}
|
||||
}
|
||||
|
||||
std::set<InferenceEngine::Precision> jit_multiply_emitter::get_supported_precisions() {
|
||||
return {Precision::FP32, Precision::I32};
|
||||
}
|
||||
|
||||
/// DIVIDE ///
|
||||
jit_divide_emitter::jit_divide_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
|
||||
@ -554,17 +619,32 @@ void jit_squared_difference_emitter::emit_isa(const std::vector<size_t> &in_vec_
|
||||
Vmm vmm_src1 = Vmm(in_vec_idxs[1]);
|
||||
Vmm vmm_dst = Vmm(out_vec_idxs[0]);
|
||||
|
||||
auto uni_vsqdiff = [this](Vmm vmm_dst, Vmm vmm_src0, Vmm vmm_src1) {
|
||||
switch (exec_prc_) {
|
||||
case Precision::FP32: {
|
||||
h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1);
|
||||
h->uni_vmulps(vmm_dst, vmm_dst, vmm_dst);
|
||||
} break;
|
||||
case Precision::I32: {
|
||||
h->uni_vpsubd(vmm_dst, vmm_src0, vmm_src1);
|
||||
h->uni_vpmulld(vmm_dst, vmm_dst, vmm_dst);
|
||||
} break;
|
||||
default: assert(!"unsupported precision");
|
||||
}
|
||||
};
|
||||
|
||||
if (isa == cpu::x64::sse41) {
|
||||
if (vmm_src0.getIdx() != vmm_dst.getIdx())
|
||||
h->uni_vmovups(vmm_dst, vmm_src0);
|
||||
h->uni_vsubps(vmm_dst, vmm_dst, vmm_src1);
|
||||
h->uni_vmulps(vmm_dst, vmm_dst, vmm_dst);
|
||||
uni_vsqdiff(vmm_dst, vmm_dst, vmm_src1);
|
||||
} else {
|
||||
h->uni_vsubps(vmm_dst, vmm_src0, vmm_src1);
|
||||
h->uni_vmulps(vmm_dst, vmm_dst, vmm_dst);
|
||||
uni_vsqdiff(vmm_dst, vmm_src0, vmm_src1);
|
||||
}
|
||||
}
|
||||
|
||||
std::set<InferenceEngine::Precision> jit_squared_difference_emitter::get_supported_precisions() {
|
||||
return {Precision::FP32, Precision::I32};
|
||||
}
|
||||
|
||||
/// POWER_DYNAMIC ///
|
||||
jit_power_dynamic_emitter::jit_power_dynamic_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
|
||||
|
@ -18,6 +18,7 @@ public:
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
static std::set<InferenceEngine::Precision> get_supported_precisions();
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
@ -36,6 +37,7 @@ public:
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
static std::set<InferenceEngine::Precision> get_supported_precisions();
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
@ -57,6 +59,7 @@ public:
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
static std::set<InferenceEngine::Precision> get_supported_precisions();
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
@ -76,6 +79,7 @@ public:
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
static std::set<InferenceEngine::Precision> get_supported_precisions();
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
@ -232,6 +236,7 @@ public:
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() const override;
|
||||
static std::set<InferenceEngine::Precision> get_supported_precisions();
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
|
@ -2252,16 +2252,19 @@ void Eltwise::appendBinPostOps(dnnl::post_ops& ops, const VectorDims& postOpDims
|
||||
}
|
||||
|
||||
bool Eltwise::canFuse(const NodePtr& node) const {
|
||||
auto isSuitableNode = [this](const Eltwise* node) {
|
||||
// [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results
|
||||
// we disable its fusing otherwise there is no guarantee it will be executed it I32
|
||||
// [TODO] We need to rewrite support for different precisions at all to avoid implicit conversions to FP32
|
||||
// (all should be handled via explicit convert operations)
|
||||
if (node->getAlgorithm() == Algorithm::EltwiseDivide) {
|
||||
for (const auto &originalInputPrecision : getOriginalInputPrecisions()) {
|
||||
if (originalInputPrecision == Precision::I32) {
|
||||
return false;
|
||||
}
|
||||
auto isIntegerComputeSupported = [this](const Node* node) {
|
||||
if (!one_of(node->getAlgorithm(), Algorithm::EltwiseAdd,
|
||||
Algorithm::EltwiseMultiply,
|
||||
Algorithm::EltwiseMulAdd,
|
||||
Algorithm::EltwiseSubtract,
|
||||
Algorithm::EltwiseDivide,
|
||||
Algorithm::EltwiseSquaredDifference)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto &originalInputPrecision : node->getOriginalInputPrecisions()) {
|
||||
if (originalInputPrecision != Precision::I32) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2271,9 +2274,10 @@ bool Eltwise::canFuse(const NodePtr& node) const {
|
||||
if (!mayiuse(x64::sse41) || getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK)
|
||||
return false;
|
||||
|
||||
if (!isSuitableNode(this)) {
|
||||
|
||||
bool isIntegerNode = isIntegerComputeSupported(this);
|
||||
if (isIntegerNode && node->getType() != Type::Eltwise)
|
||||
return false;
|
||||
}
|
||||
|
||||
// FQ inputs with quantization parameters will be hided inside post_op object, so will not increase inputs number
|
||||
size_t addedInputEdgesNum = node->getType() != Type::FakeQuantize ? (node->getParentEdges().size() - 1) : 0;
|
||||
@ -2281,6 +2285,16 @@ bool Eltwise::canFuse(const NodePtr& node) const {
|
||||
return false;
|
||||
|
||||
if (node->getType() == Type::Eltwise) {
|
||||
// [WA] Since execution precision change from I32 to FP32 for arithmetic operations may lead to incorrect results
|
||||
// we disable fusing cases which may lead to invalid precision conversions inside the kernel
|
||||
// [TODO] We need to rewrite support for different precisions at all to avoid implicit conversions to FP32
|
||||
// (all should be handled via explicit convert operations)
|
||||
bool isIntegerFusingNode = isIntegerComputeSupported(node.get());
|
||||
if (isIntegerNode && !isIntegerFusingNode ||
|
||||
!isIntegerNode && isIntegerFusingNode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (node->getParentEdgesAtPort(0)[0]->getParent().get() != this) {
|
||||
// Eltwise jitter doesn't respect commutative property, so fusing is disabled in case it applied not for 0-th port.
|
||||
if (one_of(node->getAlgorithm(), Algorithm::EltwiseSubtract,
|
||||
|
@ -13,23 +13,19 @@ namespace snippets {
|
||||
namespace {
|
||||
|
||||
const std::vector<std::pair<std::vector<ov::element::Type>, std::vector<ov::element::Type>>> types_Convert = {
|
||||
{ { ov::element::f32 }, { ov::element::i32 } },
|
||||
{ { ov::element::f32 }, { ov::element::bf16 } },
|
||||
{ { ov::element::f32 }, { ov::element::u8 } },
|
||||
{ { ov::element::f32 }, { ov::element::i8 } },
|
||||
|
||||
{ { ov::element::bf16 }, { ov::element::f32 } },
|
||||
{ { ov::element::bf16 }, { ov::element::i32 } },
|
||||
{ { ov::element::bf16 }, { ov::element::i8 } },
|
||||
{ { ov::element::bf16 }, { ov::element::u8 } },
|
||||
|
||||
{ { ov::element::i8 }, { ov::element::f32 } },
|
||||
{ { ov::element::i8 }, { ov::element::i32 } },
|
||||
{ { ov::element::i8 }, { ov::element::bf16 } },
|
||||
{ { ov::element::i8 }, { ov::element::u8 } },
|
||||
|
||||
{ { ov::element::u8 }, { ov::element::f32 } },
|
||||
{ { ov::element::u8 }, { ov::element::i32 } },
|
||||
{ { ov::element::u8 }, { ov::element::bf16 } },
|
||||
{ { ov::element::u8 }, { ov::element::i8 } },
|
||||
};
|
||||
@ -50,17 +46,14 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Convert, Convert,
|
||||
Convert::getTestCaseName);
|
||||
|
||||
const std::vector<std::pair<std::vector<ov::element::Type>, std::vector<ov::element::Type>>> types_ConvertInput = {
|
||||
{ { ov::element::f32 }, { ov::element::i32 } },
|
||||
{ { ov::element::f32 }, { ov::element::bf16 } },
|
||||
|
||||
{ { ov::element::bf16 }, { ov::element::f32 } },
|
||||
|
||||
{ { ov::element::i8 }, { ov::element::f32 } },
|
||||
{ { ov::element::i8 }, { ov::element::i32 } },
|
||||
{ { ov::element::i8 }, { ov::element::bf16 } },
|
||||
|
||||
{ { ov::element::u8 }, { ov::element::f32 } },
|
||||
{ { ov::element::u8 }, { ov::element::i32 } },
|
||||
{ { ov::element::u8 }, { ov::element::bf16 } },
|
||||
};
|
||||
|
||||
@ -98,8 +91,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertStub, ConvertStub,
|
||||
Convert::getTestCaseName);
|
||||
|
||||
const std::vector<std::pair<std::vector<ov::element::Type>, std::vector<ov::element::Type>>> types_ConvertPartialInputsAndResults = {
|
||||
{ { ov::element::i8, ov::element::i32, ov::element::f32 }, { ov::element::f32, ov::element::i8 } },
|
||||
{ { ov::element::bf16, ov::element::u8, ov::element::i32 }, { ov::element::i32, ov::element::bf16 } },
|
||||
{ { ov::element::i8, ov::element::i8, ov::element::f32 }, { ov::element::f32, ov::element::i8 } },
|
||||
};
|
||||
|
||||
const std::vector<std::vector<ov::Shape>> inputShapes_ConvertPartialInputsAndResults = {
|
||||
@ -118,10 +110,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertPartialInputsAndResults, ConvertP
|
||||
Convert::getTestCaseName);
|
||||
|
||||
const std::vector<std::pair<std::vector<ov::element::Type>, std::vector<ov::element::Type>>> types_ConvertMany = {
|
||||
{ { ov::element::i32, ov::element::u8}, {} },
|
||||
{ { ov::element::i32, ov::element::u8, ov::element::i32 }, {} },
|
||||
{ { ov::element::i32, ov::element::f32, ov::element::i32, ov::element::i8 }, {} },
|
||||
{ { ov::element::i32, ov::element::i8, ov::element::i32, ov::element::f32 }, {} },
|
||||
{ { ov::element::f32, ov::element::u8}, {} },
|
||||
{ { ov::element::f32, ov::element::u8, ov::element::i8 }, {} },
|
||||
{ { ov::element::f32, ov::element::f32, ov::element::i8, ov::element::i8 }, {} },
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnInputs, ConvertManyOnInputs,
|
||||
@ -137,21 +128,21 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnOutputs, ConvertManyOnOutpu
|
||||
::testing::Combine(
|
||||
::testing::Values(std::vector<ov::Shape>{ov::Shape{5, 5, 5, 5}}),
|
||||
::testing::ValuesIn(types_ConvertMany),
|
||||
::testing::Values(5), // sinh + subgraph + reorders for sinh
|
||||
::testing::Values(2), // sinh + subgraph
|
||||
::testing::Values(1),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Convert::getTestCaseName);
|
||||
|
||||
const std::vector<std::pair<std::vector<ov::element::Type>, std::vector<ov::element::Type>>> types_ConvertManyIO = {
|
||||
{ { ov::element::i32, ov::element::u8}, {ov::element::i32} },
|
||||
{ { ov::element::i32, ov::element::u8, ov::element::i32 }, { ov::element::i32, ov::element::i8, ov::element::i32, ov::element::f32 } },
|
||||
{ { ov::element::f32, ov::element::u8}, {ov::element::i8} },
|
||||
{ { ov::element::f32, ov::element::u8, ov::element::i8 }, { ov::element::u8, ov::element::i8, ov::element::f32, ov::element::f32 } },
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnInputOutput, ConvertManyOnInputOutput,
|
||||
::testing::Combine(
|
||||
::testing::Values(std::vector<ov::Shape>{ov::Shape{5, 5, 5, 5}}),
|
||||
::testing::ValuesIn(types_ConvertManyIO),
|
||||
::testing::Values(5), // sinh + subgraph + reorders for sinh
|
||||
::testing::Values(2), // sinh + subgraph
|
||||
::testing::Values(1),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Convert::getTestCaseName);
|
||||
|
@ -37,38 +37,47 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
ov::Tensor generate_eltwise_input(const ov::element::Type& type, const ngraph::Shape& shape) {
|
||||
struct gen_params {
|
||||
uint32_t range;
|
||||
int32_t start_from;
|
||||
int32_t resolution;
|
||||
|
||||
gen_params(uint32_t range = 10, int32_t start_from = 0, int32_t resolution = 1)
|
||||
: range(range), start_from(start_from), resolution(resolution) {}
|
||||
};
|
||||
|
||||
gen_params params = gen_params();
|
||||
if (type.is_real()) {
|
||||
switch (eltwiseType) {
|
||||
case ngraph::helpers::EltwiseTypes::POWER:
|
||||
case ngraph::helpers::EltwiseTypes::MOD:
|
||||
case ngraph::helpers::EltwiseTypes::FLOOR_MOD:
|
||||
params = gen_params(2, 2, 8);
|
||||
break;
|
||||
case ngraph::helpers::EltwiseTypes::DIVIDE:
|
||||
params = gen_params(2, 2, 8);
|
||||
break;
|
||||
case ngraph::helpers::EltwiseTypes::ERF:
|
||||
params = gen_params(6, -3);
|
||||
break;
|
||||
default:
|
||||
params = gen_params(80, 0, 8);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
params = gen_params(INT32_MAX, INT32_MIN);
|
||||
}
|
||||
|
||||
return ov::test::utils::create_and_fill_tensor(type, shape, params.range, params.start_from, params.resolution);
|
||||
}
|
||||
|
||||
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
|
||||
inputs.clear();
|
||||
const auto& funcInputs = function->inputs();
|
||||
for (int i = 0; i < funcInputs.size(); ++i) {
|
||||
const auto& funcInput = funcInputs[i];
|
||||
ov::Tensor tensor;
|
||||
bool isReal = funcInput.get_element_type().is_real();
|
||||
switch (eltwiseType) {
|
||||
case ngraph::helpers::EltwiseTypes::POWER:
|
||||
case ngraph::helpers::EltwiseTypes::MOD:
|
||||
case ngraph::helpers::EltwiseTypes::FLOOR_MOD:
|
||||
tensor = isReal ?
|
||||
ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2, 2, 8) :
|
||||
ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 4, 2);
|
||||
break;
|
||||
case ngraph::helpers::EltwiseTypes::DIVIDE:
|
||||
tensor = isReal ?
|
||||
ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2, 2, 8) :
|
||||
ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 100, 101);
|
||||
break;
|
||||
case ngraph::helpers::EltwiseTypes::ERF:
|
||||
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 6, -3);
|
||||
break;
|
||||
default:
|
||||
if (funcInput.get_element_type().is_real()) {
|
||||
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 80, 0, 8);
|
||||
} else {
|
||||
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
|
||||
inputs.insert({funcInput.get_node_shared_ptr(), generate_eltwise_input(funcInput.get_element_type(), targetInputStaticShapes[i])});
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,6 +97,8 @@ protected:
|
||||
|
||||
if (ElementType::bf16 == netType) {
|
||||
rel_threshold = 2e-2f;
|
||||
} else if (ElementType::i32 == netType) {
|
||||
abs_threshold = 0;
|
||||
}
|
||||
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
@ -134,29 +145,21 @@ protected:
|
||||
}
|
||||
}
|
||||
}
|
||||
if (eltwiseType == ngraph::helpers::EltwiseTypes::DIVIDE ||
|
||||
eltwiseType == ngraph::helpers::EltwiseTypes::MOD) {
|
||||
std::vector<float> data(ngraph::shape_size(shape));
|
||||
data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(ngraph::shape_size(shape), 10, 2);
|
||||
|
||||
if (netType == ElementType::i32) {
|
||||
auto data_tensor = generate_eltwise_input(ElementType::i32, shape);
|
||||
auto data_ptr = reinterpret_cast<int32_t*>(data_tensor.data());
|
||||
std::vector<int32_t> data(data_ptr, data_ptr + ngraph::shape_size(shape));
|
||||
secondaryInput = ngraph::builder::makeConstant(netType, shape, data);
|
||||
} else if (eltwiseType == ngraph::helpers::EltwiseTypes::FLOOR_MOD) {
|
||||
auto negative_data_size = ngraph::shape_size(shape) / 2;
|
||||
auto positive_data_size = ngraph::shape_size(shape) - negative_data_size;
|
||||
std::vector<float> negative_data(negative_data_size);
|
||||
std::vector<float> data(positive_data_size);
|
||||
negative_data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(negative_data_size, -10, -2);
|
||||
data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(positive_data_size, 10, 2);
|
||||
data.insert(data.end(), negative_data.begin(), negative_data.end());
|
||||
secondaryInput = ngraph::builder::makeConstant(netType, shape, data);
|
||||
} else if (eltwiseType == ngraph::helpers::EltwiseTypes::POWER) {
|
||||
secondaryInput = ngraph::builder::makeConstant<float>(netType, shape, {}, true, 3);
|
||||
} else {
|
||||
secondaryInput = ngraph::builder::makeConstant<float>(netType, shape, {}, true);
|
||||
auto data_tensor = generate_eltwise_input(ElementType::f32, shape);
|
||||
auto data_ptr = reinterpret_cast<float*>(data_tensor.data());
|
||||
std::vector<float> data(data_ptr, data_ptr + ngraph::shape_size(shape));
|
||||
secondaryInput = ngraph::builder::makeConstant(netType, shape, data);
|
||||
}
|
||||
}
|
||||
|
||||
auto eltwise = ngraph::builder::makeEltwise(parameters[0], secondaryInput, eltwiseType);
|
||||
|
||||
function = makeNgraphFunction(netType, parameters, eltwise, "Eltwise");
|
||||
}
|
||||
|
||||
@ -325,6 +328,36 @@ const auto params_5D_emptyCPUSpec = ::testing::Combine(
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D, EltwiseLayerCPUTest, params_5D_emptyCPUSpec, EltwiseLayerCPUTest::getTestCaseName);
|
||||
|
||||
std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypesI32 = {
|
||||
ngraph::helpers::EltwiseTypes::ADD,
|
||||
ngraph::helpers::EltwiseTypes::MULTIPLY,
|
||||
ngraph::helpers::EltwiseTypes::SUBTRACT,
|
||||
ngraph::helpers::EltwiseTypes::DIVIDE,
|
||||
ngraph::helpers::EltwiseTypes::SQUARED_DIFF,
|
||||
};
|
||||
|
||||
const std::vector<fusingSpecificParams> fusingParamsSetI32{
|
||||
emptyFusingSpec,
|
||||
fusingMultiplyAddPerChannel,
|
||||
};
|
||||
|
||||
const auto params_5D_emptyCPUSpec_I32 = ::testing::Combine(
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_5D)),
|
||||
::testing::ValuesIn(eltwiseOpTypesI32),
|
||||
::testing::ValuesIn(secondaryInputTypes),
|
||||
::testing::ValuesIn(opTypes),
|
||||
::testing::Values(ElementType::i32),
|
||||
::testing::Values(ElementType::i32),
|
||||
::testing::Values(ElementType::i32),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(additional_config)),
|
||||
::testing::Values(emptyCPUSpec),
|
||||
::testing::ValuesIn(fusingParamsSetI32));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_I32, EltwiseLayerCPUTest, params_5D_emptyCPUSpec_I32, EltwiseLayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
std::vector<std::vector<ov::Shape>> inShapes_4D_Blocked_Planar = {
|
||||
{{2, 17, 31, 3}, {2, 1, 31, 3}},
|
||||
{{2, 17, 5, 1}, {2, 1, 1, 4}},
|
||||
|
@ -361,6 +361,18 @@ const auto fusingMultiplyPerChannel = fusingSpecificParams{std::make_shared<post
|
||||
return std::make_shared<ngraph::opset1::Multiply>(cfg.input, secondMultInput);
|
||||
}, "Multiply(PerChannel)"}}), {"Multiply"}};
|
||||
|
||||
const auto fusingMultiplyAddPerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](postNodeConfig& cfg) {
|
||||
ngraph::Shape newShape = generatePerChannelShape(cfg.input);
|
||||
auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Multiply>(cfg.input, constNode);
|
||||
}, "Multiply(PerChannel)"},
|
||||
{[](postNodeConfig& cfg) {
|
||||
ngraph::Shape newShape = generatePerChannelShape(cfg.input);
|
||||
auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Add>(cfg.input, constNode);
|
||||
}, "Add(PerChannel)"}}), {"Add"} };
|
||||
|
||||
const auto fusingAddPerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](postNodeConfig& cfg){
|
||||
ngraph::Shape secondMultInShape(1, 1);
|
||||
|
@ -230,6 +230,10 @@ void compare(const ov::Tensor& expected,
|
||||
auto eps = std::numeric_limits<double>::epsilon();
|
||||
return (b - a) > (std::fmax(std::fabs(a), std::fabs(b)) * eps);
|
||||
};
|
||||
auto less_or_equal = [] (double a, double b) {
|
||||
auto eps = std::numeric_limits<double>::epsilon();
|
||||
return (b - a) >= (std::fmax(std::fabs(a), std::fabs(b)) * eps);
|
||||
};
|
||||
for (size_t i = 0; i < shape_size(expected_shape); i++) {
|
||||
double expected_value = expected_data[i];
|
||||
double actual_value = actual_data[i];
|
||||
@ -258,7 +262,7 @@ void compare(const ov::Tensor& expected,
|
||||
}
|
||||
abs_error.mean /= shape_size(expected_shape);
|
||||
rel_error.mean /= shape_size(expected_shape);
|
||||
if (!(less(abs_error.max, abs_threshold) && less(rel_error.max, rel_threshold))) {
|
||||
if (!(less_or_equal(abs_error.max, abs_threshold) && less_or_equal(rel_error.max, rel_threshold))) {
|
||||
std::ostringstream out_stream;
|
||||
out_stream << "abs_max < abs_threshold && rel_max < rel_threshold" <<
|
||||
"\n\t abs_max: " << abs_error.max <<
|
||||
|
Loading…
Reference in New Issue
Block a user