[CPU] Reorganize function tests. Remove legacy bfloat16 tests (#17130)
This commit is contained in:
parent
e79db660ce
commit
478725c719
@ -1,213 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class BF16NetworkRestore1 : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// + Power1(FP32)
|
||||
// |
|
||||
// + AvgPooling1(BF16)
|
||||
// |
|
||||
// + Convolution1(BF16)
|
||||
// |
|
||||
// + ReLU1(Fused)
|
||||
// |------------------------
|
||||
// | \
|
||||
// + Convolution2(BF16) Convolution 3 (BF16)
|
||||
// | / \
|
||||
// + | ReLU2(FP32) Normalize (FP32)
|
||||
// \ / |
|
||||
// Eltwise (Fused to Conv2) ------/
|
||||
// | /
|
||||
// ReLU3 (Fused to Conv2) /
|
||||
// | /
|
||||
// MaxPooling1 (BF16) /
|
||||
// \ /
|
||||
// Eltwise
|
||||
// |
|
||||
|
||||
|
||||
// STAGE1: construction of the GRAPH
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 224, 224});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("Power1");
|
||||
|
||||
// AvgPooling
|
||||
auto avgpoolNode = std::make_shared<opset1::AvgPool>(addNode,
|
||||
Strides{1, 1},
|
||||
Shape{1, 1},
|
||||
Shape{1, 1},
|
||||
Shape{2, 2},
|
||||
true,
|
||||
op::RoundingType::FLOOR);
|
||||
avgpoolNode->set_friendly_name("AvgPooling1");
|
||||
|
||||
// convolution1
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(3 * 3 * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(3 * 3 * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
avgpoolNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("Convolution1");
|
||||
|
||||
// ReLU1
|
||||
auto reluNode = std::make_shared<opset1::Relu>(convNode1);
|
||||
reluNode->set_friendly_name("ReLU1");
|
||||
|
||||
// convolution2
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
reluNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("Convolution2");
|
||||
|
||||
// convolution3
|
||||
std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
reluNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode3->set_friendly_name("Convolution3");
|
||||
|
||||
// ReLU1
|
||||
auto reluNode2 = std::make_shared<opset1::Relu>(convNode3);
|
||||
reluNode2->set_friendly_name("ReLU2");
|
||||
|
||||
// Norm1
|
||||
// normalize
|
||||
const auto axes = make_shared<op::Constant>(element::i64, Shape{2}, vector<int64_t>{2});
|
||||
float eps{1e-6f};
|
||||
auto eps_mode = op::EpsMode::ADD;
|
||||
|
||||
auto normNode = std::make_shared<opset1::NormalizeL2>(convNode3, axes, eps, eps_mode);
|
||||
normNode->set_friendly_name("Norm1");
|
||||
|
||||
|
||||
|
||||
// Eltwise1
|
||||
auto eltNode1 = std::make_shared<opset1::Add>(convNode2, reluNode2);
|
||||
eltNode1->set_friendly_name("Eltwise1");
|
||||
|
||||
// ReLU3
|
||||
auto reluNode3 = std::make_shared<opset1::Relu>(eltNode1);
|
||||
reluNode3->set_friendly_name("ReLU3");
|
||||
|
||||
// maxPooling1
|
||||
auto maxPoolNode = std::make_shared<opset1::MaxPool>(reluNode3,
|
||||
Strides{1, 1},
|
||||
Shape{1, 1},
|
||||
Shape{0, 0},
|
||||
Shape{2, 2},
|
||||
op::RoundingType::FLOOR);
|
||||
maxPoolNode->set_friendly_name("maxPooling1");
|
||||
|
||||
// Eltwise2
|
||||
auto eltNode2 = std::make_shared<opset1::Add>(maxPoolNode, normNode);
|
||||
eltNode2->set_friendly_name("Eltwise2");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode2}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
threshold = 0.4f; // max value in the latest tensor for FP32 network is 10.83
|
||||
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Power1"] = "FP32";
|
||||
expectedPrecisions["AvgPooling1"] = "BF16";
|
||||
expectedPrecisions["Convolution1"] = "BF16";
|
||||
expectedPrecisions["ReLU1"] = "ndef";
|
||||
expectedPrecisions["Convolution2"] = "BF16";
|
||||
expectedPrecisions["Convolution3"] = "BF16";
|
||||
expectedPrecisions["ReLU2"] = "BF16";
|
||||
expectedPrecisions["Norm1"] = "BF16";
|
||||
expectedPrecisions["Eltwise1"] = "ndef";
|
||||
expectedPrecisions["ReLU3"] = "ndef";
|
||||
expectedPrecisions["maxPooling1"] = "BF16";
|
||||
expectedPrecisions["Eltwise2"] = "BF16";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(BF16NetworkRestore1, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, BF16NetworkRestore1,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 224, 224 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
BF16NetworkRestore1::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,260 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <inference_engine.hpp>
|
||||
#include "ie_common.h"
|
||||
#include <ie_blob.h>
|
||||
#include <math.h>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include <ie_system_conf.h>
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
/**
|
||||
* class providing static helpers for bfloat16 functional tests
|
||||
* using functions you can fill the tensor content by some periodic law or compare
|
||||
*
|
||||
*/
|
||||
class BFloat16Helpers {
|
||||
public:
|
||||
static std::pair<std::string, std::string> matchPerfCountPrecisionVsExpected(
|
||||
const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& perfCounts,
|
||||
const std::map<std::string, std::string>& expected) {
|
||||
for (auto e : expected) {
|
||||
auto it = perfCounts.find(e.first);
|
||||
if (it == perfCounts.end()) {
|
||||
return std::pair<std::string, std::string>(e.first, "NOT_FOUND_IN_PERF_COUNTS");
|
||||
}
|
||||
// get the latest n symbols by number of e.second
|
||||
std::string execType = it->second.exec_type;
|
||||
std::string pfPrecision = execType.substr(execType.length() - e.second.length(), e.second.length());
|
||||
if (pfPrecision != e.second) {
|
||||
return std::pair<std::string, std::string>(e.first, pfPrecision);
|
||||
}
|
||||
}
|
||||
return std::pair<std::string, std::string>("", "");
|
||||
}
|
||||
|
||||
static float getMaxAbsValue(const float* data, size_t size) {
|
||||
float maxVal = 0.f;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
if (fabs(data[i]) > maxVal) {
|
||||
maxVal = fabs(data[i]);
|
||||
}
|
||||
}
|
||||
return maxVal;
|
||||
}
|
||||
|
||||
static float reducePrecisionBitwise(const float in) {
|
||||
float f = in;
|
||||
int* i = reinterpret_cast<int*>(&f);
|
||||
int t2 = *i & 0xFFFF0000;
|
||||
float ft1;
|
||||
memcpy(&ft1, &t2, sizeof(float));
|
||||
if ((*i & 0x8000) && (*i & 0x007F0000) != 0x007F0000) {
|
||||
t2 += 0x10000;
|
||||
memcpy(&ft1, &t2, sizeof(float));
|
||||
}
|
||||
return ft1;
|
||||
}
|
||||
|
||||
static short reducePrecisionBitwiseS(const float in) {
|
||||
float f = reducePrecisionBitwise(in);
|
||||
int intf;
|
||||
memcpy(&intf, &f, sizeof(int));
|
||||
intf = intf >> 16;
|
||||
short s = intf;
|
||||
return s;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
typedef std::tuple<
|
||||
InferenceEngine::Precision,
|
||||
InferenceEngine::Precision,
|
||||
InferenceEngine::SizeVector,
|
||||
InferenceEngine::SizeVector,
|
||||
std::string> basicParams;
|
||||
|
||||
|
||||
/**
|
||||
* Base class for bf16 tests
|
||||
* the flow in this test assume to load network in FP32 and in BF16 modes and verify
|
||||
* 1. difference between outptut's tensor with some treshold.
|
||||
* 2. which preciosion was selected for layers described in runtime info of performance counters
|
||||
*
|
||||
* To develop new test you need to
|
||||
* 1. define class inherriten from BasicBF16Test and implement SetUp(). For example:
|
||||
*
|
||||
* class ScaleshiftConv_x3_Eltwise : public BasicBF16Test {
|
||||
* protected:
|
||||
* void SetUp() override {
|
||||
* fnPtr = std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
|
||||
|
||||
// STAGE1:
|
||||
threshold = 9e-1;
|
||||
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Add_4"] = "FP32";
|
||||
expectedPrecisions["Convolution_6"] = "BF16";
|
||||
expectedPrecisions["Convolution_7"] = "BF16";
|
||||
expectedPrecisions["Add_8"] = "ndef";
|
||||
* expectedPrecisions["Convolution_10"] = "BF16";
|
||||
* }
|
||||
* };
|
||||
*
|
||||
* 2. define test
|
||||
* TEST_P(ScaleshiftConv_x3_Eltwise, CompareWithRefImpl) {
|
||||
test();
|
||||
};
|
||||
* 3. INSTANTIATE_TEST_SUITE_P(smoke_bfloat16_NoReshape, ScaleshiftConv_x3_Eltwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConv_x3_Eltwise::getTestCaseName);
|
||||
|
||||
*
|
||||
* In 3rd stage do not forget bfloat16 preffix!
|
||||
*/
|
||||
class BasicBF16Test : public testing::WithParamInterface<basicParams>,
|
||||
public CommonTestUtils::TestsCommon {
|
||||
protected:
|
||||
virtual std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) = 0;
|
||||
|
||||
public:
|
||||
std::shared_ptr<ngraph::Function> fnPtr;
|
||||
std::string targetDevice;
|
||||
InferenceEngine::SizeVector inputShapes, newInputShapes;
|
||||
InferenceEngine::Precision inputPrecision, netPrecision;
|
||||
std::map<std::string, std::string> expectedPrecisions;
|
||||
float threshold = 2e-2f; // Is enough for tensor having abs maximum values less than 1
|
||||
|
||||
static std::string getTestCaseName(testing::TestParamInfo<basicParams> obj) {
|
||||
InferenceEngine::Precision inputPrecision, netPrecision;
|
||||
InferenceEngine::SizeVector inputShapes, newInputShapes;
|
||||
std::string targetDevice;
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
if (!newInputShapes.empty()) {
|
||||
result << "Reshape_From=" << CommonTestUtils::vec2str(inputShapes);;
|
||||
result << "_To=" << CommonTestUtils::vec2str(newInputShapes) << "_";
|
||||
} else {
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
|
||||
}
|
||||
result << "inPRC=" << inputPrecision.name() << "_";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
static void setNetInOutPrecision(InferenceEngine::CNNNetwork &cnnNet, InferenceEngine::Precision inPrc,
|
||||
InferenceEngine::Precision outPrc = InferenceEngine::Precision::UNSPECIFIED) {
|
||||
if (inPrc != InferenceEngine::Precision::UNSPECIFIED) {
|
||||
for (const auto &inputItem : cnnNet.getInputsInfo()) {
|
||||
inputItem.second->setPrecision(inPrc);
|
||||
}
|
||||
}
|
||||
if (outPrc != InferenceEngine::Precision::UNSPECIFIED) {
|
||||
for (const auto &output : cnnNet.getOutputsInfo()) {
|
||||
output.second->setPrecision(outPrc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test() {
|
||||
if (!InferenceEngine::with_cpu_x86_avx512_core()) {
|
||||
// We are enabling bf16 tests on platforms with native support bfloat16, and on platforms with AVX512 ISA
|
||||
// On platforms with AVX512 ISA but w/o native bfloat16 support computations are done via simulation mode
|
||||
GTEST_SKIP();
|
||||
}
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
InferenceEngine::CNNNetwork cnnNet(fnPtr);
|
||||
|
||||
setNetInOutPrecision(cnnNet, inputPrecision);
|
||||
std::string inputName = cnnNet.getInputsInfo().begin()->first;
|
||||
std::string outputName = cnnNet.getOutputsInfo().begin()->first;
|
||||
auto ie = InferenceEngine::Core();
|
||||
// BF16 inference
|
||||
std::map<std::string, std::string> options;
|
||||
if (netPrecision == InferenceEngine::Precision::FP32) {
|
||||
options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::YES;
|
||||
} else {
|
||||
options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::NO;
|
||||
}
|
||||
options[InferenceEngine::PluginConfigParams::KEY_PERF_COUNT] = InferenceEngine::PluginConfigParams::YES;
|
||||
|
||||
auto exec_net1 = ie.LoadNetwork(cnnNet, targetDevice, options);
|
||||
auto req1 = exec_net1.CreateInferRequest();
|
||||
|
||||
InferenceEngine::Blob::Ptr inBlob1 = req1.GetBlob(inputName);
|
||||
FuncTestUtils::fillInputsBySinValues(inBlob1);
|
||||
|
||||
req1.Infer();
|
||||
auto outBlobBF16 = req1.GetBlob(outputName);
|
||||
InferenceEngine::MemoryBlob::CPtr mout1 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobBF16);
|
||||
ASSERT_NE(mout1, nullptr);
|
||||
auto lm1 = mout1->rmap();
|
||||
|
||||
// FP32 infrence
|
||||
// if netPrecision is not eq to the FP32 - change network precision and recreate network
|
||||
InferenceEngine::CNNNetwork cnnNetFP32(createGraph(InferenceEngine::Precision::FP32));
|
||||
std::string inputNameFP32 = cnnNetFP32.getInputsInfo().begin()->first;
|
||||
std::string outputNameFP32 = cnnNetFP32.getOutputsInfo().begin()->first;
|
||||
setNetInOutPrecision(cnnNetFP32, inputPrecision);
|
||||
auto exec_net2 = ie.LoadNetwork(cnnNetFP32, targetDevice,
|
||||
{ { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO } });
|
||||
auto req2 = exec_net2.CreateInferRequest();
|
||||
|
||||
|
||||
req2.SetBlob(inputNameFP32, inBlob1);
|
||||
|
||||
req2.Infer();
|
||||
auto outBlobFP32 = req2.GetBlob(outputNameFP32);
|
||||
InferenceEngine::MemoryBlob::CPtr mout2 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobFP32);
|
||||
ASSERT_NE(mout2, nullptr);
|
||||
auto lm2 = mout2->rmap();
|
||||
|
||||
// debug to figure out the maximum value in output tensors:
|
||||
// std::cout << "Max in bfloat16 network by output " << outputName << ": " <<
|
||||
// BFloat16Helpers::getMaxAbsValue(lm1.as<const float *>(), mout1->size()) << std::endl;
|
||||
// std::cout << "Max in fp32 network by output " << outputNameFP32 << ": " <<
|
||||
// BFloat16Helpers::getMaxAbsValue(lm2.as<const float *>(), mout2->size()) << std::endl;
|
||||
FuncTestUtils::compareRawBuffers(lm1.as<const float *>(),
|
||||
lm2.as<const float *>(),
|
||||
mout1->size(), mout2->size(),
|
||||
FuncTestUtils::CompareType::ABS,
|
||||
threshold);
|
||||
// Stage2: verification of performance counters
|
||||
std::pair<std::string, std::string> wrongLayer =
|
||||
BFloat16Helpers::matchPerfCountPrecisionVsExpected(req1.GetPerformanceCounts(), expectedPrecisions);
|
||||
if (wrongLayer.first != std::string("")) {
|
||||
std::string layerInPerfCounts = wrongLayer.first + " " + wrongLayer.second;
|
||||
std::string layerExpected = wrongLayer.first + " " + expectedPrecisions[wrongLayer.first];
|
||||
ASSERT_EQ(layerInPerfCounts, layerExpected);
|
||||
}
|
||||
fnPtr.reset();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
||||
|
@ -1,160 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
namespace {
|
||||
static const size_t inputSize = 2, concatAxe = 1;
|
||||
static std::vector<SizeVector> paramVector = {
|
||||
SizeVector({ 1, 1, inputSize, inputSize }),
|
||||
SizeVector({ 1, 2, inputSize, inputSize }),
|
||||
SizeVector({ 1, 3, inputSize, inputSize }),
|
||||
SizeVector({ 1, 4, inputSize, inputSize }),
|
||||
SizeVector({ 1, 5, inputSize, inputSize }),
|
||||
SizeVector({ 1, 6, inputSize, inputSize }),
|
||||
SizeVector({ 1, 7, inputSize, inputSize }),
|
||||
SizeVector({ 1, 8, inputSize, inputSize }),
|
||||
SizeVector({ 1, 9, inputSize, inputSize }),
|
||||
SizeVector({ 1, 10, inputSize, inputSize })};
|
||||
} // namespace
|
||||
|
||||
class Concat_in_place : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift
|
||||
// / \
|
||||
// Conv Conv
|
||||
// \ /
|
||||
// concat
|
||||
// |
|
||||
// relu
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
auto channelsCount = inputShapes[1];
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("CONV_2");
|
||||
|
||||
// Concat
|
||||
ngraph::NodeVector concInputNodes = { convNode1, convNode2 };
|
||||
|
||||
auto concNode = std::make_shared<opset1::Concat>(concInputNodes, concatAxe);
|
||||
concNode->set_friendly_name("CONC_1_TEST");
|
||||
|
||||
// ReLU
|
||||
auto reluNode = std::make_shared<opset1::Relu>(concNode);
|
||||
reluNode->set_friendly_name("RELU_1");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 10e-1;
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["CONV_2"] = "BF16";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Concat_in_place, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Concat_in_place,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::ValuesIn(paramVector),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Concat_in_place::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Concat_in_place,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::ValuesIn(paramVector),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Concat_in_place::getTestCaseName);
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,142 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ConvAdd : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// Power (FP32)
|
||||
// |
|
||||
// Conv(BF16)
|
||||
// |
|
||||
// Eltwise (SUM)(BF16)
|
||||
// |
|
||||
// Conv (BF16)
|
||||
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// STAGE1: construction of the GRAPH
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
// add
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> eltConst0 = nullptr, eltConst1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
eltConst0 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
eltConst1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
eltConst0 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
eltConst1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto addNode0 = std::make_shared<opset1::Multiply>(input1, eltConst0);
|
||||
addNode0->set_friendly_name("Add_0");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode0 = nullptr, weightsNode1 = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode0 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode0, weightsNode0,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode0->set_friendly_name("Convolution_0");
|
||||
|
||||
// eltwise, i.e. sum
|
||||
auto eltSumNode = std::make_shared<opset1::Add>(convNode0, eltConst1);
|
||||
eltSumNode->set_friendly_name("Elt_sum");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
eltSumNode, weightsNode1,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("Convolution_1");
|
||||
|
||||
return std::make_shared<ngraph::Function>(convNode1, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE2: set up safe threshold <= 5% from maximum value of output tensor
|
||||
|
||||
// 256 channels
|
||||
// threshold = 0.26f; // Max in fp32 network by output: 5.26852
|
||||
|
||||
// 3 channels
|
||||
threshold = 0.2f; // Max in fp32 network by output: 4.90418
|
||||
|
||||
// STAGE3:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Elt_sum"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ConvAdd, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ConvAdd,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({1, 3, 38, 38})),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ConvAdd::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ConvAdd,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({1, 3, 38, 38})),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ConvAdd::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,132 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ConvConv : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// ScaleShift (FP32)
|
||||
// |
|
||||
// Conv (BF16)
|
||||
// |
|
||||
// Conv (BF16)
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{inputShapes});
|
||||
auto const1 = opset1::Constant::create(ngraph::element::f32, Shape{1}, { 2.0f });
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
auto const2 = opset1::Constant::create(ngraph::element::f32, Shape{1}, { 1.0f });
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
// Convolution
|
||||
ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
std::vector<float> weightValues2;
|
||||
weightValues2.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
|
||||
auto weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape2, weightValues2);
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
convNode1, weightsNode2,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("CONV_2");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 1.0f; // Max in fp32 network by output CONV_2: 49.3427
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["CONV_2"] = "BF16";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ConvConv, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ConvConv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ConvConv::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ConvConv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ConvConv::getTestCaseName);
|
||||
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,151 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ConvDWConvReLU : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift (FP32)
|
||||
// |
|
||||
// Conv (BF16)
|
||||
// |
|
||||
// Depthwise Conv (BF16, assuming explicit separte execution of kernel, not fused into prev convolution)
|
||||
// |
|
||||
// ReLU (Fused Info DW convolution)
|
||||
|
||||
// multiply
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// convolution
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
// DW convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
|
||||
ngraph::Shape convFilterShape2 = { channelsCount, 1, 1, 3, 3 };
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValues2FP32;
|
||||
weightValues2FP32.resize(channelsCount * 1 * 1 * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValues2FP32.data(), weightValues2FP32.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2FP32);
|
||||
} else {
|
||||
std::vector<short> weightValues2BF16;
|
||||
weightValues2BF16.resize(channelsCount * 1 * 1 * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::GroupConvolution>(
|
||||
convNode1, weightsNode2,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("CONV_2");
|
||||
|
||||
// ReLU
|
||||
auto reluNode2 = std::make_shared<opset1::Relu>(convNode2);
|
||||
reluNode2->set_friendly_name("RELU");
|
||||
|
||||
return std::make_shared<ngraph::Function>(reluNode2, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 0.4f; // maximum value in tensor is 54.89
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["RELU"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ConvDWConvReLU, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ConvDWConvReLU,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ConvDWConvReLU::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ConvDWConvReLU,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ConvDWConvReLU::getTestCaseName);
|
||||
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,276 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
typedef std::tuple< Precision, SizeVector, string, size_t, CoordinateDiff, string> convEltwiseDepthwiseTestParamsSet;
|
||||
|
||||
class ConvEltwiseDepthwise :
|
||||
public testing::WithParamInterface<convEltwiseDepthwiseTestParamsSet>, virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
std::shared_ptr<Function> fnPtr;
|
||||
SizeVector inputShapes;
|
||||
std::map<string, string> expectedPrecisions;
|
||||
float threshold = 7e-2f;
|
||||
Precision netPrecision;
|
||||
size_t kernel;
|
||||
CoordinateDiff pads;
|
||||
std::string dnnlPrimitive;
|
||||
|
||||
protected:
|
||||
std::shared_ptr<Function> createGraph(InferenceEngine::Precision netPrecision) {
|
||||
// scaleshift (FP32)
|
||||
// |
|
||||
// Conv (BF16)
|
||||
// |
|
||||
// Relu (Eltwise Fused into Conv)
|
||||
// |
|
||||
// scaleshift (Depthwise Fused into Conv)
|
||||
|
||||
element::Type ntype = (netPrecision == Precision::FP32) ? element::f32 : element::bf16;
|
||||
size_t chCnt = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, Shape{ inputShapes });
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{ 1 }, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{ 1 }, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
mulNode->set_friendly_name("SS_1");
|
||||
|
||||
// add
|
||||
std::shared_ptr<opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{ 1 }, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{ 1 }, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<opset1::Constant> weightsNode = nullptr;
|
||||
Shape convFilterShape = { chCnt, chCnt, kernel, kernel }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(chCnt * chCnt * kernel * kernel);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(chCnt * chCnt * kernel * kernel);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> convNode1 = std::make_shared<opset1::Convolution>(
|
||||
addNode, weightsNode, Strides({ 1, 1 }), pads, pads, Strides({ 1, 1 }), op::PadType::EXPLICIT);
|
||||
convNode1->set_friendly_name("CONV");
|
||||
|
||||
// Eltwise, i.e. Relu
|
||||
auto reluNode = std::make_shared<opset1::Relu>(convNode1);
|
||||
reluNode->set_friendly_name("RELU");
|
||||
|
||||
// multiply
|
||||
std::shared_ptr<opset1::Constant> const3 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const3 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { 3.0f });
|
||||
} else {
|
||||
const3 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 },
|
||||
{ bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(3.0f)) });
|
||||
}
|
||||
auto mulNode2 = std::make_shared<opset1::Multiply>(reluNode, const3);
|
||||
mulNode2->set_friendly_name("SS_2");
|
||||
|
||||
// add
|
||||
std::shared_ptr<opset1::Constant> const4 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const4 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { 2.0f });
|
||||
} else {
|
||||
const4 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 },
|
||||
{ bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
|
||||
|
||||
return std::make_shared<Function>(NodeVector{ addNode2 }, ParameterVector{ input1 });
|
||||
}
|
||||
public:
|
||||
static string getTestCaseName(testing::TestParamInfo<convEltwiseDepthwiseTestParamsSet> obj) {
|
||||
Precision netPrecision;
|
||||
SizeVector inputShapes;
|
||||
string targetDevice;
|
||||
size_t kernel;
|
||||
CoordinateDiff pads;
|
||||
string dnnlPrimitive;
|
||||
std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, dnnlPrimitive) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "dnnlPrimitive=" << dnnlPrimitive << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void Run_test() {
|
||||
if (!InferenceEngine::with_cpu_x86_bfloat16()) {
|
||||
// on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
|
||||
// tests are useless on such platforms
|
||||
return;
|
||||
}
|
||||
std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, dnnlPrimitive) = this->GetParam();
|
||||
InferenceEngine::CNNNetwork cnnNet(fnPtr);
|
||||
|
||||
for (const auto& inputItem : cnnNet.getInputsInfo()) {
|
||||
inputItem.second->setPrecision(Precision::FP32);
|
||||
}
|
||||
|
||||
string inputName = cnnNet.getInputsInfo().begin()->first;
|
||||
string outputName = cnnNet.getOutputsInfo().begin()->first;
|
||||
auto ie = InferenceEngine::Core();
|
||||
// BF16 inference
|
||||
std::map<string, string> options;
|
||||
if (netPrecision == InferenceEngine::Precision::FP32) {
|
||||
options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::YES;
|
||||
} else {
|
||||
options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::NO;
|
||||
}
|
||||
options[InferenceEngine::PluginConfigParams::KEY_PERF_COUNT] = InferenceEngine::PluginConfigParams::YES;
|
||||
|
||||
auto exec_net1 = ie.LoadNetwork(cnnNet, targetDevice, options);
|
||||
auto req1 = exec_net1.CreateInferRequest();
|
||||
|
||||
InferenceEngine::Blob::Ptr inBlob1 = req1.GetBlob(inputName);
|
||||
FuncTestUtils::fillInputsBySinValues(inBlob1);
|
||||
|
||||
req1.Infer();
|
||||
auto outBlobBF16 = req1.GetBlob(outputName);
|
||||
InferenceEngine::MemoryBlob::CPtr mout1 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobBF16);
|
||||
ASSERT_NE(mout1, nullptr);
|
||||
auto lm1 = mout1->rmap();
|
||||
|
||||
// FP32 infrence
|
||||
// if netPrecision is not eq to the FP32 - change network precision and recreate network
|
||||
InferenceEngine::CNNNetwork cnnNetFP32(createGraph(InferenceEngine::Precision::FP32));
|
||||
string inputNameFP32 = cnnNetFP32.getInputsInfo().begin()->first;
|
||||
string outputNameFP32 = cnnNetFP32.getOutputsInfo().begin()->first;
|
||||
for (const auto& inputItem : cnnNetFP32.getInputsInfo()) {
|
||||
inputItem.second->setPrecision(Precision::FP32);
|
||||
}
|
||||
auto exec_net2 = ie.LoadNetwork(cnnNetFP32, targetDevice,
|
||||
{ { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO } });
|
||||
auto req2 = exec_net2.CreateInferRequest();
|
||||
|
||||
req2.SetBlob(inputNameFP32, inBlob1);
|
||||
|
||||
req2.Infer();
|
||||
auto outBlobFP32 = req2.GetBlob(outputNameFP32);
|
||||
InferenceEngine::MemoryBlob::CPtr mout2 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobFP32);
|
||||
ASSERT_NE(mout2, nullptr);
|
||||
auto lm2 = mout2->rmap();
|
||||
|
||||
FuncTestUtils::compareRawBuffers(lm1.as<const float*>(), lm2.as<const float*>(), mout1->size(), mout2->size(),
|
||||
FuncTestUtils::CompareType::ABS_AND_REL,
|
||||
threshold, threshold);
|
||||
|
||||
// Stage2: verification of performance counters
|
||||
const auto& perf_counts = req1.GetPerformanceCounts();
|
||||
std::pair<string, string> wrongLayer =
|
||||
BFloat16Helpers::matchPerfCountPrecisionVsExpected(perf_counts, expectedPrecisions);
|
||||
if (wrongLayer.first != string("")) {
|
||||
string layerInPerfCounts = wrongLayer.first + " " + wrongLayer.second;
|
||||
string layerExpected = wrongLayer.first + " " + expectedPrecisions[wrongLayer.first];
|
||||
ASSERT_EQ(layerInPerfCounts, layerExpected);
|
||||
}
|
||||
// onednn enabled brgemm kernel, the kernel name changed to:
|
||||
// brgconv_avx512_(1x1)_bf16 isa: AVX512
|
||||
// brgconv/jit_avx512_amx_(1x1)_bf16 isa: AMX
|
||||
// check the avx512 only
|
||||
if (perf_counts.count("CONV")) {
|
||||
const std::string exec_type = perf_counts.at("CONV").exec_type;
|
||||
if (exec_type.find("avx512") == std::string::npos) {
|
||||
EXPECT_TRUE(false) << "CONV expected select AVX512 but actual:" << exec_type;
|
||||
}
|
||||
} else {
|
||||
EXPECT_TRUE(false) << "CONV NOT_FOUND_IN_PERF_COUNTS";
|
||||
}
|
||||
fnPtr.reset();
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::vector<size_t> inputShape;
|
||||
std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, dnnlPrimitive) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
expectedPrecisions["SS_1"] = "FP32";
|
||||
expectedPrecisions["RELU"] = "ndef";
|
||||
expectedPrecisions["SS_2"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ConvEltwiseDepthwise, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
Run_test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_1x1_depthwise_BF16, ConvEltwiseDepthwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
// If input is 1,5,1,1 it will be same with the postops shape(1,5,1,1)
|
||||
// The new enabled binary postops will think the shapes are the same and sets the
|
||||
// broadcast strategy 'no broadcast'. The postops layout will be nchw, the conv
|
||||
// output layout will be nhwc or nChw16c, both are not same with the postops layout.
|
||||
// Change the input size to be different with the postops'.
|
||||
::testing::Values(SizeVector({ 1, 5, 2, 1 })),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(size_t(1)),
|
||||
::testing::Values(CoordinateDiff({ 0, 0 })),
|
||||
::testing::Values(std::string("jit_avx512_1x1_BF16"))),
|
||||
ConvEltwiseDepthwise::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_gemm_depthwise_BF16, ConvEltwiseDepthwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 10, 10 })),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(size_t(3)),
|
||||
::testing::Values(CoordinateDiff({ 1, 1 })),
|
||||
::testing::Values(std::string("jit_avx512_BF16"))),
|
||||
ConvEltwiseDepthwise::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_conv_depthwise_BF16, ConvEltwiseDepthwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 5, 10, 10 })),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(size_t(3)),
|
||||
::testing::Values(CoordinateDiff({ 0, 0 })),
|
||||
::testing::Values(std::string("jit_avx512_BF16"))),
|
||||
ConvEltwiseDepthwise::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,200 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ConvReLUPoolConvReLUPool : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// Convolution1 (FP32)
|
||||
// |
|
||||
// ReLU1 (Fused)
|
||||
// |
|
||||
// Pooling1 (BF16)
|
||||
// |
|
||||
// Convolution2 (BF16)
|
||||
// |
|
||||
// ReLU2 (Fused)
|
||||
// |
|
||||
// Pooling2 (BF16)
|
||||
// |
|
||||
// Convolution3 (BF16)
|
||||
|
||||
|
||||
// STAGE1: construction of the GRAPH
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
|
||||
// convolution1
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
|
||||
input1, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode->set_friendly_name("Convolution_1");
|
||||
|
||||
// ReLU
|
||||
auto reluNode = std::make_shared<opset1::Relu>(convNode);
|
||||
reluNode->set_friendly_name("ReLU_1");
|
||||
|
||||
// Pooling
|
||||
auto avgpoolNode = std::make_shared<opset1::AvgPool>(reluNode,
|
||||
Strides{1, 1},
|
||||
Shape{1, 1},
|
||||
Shape{1, 1},
|
||||
Shape{2, 2},
|
||||
true,
|
||||
op::RoundingType::FLOOR);
|
||||
avgpoolNode->set_friendly_name("AvgPool_1");
|
||||
|
||||
// convolution2
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
|
||||
ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
avgpoolNode, weightsNode2,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("Convolution_2");
|
||||
|
||||
// ReLU
|
||||
auto reluNode2 = std::make_shared<opset1::Relu>(convNode2);
|
||||
reluNode2->set_friendly_name("ReLU_2");
|
||||
|
||||
// Pooling
|
||||
auto maxpoolNode2 = std::make_shared<opset1::MaxPool>(reluNode2,
|
||||
Strides{1, 1},
|
||||
Shape{1, 1},
|
||||
Shape{0, 0},
|
||||
Shape{2, 2},
|
||||
op::RoundingType::FLOOR);
|
||||
maxpoolNode2->set_friendly_name("MaxPool_2");
|
||||
|
||||
// convolution3
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode3 = nullptr;
|
||||
ngraph::Shape convFilterShape3 = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
maxpoolNode2, weightsNode3,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode3->set_friendly_name("Convolution_3");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
threshold = 0.2f; // max value in the latest tensor for FP32 network is 9.8
|
||||
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Convolution_1"] = "BF16";
|
||||
expectedPrecisions["ReLU_1"] = "ndef";
|
||||
expectedPrecisions["AvgPool_1"] = "BF16";
|
||||
expectedPrecisions["Convolution_2"] = "BF16";
|
||||
expectedPrecisions["ReLU_2"] = "ndef";
|
||||
expectedPrecisions["MaxPool_2"] = "BF16";
|
||||
expectedPrecisions["Convolution_3"] = "BF16";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ConvReLUPoolConvReLUPool, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ConvReLUPoolConvReLUPool,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ConvReLUPoolConvReLUPool::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ConvReLUPoolConvReLUPool,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ConvReLUPoolConvReLUPool::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,152 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class Elt_max : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// Power (FP32)
|
||||
// |
|
||||
// Conv(BF16) Const(FP32)
|
||||
// | /
|
||||
// Eltwise(MAX)(FP32)
|
||||
// |
|
||||
// Conv(BF16)
|
||||
|
||||
// STAGE1: construction of the GRAPH
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
const size_t conv0OutputChannels = 1;
|
||||
|
||||
// add
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> powerConst = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
powerConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
powerConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto powerNode0 = std::make_shared<opset1::Multiply>(input1, powerConst);
|
||||
powerNode0->set_friendly_name("Power_0");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode0 = nullptr, weightsNode1 = nullptr;
|
||||
ngraph::Shape convFilterShape0 = { conv0OutputChannels, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
ngraph::Shape convFilterShape1 = { 1, conv0OutputChannels, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32_0, weightValuesFP32_1;
|
||||
weightValuesFP32_0.resize(conv0OutputChannels * channelsCount * 3 * 3);
|
||||
weightValuesFP32_1.resize(1 * conv0OutputChannels * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32_0.data(), weightValuesFP32_0.size());
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32_1.data(), weightValuesFP32_1.size());
|
||||
weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape0, weightValuesFP32_0);
|
||||
weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape1, weightValuesFP32_1);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16_0, weightValuesBF16_1;
|
||||
weightValuesBF16_0.resize(conv0OutputChannels * channelsCount * 3 * 3);
|
||||
weightValuesBF16_1.resize(1 * conv0OutputChannels * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16_0.data(), weightValuesBF16_0.size());
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16_1.data(), weightValuesBF16_1.size());
|
||||
weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape0, weightValuesBF16_0.data());
|
||||
weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape1, weightValuesBF16_1.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode0 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
powerNode0, weightsNode0,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode0->set_friendly_name("Convolution_0");
|
||||
|
||||
// Eltwise, i.e. Max
|
||||
std::shared_ptr<ngraph::opset1::Constant> maxConst = nullptr;
|
||||
auto batchSize = inputShapes[0];
|
||||
auto heightSize = inputShapes[2];
|
||||
auto widthSize = inputShapes[3];
|
||||
if (netPrecision == Precision::FP32) {
|
||||
maxConst = opset1::Constant::create(ntype, Shape{batchSize, conv0OutputChannels, heightSize, widthSize}, { 2.0f });
|
||||
} else {
|
||||
maxConst = opset1::Constant::create(ntype, Shape{batchSize, conv0OutputChannels, heightSize, widthSize},
|
||||
{ bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
maxConst->set_friendly_name("Max_const");
|
||||
auto eltMaxNode = std::make_shared<opset1::Maximum>(convNode0, maxConst);
|
||||
eltMaxNode->set_friendly_name("Elt_max");
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
eltMaxNode, weightsNode1,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("Convolution_1");
|
||||
|
||||
return std::make_shared<ngraph::Function>(convNode1, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE2: set up safe threshold <= 5% from maximum value of output tensor
|
||||
threshold = 0.2f; // Max in fp32 network by output: 20.0761
|
||||
|
||||
// STAGE3:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Convolution_0"] = "BF16";
|
||||
expectedPrecisions["Convolution_1"] = "BF16";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Elt_max, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Elt_max,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({1, 3, 40, 40})),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Elt_max::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Elt_max,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({1, 3, 40, 40})),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Elt_max::getTestCaseName);
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,211 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class Elt_x3 : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
/* Power (FP32)
|
||||
* / | \
|
||||
* Conv(BF16) Conv(BF16) Conv(BF16)
|
||||
* / | /
|
||||
* ----------------------------------------------
|
||||
* Eltwise(MAX)(FP32) Eltwise(Mul) (FP32)
|
||||
* | |
|
||||
* Conv(BF16) Conv(BF16)
|
||||
* \ /
|
||||
* Eltwise (SUM)(BF16)
|
||||
* |
|
||||
* Conv (BF16)
|
||||
*/
|
||||
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// STAGE1: construction of the GRAPH
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
// add
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
|
||||
addNode0->set_friendly_name("Add_0");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode0_1 = nullptr, weightsNode0_2 = nullptr,
|
||||
weightsNode0_3 = nullptr, weightsNode1 = nullptr,
|
||||
weightsNode2 = nullptr, weightsNode3 = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode0_1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
weightsNode0_2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
weightsNode0_3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode0_1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
weightsNode0_2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
weightsNode0_3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode0_1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode0, weightsNode0_1,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode0_1->set_friendly_name("Convolution_0_1");
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode0_2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode0, weightsNode0_2,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode0_2->set_friendly_name("Convolution_0_2");
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode0_3 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode0, weightsNode0_3,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode0_3->set_friendly_name("Convolution_0_3");
|
||||
|
||||
// Eltwise, i.e. Mul
|
||||
auto eltMulNode = std::make_shared<opset1::Multiply>(convNode0_1, convNode0_2);
|
||||
eltMulNode->set_friendly_name("Elt_mul");
|
||||
|
||||
// Eltwise, i.e. Max
|
||||
std::shared_ptr<ngraph::opset1::Constant> maxConst = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
maxConst = opset1::Constant::create(ntype, Shape{inputShapes}, { 2.0f });
|
||||
} else {
|
||||
maxConst = opset1::Constant::create(ntype, Shape{inputShapes},
|
||||
{ bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto eltMaxNode = std::make_shared<opset1::Maximum>(convNode0_3, maxConst);
|
||||
eltMaxNode->set_friendly_name("Elt_max");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
eltMulNode, weightsNode1,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("Convolution_1");
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
eltMaxNode, weightsNode2,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("Convolution_2");
|
||||
|
||||
// eltwise, i.e. sum
|
||||
auto eltSumNode = std::make_shared<opset1::Add>(convNode1, convNode2);
|
||||
eltSumNode->set_friendly_name("Elt_sum");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
eltSumNode, weightsNode3,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode3->set_friendly_name("Convolution_3");
|
||||
|
||||
return std::make_shared<ngraph::Function>(convNode3, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE2: set up safe threshold <= 5% from maximum value of output tensor
|
||||
|
||||
// 256 channels, 38 x 38 size
|
||||
// threshold = 0.6f; // Max in fp32 network by output: 12.0983
|
||||
|
||||
// 3 channels, 4 x 4 size
|
||||
threshold = 30.6f; // Max in fp32 network by output: 879.077
|
||||
|
||||
// STAGE3:
|
||||
// filling of expected precision of layer execution defined by precision of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Convolution_1"] = "BF16";
|
||||
expectedPrecisions["Convolution_2"] = "BF16";
|
||||
expectedPrecisions["Convolution_3"] = "BF16";
|
||||
expectedPrecisions["Elt_sum"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Elt_x3, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Elt_x3,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({1, 3, 4, 4})),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Elt_x3::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Elt_x3,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({1, 3, 4, 4})),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Elt_x3::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,139 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class Faster100_5_1_1_Conv : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// Power (FP32)
|
||||
// |
|
||||
// Convolution (BF16)
|
||||
|
||||
// STAGE1: constructin og the GRAPH
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("Add_4");
|
||||
|
||||
// problematic convolution: 100x5x1x1
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 1, 1 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValues;
|
||||
weightValues.resize(channelsCount * channelsCount * 1 * 1, 0.f);
|
||||
weightValues[0] = 1.0f;
|
||||
weightValues[7] = 1.0f;
|
||||
weightValues[11] = 1.0f;
|
||||
weightValues[19] = 1.0f;
|
||||
weightValues[23] = 1.0f;
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape, weightValues);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 1 * 1, FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(0.0f));
|
||||
weightValuesBF16[0] = FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f);
|
||||
weightValuesBF16[7] = FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f);
|
||||
weightValuesBF16[11] = FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f);
|
||||
weightValuesBF16[19] = FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f);
|
||||
weightValuesBF16[23] = FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f);
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode->set_friendly_name("Convolution_6");
|
||||
|
||||
|
||||
// ReLU
|
||||
auto reluNode = std::make_shared<opset1::Relu>(convNode);
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Add_4"] = netPrecision.name();
|
||||
expectedPrecisions["Convolution_6"] = "BF16";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Faster100_5_1_1_Conv, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_bfloat16_NoReshape, Faster100_5_1_1_Conv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 10, 5, 1, 1 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Faster100_5_1_1_Conv::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Faster100_5_1_1_Conv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 10, 5, 1, 1 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Faster100_5_1_1_Conv::getTestCaseName);
|
||||
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,130 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class Gather_multiply : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// Add (FP32)
|
||||
// |
|
||||
// FC (BF16)
|
||||
// /
|
||||
// -------------------------------------------
|
||||
// Gather(BF16) Const
|
||||
// \ /
|
||||
// Mul(FP32)
|
||||
|
||||
// STAGE1: construction of the GRAPH
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto inputSize = inputShapes[1];
|
||||
|
||||
// add
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
|
||||
addNode0->set_friendly_name("Add_1");
|
||||
|
||||
// matmul
|
||||
std::shared_ptr<ngraph::opset1::Constant> matmulConst0 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize}, { 2.0f });
|
||||
} else {
|
||||
matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize},
|
||||
{ bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto matmulNode = std::make_shared<opset1::MatMul>(addNode0, matmulConst0);
|
||||
matmulNode->set_friendly_name("Matmul_0");
|
||||
|
||||
// gather
|
||||
auto axesConst = opset1::Constant::create(ngraph::element::i64, Shape{1}, { 1 });
|
||||
std::vector<size_t> gatherArray;
|
||||
for (size_t i = 0; i < inputSize; i++) {
|
||||
gatherArray.push_back(i);
|
||||
}
|
||||
auto indexesConst = opset1::Constant::create(ngraph::element::i64, Shape{inputSize}, gatherArray);
|
||||
auto gatherNode = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
|
||||
gatherNode->set_friendly_name("Gather_1");
|
||||
|
||||
// multiply
|
||||
std::shared_ptr<ngraph::opset1::Constant> mulConst = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
mulConst = opset1::Constant::create(ntype, Shape{inputShapes}, { 2.0f });
|
||||
} else {
|
||||
mulConst = opset1::Constant::create(ntype, Shape{inputShapes},
|
||||
{ bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(gatherNode, mulConst);
|
||||
mulNode->set_friendly_name("Mul_1");
|
||||
|
||||
return std::make_shared<ngraph::Function>(mulNode, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE2: set up safe threshold <= 5% from maximum value of output tensor
|
||||
threshold = 0.4f; // Max in fp32 network by output: 9.20144
|
||||
|
||||
// STAGE3:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
|
||||
expectedPrecisions["Matmul_0"] = "BF16";
|
||||
expectedPrecisions["Mul_1"] = netPrecision.name(); // tail kept in FP32 precision
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Gather_multiply, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Gather_multiply,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({2048, 64})),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Gather_multiply::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Gather_multiply,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({2048, 64})),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Gather_multiply::getTestCaseName);
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,158 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class Gather_x2_add_mul_relu_concat_matmul : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// Add (FP32)
|
||||
// |
|
||||
// FullyConnected (BF16)
|
||||
// / | \
|
||||
// -------------------------------------------
|
||||
// Gather(FP32) Gather(FP32) Add (FP32)
|
||||
// \ / /
|
||||
// Mul(FP32) ReLU(FP32)
|
||||
// \ /
|
||||
// Concat(BF16) Const
|
||||
// \ /
|
||||
// Matmul(BF16)
|
||||
|
||||
// STAGE1: construction of the GRAPH
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
// add
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
auto inputSize = inputShapes[1];
|
||||
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
|
||||
addNode0->set_friendly_name("Add_1");
|
||||
|
||||
// matmul
|
||||
std::shared_ptr<ngraph::opset1::Constant> matmulConst0 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize}, { 2.0f });
|
||||
} else {
|
||||
matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize},
|
||||
{ bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto matmulNode = std::make_shared<opset1::MatMul>(addNode0, matmulConst0);
|
||||
matmulNode->set_friendly_name("Matmul_0");
|
||||
|
||||
// gather
|
||||
std::vector<size_t> gatherArray;
|
||||
for (size_t i = 0; i < inputSize; i++) {
|
||||
gatherArray.push_back(i);
|
||||
}
|
||||
auto axesConst = opset1::Constant::create(ngraph::element::i64, Shape{1}, { 1 });
|
||||
auto indexesConst = opset1::Constant::create(ngraph::element::i64, Shape{inputSize}, gatherArray);
|
||||
auto gatherNode1 = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
|
||||
gatherNode1->set_friendly_name("Gather_1");
|
||||
|
||||
auto gatherNode2 = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
|
||||
gatherNode2->set_friendly_name("Gather_2");
|
||||
|
||||
// multiply
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(gatherNode1, gatherNode2);
|
||||
mulNode->set_friendly_name("Mul_1");
|
||||
|
||||
// add
|
||||
auto addNode1 = std::make_shared<opset1::Multiply>(matmulNode, addConst);
|
||||
addNode0->set_friendly_name("Add_1");
|
||||
|
||||
// ReLU
|
||||
auto reluNode = std::make_shared<opset1::Relu>(addNode1);
|
||||
reluNode->set_friendly_name("Relu_1");
|
||||
|
||||
// Concat
|
||||
ngraph::NodeVector concInputNodes = {mulNode, reluNode};
|
||||
auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 1);
|
||||
concNode->set_friendly_name("Conc_1");
|
||||
|
||||
// matmul
|
||||
std::shared_ptr<ngraph::opset1::Constant> matmulConst1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
matmulConst1 = opset1::Constant::create(ntype, Shape{inputSize * 2, inputSize * 2}, { 2.0f });
|
||||
} else {
|
||||
matmulConst1 = opset1::Constant::create(ntype, Shape{inputSize * 2, inputSize * 2},
|
||||
{ bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto matmulNode1 = std::make_shared<opset1::MatMul>(concNode, matmulConst1);
|
||||
matmulNode1->set_friendly_name("Matmul_1");
|
||||
|
||||
return std::make_shared<ngraph::Function>(matmulNode1, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE2: set up safe threshold <= 5% from maximum value of output tensor
|
||||
threshold = 177.f; // Max in fp32 network by output: 3887.11
|
||||
|
||||
// STAGE3:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Matmul_0"] = "BF16";
|
||||
expectedPrecisions["Mul_1"] = "BF16";
|
||||
expectedPrecisions["Add_1"] = netPrecision.name(); // FP32->BF16 in case of FP32 net, BF16->BF16 in case of BF16 net
|
||||
expectedPrecisions["Relu_1"] = "ndef";
|
||||
expectedPrecisions["Conc_1"] = "BF16";
|
||||
expectedPrecisions["Matmul_1"] = "BF16";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Gather_x2_add_mul_relu_concat_matmul, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Gather_x2_add_mul_relu_concat_matmul,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 2048, 64 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Gather_x2_add_mul_relu_concat_matmul::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Gather_x2_add_mul_relu_concat_matmul,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 2048, 64 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Gather_x2_add_mul_relu_concat_matmul::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,115 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "ie_system_conf.h"
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
using InferenceEngine::Precision;
|
||||
using InferenceEngine::SizeVector;
|
||||
|
||||
class MemoryConv : public testing::WithParamInterface<LayerTestsUtils::basicParams>,
|
||||
public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<LayerTestsUtils::basicParams> obj) {
|
||||
Precision netPrecision;
|
||||
SizeVector inputShapes, newInputShapes;
|
||||
std::string targetDevice;
|
||||
std::tie(netPrecision, inputShapes, targetDevice) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
SizeVector ie_shape;
|
||||
std::tie(inPrc, ie_shape, targetDevice) = this->GetParam();
|
||||
|
||||
using namespace ngraph;
|
||||
using std::make_shared;
|
||||
|
||||
Shape shape = ie_shape;
|
||||
size_t C = shape[1];
|
||||
element::Type type = ngraph::element::f32;
|
||||
|
||||
auto input = make_shared<op::v0::Parameter>(type, shape);
|
||||
auto mem_i = make_shared<op::v0::Constant>(type, shape, 0);
|
||||
auto mem_r = make_shared<op::v3::ReadValue>(mem_i, "id");
|
||||
|
||||
auto mul = make_shared<op::v1::Multiply>(mem_r, input);
|
||||
auto sig = make_shared<op::v0::Sigmoid>(mul);
|
||||
|
||||
auto fc1_w = make_shared<op::v0::Constant>(type, Shape{C, C}, 1);
|
||||
auto fc1_b = make_shared<op::v0::Constant>(type, Shape{C}, 1);
|
||||
auto fc1 = make_shared<op::v0::MatMul>(sig, fc1_w);
|
||||
auto bias_1 = make_shared<op::v1::Add>(fc1, fc1_b);
|
||||
|
||||
auto fc2_w = make_shared<op::v0::Constant>(type, Shape{C, C}, 1);
|
||||
auto fc2_b = make_shared<op::v0::Constant>(type, Shape{C}, 1);
|
||||
auto fc2 = make_shared<op::v0::MatMul>(bias_1, fc2_w);
|
||||
auto bias_2 = make_shared<op::v1::Add>(fc2, fc2_b);
|
||||
|
||||
auto mem_w = make_shared<op::v3::Assign>(bias_1, "id");
|
||||
|
||||
// WA. Limitation of ngraph. control_dependency are required.
|
||||
mem_w->add_control_dependency(mem_r);
|
||||
bias_2->add_control_dependency(mem_w);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(
|
||||
ngraph::NodeVector {bias_2},
|
||||
ngraph::ParameterVector {input},
|
||||
"SimpleNet");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(MemoryConv, CheckTypeConversion) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
if (!InferenceEngine::with_cpu_x86_bfloat16())
|
||||
GTEST_SKIP();
|
||||
|
||||
auto ie = PluginCache::get().ie();
|
||||
auto net = InferenceEngine::CNNNetwork(function);
|
||||
auto exe_net = ie->LoadNetwork(net, "CPU");
|
||||
auto inf_reg = exe_net.CreateInferRequest();
|
||||
|
||||
// check data type via exec graph
|
||||
auto exec_graph = exe_net.GetExecGraphInfo();
|
||||
auto exec_ops = exec_graph.getFunction()->get_ops();
|
||||
std::shared_ptr<ngraph::Node> mem_r, mem_w;
|
||||
|
||||
for (auto &node : exec_ops) {
|
||||
auto var = node->get_rt_info()["layerType"];
|
||||
auto s_val = var.as<std::string>();
|
||||
if (s_val == "MemoryOutput")
|
||||
mem_w = node;
|
||||
if (s_val == "MemoryInput")
|
||||
mem_r = node;
|
||||
}
|
||||
|
||||
ASSERT_NE(nullptr, mem_r);
|
||||
ASSERT_EQ(ngraph::element::bf16, mem_r->output(0).get_element_type());
|
||||
|
||||
ASSERT_NE(nullptr, mem_w);
|
||||
ASSERT_EQ(ngraph::element::bf16, mem_w->input(0).get_element_type());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CPU, MemoryConv,
|
||||
::testing::Combine(
|
||||
::testing::Values<Precision>(Precision::BF16, Precision::FP32),
|
||||
::testing::Values(SizeVector{1, 200}),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
MemoryConv::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,185 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class MobileNet_ssd_with_branching : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift
|
||||
// |
|
||||
// Conv1 (FP32)
|
||||
// | \
|
||||
// Conv2 (BF16) \
|
||||
// | |
|
||||
// relu(fused) |
|
||||
// | Normalize (not LRN)
|
||||
// Conv (DW)(BF16) |
|
||||
// | |
|
||||
// ReLU (Fused) |
|
||||
// \ /
|
||||
// Concat
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// Conv1
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
// Conv2
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
convNode1, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("CONV_2");
|
||||
|
||||
// ReLU
|
||||
auto reluNode = std::make_shared<opset1::Relu>(convNode2);
|
||||
reluNode->set_friendly_name("RELU_2");
|
||||
|
||||
// DW convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
|
||||
ngraph::Shape convFilterShape2 = { channelsCount, 1, 1, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValues2FP32;
|
||||
weightValues2FP32.resize(channelsCount * 1 * 1 * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValues2FP32.data(), weightValues2FP32.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2FP32);
|
||||
} else {
|
||||
std::vector<short> weightValues2BF16;
|
||||
weightValues2BF16.resize(channelsCount * 1 * 1 * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> dwConvNode = std::make_shared<ngraph::opset1::GroupConvolution>(
|
||||
reluNode, weightsNode2,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
dwConvNode->set_friendly_name("DW_CONV");
|
||||
|
||||
// ReLU
|
||||
auto reluNode2 = std::make_shared<opset1::Relu>(dwConvNode);
|
||||
reluNode2->set_friendly_name("RELU_DW");
|
||||
|
||||
// normalize
|
||||
const auto axes = make_shared<op::Constant>(element::i64, Shape{2}, vector<int64_t>{2});
|
||||
float eps{1e-6f};
|
||||
auto eps_mode = op::EpsMode::ADD;
|
||||
|
||||
auto normNode = std::make_shared<opset1::NormalizeL2>(convNode1, axes, eps, eps_mode);
|
||||
normNode->set_friendly_name("NORM_1");
|
||||
|
||||
// Concat
|
||||
ngraph::NodeVector concInputNodes = { reluNode2, normNode };
|
||||
auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 1);
|
||||
concNode->set_friendly_name("CONC_1");
|
||||
|
||||
return std::make_shared<ngraph::Function>(concNode, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 0.85f; // max value in latest tensor is 87.67
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = "FP32";
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["RELU_2"] = "ndef";
|
||||
expectedPrecisions["DW_CONV"] = "BF16";
|
||||
expectedPrecisions["RELU_DW"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(MobileNet_ssd_with_branching, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, MobileNet_ssd_with_branching,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
MobileNet_ssd_with_branching::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, MobileNet_ssd_with_branching,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
MobileNet_ssd_with_branching::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,157 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ScaleshiftConvEltwiseConv : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift (FP32) Conv (FP32)
|
||||
// \ /
|
||||
// Eltwise (Fused into Conv)
|
||||
// |
|
||||
// Conv (BF16)
|
||||
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
input1, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
// Eltwise, i.e. Add
|
||||
auto eltNode = std::make_shared<opset1::Add>(addNode, convNode1);
|
||||
eltNode->set_friendly_name("ELT_1");
|
||||
|
||||
// Convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
|
||||
ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValues2;
|
||||
weightValues2.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2);
|
||||
} else {
|
||||
std::vector<short> weightValues2BF16;
|
||||
weightValues2BF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
eltNode, weightsNode2,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("CONV_2");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 1.0f; // max value in the latest tensor for FP32 network is 37.77
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["CONV_2"] = "BF16";
|
||||
expectedPrecisions["ELT_1"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ScaleshiftConvEltwiseConv, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConvEltwiseConv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConvEltwiseConv::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConvEltwiseConv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConvEltwiseConv::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,163 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ScaleshiftConvEltwiseReluConv : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift (FP32) Conv (FP32_
|
||||
// \ /
|
||||
// Eltwise (Fused into conv)
|
||||
// |
|
||||
// ReLU (Fused into conv)
|
||||
// |
|
||||
// Conv (BF16)
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
input1, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
// Eltwise, i.e. Add
|
||||
auto eltNode = std::make_shared<opset1::Add>(addNode, convNode1);
|
||||
eltNode->set_friendly_name("ELT_1");
|
||||
|
||||
// ReLU
|
||||
auto reluNode = std::make_shared<opset1::Relu>(eltNode);
|
||||
reluNode->set_friendly_name("RELU_1");
|
||||
|
||||
// Convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
|
||||
ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValues2;
|
||||
weightValues2.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2);
|
||||
} else {
|
||||
std::vector<short> weightValues2BF16;
|
||||
weightValues2BF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
reluNode, weightsNode2,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("CONV_2");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 1.0f; // Max in fp32 network by output CONV_2: 30.1374
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["CONV_2"] = "BF16";
|
||||
expectedPrecisions["RELU_1"] = "ndef";
|
||||
expectedPrecisions["ELT_1"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ScaleshiftConvEltwiseReluConv, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConvEltwiseReluConv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConvEltwiseReluConv::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConvEltwiseReluConv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConvEltwiseReluConv::getTestCaseName);
|
||||
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,155 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ScaleshiftConvEltwiseScaleshift : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift (FP32)
|
||||
// |
|
||||
// Conv (BF16)
|
||||
// \ /
|
||||
// Eltwise (Fused into Conv)
|
||||
// |
|
||||
// scaleshift (FP32)
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
// Eltwise, i.e. Add
|
||||
auto eltNode = std::make_shared<opset1::Add>(input1, convNode1);
|
||||
eltNode->set_friendly_name("ELT_1");
|
||||
|
||||
auto reluNode = std::make_shared<opset1::Relu>(eltNode);
|
||||
reluNode->set_friendly_name("RELU_1");
|
||||
|
||||
// multiply
|
||||
std::shared_ptr<ngraph::opset1::Constant> const3 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const3 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
|
||||
} else {
|
||||
const3 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(3.0f)) });
|
||||
}
|
||||
auto mulNode2 = std::make_shared<opset1::Multiply>(reluNode, const3);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const4 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const4 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const4 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
|
||||
addNode2->set_friendly_name("ADD_2");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{addNode2}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 0.4f;
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["ELT_1"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ScaleshiftConvEltwiseScaleshift, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConvEltwiseScaleshift,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConvEltwiseScaleshift::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConvEltwiseScaleshift,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConvEltwiseScaleshift::getTestCaseName);
|
||||
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,143 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ScaleshiftConvEluConv : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift (FP32)
|
||||
// |
|
||||
// Conv (BF16)
|
||||
// |
|
||||
// Elu (FP32 for now, this must be fixed and it must be fused into Conv)
|
||||
// |
|
||||
// Conv (BF16)
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
// Elu
|
||||
auto eluNode = std::make_shared<opset1::Elu>(convNode1, 2);
|
||||
eluNode->set_friendly_name("ELU_1");
|
||||
|
||||
// Conv
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
eluNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("CONV_2");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 1;
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["CONV_2"] = "BF16";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ScaleshiftConvEluConv, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConvEluConv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConvEluConv::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConvEluConv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConvEluConv::getTestCaseName);
|
||||
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,130 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ScaleshiftConvRelu : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift (FP32)
|
||||
// |
|
||||
// Conv (BF16)
|
||||
// |
|
||||
// relu (Fused into convolution)
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
// ReLU
|
||||
auto reluNode = std::make_shared<opset1::Relu>(convNode1);
|
||||
reluNode->set_friendly_name("RELU_1");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 1e-1f;
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["RELU_1"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ScaleshiftConvRelu, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConvRelu,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConvRelu::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConvRelu,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConvRelu::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,148 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ScaleshiftConv_x2_ConcatRelu : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift
|
||||
// / \
|
||||
// Conv Conv
|
||||
// \ /
|
||||
// concat
|
||||
// |
|
||||
// relu
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("CONV_2");
|
||||
|
||||
// Concat
|
||||
ngraph::NodeVector concInputNodes = { convNode1, convNode2 };
|
||||
auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 1);
|
||||
concNode->set_friendly_name("CONC_1");
|
||||
|
||||
// ReLU
|
||||
auto reluNode = std::make_shared<opset1::Relu>(concNode);
|
||||
reluNode->set_friendly_name("RELU_1");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 10e-1;
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["CONV_2"] = "BF16";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ScaleshiftConv_x2_ConcatRelu, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConv_x2_ConcatRelu,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConv_x2_ConcatRelu::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConv_x2_ConcatRelu,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConv_x2_ConcatRelu::getTestCaseName);
|
||||
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,142 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ScaleshiftConv_x2_Eltwise : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
/* scaleshift (FP32)
|
||||
* / \
|
||||
* Conv1 (BF16) Conv1 (BF16)
|
||||
* \ /
|
||||
* eltwise (Fused into Conv1) produce FP32 output
|
||||
*/
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("CONV_2");
|
||||
|
||||
// Eltwise, i.e. Add
|
||||
auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
|
||||
eltNode->set_friendly_name("ELT_1");
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 2e-1f;
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["CONV_2"] = "BF16";
|
||||
expectedPrecisions["ELT_1"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ScaleshiftConv_x2_Eltwise, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConv_x2_Eltwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConv_x2_Eltwise::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConv_x2_Eltwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConv_x2_Eltwise::getTestCaseName);
|
||||
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,141 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ScaleshiftConv_x2_mixed1_Eltwise : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift (FP32)
|
||||
// | |
|
||||
// Conv1(BF16) Conv2(FP32)
|
||||
// \ /
|
||||
// eltwise(Fused into Conv1)
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
input1, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("CONV_2");
|
||||
|
||||
// Eltwise, i.e. Add
|
||||
auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
|
||||
eltNode->set_friendly_name("ELT_1");
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 2e-1f;
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["ADD_1"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["CONV_2"] = "BF16";
|
||||
expectedPrecisions["ELT_1"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ScaleshiftConv_x2_mixed1_Eltwise, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConv_x2_mixed1_Eltwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConv_x2_mixed1_Eltwise::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConv_x2_mixed1_Eltwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConv_x2_mixed1_Eltwise::getTestCaseName);
|
||||
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,142 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ScaleshiftConv_x2_mixed2_Eltwise : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift (FP32)
|
||||
// | |
|
||||
// Conv1 (FP32) Conv2 (Bf16)
|
||||
// \ /
|
||||
// eltwise (Fused into Conv1)
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
input1, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("CONV_1");
|
||||
|
||||
// multiply
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("ADD_2");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("CONV_2");
|
||||
|
||||
// Eltwise, i.e. Add
|
||||
auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
|
||||
eltNode->set_friendly_name("ELT_1");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 2e-1f;
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["CONV_1"] = "BF16";
|
||||
expectedPrecisions["ADD_2"] = netPrecision.name();
|
||||
expectedPrecisions["CONV_2"] = "BF16";
|
||||
expectedPrecisions["ELT_1"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ScaleshiftConv_x2_mixed2_Eltwise, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConv_x2_mixed2_Eltwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConv_x2_mixed2_Eltwise::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConv_x2_mixed2_Eltwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConv_x2_mixed2_Eltwise::getTestCaseName);
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,175 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ScaleshiftConv_x3_Eltwise : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// scaleshift (FP32)
|
||||
//
|
||||
// / \
|
||||
//
|
||||
// Conv1 (BF16) Conv2 (BF16)
|
||||
//
|
||||
// \ /
|
||||
//
|
||||
// Eltwise (Fused to Conv1)
|
||||
//
|
||||
// |
|
||||
//
|
||||
// Conv3 (BF16)
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
const size_t outChannels = 16;
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("Add_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { outChannels, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(outChannels * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(outChannels * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("Convolution_1");
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("Convolution_2");
|
||||
|
||||
// Eltwise, i.e. Add
|
||||
auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
|
||||
eltNode->set_friendly_name("ELT_1");
|
||||
|
||||
|
||||
// Convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode3 = nullptr;
|
||||
ngraph::Shape convFilterShape3 = { outChannels, outChannels, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(outChannels * outChannels * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(outChannels * outChannels * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
eltNode, weightsNode3,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode3->set_friendly_name("Convolution_3");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 2.0f; // max value in the latest tensor for FP32 network is 93.3
|
||||
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Add_1"] = netPrecision.name();
|
||||
expectedPrecisions["Convolution_1"] = "BF16";
|
||||
expectedPrecisions["Convolution_2"] = "BF16";
|
||||
expectedPrecisions["ELT_1"] = "ndef";
|
||||
expectedPrecisions["Convolution_3"] = "BF16";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ScaleshiftConv_x3_Eltwise, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConv_x3_Eltwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConv_x3_Eltwise::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConv_x3_Eltwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ScaleshiftConv_x3_Eltwise::getTestCaseName);
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,162 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class Scaleshift_x2_Conv_x2_Eltwise : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
//
|
||||
// scaleshift (FP32) scaleshift (FP32)
|
||||
// \ / \
|
||||
// Eltwise (FP32) Conv (BF16)
|
||||
// | |
|
||||
// Conv (BF16)
|
||||
// |
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("Add_1");
|
||||
|
||||
// multiply
|
||||
std::shared_ptr<ngraph::opset1::Constant> const3 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const3 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
|
||||
} else {
|
||||
const3 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(3.0f)) });
|
||||
}
|
||||
auto mulNode2 = std::make_shared<opset1::Multiply>(input1, const3);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const4 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const4 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const4 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
|
||||
addNode2->set_friendly_name("Add_2");
|
||||
|
||||
// Eltwise, i.e. Add
|
||||
auto eltNode = std::make_shared<opset1::Add>(addNode, addNode2);
|
||||
eltNode->set_friendly_name("ELT_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode2, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("Convolution_1");
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
convNode1, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("Convolution_2");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode, convNode2}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 1;
|
||||
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Add_2"] = netPrecision.name();
|
||||
expectedPrecisions["Convolution_1"] = "BF16";
|
||||
expectedPrecisions["ELT_1"] = netPrecision.name();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Scaleshift_x2_Conv_x2_Eltwise, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Scaleshift_x2_Conv_x2_Eltwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Scaleshift_x2_Conv_x2_Eltwise::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Scaleshift_x2_Conv_x2_Eltwise,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Scaleshift_x2_Conv_x2_Eltwise::getTestCaseName);
|
||||
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,184 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class Scaleshift_x3_ConvEltwiseRelu : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
//
|
||||
// scaleshift (FP32)
|
||||
// |
|
||||
// Conv (BF16) scaleshift (FP32
|
||||
//
|
||||
// \ /
|
||||
//
|
||||
// Eltwise (Fused to Conv)
|
||||
// |
|
||||
// ReLU (Fused to Conv)
|
||||
// |
|
||||
// scaleshift (FP32)
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("Add_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 1, 1 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode1->set_friendly_name("Convolution_1");
|
||||
|
||||
// multiply
|
||||
std::shared_ptr<ngraph::opset1::Constant> const3 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const3 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
|
||||
} else {
|
||||
const3 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(3.0f)) });
|
||||
}
|
||||
auto mulNode2 = std::make_shared<opset1::Multiply>(input1, const3);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const4 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const4 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const4 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
|
||||
addNode2->set_friendly_name("Add_2");
|
||||
|
||||
// Eltwise, i.e. Add
|
||||
auto eltNode = std::make_shared<opset1::Add>(convNode1, addNode2);
|
||||
eltNode->set_friendly_name("ELT_1");
|
||||
|
||||
// ReLU
|
||||
auto reluNode = std::make_shared<opset1::Relu>(eltNode);
|
||||
reluNode->set_friendly_name("RELU_1");
|
||||
|
||||
// multiply
|
||||
std::shared_ptr<ngraph::opset1::Constant> const5 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const5 = opset1::Constant::create(ntype, Shape{1}, { 4.0f });
|
||||
} else {
|
||||
const5 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(4.0f)) });
|
||||
}
|
||||
auto mulNode3 = std::make_shared<opset1::Multiply>(reluNode, const5);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const6 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const6 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
|
||||
} else {
|
||||
const6 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(3.0f)) });
|
||||
}
|
||||
auto addNode3 = std::make_shared<opset1::Add>(mulNode3, const6);
|
||||
addNode3->set_friendly_name("Add_3");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{addNode3}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
// STAGE1:
|
||||
threshold = 5e-1;
|
||||
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Add_1"] = netPrecision.name();
|
||||
expectedPrecisions["Convolution_1"] = "BF16";
|
||||
expectedPrecisions["Add_2"] = netPrecision.name();
|
||||
expectedPrecisions["ELT_1"] = "ndef";
|
||||
expectedPrecisions["RELU_1"] = "ndef";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Scaleshift_x3_ConvEltwiseRelu, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Scaleshift_x3_ConvEltwiseRelu,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Scaleshift_x3_ConvEltwiseRelu::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Scaleshift_x3_ConvEltwiseRelu,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
Scaleshift_x3_ConvEltwiseRelu::getTestCaseName);
|
||||
|
||||
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,145 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class PoolingAfterConv : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// Scaleshift (FP32)
|
||||
// |
|
||||
// Convolution (BF16)
|
||||
// |
|
||||
// ReLU (Fused)
|
||||
// |
|
||||
// Pooling (FP32) <- this layer can be be executed in bf16 if it passes data to next bf16 layer
|
||||
// in other case there should be tail optimization and return Pooling to FP32
|
||||
|
||||
// STAGE1: construction of the GRAPH
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
const size_t outChannels = 16;
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("Add_4");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { outChannels, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(outChannels * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(outChannels * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode->set_friendly_name("Convolution_6");
|
||||
|
||||
// ReLU
|
||||
auto reluNode = std::make_shared<opset1::Relu>(convNode);
|
||||
|
||||
// Pooling
|
||||
auto avgpoolNode = std::make_shared<opset1::AvgPool>(reluNode,
|
||||
Strides{1, 1},
|
||||
Shape{1, 1},
|
||||
Shape{1, 1},
|
||||
Shape{2, 2},
|
||||
true,
|
||||
op::RoundingType::FLOOR);
|
||||
avgpoolNode->set_friendly_name("AvgPool_8");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{avgpoolNode}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
threshold = 0.14f; // max value in the latest tensor for FP32 network is 14.6448
|
||||
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Add_4"] = netPrecision.name();
|
||||
expectedPrecisions["Convolution_6"] = "BF16";
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(PoolingAfterConv, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, PoolingAfterConv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
PoolingAfterConv::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, PoolingAfterConv,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
PoolingAfterConv::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -1,167 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "bfloat16_helpers.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class TopKInputsI32 : public BasicBF16Test {
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
|
||||
// Power (FP32)
|
||||
// |
|
||||
// Convolution1 (BF16) Const (I32)
|
||||
// | |
|
||||
// \ /
|
||||
// TopK (FP32)
|
||||
// (BF16)/ \ (I32)
|
||||
// |
|
||||
// Convolution 2
|
||||
|
||||
// STAGE1: construction of the GRAPH
|
||||
|
||||
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
|
||||
auto channelsCount = inputShapes[1];
|
||||
const size_t intermediateChannelsCount = 16;
|
||||
|
||||
// multiply
|
||||
auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
|
||||
input1->set_friendly_name("Input_1");
|
||||
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
|
||||
} else {
|
||||
const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
|
||||
}
|
||||
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
|
||||
|
||||
// add
|
||||
std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
|
||||
if (netPrecision == Precision::FP32) {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
|
||||
} else {
|
||||
const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
|
||||
}
|
||||
auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
|
||||
addNode->set_friendly_name("Add_4");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
|
||||
ngraph::Shape convFilterShape = { intermediateChannelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(intermediateChannelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(intermediateChannelsCount * channelsCount * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
|
||||
addNode, weightsNode,
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode->set_friendly_name("Convolution_1");
|
||||
|
||||
// TopK
|
||||
const auto k = make_shared<op::Constant>(element::i32, Shape{}, vector<int>{1});
|
||||
size_t axis = 1;
|
||||
ngraph::op::v1::TopK::Mode mode = ngraph::op::v1::TopK::Mode::MAX;
|
||||
ngraph::op::v1::TopK::SortType sort = ngraph::op::v1::TopK::SortType::NONE;
|
||||
auto argmaxNode = std::make_shared<opset1::TopK>(convNode, k, axis, mode, sort);
|
||||
argmaxNode->set_friendly_name("TopK_1");
|
||||
|
||||
// convolution
|
||||
std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
|
||||
ngraph::Shape convFilterShape2 = { 1, 1, 3, 3 }; // out channel, /input channels, kernel h, kernel w
|
||||
if (netPrecision == Precision::FP32) {
|
||||
std::vector<float> weightValuesFP32;
|
||||
weightValuesFP32.resize(1 * 1 * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesFP32);
|
||||
} else {
|
||||
std::vector<short> weightValuesBF16;
|
||||
weightValuesBF16.resize(1 * 1 * 3 * 3);
|
||||
FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
|
||||
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesBF16.data());
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
|
||||
argmaxNode->output(0), weightsNode2->output(0),
|
||||
ngraph::Strides({ 1, 1 }), // strides
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad begin
|
||||
ngraph::CoordinateDiff({ 0, 0 }), // pad end
|
||||
ngraph::Strides({ 1, 1 }), // dilation
|
||||
ngraph::op::PadType::EXPLICIT); // pad type
|
||||
convNode2->set_friendly_name("Convolution_2");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::OutputVector{convNode2->output(0), argmaxNode->output(1)}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
void SetUp() override {
|
||||
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
|
||||
fnPtr = createGraph(netPrecision);
|
||||
|
||||
threshold = 0.5f; // max value in the latest tensor for FP32 network is 22.6
|
||||
|
||||
// STAGE2:
|
||||
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
|
||||
// performance counters
|
||||
expectedPrecisions["Add_4"] = netPrecision.name();
|
||||
expectedPrecisions["Convolution_1"] = "BF16";
|
||||
expectedPrecisions["Convolution_2"] = "BF16";
|
||||
expectedPrecisions["TopK_1"] = netPrecision.name(); // tail kept in FP32 precision
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(TopKInputsI32, CompareWithRefImpl) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
test();
|
||||
};
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, TopKInputsI32,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
TopKInputsI32::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, TopKInputsI32,
|
||||
::testing::Combine(
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(SizeVector({ 1, 3, 40, 40 })),
|
||||
::testing::Values(SizeVector()),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
TopKInputsI32::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
Loading…
Reference in New Issue
Block a user