auto-batching hetero test (subgraph with DetectionOutput)

This commit is contained in:
myshevts 2021-11-25 17:37:23 +03:00
parent 17d84b990c
commit 1866847c83
9 changed files with 180 additions and 143 deletions

View File

@ -7,7 +7,9 @@ const std::vector<bool> get_vs_set{ true, false };
const std::vector<size_t> num_streams{ 1, 2 };
const std::vector<size_t> num_requests{ 1, 3, 8, 9, 16, 64 };
const std::vector<size_t> num_batch{ 1, 4, 8, 16, 32, 64, 128, 256 };
using namespace AutoBatchingTests;
namespace {
INSTANTIATE_TEST_SUITE_P(smoke_AutoBatching_CPU, AutoBatching_Test,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_CPU),
@ -15,4 +17,15 @@ INSTANTIATE_TEST_SUITE_P(smoke_AutoBatching_CPU, AutoBatching_Test,
::testing::ValuesIn(num_streams),
::testing::ValuesIn(num_requests),
::testing::ValuesIn(num_batch)),
AutoBatching_Test::getTestCaseName);
AutoBatching_Test::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_AutoBatching_CPU, AutoBatching_Test_DetectionOutput,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::ValuesIn(get_vs_set),
::testing::ValuesIn(num_streams),
::testing::ValuesIn(num_requests),
::testing::ValuesIn(num_batch)),
AutoBatching_Test_DetectionOutput::getTestCaseName);
} // namespace

View File

@ -57,8 +57,6 @@ const auto params3Inputs = ::testing::Combine(
);
INSTANTIATE_TEST_SUITE_P(smoke_DetectionOutput3In, DetectionOutputLayerTest, params3Inputs, DetectionOutputLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_DetectionOutput3In_Hetero, DetectionOutputLayerTestWithAutoBatching,
params3Inputs, DetectionOutputLayerTestWithAutoBatching::getTestCaseName);
/* =============== 5 inputs cases =============== */
@ -83,7 +81,5 @@ const auto params5Inputs = ::testing::Combine(
);
INSTANTIATE_TEST_SUITE_P(smoke_DetectionOutput5In, DetectionOutputLayerTest, params5Inputs, DetectionOutputLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_DetectionOutput5In_Hetero, DetectionOutputLayerTestWithAutoBatching,
params5Inputs, DetectionOutputLayerTestWithAutoBatching::getTestCaseName);
} // namespace

View File

@ -7,12 +7,25 @@ const std::vector<size_t> num_streams{ 1, 2 };
const std::vector<bool> get_vs_set{ true, false };
const std::vector<size_t> num_requests{ 1, 8, 16, 64 };
const std::vector<size_t> num_batch{ 1, 4, 8, 16, 32, 64, 128, 256 };
using namespace AutoBatchingTests;
namespace AutoBatchingTests {
INSTANTIATE_TEST_SUITE_P(smoke_AutoBatching_GPU, AutoBatching_Test,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::ValuesIn(get_vs_set),
::testing::ValuesIn(num_streams),
::testing::ValuesIn(num_requests),
::testing::ValuesIn(num_batch)),
AutoBatching_Test::getTestCaseName);
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::ValuesIn(get_vs_set),
::testing::ValuesIn(num_streams),
::testing::ValuesIn(num_requests),
::testing::ValuesIn(num_batch)),
AutoBatching_Test::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_AutoBatching_GPU, AutoBatching_Test_DetectionOutput,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::ValuesIn(get_vs_set),
::testing::ValuesIn(num_streams),
::testing::ValuesIn(num_requests),
::testing::ValuesIn(num_batch)),
AutoBatching_Test_DetectionOutput::getTestCaseName);
} // namespace AutoBatchingTests

View File

@ -57,8 +57,6 @@ const auto params3Inputs = ::testing::Combine(
);
INSTANTIATE_TEST_SUITE_P(smoke_DetectionOutput3In, DetectionOutputLayerTest, params3Inputs, DetectionOutputLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_DetectionOutput3In_HETERO, DetectionOutputLayerTestWithAutoBatching,
params3Inputs, DetectionOutputLayerTestWithAutoBatching::getTestCaseName);
/* =============== 5 inputs cases =============== */
@ -83,7 +81,5 @@ const auto params5Inputs = ::testing::Combine(
);
INSTANTIATE_TEST_SUITE_P(smoke_DetectionOutput5In, DetectionOutputLayerTest, params5Inputs, DetectionOutputLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_DetectionOutput5In_Hetero, DetectionOutputLayerTestWithAutoBatching,
params5Inputs, DetectionOutputLayerTestWithAutoBatching::getTestCaseName);
} // namespace

View File

@ -17,6 +17,7 @@
using namespace ::testing;
using namespace InferenceEngine;
namespace AutoBatchingTests {
using AutoBatchTwoNetsParams = std::tuple<
std::string, // device name
bool, // get or set blob
@ -24,101 +25,135 @@ using AutoBatchTwoNetsParams = std::tuple<
size_t, // number of requests
size_t>; // batch size>
class AutoBatching_Test : public CommonTestUtils::TestsCommon,
public testing::WithParamInterface<AutoBatchTwoNetsParams> {
void SetUp() override {
std::tie(device_name, use_get_blob, num_streams, num_requests, num_batch) = this->GetParam();
fn_ptrs = {ngraph::builder::subgraph::makeSingleConv(), ngraph::builder::subgraph::makeMultiSingleConv()};
fn_ptrs = {ngraph::builder::subgraph::makeSingleConv(),
ngraph::builder::subgraph::makeMultiSingleConv()};
};
public:
static std::string getTestCaseName(const testing::TestParamInfo<AutoBatchTwoNetsParams>& obj) {
static std::string getTestCaseName(const testing::TestParamInfo<AutoBatchTwoNetsParams> &obj) {
size_t streams, requests, batch;
bool use_get_blob;
std::string device_name;
std::tie(device_name, use_get_blob, streams, requests, batch) = obj.param;
return device_name + std::string(use_get_blob ? "_get_blob" : "_set_blob") + "_batch_size_" + std::to_string(batch) +
return device_name + std::string(use_get_blob ? "_get_blob" : "_set_blob") + "_batch_size_" +
std::to_string(batch) +
"_num_streams_" + std::to_string(streams) + "_num_req_" + std::to_string(requests);
}
protected:
std::string device_name;
bool use_get_blob;
bool use_get_blob;
size_t num_streams;
size_t num_requests;
size_t num_batch;
std::vector<std::shared_ptr<ngraph::Function>> fn_ptrs;
void TestAutoBatch() {
std::vector<InferenceEngine::CNNNetwork> nets;
for (auto &fn_ptr : fn_ptrs) {
nets.push_back(CNNNetwork(fn_ptr));
}
auto ie = InferenceEngine::Core();
std::vector<std::string> outputs;
std::vector<InferRequest> irs;
std::vector<std::vector<uint8_t>> ref;
std::vector<int> outElementsCount;
for (size_t i = 0; i < nets.size(); ++i) {
auto net = nets[i];
auto inputs = net.getInputsInfo();
for (auto n : inputs) {
n.second->setPrecision(Precision::FP32);
}
std::map<std::string, std::string> config;
if (device_name.find("GPU") != std::string::npos)
config[CONFIG_KEY(GPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams);
if (device_name.find("CPU") != std::string::npos)
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams);
auto exec_net_ref = ie.LoadNetwork(net, std::string(CommonTestUtils::DEVICE_BATCH) + ":" +
device_name + "(" + std::to_string(num_batch) + ")",
config);
for (int j = 0; j < num_requests; j++) {
outputs.push_back(net.getOutputsInfo().begin()->first); //single output
outElementsCount.push_back(
std::accumulate(begin(fn_ptrs[i]->get_output_shape(0)), end(fn_ptrs[i]->get_output_shape(0)), 1,
std::multiplies<size_t>()));
auto inf_req = exec_net_ref.CreateInferRequest();
irs.push_back(inf_req);
std::vector<std::vector<uint8_t>> inData;
for (auto n : inputs) {
auto blob = FuncTestUtils::createAndFillBlob(n.second->getTensorDesc());
if (use_get_blob)
memcpy(reinterpret_cast<void *>(inf_req.GetBlob(n.first)->buffer().as<uint8_t*>()),
reinterpret_cast<const void *>(blob->cbuffer().as<uint8_t*>()), blob->byteSize());
else
inf_req.SetBlob(n.first, blob);
const auto inBlob = inf_req.GetBlob(n.first);
const auto blobSize = inBlob->byteSize();
const auto inBlobBuf = inBlob->cbuffer().as<uint8_t *>();
inData.push_back(std::vector<uint8_t>(inBlobBuf, inBlobBuf + blobSize));
}
auto refOutData = ngraph::helpers::interpreterFunction(fn_ptrs[i], {inData}).front().second;
ref.push_back(refOutData);
}
}
const int niter = 1;
for (int i = 0; i < niter; i++) {
for (auto ir : irs) {
ir.StartAsync();
}
for (auto ir : irs) {
ir.Wait(InferRequest::RESULT_READY);
}
}
auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32);
for (size_t i = 0; i < irs.size(); ++i) {
const auto &refBuffer = ref[i].data();
ASSERT_EQ(outElementsCount[i], irs[i].GetBlob(outputs[i])->size());
FuncTestUtils::compareRawBuffers(irs[i].GetBlob(outputs[i])->buffer().as<float *>(),
reinterpret_cast<const float *>(refBuffer), outElementsCount[i],
outElementsCount[i],
thr);
}
}
};
TEST_P(AutoBatching_Test, compareAutoBatchingToBatch1) {
std::vector<InferenceEngine::CNNNetwork> nets;
for (auto &fn_ptr : fn_ptrs) {
nets.push_back(CNNNetwork(fn_ptr));
class AutoBatching_Test_DetectionOutput : public AutoBatching_Test {
public:
void SetUp() override {
std::tie(device_name, use_get_blob, num_streams, num_requests, num_batch) = this->GetParam();
fn_ptrs = {ngraph::builder::subgraph::makeEltwisePlusDetectionOutput(),
ngraph::builder::subgraph::makeEltwisePlusDetectionOutput()};
};
static std::string getTestCaseName(const testing::TestParamInfo<AutoBatchTwoNetsParams> &obj) {
size_t streams, requests, batch;
bool use_get_blob;
std::string device_name;
std::tie(device_name, use_get_blob, streams, requests, batch) = obj.param;
return "DetectionOutput_HETERO_" + device_name + std::string(use_get_blob ? "_get_blob" : "_set_blob") +
"_batch_size_" + std::to_string(batch) +
"_num_streams_" + std::to_string(streams) + "_num_req_" + std::to_string(requests);
}
};
auto ie = InferenceEngine::Core();
std::vector<std::string> outputs;
std::vector<InferRequest> irs;
std::vector<std::vector<uint8_t>> ref;
std::vector<int> outElementsCount;
TEST_P(AutoBatching_Test, compareAutoBatchingToSingleBatch) {
TestAutoBatch();
}
for (size_t i = 0; i < nets.size(); ++i) {
auto net = nets[i];
TEST_P(AutoBatching_Test_DetectionOutput, compareAutoBatchingToSingleBatch) {
TestAutoBatch();
}
// we test single inputs networks only
auto inp = net.getInputsInfo().begin()->second;
inp->setLayout(Layout::NCHW);
inp->setPrecision(Precision::FP32);
std::map<std::string, std::string> config;
if (device_name.find("GPU") != std::string::npos)
config[CONFIG_KEY(GPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams);
if (device_name.find("CPU") != std::string::npos)
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams);
auto exec_net_ref = ie.LoadNetwork(net, std::string(CommonTestUtils::DEVICE_BATCH) + ":" +
device_name + "(" + std::to_string(num_batch) + ")",
config);
for (int j = 0; j < num_requests; j++) {
outputs.push_back(net.getOutputsInfo().begin()->first);
auto inf_req = exec_net_ref.CreateInferRequest();
irs.push_back(inf_req);
auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
if (use_get_blob)
InferenceEngine::blob_copy(blob, inf_req.GetBlob(net.getInputsInfo().begin()->first));
else
inf_req.SetBlob(net.getInputsInfo().begin()->first, blob);
outElementsCount.push_back(
std::accumulate(begin(fn_ptrs[i]->get_output_shape(0)), end(fn_ptrs[i]->get_output_shape(0)), 1,
std::multiplies<size_t>()));
const auto inBlob = inf_req.GetBlob(net.getInputsInfo().begin()->first);
const auto blobSize = inBlob->byteSize();
const auto inBlobBuf = inBlob->cbuffer().as<uint8_t *>();
std::vector<uint8_t> inData(inBlobBuf, inBlobBuf + blobSize);
auto refOutData = ngraph::helpers::interpreterFunction(fn_ptrs[i], {inData}).front().second;
ref.push_back(refOutData);
}
}
const int niter = 1;
for (int i = 0; i < niter; i++) {
for (auto ir : irs) {
ir.StartAsync();
}
for (auto ir : irs) {
ir.Wait(InferRequest::RESULT_READY);
}
}
auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32);
for (size_t i = 0; i < irs.size(); ++i) {
const auto &refBuffer = ref[i].data();
ASSERT_EQ(outElementsCount[i], irs[i].GetBlob(outputs[i])->size());
FuncTestUtils::compareRawBuffers(irs[i].GetBlob(outputs[i])->buffer().as<float *>(),
reinterpret_cast<const float *>(refBuffer), outElementsCount[i],
outElementsCount[i],
thr);
}
}
} // namespace AutoBatchingTests

View File

@ -11,8 +11,4 @@ namespace LayerTestsDefinitions {
TEST_P(DetectionOutputLayerTest, CompareWithRefs) {
Run();
};
TEST_P(DetectionOutputLayerTestWithAutoBatching, CompareWithRefs) {
Run();
};
} // namespace LayerTestsDefinitions

View File

@ -71,10 +71,4 @@ class DetectionOutputLayerTest : public testing::WithParamInterface<DetectionOut
void SetUp() override;
};
class DetectionOutputLayerTestWithAutoBatching : public DetectionOutputLayerTest {
public:
static std::string getTestCaseName(const testing::TestParamInfo<DetectionOutputParams>& obj);
protected:
void SetUp() override;
};
} // namespace LayerTestsDefinitions

View File

@ -157,48 +157,4 @@ void DetectionOutputLayerTest::SetUp() {
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
function = std::make_shared<ngraph::Function>(results, params, "DetectionOutput");
}
std::string DetectionOutputLayerTestWithAutoBatching::getTestCaseName(const testing::TestParamInfo<DetectionOutputParams>& obj) {
return DetectionOutputLayerTest::getTestCaseName(obj) + "_WITH_AUTO_BATCH";
}
void DetectionOutputLayerTestWithAutoBatching::SetUp() {
DetectionOutputAttributes commonAttrs;
ParamsWhichSizeDepends specificAttrs;
size_t batch;
std::tie(commonAttrs, specificAttrs, batch, attrs.objectness_score, targetDevice) = this->GetParam();
//creating the Auto-batching device with the specific batch size
targetDevice = std::string(CommonTestUtils::DEVICE_BATCH) + ":" + targetDevice + "(" + std::to_string(batch) + ")";
std::tie(attrs.num_classes, attrs.background_label_id, attrs.top_k, attrs.keep_top_k, attrs.code_type, attrs.nms_threshold, attrs.confidence_threshold,
attrs.clip_after_nms, attrs.clip_before_nms, attrs.decrease_label_id) = commonAttrs;
inShapes.resize(numInputs);
std::tie(attrs.variance_encoded_in_target, attrs.share_location, attrs.normalized, attrs.input_height, attrs.input_width,
inShapes[idxLocation], inShapes[idxConfidence], inShapes[idxPriors], inShapes[idxArmConfidence], inShapes[idxArmLocation]) = specificAttrs;
if (inShapes[idxArmConfidence].empty()) {
inShapes.resize(3);
}
for (size_t i = 0; i < inShapes.size(); i++) {
inShapes[i][0] = 1; //auto-batching will do the batching transparently
}
// adding Eltwise so that we can tests Auto-Batching's HETERO code-path that splits the DetectionOutput and the rest of the network
auto params = ngraph::builder::makeParams(ngraph::element::f32, inShapes);
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
ngraph::OutputVector outs;
for (int i = 0; i < inShapes.size(); i++) {
auto shape = inShapes[i];
auto p = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{shape});
auto add = ngraph::builder::makeEltwise(paramOuts[i], p , ngraph::helpers::EltwiseTypes::ADD);
params.push_back(p);
outs.push_back(add->output(0));
}
auto detOut = ngraph::builder::makeDetectionOutput(outs, attrs);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
function = std::make_shared<ngraph::Function>(results, params, "EltWiseWithDetectionOutput");
}
} // namespace LayerTestsDefinitions

View File

@ -242,6 +242,44 @@ inline std::shared_ptr<ngraph::Function> makeSingleConv(std::vector<size_t> inpu
return fn_ptr;
}
inline std::shared_ptr<ngraph::Function> makeEltwisePlusDetectionOutput(std::vector<std::vector<size_t>> inShapes =
{{1, 60}, {1, 165}, {1, 1, 75}},
ngraph::element::Type_t type = ngraph::element::Type_t::f32) {
// adding Eltwise so that we can tests Auto-Batching's HETERO code-path that splits the DetectionOutput and the rest of the network
auto params = ngraph::builder::makeParams(ngraph::element::f32, inShapes);
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
ngraph::OutputVector outs;
for (int i = 0; i < inShapes.size(); i++) {
auto shape = inShapes[i];
auto p = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{shape});
auto add = ngraph::builder::makeEltwise(paramOuts[i], p, ngraph::helpers::EltwiseTypes::ADD);
params.push_back(p);
outs.push_back(add->output(0));
}
ngraph::op::DetectionOutput::Attributes attr;
attr.num_classes = 11;
attr.background_label_id = 0;
attr.top_k = 75;
attr.variance_encoded_in_target = true;
attr.keep_top_k = {50};
attr.code_type = std::string{"caffe.PriorBoxParameter.CORNER"};
attr.share_location = true;
attr.nms_threshold = 0.5f;
attr.confidence_threshold = 0.5f;
attr.clip_after_nms = false;
attr.clip_before_nms = false;
attr.decrease_label_id = false;
attr.normalized = false;
attr.input_height = 1;
attr.input_width = 1;
attr.objectness_score = 0.4f;
auto detOut = ngraph::builder::makeDetectionOutput(outs, attr);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
return std::make_shared<ngraph::Function>(results, params, "EltWiseWithDetectionOutput");
}
inline std::shared_ptr<ngraph::Function> makeMultiSingleConv(std::vector<size_t> inputShape = {1, 3, 24, 24},
ngraph::element::Type type = ngraph::element::Type_t::f32) {
auto param0 = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape(inputShape));