[CPU] Parallel copy for output in case data doesn't fit L2 cache capacity (#10340)

This commit is contained in:
Zhang Yi
2022-02-18 18:16:51 +08:00
committed by GitHub
parent a18c8076cc
commit ba9d18f181
2 changed files with 143 additions and 129 deletions

View File

@@ -15,6 +15,7 @@
#include <type_traits>
#include <tuple>
#include <cmath>
#include "mkldnn/ie_mkldnn.h"
using namespace ov::intel_cpu;
using namespace InferenceEngine;
@@ -530,7 +531,19 @@ void cpu_convert(const void *srcPtr,
IE_THROW() << "cpu_convert has null data pointer";
if (srcPrc == dstPrc && srcPrc == interimPrc) {
cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
const size_t L2_cache_size = mkldnn::utils::get_cache_size(2, true);
const size_t totalSize = size * dstPrc.size();
if (totalSize >= L2_cache_size) {
auto src = static_cast<const uint8_t *>(srcPtr);
auto dst = static_cast<uint8_t *>(dstPtr);
parallel_nt(0, [&](const size_t ithr, const size_t nthr) {
size_t start = 0, end = 0;
splitter(totalSize, nthr, ithr, start, end);
cpu_memcpy(dst + start, src + start, end - start);
});
} else {
cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
}
} else {
ConvertContext ctx = {
srcPtr,

View File

@@ -1,129 +1,130 @@
//// Copyright (C) 2018-2022 Intel Corporation
//// SPDX-License-Identifier: Apache-2.0
////
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
//#include "shared_test_classes/base/layer_test_utils.hpp"
//#include "test_utils/cpu_test_utils.hpp"
//#include "ngraph_functions/builders.hpp"
//
//using namespace InferenceEngine;
//using namespace ngraph;
//using namespace CPUTestUtils;
//
//namespace CPULayerTestsDefinitions {
//
//using convertLayerShapeDefinition = std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>;
//
//using convertLayerTestParamsSet = std::tuple<convertLayerShapeDefinition, // input shapes
// InferenceEngine::Precision, // input precision
// InferenceEngine::Precision, // output precision
// CPUSpecificParams>;
//
//class ConvertCPULayerTest : public testing::WithParamInterface<convertLayerTestParamsSet>,
// virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
//public:
// static std::string getTestCaseName(testing::TestParamInfo<convertLayerTestParamsSet> obj) {
// convertLayerShapeDefinition shapes;
// InferenceEngine::Precision inPrc, outPrc;
// CPUSpecificParams cpuParams;
// std::tie(shapes, inPrc, outPrc, cpuParams) = obj.param;
//
// std::ostringstream result;
// if (!shapes.first.empty()) {
// result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
// }
// result << "TS=";
// for (const auto& shape : shapes.second) {
// result << CommonTestUtils::vec2str(shape) << "_";
// }
// result << "inputPRC=" << inPrc.name() << "_";
// result << "targetPRC=" << outPrc.name() << "_";
// result << CPUTestsBase::getTestCaseName(cpuParams);
//
// return result.str();
// }
//
//protected:
// void SetUp() override {
// targetDevice = CommonTestUtils::DEVICE_CPU;
//
// convertLayerShapeDefinition shapes;
// InferenceEngine::Precision inPrc, outPrc;
// CPUSpecificParams cpuParams;
// std::tie(shapes, inPrc, outPrc, cpuParams) = GetParam();
//
// std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
//
// selectedType = std::string("unknown_") + (inPrc == InferenceEngine::Precision::U8 ? "I8" : inPrc.name());
//
// for (size_t i = 0; i < shapes.second.size(); i++) {
// targetStaticShapes.push_back(std::vector<ngraph::Shape>{shapes.second[i]});
// }
// inputDynamicShapes = shapes.first;
//
// auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
// auto targetPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
// auto params = ngraph::builder::makeParams(ngPrc, {targetStaticShapes[0][0]});
// auto conversion = ngraph::builder::makeConversion(params.front(), targetPrc, helpers::ConversionTypes::CONVERT);
//
// function = makeNgraphFunction(ngPrc, params, conversion, "ConversionCPU");
// }
//};
//
//TEST_P(ConvertCPULayerTest, CompareWithRefs) {
// SKIP_IF_CURRENT_TEST_IS_DISABLED()
//
// Run();
//
// CheckPluginRelatedResults(executableNetwork, "Convert");
//}
//
//std::vector<convertLayerShapeDefinition> inShapes_4D = {
// {{}, {{1, 2, 3, 4}}},
// {
// // dynamic
// {{-1, -1, -1, -1}},
// // target
// {
// {2, 4, 4, 1},
// {2, 17, 5, 4},
// {1, 2, 3, 4}
// }
// },
// {
// // dynamic
// {{{1, 5}, {2, 22}, {2, 9}, {1, 4}}},
// // target
// {
// {2, 17, 5, 4},
// {5, 2, 3, 2},
// {1, 10, 4, 1},
// }
// }
//};
//
//// List of precisions natively supported by mkldnn.
//const std::vector<Precision> precisions = {
// Precision::U8,
// Precision::I8,
// Precision::I32,
// Precision::FP32,
// Precision::BF16
//};
//
//std::vector<CPUSpecificParams> memForm4D = {
// CPUSpecificParams({nchw}, {nchw}, {}, {}),
// CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
// CPUSpecificParams({nChw8c}, {nChw8c}, {}, {}),
// CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
//};
//
//INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest,
// ::testing::Combine(
// ::testing::ValuesIn(inShapes_4D),
// ::testing::ValuesIn(precisions),
// ::testing::ValuesIn(precisions),
// ::testing::ValuesIn(memForm4D)),
// ConvertCPULayerTest::getTestCaseName);
//
//} // namespace CPULayerTestsDefinitions
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
using namespace InferenceEngine;
using namespace ngraph;
using namespace CPUTestUtils;
using namespace ov::test;
namespace CPULayerTestsDefinitions {
using convertLayerTestParamsSet = std::tuple<InputShape, // input shapes
InferenceEngine::Precision, // input precision
InferenceEngine::Precision, // output precision
CPUSpecificParams>;
class ConvertCPULayerTest : public testing::WithParamInterface<convertLayerTestParamsSet>,
virtual public SubgraphBaseTest, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<convertLayerTestParamsSet> obj) {
InputShape inputShape;
InferenceEngine::Precision inPrc, outPrc;
CPUSpecificParams cpuParams;
std::tie(inputShape, inPrc, outPrc, cpuParams) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::partialShape2str({inputShape.first}) << "_";
result << "TS=";
for (const auto& shape : inputShape.second) {
result << CommonTestUtils::vec2str(shape) << "_";
}
result << "inputPRC=" << inPrc.name() << "_";
result << "targetPRC=" << outPrc.name() << "_";
result << CPUTestsBase::getTestCaseName(cpuParams);
return result.str();
}
protected:
void SetUp() override {
targetDevice = CommonTestUtils::DEVICE_CPU;
InputShape shapes;
InferenceEngine::Precision inPrc, outPrc;
CPUSpecificParams cpuParams;
std::tie(shapes, inPrc, outPrc, cpuParams) = GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
selectedType = std::string("unknown_") + (inPrc == InferenceEngine::Precision::U8 ? "I8" : inPrc.name());
for (size_t i = 0; i < shapes.second.size(); i++) {
targetStaticShapes.push_back(std::vector<ngraph::Shape>{shapes.second[i]});
}
inputDynamicShapes.push_back(shapes.first);
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
auto targetPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
ParameterVector params = builder::makeDynamicParams(ngPrc, inputDynamicShapes);
auto conversion = ngraph::builder::makeConversion(params.front(), targetPrc, helpers::ConversionTypes::CONVERT);
function = makeNgraphFunction(ngPrc, params, conversion, "ConversionCPU");
}
};
TEST_P(ConvertCPULayerTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
CheckPluginRelatedResults(executableNetwork, "Convert");
}
std::vector<InputShape> inShapes_4D = {
{{1, 2, 3, 4}, {{1, 2, 3, 4}}},
{{1, 1, 1080, 1920}, {{1, 1, 1080, 1920}}},
{
// dynamic
{{-1, -1, -1, -1}},
// target
{
{2, 4, 4, 1},
{2, 17, 5, 4},
{1, 2, 3, 4}
}
},
{
// dynamic
{{{1, 5}, {2, 22}, {2, 9}, {1, 4}}},
// target
{
{2, 17, 5, 4},
{5, 2, 3, 2},
{1, 10, 4, 1},
}
}
};
// List of precisions natively supported by mkldnn.
const std::vector<Precision> precisions = {
Precision::U8,
Precision::I8,
Precision::I32,
Precision::FP32,
Precision::BF16
};
std::vector<CPUSpecificParams> memForm4D = {
CPUSpecificParams({nchw}, {nchw}, {}, {}),
CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
CPUSpecificParams({nChw8c}, {nChw8c}, {}, {}),
CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
};
INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest,
::testing::Combine(
::testing::ValuesIn(inShapes_4D),
::testing::ValuesIn(precisions),
::testing::ValuesIn(precisions),
::testing::ValuesIn(memForm4D)),
ConvertCPULayerTest::getTestCaseName);
} // namespace CPULayerTestsDefinitions