[CPU] Parallel copy for output in case data doesn't fit L2 cache capacity (#10340)
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
#include <type_traits>
|
||||
#include <tuple>
|
||||
#include <cmath>
|
||||
#include "mkldnn/ie_mkldnn.h"
|
||||
|
||||
using namespace ov::intel_cpu;
|
||||
using namespace InferenceEngine;
|
||||
@@ -530,7 +531,19 @@ void cpu_convert(const void *srcPtr,
|
||||
IE_THROW() << "cpu_convert has null data pointer";
|
||||
|
||||
if (srcPrc == dstPrc && srcPrc == interimPrc) {
|
||||
cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
|
||||
const size_t L2_cache_size = mkldnn::utils::get_cache_size(2, true);
|
||||
const size_t totalSize = size * dstPrc.size();
|
||||
if (totalSize >= L2_cache_size) {
|
||||
auto src = static_cast<const uint8_t *>(srcPtr);
|
||||
auto dst = static_cast<uint8_t *>(dstPtr);
|
||||
parallel_nt(0, [&](const size_t ithr, const size_t nthr) {
|
||||
size_t start = 0, end = 0;
|
||||
splitter(totalSize, nthr, ithr, start, end);
|
||||
cpu_memcpy(dst + start, src + start, end - start);
|
||||
});
|
||||
} else {
|
||||
cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
|
||||
}
|
||||
} else {
|
||||
ConvertContext ctx = {
|
||||
srcPtr,
|
||||
|
||||
@@ -1,129 +1,130 @@
|
||||
//// Copyright (C) 2018-2022 Intel Corporation
|
||||
//// SPDX-License-Identifier: Apache-2.0
|
||||
////
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
//#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
//#include "test_utils/cpu_test_utils.hpp"
|
||||
//#include "ngraph_functions/builders.hpp"
|
||||
//
|
||||
//using namespace InferenceEngine;
|
||||
//using namespace ngraph;
|
||||
//using namespace CPUTestUtils;
|
||||
//
|
||||
//namespace CPULayerTestsDefinitions {
|
||||
//
|
||||
//using convertLayerShapeDefinition = std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>;
|
||||
//
|
||||
//using convertLayerTestParamsSet = std::tuple<convertLayerShapeDefinition, // input shapes
|
||||
// InferenceEngine::Precision, // input precision
|
||||
// InferenceEngine::Precision, // output precision
|
||||
// CPUSpecificParams>;
|
||||
//
|
||||
//class ConvertCPULayerTest : public testing::WithParamInterface<convertLayerTestParamsSet>,
|
||||
// virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
|
||||
//public:
|
||||
// static std::string getTestCaseName(testing::TestParamInfo<convertLayerTestParamsSet> obj) {
|
||||
// convertLayerShapeDefinition shapes;
|
||||
// InferenceEngine::Precision inPrc, outPrc;
|
||||
// CPUSpecificParams cpuParams;
|
||||
// std::tie(shapes, inPrc, outPrc, cpuParams) = obj.param;
|
||||
//
|
||||
// std::ostringstream result;
|
||||
// if (!shapes.first.empty()) {
|
||||
// result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
|
||||
// }
|
||||
// result << "TS=";
|
||||
// for (const auto& shape : shapes.second) {
|
||||
// result << CommonTestUtils::vec2str(shape) << "_";
|
||||
// }
|
||||
// result << "inputPRC=" << inPrc.name() << "_";
|
||||
// result << "targetPRC=" << outPrc.name() << "_";
|
||||
// result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
//
|
||||
// return result.str();
|
||||
// }
|
||||
//
|
||||
//protected:
|
||||
// void SetUp() override {
|
||||
// targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
//
|
||||
// convertLayerShapeDefinition shapes;
|
||||
// InferenceEngine::Precision inPrc, outPrc;
|
||||
// CPUSpecificParams cpuParams;
|
||||
// std::tie(shapes, inPrc, outPrc, cpuParams) = GetParam();
|
||||
//
|
||||
// std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
//
|
||||
// selectedType = std::string("unknown_") + (inPrc == InferenceEngine::Precision::U8 ? "I8" : inPrc.name());
|
||||
//
|
||||
// for (size_t i = 0; i < shapes.second.size(); i++) {
|
||||
// targetStaticShapes.push_back(std::vector<ngraph::Shape>{shapes.second[i]});
|
||||
// }
|
||||
// inputDynamicShapes = shapes.first;
|
||||
//
|
||||
// auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
|
||||
// auto targetPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
|
||||
// auto params = ngraph::builder::makeParams(ngPrc, {targetStaticShapes[0][0]});
|
||||
// auto conversion = ngraph::builder::makeConversion(params.front(), targetPrc, helpers::ConversionTypes::CONVERT);
|
||||
//
|
||||
// function = makeNgraphFunction(ngPrc, params, conversion, "ConversionCPU");
|
||||
// }
|
||||
//};
|
||||
//
|
||||
//TEST_P(ConvertCPULayerTest, CompareWithRefs) {
|
||||
// SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
//
|
||||
// Run();
|
||||
//
|
||||
// CheckPluginRelatedResults(executableNetwork, "Convert");
|
||||
//}
|
||||
//
|
||||
//std::vector<convertLayerShapeDefinition> inShapes_4D = {
|
||||
// {{}, {{1, 2, 3, 4}}},
|
||||
// {
|
||||
// // dynamic
|
||||
// {{-1, -1, -1, -1}},
|
||||
// // target
|
||||
// {
|
||||
// {2, 4, 4, 1},
|
||||
// {2, 17, 5, 4},
|
||||
// {1, 2, 3, 4}
|
||||
// }
|
||||
// },
|
||||
// {
|
||||
// // dynamic
|
||||
// {{{1, 5}, {2, 22}, {2, 9}, {1, 4}}},
|
||||
// // target
|
||||
// {
|
||||
// {2, 17, 5, 4},
|
||||
// {5, 2, 3, 2},
|
||||
// {1, 10, 4, 1},
|
||||
// }
|
||||
// }
|
||||
//};
|
||||
//
|
||||
//// List of precisions natively supported by mkldnn.
|
||||
//const std::vector<Precision> precisions = {
|
||||
// Precision::U8,
|
||||
// Precision::I8,
|
||||
// Precision::I32,
|
||||
// Precision::FP32,
|
||||
// Precision::BF16
|
||||
//};
|
||||
//
|
||||
//std::vector<CPUSpecificParams> memForm4D = {
|
||||
// CPUSpecificParams({nchw}, {nchw}, {}, {}),
|
||||
// CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
|
||||
// CPUSpecificParams({nChw8c}, {nChw8c}, {}, {}),
|
||||
// CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
|
||||
//};
|
||||
//
|
||||
//INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest,
|
||||
// ::testing::Combine(
|
||||
// ::testing::ValuesIn(inShapes_4D),
|
||||
// ::testing::ValuesIn(precisions),
|
||||
// ::testing::ValuesIn(precisions),
|
||||
// ::testing::ValuesIn(memForm4D)),
|
||||
// ConvertCPULayerTest::getTestCaseName);
|
||||
//
|
||||
//} // namespace CPULayerTestsDefinitions
|
||||
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace ngraph;
|
||||
using namespace CPUTestUtils;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
using convertLayerTestParamsSet = std::tuple<InputShape, // input shapes
|
||||
InferenceEngine::Precision, // input precision
|
||||
InferenceEngine::Precision, // output precision
|
||||
CPUSpecificParams>;
|
||||
|
||||
class ConvertCPULayerTest : public testing::WithParamInterface<convertLayerTestParamsSet>,
|
||||
virtual public SubgraphBaseTest, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<convertLayerTestParamsSet> obj) {
|
||||
InputShape inputShape;
|
||||
InferenceEngine::Precision inPrc, outPrc;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(inputShape, inPrc, outPrc, cpuParams) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
|
||||
result << "IS=" << CommonTestUtils::partialShape2str({inputShape.first}) << "_";
|
||||
result << "TS=";
|
||||
for (const auto& shape : inputShape.second) {
|
||||
result << CommonTestUtils::vec2str(shape) << "_";
|
||||
}
|
||||
result << "inputPRC=" << inPrc.name() << "_";
|
||||
result << "targetPRC=" << outPrc.name() << "_";
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
|
||||
InputShape shapes;
|
||||
InferenceEngine::Precision inPrc, outPrc;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(shapes, inPrc, outPrc, cpuParams) = GetParam();
|
||||
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
|
||||
selectedType = std::string("unknown_") + (inPrc == InferenceEngine::Precision::U8 ? "I8" : inPrc.name());
|
||||
|
||||
for (size_t i = 0; i < shapes.second.size(); i++) {
|
||||
targetStaticShapes.push_back(std::vector<ngraph::Shape>{shapes.second[i]});
|
||||
}
|
||||
|
||||
inputDynamicShapes.push_back(shapes.first);
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
|
||||
auto targetPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
|
||||
ParameterVector params = builder::makeDynamicParams(ngPrc, inputDynamicShapes);
|
||||
auto conversion = ngraph::builder::makeConversion(params.front(), targetPrc, helpers::ConversionTypes::CONVERT);
|
||||
|
||||
function = makeNgraphFunction(ngPrc, params, conversion, "ConversionCPU");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ConvertCPULayerTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
run();
|
||||
|
||||
CheckPluginRelatedResults(executableNetwork, "Convert");
|
||||
}
|
||||
|
||||
std::vector<InputShape> inShapes_4D = {
|
||||
{{1, 2, 3, 4}, {{1, 2, 3, 4}}},
|
||||
{{1, 1, 1080, 1920}, {{1, 1, 1080, 1920}}},
|
||||
{
|
||||
// dynamic
|
||||
{{-1, -1, -1, -1}},
|
||||
// target
|
||||
{
|
||||
{2, 4, 4, 1},
|
||||
{2, 17, 5, 4},
|
||||
{1, 2, 3, 4}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{{1, 5}, {2, 22}, {2, 9}, {1, 4}}},
|
||||
// target
|
||||
{
|
||||
{2, 17, 5, 4},
|
||||
{5, 2, 3, 2},
|
||||
{1, 10, 4, 1},
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// List of precisions natively supported by mkldnn.
|
||||
const std::vector<Precision> precisions = {
|
||||
Precision::U8,
|
||||
Precision::I8,
|
||||
Precision::I32,
|
||||
Precision::FP32,
|
||||
Precision::BF16
|
||||
};
|
||||
|
||||
std::vector<CPUSpecificParams> memForm4D = {
|
||||
CPUSpecificParams({nchw}, {nchw}, {}, {}),
|
||||
CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
|
||||
CPUSpecificParams({nChw8c}, {nChw8c}, {}, {}),
|
||||
CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_4D),
|
||||
::testing::ValuesIn(precisions),
|
||||
::testing::ValuesIn(precisions),
|
||||
::testing::ValuesIn(memForm4D)),
|
||||
ConvertCPULayerTest::getTestCaseName);
|
||||
|
||||
} // namespace CPULayerTestsDefinitions
|
||||
Reference in New Issue
Block a user