[CPU] Parallel copy for output in case data doesn't fit L2 cache capacity (#10340)

2022-02-18 18:16:51 +08:00
parent a18c8076cc
commit ba9d18f181
2 changed files with 143 additions and 129 deletions
--- a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp
+++ b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp
@@ -15,6 +15,7 @@
 #include <type_traits>
 #include <tuple>
 #include <cmath>
+#include "mkldnn/ie_mkldnn.h"

 using namespace ov::intel_cpu;
 using namespace InferenceEngine;
@@ -530,7 +531,19 @@ void cpu_convert(const void *srcPtr,
        IE_THROW() << "cpu_convert has null data pointer";

    if (srcPrc == dstPrc && srcPrc == interimPrc) {
-        cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
+        const size_t L2_cache_size = mkldnn::utils::get_cache_size(2, true);
+        const size_t totalSize = size * dstPrc.size();
+        if (totalSize >= L2_cache_size) {
+            auto src = static_cast<const uint8_t *>(srcPtr);
+            auto dst = static_cast<uint8_t *>(dstPtr);
+            parallel_nt(0, [&](const size_t ithr, const size_t nthr) {
+                size_t start = 0, end = 0;
+                splitter(totalSize, nthr, ithr, start, end);
+                cpu_memcpy(dst + start, src + start, end - start);
+            });
+        } else {
+            cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
+        }
    } else {
        ConvertContext ctx = {
            srcPtr,
--- a/src/tests/functional/plugin/cpu/single_layer_tests/conversion.cpp
+++ b/src/tests/functional/plugin/cpu/single_layer_tests/conversion.cpp
@@ -1,129 +1,130 @@
-//// Copyright (C) 2018-2022 Intel Corporation
-//// SPDX-License-Identifier: Apache-2.0
-////
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
 //
-//#include "shared_test_classes/base/layer_test_utils.hpp"
-//#include "test_utils/cpu_test_utils.hpp"
-//#include "ngraph_functions/builders.hpp"
-//
-//using namespace InferenceEngine;
-//using namespace ngraph;
-//using namespace CPUTestUtils;
-//
-//namespace CPULayerTestsDefinitions {
-//
-//using convertLayerShapeDefinition = std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>;
-//
-//using convertLayerTestParamsSet = std::tuple<convertLayerShapeDefinition,  // input shapes
-//                                        InferenceEngine::Precision,        // input precision
-//                                        InferenceEngine::Precision,        // output precision
-//                                        CPUSpecificParams>;
-//
-//class ConvertCPULayerTest : public testing::WithParamInterface<convertLayerTestParamsSet>,
-//                            virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
-//public:
-//    static std::string getTestCaseName(testing::TestParamInfo<convertLayerTestParamsSet> obj) {
-//        convertLayerShapeDefinition shapes;
-//        InferenceEngine::Precision inPrc, outPrc;
-//        CPUSpecificParams cpuParams;
-//        std::tie(shapes, inPrc, outPrc, cpuParams) = obj.param;
-//
-//        std::ostringstream result;
-//        if (!shapes.first.empty()) {
-//            result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
-//        }
-//        result << "TS=";
-//        for (const auto& shape : shapes.second) {
-//            result << CommonTestUtils::vec2str(shape) << "_";
-//        }
-//        result << "inputPRC=" << inPrc.name() << "_";
-//        result << "targetPRC=" << outPrc.name() << "_";
-//        result << CPUTestsBase::getTestCaseName(cpuParams);
-//
-//        return result.str();
-//    }
-//
-//protected:
-//    void SetUp() override {
-//        targetDevice = CommonTestUtils::DEVICE_CPU;
-//
-//        convertLayerShapeDefinition shapes;
-//        InferenceEngine::Precision inPrc, outPrc;
-//        CPUSpecificParams cpuParams;
-//        std::tie(shapes, inPrc, outPrc, cpuParams) = GetParam();
-//
-//        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
-//
-//        selectedType = std::string("unknown_") + (inPrc == InferenceEngine::Precision::U8 ? "I8" : inPrc.name());
-//
-//        for (size_t i = 0; i < shapes.second.size(); i++) {
-//            targetStaticShapes.push_back(std::vector<ngraph::Shape>{shapes.second[i]});
-//        }
-//        inputDynamicShapes = shapes.first;
-//
-//        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
-//        auto targetPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
-//        auto params = ngraph::builder::makeParams(ngPrc, {targetStaticShapes[0][0]});
-//        auto conversion = ngraph::builder::makeConversion(params.front(), targetPrc, helpers::ConversionTypes::CONVERT);
-//
-//        function = makeNgraphFunction(ngPrc, params, conversion, "ConversionCPU");
-//    }
-//};
-//
-//TEST_P(ConvertCPULayerTest, CompareWithRefs) {
-//    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-//
-//    Run();
-//
-//    CheckPluginRelatedResults(executableNetwork, "Convert");
-//}
-//
-//std::vector<convertLayerShapeDefinition> inShapes_4D = {
-//        {{}, {{1, 2, 3, 4}}},
-//        {
-//            // dynamic
-//            {{-1, -1, -1, -1}},
-//            // target
-//            {
-//                {2, 4, 4, 1},
-//                {2, 17, 5, 4},
-//                {1, 2, 3, 4}
-//            }
-//        },
-//        {
-//            // dynamic
-//            {{{1, 5}, {2, 22}, {2, 9}, {1, 4}}},
-//            // target
-//            {
-//                {2, 17, 5, 4},
-//                {5, 2, 3, 2},
-//                {1, 10, 4, 1},
-//            }
-//        }
-//};
-//
-//// List of precisions natively supported by mkldnn.
-//const std::vector<Precision> precisions = {
-//        Precision::U8,
-//        Precision::I8,
-//        Precision::I32,
-//        Precision::FP32,
-//        Precision::BF16
-//};
-//
-//std::vector<CPUSpecificParams> memForm4D = {
-//        CPUSpecificParams({nchw}, {nchw}, {}, {}),
-//        CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
-//        CPUSpecificParams({nChw8c}, {nChw8c}, {}, {}),
-//        CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
-//};
-//
-//INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest,
-//                        ::testing::Combine(
-//                                ::testing::ValuesIn(inShapes_4D),
-//                                ::testing::ValuesIn(precisions),
-//                                ::testing::ValuesIn(precisions),
-//                                ::testing::ValuesIn(memForm4D)),
-//                        ConvertCPULayerTest::getTestCaseName);
-//
-//} // namespace CPULayerTestsDefinitions
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+
+using namespace InferenceEngine;
+using namespace ngraph;
+using namespace CPUTestUtils;
+using namespace ov::test;
+
+namespace CPULayerTestsDefinitions {
+
+using convertLayerTestParamsSet = std::tuple<InputShape,  // input shapes
+                                        InferenceEngine::Precision,        // input precision
+                                        InferenceEngine::Precision,        // output precision
+                                        CPUSpecificParams>;
+
+class ConvertCPULayerTest : public testing::WithParamInterface<convertLayerTestParamsSet>,
+                            virtual public SubgraphBaseTest, public CPUTestsBase {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<convertLayerTestParamsSet> obj) {
+        InputShape inputShape;
+        InferenceEngine::Precision inPrc, outPrc;
+        CPUSpecificParams cpuParams;
+        std::tie(inputShape, inPrc, outPrc, cpuParams) = obj.param;
+
+        std::ostringstream result;
+
+        result << "IS=" << CommonTestUtils::partialShape2str({inputShape.first}) << "_";
+        result << "TS=";
+        for (const auto& shape : inputShape.second) {
+            result << CommonTestUtils::vec2str(shape) << "_";
+        }
+        result << "inputPRC=" << inPrc.name() << "_";
+        result << "targetPRC=" << outPrc.name() << "_";
+        result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
+        InputShape shapes;
+        InferenceEngine::Precision inPrc, outPrc;
+        CPUSpecificParams cpuParams;
+        std::tie(shapes, inPrc, outPrc, cpuParams) = GetParam();
+
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+
+        selectedType = std::string("unknown_") + (inPrc == InferenceEngine::Precision::U8 ? "I8" : inPrc.name());
+
+        for (size_t i = 0; i < shapes.second.size(); i++) {
+            targetStaticShapes.push_back(std::vector<ngraph::Shape>{shapes.second[i]});
+        }
+
+        inputDynamicShapes.push_back(shapes.first);
+
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
+        auto targetPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
+        ParameterVector params = builder::makeDynamicParams(ngPrc, inputDynamicShapes);
+        auto conversion = ngraph::builder::makeConversion(params.front(), targetPrc, helpers::ConversionTypes::CONVERT);
+
+        function = makeNgraphFunction(ngPrc, params, conversion, "ConversionCPU");
+    }
+};
+
+TEST_P(ConvertCPULayerTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    run();
+
+    CheckPluginRelatedResults(executableNetwork, "Convert");
+}
+
+std::vector<InputShape> inShapes_4D = {
+        {{1, 2, 3, 4}, {{1, 2, 3, 4}}},
+        {{1, 1, 1080, 1920}, {{1, 1, 1080, 1920}}},
+        {
+            // dynamic
+            {{-1, -1, -1, -1}},
+            // target
+            {
+                {2, 4, 4, 1},
+                {2, 17, 5, 4},
+                {1, 2, 3, 4}
+            }
+        },
+        {
+            // dynamic
+            {{{1, 5}, {2, 22}, {2, 9}, {1, 4}}},
+            // target
+            {
+                {2, 17, 5, 4},
+                {5, 2, 3, 2},
+                {1, 10, 4, 1},
+            }
+        }
+};
+
+// List of precisions natively supported by mkldnn.
+const std::vector<Precision> precisions = {
+        Precision::U8,
+        Precision::I8,
+        Precision::I32,
+        Precision::FP32,
+        Precision::BF16
+};
+
+std::vector<CPUSpecificParams> memForm4D = {
+        CPUSpecificParams({nchw}, {nchw}, {}, {}),
+        CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
+        CPUSpecificParams({nChw8c}, {nChw8c}, {}, {}),
+        CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inShapes_4D),
+                                ::testing::ValuesIn(precisions),
+                                ::testing::ValuesIn(precisions),
+                                ::testing::ValuesIn(memForm4D)),
+                        ConvertCPULayerTest::getTestCaseName);
+
+} // namespace CPULayerTestsDefinitions