[CPU] [MSVC] memcpy and memcpy_s were unified for all layers and SIMD functionality was enabled on VS2019 (#1374)

2020-08-27 13:49:31 +03:00 · 2020-08-27 13:49:31 +03:00 · b6b536f23c
commit b6b536f23c
parent 675e1c821e
37 changed files with 89 additions and 70 deletions
--- a/inference-engine/src/mkldnn_plugin/mean_image.cpp
+++ b/inference-engine/src/mkldnn_plugin/mean_image.cpp
@ -4,7 +4,7 @@

 #include "mean_image.h"
 #include "ie_parallel.hpp"
-#include "ie_memcpy.h"
+#include "nodes/common/cpu_memcpy.h"

 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@ -53,7 +53,7 @@ void MeanImage::Load(const MKLDNNDims& inputDims, InputInfo::Ptr inputInfo) {
                    THROW_IE_EXCEPTION << "mean image size does not match expected network input, expecting " << meanWidth << " x " << meanHeight;
                }
                // todo: cast to TBlob and make sure it is floats
-                ie_memcpy(meanBuffer->data() + channel*meanBlob->size(), meanBuffer->byteSize() - channel*meanBlob->byteSize(),
+                cpu_memcpy_s(meanBuffer->data() + channel*meanBlob->size(), meanBuffer->byteSize() - channel*meanBlob->byteSize(),
                          meanBlob->buffer(), meanBlob->byteSize());
            }
        }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@ -28,7 +28,7 @@
 #include <blob_factory.hpp>
 #include <legacy/net_pass.h>
 #include <legacy/details/ie_cnn_network_tools.h>
-#include <ie_memcpy.h>
+#include "nodes/common/cpu_memcpy.h"

 #include "precision_utils.h"
 #include <ie_plugin_config.hpp>
@ -775,7 +775,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
            MB_to_process = std::min<int>(config.batchLimit, MB_to_process);
        size_t size_to_copy = intr_blob.GetSize() * MB_to_process / MB;

-        ie_memcpy(ext_blob_ptr, ext_blob->byteSize(), intr_blob_ptr, size_to_copy);
+        cpu_memcpy_s(ext_blob_ptr, ext_blob->byteSize(), intr_blob_ptr, size_to_copy);
    }
 }

--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
@ -13,6 +13,7 @@
 #include "mkldnn_memory.h"
 #include "mkldnn_node.h"
 #include "mkldnn_extension_utils.h"
+#include "nodes/common/cpu_memcpy.h"

 using namespace InferenceEngine;
 using namespace mkldnn;
@ -110,7 +111,7 @@ void MKLDNNMemory::SetData(memory::data_type dataType, memory::format format, co
        uint8_t* dataPtr = static_cast<uint8_t*>(GetData());
        // We cannot support strides for i/o blobs because it affects performance.
        dataPtr += itemSize * prim->get_primitive_desc().desc().data.layout_desc.blocking.offset_padding;
-        memcpy(dataPtr, data, size);
+        cpu_memcpy(dataPtr, data, size);
    }

    if (ftz && dataType == mkldnn_f32) {
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@ -47,7 +47,7 @@
 #include <mkldnn_types.h>
 #include "mkldnn_extension_utils.h"

-#include "ie_memcpy.h"
+#include "nodes/common/cpu_memcpy.h"
 #include "mkldnn_debug.h"

 using namespace mkldnn;
@ -698,7 +698,7 @@ InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeV
    auto fillInternalBlob = [&](char *data, size_t intBuffSize) {
        size_t offset = blb->byteSize();
        checkSize(intBuffSize, offset);
-        ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
+        cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize());
        data += blb->byteSize();
        for (const auto &merged : getMergeWith()) {
            wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(merged->getCnnLayer().get());
@ -711,7 +711,7 @@ InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeV
                THROW_IE_EXCEPTION << "Cannot get internal blob layer for node " << getName() << ".";
            offset += blb->byteSize();
            checkSize(intBuffSize, offset);
-            ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
+            cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize());
            data += blb->byteSize();
        }
    };
--- a/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
@ -9,7 +9,7 @@
 #include <vector>
 #include <cassert>
 #include "ie_parallel.hpp"
-#include "common/simple_copy.h"
+#include "common/cpu_memcpy.h"

 namespace InferenceEngine {
 namespace Extensions {
@ -111,7 +111,7 @@ public:
                for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
                    src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;

-                simple_copy(&dst_data[iwork], data_size, &src_data[src_idx * data_size], data_size);
+                cpu_memcpy(&dst_data[iwork], &src_data[src_idx * data_size], data_size);

                for (int j = dst_dims.size() - 1; j >= 0; j--) {
                    counters[j] = (counters[j] + 1) % dst_dims[j];
--- a/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
@ -12,7 +12,6 @@
 #include <algorithm>
 #include <limits>
 #include "ie_parallel.hpp"
-#include "common/simple_copy.h"

 namespace InferenceEngine {
 namespace Extensions {
--- a/inference-engine/src/mkldnn_plugin/nodes/common/simple_copy.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/simple_copy.h
@ -11,9 +11,9 @@
 * @brief Copies bytes between buffers with security enhancements
 * Copies count bytes from src to dest. If the source and destination
 * overlap, the behavior is undefined.
- * @param dest
+ * @param dst
 * pointer to the object to copy to
- * @param destsz
+ * @param dst_size
 * max number of bytes to modify in the destination (typically the size
 * of the destination object)
 * @param src
@ -23,16 +23,28 @@
 @return zero on success and non-zero value on error.
 */

-inline int simple_copy(void* dest, size_t destsz, void const* src, size_t count) {
+inline void cpu_memcpy(void* dst, const void* src, size_t count) {
+#ifdef _WIN32
+    memcpy_s(dst, count, src, count);
+#else
+    std::memcpy(dst, src, count);
+#endif
+}
+
+inline int cpu_memcpy_s(void* dst, size_t dst_size, const void* src, size_t count) {
    size_t i;
-    if (!src || count > destsz ||
-        count > (dest > src ? ((uintptr_t)dest - (uintptr_t)src)
-                            : ((uintptr_t)src - (uintptr_t)dest))) {
+    if (!src ||
+        count > dst_size ||
+        count > (dst > src ? ((uintptr_t)dst - (uintptr_t)src) : ((uintptr_t)src - (uintptr_t)dst))) {
        // zero out dest if error detected
-        memset(dest, 0, destsz);
+        std::memset(dst, 0, dst_size);
        return -1;
    }

-    for (i = 0; i < count; ++i) (reinterpret_cast<uint8_t*>(dest))[i] = (reinterpret_cast<const uint8_t*>(src))[i];
+#ifdef _WIN32
+    memcpy_s(dst, dst_size, src, count);
+#else
+    std::memcpy(dst, src, count);
+#endif
    return 0;
 }
--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_packed_sum.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_packed_sum.cpp
@ -3,6 +3,7 @@
 //

 #include "embedding_bag_sum.hpp"
+#include "common/cpu_memcpy.h"

 namespace InferenceEngine {
 namespace Extensions {
@ -38,7 +39,7 @@ public:
        } else if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
            const UINT64* src = inputs[INDICES_IDX]->cbuffer().as<const UINT64*>();
            for (size_t i = 0lu; i < bagsNum; i++) {
-                memcpy(_indices[i].data(), src + i * batch, batch * sizeof(UINT64));
+                cpu_memcpy(_indices[i].data(), src + i * batch, batch * sizeof(UINT64));
            }
        }
    }
--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_segments_sum.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/embedding_segments_sum.cpp
@ -3,6 +3,7 @@
 //

 #include "embedding_bag_sum.hpp"
+#include "common/cpu_memcpy.h"

 namespace InferenceEngine {
 namespace Extensions {
@ -51,7 +52,7 @@ public:
                _indices[i] = static_cast<size_t>(src[i]);
        } else if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
            const UINT64* src = inputs[INDICES_IDX]->cbuffer().as<const UINT64*>();
-            memcpy(_indices.data(), src, inputs[INDICES_IDX]->byteSize());
+            cpu_memcpy(_indices.data(), src, inputs[INDICES_IDX]->byteSize());
        }

        // Initialize segments ids
@ -61,7 +62,7 @@ public:
                _segmentIds[i] = static_cast<size_t>(src[i]);
        } else if (inputs[SEGMENT_ID_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
            const UINT64* src = inputs[SEGMENT_ID_IDX]->cbuffer().as<const UINT64*>();
-            memcpy(_segmentIds.data(), src, inputs[SEGMENT_ID_IDX]->byteSize());
+            cpu_memcpy(_segmentIds.data(), src, inputs[SEGMENT_ID_IDX]->byteSize());
        }

        if (inputs.size() > NUM_SEGMENTS_IDX) {
--- a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
@ -11,7 +11,7 @@
 #include <algorithm>
 #include <limits>
 #include "ie_parallel.hpp"
-#include "common/simple_copy.h"
+#include "common/cpu_memcpy.h"
 #include "common/fp16_utils.h"

 namespace InferenceEngine {
@ -125,7 +125,7 @@ private:
            if (idx < indexRange) {
                //  Copying data to destination from Dictionary
                for (size_t j = 0; j < numDictionaries; j++) {
-                    simple_copy(&dst_data[len * (i + j * src_indexSize)],
+                    cpu_memcpy_s(&dst_data[len * (i + j * src_indexSize)],
                                output->byteSize() - (len * (i + j * src_indexSize)),
                                &src_dataDict[len * (idx + j * indexRange)],
                                len);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
@ -5,7 +5,7 @@
 #include "mkldnn_batchnorm_node.h"
 #include "mkldnn_depthwise_node.h"
 #include <mkldnn_extension_utils.h>
-#include "ie_memcpy.h"
+#include "common/cpu_memcpy.h"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -81,7 +81,7 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
            THROW_IE_EXCEPTION << "Cannot get weights blob for node " << getName() << ".";

        size_t weightsByteSize = blb->byteSize();
-        ie_memcpy(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
+        cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
        data += blb->size();
        blb = scshLayer->_biases;

@ -90,7 +90,7 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
        } else {
            if (weightsByteSize != blb->byteSize())
                THROW_IE_EXCEPTION << "ScaleShift has incorrect weights!";
-            ie_memcpy(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
+            cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
        }
        internalBlobs.push_back(internalBlob);
    }
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
@ -21,6 +21,7 @@
 #include "mkldnn_quantize_node.h"
 #include "mkldnn_pooling_node.h"
 #include <limits>
+#include "common/cpu_memcpy.h"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -646,7 +647,7 @@ void MKLDNNConcatNode::execute(mkldnn::stream strm) {
        parallel_for(iter_count, [&](int i) {
            const size_t dst_off = i * channels_size;
            for (int j = 0; j < num_src; j++) {
-                memcpy(dst_ptrs[j] + dst_off, src_ptrs[j] + i * channels[j], channels[j]);
+                cpu_memcpy(dst_ptrs[j] + dst_off, src_ptrs[j] + i * channels[j], channels[j]);
            }
        });
    } else {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.cpp
@ -9,6 +9,7 @@
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include "ie_parallel.hpp"
+#include "common/cpu_memcpy.h"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -162,7 +163,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
 #ifdef _WIN32
    if (OD == 1 && OH == 1 && OW == 1 && ID == 1 && IH == 1 && IW == 1) {
        for (int n = 0; n < ON; ++n) {
-            memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
+            cpu_memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
        }
    } else {
        for (int n = 0; n < ON; ++n) {
@ -180,7 +181,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
                                (h+OFFSET_H)*IW*m_block_size +
                                OFFSET_W*m_block_size;

-                        memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));
+                        cpu_memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));
                    }
                }
            }
@ -189,7 +190,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
 #else
    if (OD == 1 && OH == 1 && OW == 1 && ID == 1 && IH == 1 && IW == 1) {
        parallel_for(ON, [&](int n) {
-            memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
+            cpu_memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
        });
    } else {
        parallel_for2d(ON, (OC / m_block_size), [&](int n, int c) {
@ -200,7 +201,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
                              ((d+OFFSET_D)*IH*IW + OFFSET_H*IW + OFFSET_W)*m_block_size;

                for (int h = 0; h < OH; ++h) {
-                    memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));
+                    cpu_memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));

                    src_ind += IW * m_block_size;
                    dst_ind += OW * m_block_size;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp
@ -12,6 +12,7 @@
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include "ie_parallel.hpp"
+#include "common/cpu_memcpy.h"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -262,7 +263,7 @@ void MKLDNNGemmNode::process_data() {

        for (int b2 = 0; b2 < MB2; b2++) {
            if (isThreeInputs) {
-                memcpy(d_ptr, c_ptr, M * N * sizeof(float));
+                cpu_memcpy(d_ptr, c_ptr, M * N * sizeof(float));
                c_ptr += cOffsets[0];
            }

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
@ -8,7 +8,7 @@
 #include <tuple>
 #include <algorithm>
 #include "caseless.hpp"
-#include "ie_memcpy.h"
+#include "common/cpu_memcpy.h"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -154,7 +154,7 @@ void MKLDNNInputNode::execute(mkldnn::stream strm) {
        const int8_t *srcData = constBlob->cbuffer().as<int8_t *>();
        int8_t *dstData = dstBlob->buffer();

-        ie_memcpy(dstData, dstBlob->byteSize(), srcData, constBlob->byteSize());
+        cpu_memcpy_s(dstData, dstBlob->byteSize(), srcData, constBlob->byteSize());
    } else {
        switch (precision.size()) {
            case 1: {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
@ -6,6 +6,7 @@
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include "mkldnn_memory_node.hpp"
+#include "common/cpu_memcpy.h"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -88,7 +89,7 @@ static void simple_copy(MKLDNNMemory& dst, const MKLDNNMemory& src) {

    IE_ASSERT(srcSizeInByte == dstSizeInByte) << "Memory objects are not compatible. Has different sizes.";

-    memcpy(dstPtr, srcPtr, srcSizeInByte);
+    cpu_memcpy(dstPtr, srcPtr, srcSizeInByte);
 }

 MKLDNNMemoryInputNode::~MKLDNNMemoryInputNode() {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
@ -12,7 +12,7 @@
 #include "jit_uni_depthwise.hpp"
 #include "jit_uni_quantization.hpp"
 #include "bf16transformer.h"
-
+#include "common/cpu_memcpy.h"
 #include "mkldnn_normalize_node.h"

 using namespace mkldnn;
@ -1317,7 +1317,7 @@ void MKLDNNNormalizeNode::normalize_blk(const in_data_t* src_data, out_data_t* d
    // post ops for tails: post-ops params is padding.
    std::vector<float> weights_padding(CB * blk_size);
    if (!channel_shared) {
-        memcpy(static_cast<float*>(&weights_padding[0]), weights, C * sizeof(float));
+        cpu_memcpy(static_cast<float*>(&weights_padding[0]), weights, C * sizeof(float));
    }

    for (size_t b = 0lu; b < B; b++) {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
@ -9,7 +9,6 @@
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
-#include <ie_memcpy.h>
 #include <algorithm>
 #include <set>
 #include <cmath>
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_resample_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_resample_node.cpp
@ -21,7 +21,7 @@
 #include "jit_uni_eltwise.hpp"
 #include "jit_uni_depthwise.hpp"
 #include "jit_uni_quantization.hpp"
-#include "common/simple_copy.h"
+#include "common/cpu_memcpy.h"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -697,7 +697,7 @@ void MKLDNNResampleNode::NearestNeighbor_BLK(const in_data_t *in_ptr_, out_data_
                            out_data_t *out_ptr_dhw = out_ptr_dh + C * ox;
                            const in_data_t *in_ptr_dhw = in_ptr_dh + C * index_w[ox];
                            if (fusedWith.empty() && output_prec == input_prec) {
-                                memcpy(out_ptr_dhw + tail, in_ptr_dhw + tail, (C - tail) * sizeof(in_data_t));
+                                cpu_memcpy(out_ptr_dhw + tail, in_ptr_dhw + tail, (C - tail) * sizeof(in_data_t));
                            } else {
                                for (int c = tail; c < C; c++) {
                                    float dst_value = static_cast<float>(in_ptr_dhw[c]);
@ -722,7 +722,7 @@ void MKLDNNResampleNode::NearestNeighbor_BLK(const in_data_t *in_ptr_, out_data_
                        out_data_t *out_ptr_dhw = out_ptr_dh + C * ox;
                        const in_data_t *in_ptr_dhw = in_ptr_dh + C * index_w[ox];
                        if (fusedWith.empty() && output_prec == input_prec) {
-                            memcpy(out_ptr_dhw, in_ptr_dhw, C * sizeof(in_data_t));
+                            cpu_memcpy(out_ptr_dhw, in_ptr_dhw, C * sizeof(in_data_t));
                        } else {
                            for (int c = 0; c < C; c++) {
                                float dst_value = static_cast<float>(in_ptr_dhw[c]);
@ -774,7 +774,7 @@ void MKLDNNResampleNode::NearestNeighbor_BLK(const in_data_t *in_ptr_, out_data_
                            out_data_t *out_ptr_cbdhw = out_ptr_cbdh + blk_size * w;
                            const in_data_t *in_ptr_cbdhw = in_ptr_cbdh + blk_size * index_w[w];
                            if (fusedWith.empty()) {
-                                memcpy(out_ptr_cbdhw, in_ptr_cbdhw, blk_size * sizeof(in_data_t));
+                                cpu_memcpy(out_ptr_cbdhw, in_ptr_cbdhw, blk_size * sizeof(in_data_t));
                            } else {
                                for (int blk = 0; blk < blk_size; blk++) {
                                    float dst_value = static_cast<float>(in_ptr_cbdhw[blk]);
@ -808,7 +808,7 @@ void MKLDNNResampleNode::LinearInterpolation(const in_data_t *in_ptr_, out_data_
        if (input_prec == Precision::FP32) {
            size *= sizeof(float);
        }
-        simple_copy(out_ptr_, size, in_ptr_, size);
+        cpu_memcpy(out_ptr_, in_ptr_, size);
        return;
    }

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp
@ -21,7 +21,7 @@
 #include "jit_uni_eltwise.hpp"
 #include "jit_uni_depthwise.hpp"
 #include "jit_uni_quantization.hpp"
-#include "common/simple_copy.h"
+#include "common/cpu_memcpy.h"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -352,7 +352,7 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) {
            splitter(srcBlockND[0], nthr, ithr, start, end);
            size_t size = (end - start) * dataSize;
            start *= dataSize;
-            simple_copy(dstPtr + start, size, srcPtr + start, size);
+            cpu_memcpy(dstPtr + start, srcPtr + start, size);
        });
    }

@ -405,7 +405,7 @@ void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, i
        int64_t idxValue = getIndicesValue(indices, idx);
        uint8_t *dstEntry = dstData + (b * srcBlockND[axis] + idxValue * blockToUpdate) * dataSize;
        uint8_t *updateEntry = update + (b * updateBlockND[axis] + idx * blockToUpdate) * dataSize;
-        simple_copy(dstEntry, blockToUpdateSize, updateEntry, blockToUpdateSize);
+        cpu_memcpy(dstEntry, updateEntry, blockToUpdateSize);
    });
 }

@ -435,7 +435,7 @@ void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update,
        }
        dstOffset *= dataSize;
        size_t updateOffset = tupleIdx * sizeToUpdate;
-        simple_copy(dstData + dstOffset, sizeToUpdate, update + updateOffset, sizeToUpdate);
+        cpu_memcpy(dstData + dstOffset, update + updateOffset, sizeToUpdate);
    });
 }

@ -470,7 +470,7 @@ void MKLDNNScatterUpdateNode::scatterElementsUpdate(uint8_t *indices, uint8_t *u
        for (size_t iwork = start; iwork < end; iwork++) {
            int64_t idxValue = getIndicesValue(indices, iwork);
            if (idxValue < srcDataDim[axis])
-            simple_copy(dstData + dataSize * (dst_idx + idxValue * srcBlockND[axis + 1]), dataSize,
+                cpu_memcpy(dstData + dataSize * (dst_idx + idxValue * srcBlockND[axis + 1]),
                            update + iwork * dataSize, dataSize);

            for (j = updateRank - 1; j >= 0; j--) {
@ -497,4 +497,4 @@ bool MKLDNNScatterUpdateNode::created() const {

 REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterUpdate);
 REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterElementsUpdate);
-REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterNDUpdate);
+REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterNDUpdate);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
@ -11,7 +11,6 @@
 #include <map>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
-#include <ie_memcpy.h>
 #include <legacy/graph_transformer.h>

 using namespace mkldnn;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp
@ -7,6 +7,7 @@
 #include <string>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
+#include "common/cpu_memcpy.h"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -126,7 +127,7 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) {

    for (int i = 0; i < m_outer_dim; ++i) {
        for (int t = 0; t < tiles; ++t) {
-            memcpy(dst_ptr, src_ptr, m_inner_dim* sizeof(float));
+            cpu_memcpy(dst_ptr, src_ptr, m_inner_dim* sizeof(float));
            dst_ptr += m_inner_dim;
        }
        src_ptr += m_inner_dim;
--- a/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp
@ -14,6 +14,7 @@
 #include <string>
 #include <algorithm>
 #include "ie_parallel.hpp"
+#include "common/cpu_memcpy.h"

 namespace InferenceEngine {
 namespace Extensions {
@ -263,7 +264,7 @@ void reorder(const float* src_data, const int* ranks, const int n, const int ste
    for (int i = 0; i < n; ++i) {
        const int j = dst_mapping[i];
        assert(0 <= j && j < n);
-        std::memcpy(dst_data + i * step, src_data + j * step, sizeof(float) * step);
+        cpu_memcpy(dst_data + i * step, src_data + j * step, sizeof(float) * step);
    }
 }

@ -386,7 +387,7 @@ public:
        reorder(&output_rois_features_temp[0], &original_rois_mapping[0], num_rois, feaxels_per_roi,
                output_rois_features, &dummy_mapping[0]);
        if (output_rois != nullptr) {
-            std::memcpy(output_rois, input_rois, 4 * num_rois * sizeof(float));
+            cpu_memcpy(output_rois, input_rois, 4 * num_rois * sizeof(float));
        }

        return OK;
--- a/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp
@ -10,6 +10,7 @@
 #include <set>
 #include <cassert>
 #include "ie_parallel.hpp"
+#include "common/cpu_memcpy.h"

 namespace InferenceEngine {
 namespace Extensions {
@ -157,7 +158,7 @@ public:
                splitter(work_amount_dst, nthr, ithr, start, end);
                src_idx = initter(start, CNTR_SIZE, counters, own_dims, ownStrides);
                for (size_t iwork = start, dst_idx = start * dataLength; iwork < end; ++iwork, dst_idx += dataLength) {
-                    memcpy(&dst_data[dst_idx], &src_data[dataLength * src_idx], sizeof(T) * dataLength);
+                    cpu_memcpy(&dst_data[dst_idx], &src_data[dataLength * src_idx], sizeof(T) * dataLength);
                    src_idx = updater(src_idx, CNTR_SIZE, counters, own_dims, ownStrides);
                }
            });
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp
@ -12,7 +12,6 @@
 #include <algorithm>
 #include <limits>
 #include "ie_parallel.hpp"
-#include "common/simple_copy.h"

 namespace InferenceEngine {
 namespace Extensions {
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp
@ -12,7 +12,6 @@
 #include <limits>
 #include <functional>
 #include "ie_parallel.hpp"
-#include "common/simple_copy.h"

 namespace InferenceEngine {
 namespace Extensions {
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp
@ -12,7 +12,6 @@
 #include <algorithm>
 #include <limits>
 #include "ie_parallel.hpp"
-#include "common/simple_copy.h"

 namespace InferenceEngine {
 namespace Extensions {
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp
@ -12,7 +12,6 @@
 #include <algorithm>
 #include <limits>
 #include "ie_parallel.hpp"
-#include "common/simple_copy.h"

 namespace InferenceEngine {
 namespace Extensions {
--- a/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp
@ -9,7 +9,7 @@
 #include <vector>
 #include <cassert>
 #include "ie_parallel.hpp"
-#include "common/simple_copy.h"
+#include "common/cpu_memcpy.h"

 namespace InferenceEngine {
 namespace Extensions {
@ -49,7 +49,7 @@ public:
        if (src != dst) {
            size_t srcSize = inputs[0]->byteSize();
            size_t dstSize = outputs[0]->byteSize();
-            simple_copy(dst, dstSize, src, srcSize);
+            cpu_memcpy_s(dst, dstSize, src, srcSize);
        }

        return OK;
--- a/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp
@ -10,6 +10,7 @@
 #include <cassert>
 #include <algorithm>
 #include "ie_parallel.hpp"
+#include "common/cpu_memcpy.h"

 namespace InferenceEngine {
 namespace Extensions {
@ -323,7 +324,7 @@ void StridedSliceImpl::strided_slice_vp(const float *src_data, float* dst_data)
        }

        for (size_t iwork = start, dst_idx = start * dataLength, i = 1; iwork < end; ++iwork, dst_idx += dataLength) {
-            memcpy(&dst_data[dst_idx], &src_data[src_idx], sizeof(float) * dataLength);
+            cpu_memcpy(&dst_data[dst_idx], &src_data[src_idx], sizeof(float) * dataLength);
            for (int j = dims_size_1 - 1; j >= 0; j--) {
                counters[j]++;
                if (counters[j] < dst_dims[j]) {
--- a/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp
@ -6,6 +6,7 @@
 #include <algorithm>
 #include <cassert>
 #include <vector>
+#include "common/cpu_memcpy.h"


 namespace InferenceEngine {
@ -60,7 +61,7 @@ public:
        sort(idx.begin(), idx.end(), [&input_probs](size_t i1, size_t i2) {return input_probs[i1] > input_probs[i2];});

        for (int i = 0; i < top_rois_num; ++i) {
-            std::memcpy(output_rois + 4 * i, input_rois + 4 * idx[i], 4 * sizeof(float));
+            cpu_memcpy(output_rois + 4 * i, input_rois + 4 * idx[i], 4 * sizeof(float));
        }

        return OK;
--- a/inference-engine/src/mkldnn_plugin/nodes/unique.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/unique.cpp
@ -14,7 +14,6 @@
 #include <limits>
 #include <utility>
 #include "ie_parallel.hpp"
-#include "common/simple_copy.h"

 namespace InferenceEngine {
 namespace Extensions {
--- a/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp
@ -9,7 +9,7 @@
 #include <vector>
 #include <cassert>
 #include "ie_parallel.hpp"
-#include "common/simple_copy.h"
+#include "common/cpu_memcpy.h"

 namespace InferenceEngine {
 namespace Extensions {
@ -44,7 +44,7 @@ public:
        if (src != dst) {
            size_t srcSize = inputs[0]->byteSize();
            size_t dstSize = outputs[0]->byteSize();
-            simple_copy(dst, dstSize, src, srcSize);
+            cpu_memcpy_s(dst, dstSize, src, srcSize);
        }

        return OK;
--- a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
+++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
@ -4,8 +4,6 @@

 #include "ie_ir_parser.hpp"

-#include <ie_memcpy.h>
-
 #include <typeinfo>
 #include <unordered_set>
 #include <algorithm>
--- a/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp
@ -8,7 +8,7 @@

 #include "unit_test_utils/mocks/mock_allocator.hpp"

-#ifdef WIN32
+#ifdef _WIN32
 #define UNUSED
 #else
 #define UNUSED  __attribute__((unused))
--- a/inference-engine/thirdparty/CMakeLists.txt
+++ b/inference-engine/thirdparty/CMakeLists.txt
@ -9,6 +9,11 @@ elseif(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQ
    # On g++ 9.3.0 (Ubuntu 20.04) the ADE library raises "redundant-move" warnings
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=redundant-move")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-error=redundant-move")
+elseif(("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") AND ("${MSVC_VERSION}" VERSION_GREATER_EQUAL "1920"))
+    # 1920 version of MSVC 2019
+    # This flagis needed for enabling SIMD vectorization with command '#pragma omp simd'.
+    # Compilation with '/openmp:experimental' key allow us to enable vectorizatikon capability in MSVC.
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp:experimental")
 endif()

 function(build_with_lto)
--- a/inference-engine/thirdparty/mkl-dnn
+++ b/inference-engine/thirdparty/mkl-dnn
@ -1 +1 @@
-Subproject commit eb54063189a33a10c4aa90311788e6fbb4cdf2f6
+Subproject commit b73474c80c21ae170b112803a1fc315e1549bdab