[CPU] [MSVC] memcpy and memcpy_s were unified for all layers and SIMD functionality was enabled on VS2019 (#1374)
This commit is contained in:
parent
675e1c821e
commit
b6b536f23c
@ -4,7 +4,7 @@
|
||||
|
||||
#include "mean_image.h"
|
||||
#include "ie_parallel.hpp"
|
||||
#include "ie_memcpy.h"
|
||||
#include "nodes/common/cpu_memcpy.h"
|
||||
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
@ -53,7 +53,7 @@ void MeanImage::Load(const MKLDNNDims& inputDims, InputInfo::Ptr inputInfo) {
|
||||
THROW_IE_EXCEPTION << "mean image size does not match expected network input, expecting " << meanWidth << " x " << meanHeight;
|
||||
}
|
||||
// todo: cast to TBlob and make sure it is floats
|
||||
ie_memcpy(meanBuffer->data() + channel*meanBlob->size(), meanBuffer->byteSize() - channel*meanBlob->byteSize(),
|
||||
cpu_memcpy_s(meanBuffer->data() + channel*meanBlob->size(), meanBuffer->byteSize() - channel*meanBlob->byteSize(),
|
||||
meanBlob->buffer(), meanBlob->byteSize());
|
||||
}
|
||||
}
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include <blob_factory.hpp>
|
||||
#include <legacy/net_pass.h>
|
||||
#include <legacy/details/ie_cnn_network_tools.h>
|
||||
#include <ie_memcpy.h>
|
||||
#include "nodes/common/cpu_memcpy.h"
|
||||
|
||||
#include "precision_utils.h"
|
||||
#include <ie_plugin_config.hpp>
|
||||
@ -775,7 +775,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
|
||||
MB_to_process = std::min<int>(config.batchLimit, MB_to_process);
|
||||
size_t size_to_copy = intr_blob.GetSize() * MB_to_process / MB;
|
||||
|
||||
ie_memcpy(ext_blob_ptr, ext_blob->byteSize(), intr_blob_ptr, size_to_copy);
|
||||
cpu_memcpy_s(ext_blob_ptr, ext_blob->byteSize(), intr_blob_ptr, size_to_copy);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "mkldnn_memory.h"
|
||||
#include "mkldnn_node.h"
|
||||
#include "mkldnn_extension_utils.h"
|
||||
#include "nodes/common/cpu_memcpy.h"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace mkldnn;
|
||||
@ -110,7 +111,7 @@ void MKLDNNMemory::SetData(memory::data_type dataType, memory::format format, co
|
||||
uint8_t* dataPtr = static_cast<uint8_t*>(GetData());
|
||||
// We cannot support strides for i/o blobs because it affects performance.
|
||||
dataPtr += itemSize * prim->get_primitive_desc().desc().data.layout_desc.blocking.offset_padding;
|
||||
memcpy(dataPtr, data, size);
|
||||
cpu_memcpy(dataPtr, data, size);
|
||||
}
|
||||
|
||||
if (ftz && dataType == mkldnn_f32) {
|
||||
|
@ -47,7 +47,7 @@
|
||||
#include <mkldnn_types.h>
|
||||
#include "mkldnn_extension_utils.h"
|
||||
|
||||
#include "ie_memcpy.h"
|
||||
#include "nodes/common/cpu_memcpy.h"
|
||||
#include "mkldnn_debug.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
@ -698,7 +698,7 @@ InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeV
|
||||
auto fillInternalBlob = [&](char *data, size_t intBuffSize) {
|
||||
size_t offset = blb->byteSize();
|
||||
checkSize(intBuffSize, offset);
|
||||
ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
|
||||
cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize());
|
||||
data += blb->byteSize();
|
||||
for (const auto &merged : getMergeWith()) {
|
||||
wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(merged->getCnnLayer().get());
|
||||
@ -711,7 +711,7 @@ InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeV
|
||||
THROW_IE_EXCEPTION << "Cannot get internal blob layer for node " << getName() << ".";
|
||||
offset += blb->byteSize();
|
||||
checkSize(intBuffSize, offset);
|
||||
ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
|
||||
cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize());
|
||||
data += blb->byteSize();
|
||||
}
|
||||
};
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
@ -111,7 +111,7 @@ public:
|
||||
for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
|
||||
src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
|
||||
|
||||
simple_copy(&dst_data[iwork], data_size, &src_data[src_idx * data_size], data_size);
|
||||
cpu_memcpy(&dst_data[iwork], &src_data[src_idx * data_size], data_size);
|
||||
|
||||
for (int j = dst_dims.size() - 1; j >= 0; j--) {
|
||||
counters[j] = (counters[j] + 1) % dst_dims[j];
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
|
28
inference-engine/src/mkldnn_plugin/nodes/common/simple_copy.h → inference-engine/src/mkldnn_plugin/nodes/common/cpu_memcpy.h
Normal file → Executable file
28
inference-engine/src/mkldnn_plugin/nodes/common/simple_copy.h → inference-engine/src/mkldnn_plugin/nodes/common/cpu_memcpy.h
Normal file → Executable file
@ -11,9 +11,9 @@
|
||||
* @brief Copies bytes between buffers with security enhancements
|
||||
* Copies count bytes from src to dest. If the source and destination
|
||||
* overlap, the behavior is undefined.
|
||||
* @param dest
|
||||
* @param dst
|
||||
* pointer to the object to copy to
|
||||
* @param destsz
|
||||
* @param dst_size
|
||||
* max number of bytes to modify in the destination (typically the size
|
||||
* of the destination object)
|
||||
* @param src
|
||||
@ -23,16 +23,28 @@
|
||||
@return zero on success and non-zero value on error.
|
||||
*/
|
||||
|
||||
inline int simple_copy(void* dest, size_t destsz, void const* src, size_t count) {
|
||||
inline void cpu_memcpy(void* dst, const void* src, size_t count) {
|
||||
#ifdef _WIN32
|
||||
memcpy_s(dst, count, src, count);
|
||||
#else
|
||||
std::memcpy(dst, src, count);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int cpu_memcpy_s(void* dst, size_t dst_size, const void* src, size_t count) {
|
||||
size_t i;
|
||||
if (!src || count > destsz ||
|
||||
count > (dest > src ? ((uintptr_t)dest - (uintptr_t)src)
|
||||
: ((uintptr_t)src - (uintptr_t)dest))) {
|
||||
if (!src ||
|
||||
count > dst_size ||
|
||||
count > (dst > src ? ((uintptr_t)dst - (uintptr_t)src) : ((uintptr_t)src - (uintptr_t)dst))) {
|
||||
// zero out dest if error detected
|
||||
memset(dest, 0, destsz);
|
||||
std::memset(dst, 0, dst_size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < count; ++i) (reinterpret_cast<uint8_t*>(dest))[i] = (reinterpret_cast<const uint8_t*>(src))[i];
|
||||
#ifdef _WIN32
|
||||
memcpy_s(dst, dst_size, src, count);
|
||||
#else
|
||||
std::memcpy(dst, src, count);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
@ -3,6 +3,7 @@
|
||||
//
|
||||
|
||||
#include "embedding_bag_sum.hpp"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
@ -38,7 +39,7 @@ public:
|
||||
} else if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
|
||||
const UINT64* src = inputs[INDICES_IDX]->cbuffer().as<const UINT64*>();
|
||||
for (size_t i = 0lu; i < bagsNum; i++) {
|
||||
memcpy(_indices[i].data(), src + i * batch, batch * sizeof(UINT64));
|
||||
cpu_memcpy(_indices[i].data(), src + i * batch, batch * sizeof(UINT64));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
//
|
||||
|
||||
#include "embedding_bag_sum.hpp"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
@ -51,7 +52,7 @@ public:
|
||||
_indices[i] = static_cast<size_t>(src[i]);
|
||||
} else if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
|
||||
const UINT64* src = inputs[INDICES_IDX]->cbuffer().as<const UINT64*>();
|
||||
memcpy(_indices.data(), src, inputs[INDICES_IDX]->byteSize());
|
||||
cpu_memcpy(_indices.data(), src, inputs[INDICES_IDX]->byteSize());
|
||||
}
|
||||
|
||||
// Initialize segments ids
|
||||
@ -61,7 +62,7 @@ public:
|
||||
_segmentIds[i] = static_cast<size_t>(src[i]);
|
||||
} else if (inputs[SEGMENT_ID_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
|
||||
const UINT64* src = inputs[SEGMENT_ID_IDX]->cbuffer().as<const UINT64*>();
|
||||
memcpy(_segmentIds.data(), src, inputs[SEGMENT_ID_IDX]->byteSize());
|
||||
cpu_memcpy(_segmentIds.data(), src, inputs[SEGMENT_ID_IDX]->byteSize());
|
||||
}
|
||||
|
||||
if (inputs.size() > NUM_SEGMENTS_IDX) {
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
#include "common/cpu_memcpy.h"
|
||||
#include "common/fp16_utils.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
@ -125,7 +125,7 @@ private:
|
||||
if (idx < indexRange) {
|
||||
// Copying data to destination from Dictionary
|
||||
for (size_t j = 0; j < numDictionaries; j++) {
|
||||
simple_copy(&dst_data[len * (i + j * src_indexSize)],
|
||||
cpu_memcpy_s(&dst_data[len * (i + j * src_indexSize)],
|
||||
output->byteSize() - (len * (i + j * src_indexSize)),
|
||||
&src_dataDict[len * (idx + j * indexRange)],
|
||||
len);
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include "mkldnn_batchnorm_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "ie_memcpy.h"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -81,7 +81,7 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
|
||||
THROW_IE_EXCEPTION << "Cannot get weights blob for node " << getName() << ".";
|
||||
|
||||
size_t weightsByteSize = blb->byteSize();
|
||||
ie_memcpy(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
|
||||
cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
|
||||
data += blb->size();
|
||||
blb = scshLayer->_biases;
|
||||
|
||||
@ -90,7 +90,7 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
|
||||
} else {
|
||||
if (weightsByteSize != blb->byteSize())
|
||||
THROW_IE_EXCEPTION << "ScaleShift has incorrect weights!";
|
||||
ie_memcpy(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
|
||||
cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
|
||||
}
|
||||
internalBlobs.push_back(internalBlob);
|
||||
}
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "mkldnn_quantize_node.h"
|
||||
#include "mkldnn_pooling_node.h"
|
||||
#include <limits>
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -646,7 +647,7 @@ void MKLDNNConcatNode::execute(mkldnn::stream strm) {
|
||||
parallel_for(iter_count, [&](int i) {
|
||||
const size_t dst_off = i * channels_size;
|
||||
for (int j = 0; j < num_src; j++) {
|
||||
memcpy(dst_ptrs[j] + dst_off, src_ptrs[j] + i * channels[j], channels[j]);
|
||||
cpu_memcpy(dst_ptrs[j] + dst_off, src_ptrs[j] + i * channels[j], channels[j]);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -162,7 +163,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
|
||||
#ifdef _WIN32
|
||||
if (OD == 1 && OH == 1 && OW == 1 && ID == 1 && IH == 1 && IW == 1) {
|
||||
for (int n = 0; n < ON; ++n) {
|
||||
memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
|
||||
cpu_memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
|
||||
}
|
||||
} else {
|
||||
for (int n = 0; n < ON; ++n) {
|
||||
@ -180,7 +181,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
|
||||
(h+OFFSET_H)*IW*m_block_size +
|
||||
OFFSET_W*m_block_size;
|
||||
|
||||
memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));
|
||||
cpu_memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -189,7 +190,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
|
||||
#else
|
||||
if (OD == 1 && OH == 1 && OW == 1 && ID == 1 && IH == 1 && IW == 1) {
|
||||
parallel_for(ON, [&](int n) {
|
||||
memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
|
||||
cpu_memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
|
||||
});
|
||||
} else {
|
||||
parallel_for2d(ON, (OC / m_block_size), [&](int n, int c) {
|
||||
@ -200,7 +201,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
|
||||
((d+OFFSET_D)*IH*IW + OFFSET_H*IW + OFFSET_W)*m_block_size;
|
||||
|
||||
for (int h = 0; h < OH; ++h) {
|
||||
memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));
|
||||
cpu_memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));
|
||||
|
||||
src_ind += IW * m_block_size;
|
||||
dst_ind += OW * m_block_size;
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -262,7 +263,7 @@ void MKLDNNGemmNode::process_data() {
|
||||
|
||||
for (int b2 = 0; b2 < MB2; b2++) {
|
||||
if (isThreeInputs) {
|
||||
memcpy(d_ptr, c_ptr, M * N * sizeof(float));
|
||||
cpu_memcpy(d_ptr, c_ptr, M * N * sizeof(float));
|
||||
c_ptr += cOffsets[0];
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include <tuple>
|
||||
#include <algorithm>
|
||||
#include "caseless.hpp"
|
||||
#include "ie_memcpy.h"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -154,7 +154,7 @@ void MKLDNNInputNode::execute(mkldnn::stream strm) {
|
||||
const int8_t *srcData = constBlob->cbuffer().as<int8_t *>();
|
||||
int8_t *dstData = dstBlob->buffer();
|
||||
|
||||
ie_memcpy(dstData, dstBlob->byteSize(), srcData, constBlob->byteSize());
|
||||
cpu_memcpy_s(dstData, dstBlob->byteSize(), srcData, constBlob->byteSize());
|
||||
} else {
|
||||
switch (precision.size()) {
|
||||
case 1: {
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "mkldnn_memory_node.hpp"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -88,7 +89,7 @@ static void simple_copy(MKLDNNMemory& dst, const MKLDNNMemory& src) {
|
||||
|
||||
IE_ASSERT(srcSizeInByte == dstSizeInByte) << "Memory objects are not compatible. Has different sizes.";
|
||||
|
||||
memcpy(dstPtr, srcPtr, srcSizeInByte);
|
||||
cpu_memcpy(dstPtr, srcPtr, srcSizeInByte);
|
||||
}
|
||||
|
||||
MKLDNNMemoryInputNode::~MKLDNNMemoryInputNode() {
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include "jit_uni_depthwise.hpp"
|
||||
#include "jit_uni_quantization.hpp"
|
||||
#include "bf16transformer.h"
|
||||
|
||||
#include "common/cpu_memcpy.h"
|
||||
#include "mkldnn_normalize_node.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
@ -1317,7 +1317,7 @@ void MKLDNNNormalizeNode::normalize_blk(const in_data_t* src_data, out_data_t* d
|
||||
// post ops for tails: post-ops params is padding.
|
||||
std::vector<float> weights_padding(CB * blk_size);
|
||||
if (!channel_shared) {
|
||||
memcpy(static_cast<float*>(&weights_padding[0]), weights, C * sizeof(float));
|
||||
cpu_memcpy(static_cast<float*>(&weights_padding[0]), weights, C * sizeof(float));
|
||||
}
|
||||
|
||||
for (size_t b = 0lu; b < B; b++) {
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include <vector>
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include <ie_memcpy.h>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "jit_uni_eltwise.hpp"
|
||||
#include "jit_uni_depthwise.hpp"
|
||||
#include "jit_uni_quantization.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -697,7 +697,7 @@ void MKLDNNResampleNode::NearestNeighbor_BLK(const in_data_t *in_ptr_, out_data_
|
||||
out_data_t *out_ptr_dhw = out_ptr_dh + C * ox;
|
||||
const in_data_t *in_ptr_dhw = in_ptr_dh + C * index_w[ox];
|
||||
if (fusedWith.empty() && output_prec == input_prec) {
|
||||
memcpy(out_ptr_dhw + tail, in_ptr_dhw + tail, (C - tail) * sizeof(in_data_t));
|
||||
cpu_memcpy(out_ptr_dhw + tail, in_ptr_dhw + tail, (C - tail) * sizeof(in_data_t));
|
||||
} else {
|
||||
for (int c = tail; c < C; c++) {
|
||||
float dst_value = static_cast<float>(in_ptr_dhw[c]);
|
||||
@ -722,7 +722,7 @@ void MKLDNNResampleNode::NearestNeighbor_BLK(const in_data_t *in_ptr_, out_data_
|
||||
out_data_t *out_ptr_dhw = out_ptr_dh + C * ox;
|
||||
const in_data_t *in_ptr_dhw = in_ptr_dh + C * index_w[ox];
|
||||
if (fusedWith.empty() && output_prec == input_prec) {
|
||||
memcpy(out_ptr_dhw, in_ptr_dhw, C * sizeof(in_data_t));
|
||||
cpu_memcpy(out_ptr_dhw, in_ptr_dhw, C * sizeof(in_data_t));
|
||||
} else {
|
||||
for (int c = 0; c < C; c++) {
|
||||
float dst_value = static_cast<float>(in_ptr_dhw[c]);
|
||||
@ -774,7 +774,7 @@ void MKLDNNResampleNode::NearestNeighbor_BLK(const in_data_t *in_ptr_, out_data_
|
||||
out_data_t *out_ptr_cbdhw = out_ptr_cbdh + blk_size * w;
|
||||
const in_data_t *in_ptr_cbdhw = in_ptr_cbdh + blk_size * index_w[w];
|
||||
if (fusedWith.empty()) {
|
||||
memcpy(out_ptr_cbdhw, in_ptr_cbdhw, blk_size * sizeof(in_data_t));
|
||||
cpu_memcpy(out_ptr_cbdhw, in_ptr_cbdhw, blk_size * sizeof(in_data_t));
|
||||
} else {
|
||||
for (int blk = 0; blk < blk_size; blk++) {
|
||||
float dst_value = static_cast<float>(in_ptr_cbdhw[blk]);
|
||||
@ -808,7 +808,7 @@ void MKLDNNResampleNode::LinearInterpolation(const in_data_t *in_ptr_, out_data_
|
||||
if (input_prec == Precision::FP32) {
|
||||
size *= sizeof(float);
|
||||
}
|
||||
simple_copy(out_ptr_, size, in_ptr_, size);
|
||||
cpu_memcpy(out_ptr_, in_ptr_, size);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "jit_uni_eltwise.hpp"
|
||||
#include "jit_uni_depthwise.hpp"
|
||||
#include "jit_uni_quantization.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -352,7 +352,7 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) {
|
||||
splitter(srcBlockND[0], nthr, ithr, start, end);
|
||||
size_t size = (end - start) * dataSize;
|
||||
start *= dataSize;
|
||||
simple_copy(dstPtr + start, size, srcPtr + start, size);
|
||||
cpu_memcpy(dstPtr + start, srcPtr + start, size);
|
||||
});
|
||||
}
|
||||
|
||||
@ -405,7 +405,7 @@ void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, i
|
||||
int64_t idxValue = getIndicesValue(indices, idx);
|
||||
uint8_t *dstEntry = dstData + (b * srcBlockND[axis] + idxValue * blockToUpdate) * dataSize;
|
||||
uint8_t *updateEntry = update + (b * updateBlockND[axis] + idx * blockToUpdate) * dataSize;
|
||||
simple_copy(dstEntry, blockToUpdateSize, updateEntry, blockToUpdateSize);
|
||||
cpu_memcpy(dstEntry, updateEntry, blockToUpdateSize);
|
||||
});
|
||||
}
|
||||
|
||||
@ -435,7 +435,7 @@ void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update,
|
||||
}
|
||||
dstOffset *= dataSize;
|
||||
size_t updateOffset = tupleIdx * sizeToUpdate;
|
||||
simple_copy(dstData + dstOffset, sizeToUpdate, update + updateOffset, sizeToUpdate);
|
||||
cpu_memcpy(dstData + dstOffset, update + updateOffset, sizeToUpdate);
|
||||
});
|
||||
}
|
||||
|
||||
@ -470,7 +470,7 @@ void MKLDNNScatterUpdateNode::scatterElementsUpdate(uint8_t *indices, uint8_t *u
|
||||
for (size_t iwork = start; iwork < end; iwork++) {
|
||||
int64_t idxValue = getIndicesValue(indices, iwork);
|
||||
if (idxValue < srcDataDim[axis])
|
||||
simple_copy(dstData + dataSize * (dst_idx + idxValue * srcBlockND[axis + 1]), dataSize,
|
||||
cpu_memcpy(dstData + dataSize * (dst_idx + idxValue * srcBlockND[axis + 1]),
|
||||
update + iwork * dataSize, dataSize);
|
||||
|
||||
for (j = updateRank - 1; j >= 0; j--) {
|
||||
@ -497,4 +497,4 @@ bool MKLDNNScatterUpdateNode::created() const {
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterUpdate);
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterElementsUpdate);
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterNDUpdate);
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterNDUpdate);
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <map>
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include <ie_memcpy.h>
|
||||
#include <legacy/graph_transformer.h>
|
||||
|
||||
using namespace mkldnn;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <string>
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -126,7 +127,7 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) {
|
||||
|
||||
for (int i = 0; i < m_outer_dim; ++i) {
|
||||
for (int t = 0; t < tiles; ++t) {
|
||||
memcpy(dst_ptr, src_ptr, m_inner_dim* sizeof(float));
|
||||
cpu_memcpy(dst_ptr, src_ptr, m_inner_dim* sizeof(float));
|
||||
dst_ptr += m_inner_dim;
|
||||
}
|
||||
src_ptr += m_inner_dim;
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
@ -263,7 +264,7 @@ void reorder(const float* src_data, const int* ranks, const int n, const int ste
|
||||
for (int i = 0; i < n; ++i) {
|
||||
const int j = dst_mapping[i];
|
||||
assert(0 <= j && j < n);
|
||||
std::memcpy(dst_data + i * step, src_data + j * step, sizeof(float) * step);
|
||||
cpu_memcpy(dst_data + i * step, src_data + j * step, sizeof(float) * step);
|
||||
}
|
||||
}
|
||||
|
||||
@ -386,7 +387,7 @@ public:
|
||||
reorder(&output_rois_features_temp[0], &original_rois_mapping[0], num_rois, feaxels_per_roi,
|
||||
output_rois_features, &dummy_mapping[0]);
|
||||
if (output_rois != nullptr) {
|
||||
std::memcpy(output_rois, input_rois, 4 * num_rois * sizeof(float));
|
||||
cpu_memcpy(output_rois, input_rois, 4 * num_rois * sizeof(float));
|
||||
}
|
||||
|
||||
return OK;
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <set>
|
||||
#include <cassert>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
@ -157,7 +158,7 @@ public:
|
||||
splitter(work_amount_dst, nthr, ithr, start, end);
|
||||
src_idx = initter(start, CNTR_SIZE, counters, own_dims, ownStrides);
|
||||
for (size_t iwork = start, dst_idx = start * dataLength; iwork < end; ++iwork, dst_idx += dataLength) {
|
||||
memcpy(&dst_data[dst_idx], &src_data[dataLength * src_idx], sizeof(T) * dataLength);
|
||||
cpu_memcpy(&dst_data[dst_idx], &src_data[dataLength * src_idx], sizeof(T) * dataLength);
|
||||
src_idx = updater(src_idx, CNTR_SIZE, counters, own_dims, ownStrides);
|
||||
}
|
||||
});
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <limits>
|
||||
#include <functional>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
@ -49,7 +49,7 @@ public:
|
||||
if (src != dst) {
|
||||
size_t srcSize = inputs[0]->byteSize();
|
||||
size_t dstSize = outputs[0]->byteSize();
|
||||
simple_copy(dst, dstSize, src, srcSize);
|
||||
cpu_memcpy_s(dst, dstSize, src, srcSize);
|
||||
}
|
||||
|
||||
return OK;
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
@ -323,7 +324,7 @@ void StridedSliceImpl::strided_slice_vp(const float *src_data, float* dst_data)
|
||||
}
|
||||
|
||||
for (size_t iwork = start, dst_idx = start * dataLength, i = 1; iwork < end; ++iwork, dst_idx += dataLength) {
|
||||
memcpy(&dst_data[dst_idx], &src_data[src_idx], sizeof(float) * dataLength);
|
||||
cpu_memcpy(&dst_data[dst_idx], &src_data[src_idx], sizeof(float) * dataLength);
|
||||
for (int j = dims_size_1 - 1; j >= 0; j--) {
|
||||
counters[j]++;
|
||||
if (counters[j] < dst_dims[j]) {
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
|
||||
namespace InferenceEngine {
|
||||
@ -60,7 +61,7 @@ public:
|
||||
sort(idx.begin(), idx.end(), [&input_probs](size_t i1, size_t i2) {return input_probs[i1] > input_probs[i2];});
|
||||
|
||||
for (int i = 0; i < top_rois_num; ++i) {
|
||||
std::memcpy(output_rois + 4 * i, input_rois + 4 * idx[i], 4 * sizeof(float));
|
||||
cpu_memcpy(output_rois + 4 * i, input_rois + 4 * idx[i], 4 * sizeof(float));
|
||||
}
|
||||
|
||||
return OK;
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "common/simple_copy.h"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
@ -44,7 +44,7 @@ public:
|
||||
if (src != dst) {
|
||||
size_t srcSize = inputs[0]->byteSize();
|
||||
size_t dstSize = outputs[0]->byteSize();
|
||||
simple_copy(dst, dstSize, src, srcSize);
|
||||
cpu_memcpy_s(dst, dstSize, src, srcSize);
|
||||
}
|
||||
|
||||
return OK;
|
||||
|
@ -4,8 +4,6 @@
|
||||
|
||||
#include "ie_ir_parser.hpp"
|
||||
|
||||
#include <ie_memcpy.h>
|
||||
|
||||
#include <typeinfo>
|
||||
#include <unordered_set>
|
||||
#include <algorithm>
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
#include "unit_test_utils/mocks/mock_allocator.hpp"
|
||||
|
||||
#ifdef WIN32
|
||||
#ifdef _WIN32
|
||||
#define UNUSED
|
||||
#else
|
||||
#define UNUSED __attribute__((unused))
|
||||
|
5
inference-engine/thirdparty/CMakeLists.txt
vendored
5
inference-engine/thirdparty/CMakeLists.txt
vendored
@ -9,6 +9,11 @@ elseif(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQ
|
||||
# On g++ 9.3.0 (Ubuntu 20.04) the ADE library raises "redundant-move" warnings
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=redundant-move")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-error=redundant-move")
|
||||
elseif(("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") AND ("${MSVC_VERSION}" VERSION_GREATER_EQUAL "1920"))
|
||||
# 1920 version of MSVC 2019
|
||||
# This flagis needed for enabling SIMD vectorization with command '#pragma omp simd'.
|
||||
# Compilation with '/openmp:experimental' key allow us to enable vectorizatikon capability in MSVC.
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp:experimental")
|
||||
endif()
|
||||
|
||||
function(build_with_lto)
|
||||
|
2
inference-engine/thirdparty/mkl-dnn
vendored
2
inference-engine/thirdparty/mkl-dnn
vendored
@ -1 +1 @@
|
||||
Subproject commit eb54063189a33a10c4aa90311788e6fbb4cdf2f6
|
||||
Subproject commit b73474c80c21ae170b112803a1fc315e1549bdab
|
Loading…
Reference in New Issue
Block a user