[CPU] [MSVC] memcpy and memcpy_s were unified for all layers and SIMD functionality was enabled on VS2019 (#1374)

This commit is contained in:
Alexey Varyzgin 2020-08-27 13:49:31 +03:00 committed by GitHub
parent 675e1c821e
commit b6b536f23c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 89 additions and 70 deletions

View File

@ -4,7 +4,7 @@
#include "mean_image.h"
#include "ie_parallel.hpp"
#include "ie_memcpy.h"
#include "nodes/common/cpu_memcpy.h"
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
@ -53,7 +53,7 @@ void MeanImage::Load(const MKLDNNDims& inputDims, InputInfo::Ptr inputInfo) {
THROW_IE_EXCEPTION << "mean image size does not match expected network input, expecting " << meanWidth << " x " << meanHeight;
}
// todo: cast to TBlob and make sure it is floats
ie_memcpy(meanBuffer->data() + channel*meanBlob->size(), meanBuffer->byteSize() - channel*meanBlob->byteSize(),
cpu_memcpy_s(meanBuffer->data() + channel*meanBlob->size(), meanBuffer->byteSize() - channel*meanBlob->byteSize(),
meanBlob->buffer(), meanBlob->byteSize());
}
}

View File

@ -28,7 +28,7 @@
#include <blob_factory.hpp>
#include <legacy/net_pass.h>
#include <legacy/details/ie_cnn_network_tools.h>
#include <ie_memcpy.h>
#include "nodes/common/cpu_memcpy.h"
#include "precision_utils.h"
#include <ie_plugin_config.hpp>
@ -775,7 +775,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
MB_to_process = std::min<int>(config.batchLimit, MB_to_process);
size_t size_to_copy = intr_blob.GetSize() * MB_to_process / MB;
ie_memcpy(ext_blob_ptr, ext_blob->byteSize(), intr_blob_ptr, size_to_copy);
cpu_memcpy_s(ext_blob_ptr, ext_blob->byteSize(), intr_blob_ptr, size_to_copy);
}
}

View File

@ -13,6 +13,7 @@
#include "mkldnn_memory.h"
#include "mkldnn_node.h"
#include "mkldnn_extension_utils.h"
#include "nodes/common/cpu_memcpy.h"
using namespace InferenceEngine;
using namespace mkldnn;
@ -110,7 +111,7 @@ void MKLDNNMemory::SetData(memory::data_type dataType, memory::format format, co
uint8_t* dataPtr = static_cast<uint8_t*>(GetData());
// We cannot support strides for i/o blobs because it affects performance.
dataPtr += itemSize * prim->get_primitive_desc().desc().data.layout_desc.blocking.offset_padding;
memcpy(dataPtr, data, size);
cpu_memcpy(dataPtr, data, size);
}
if (ftz && dataType == mkldnn_f32) {

View File

@ -47,7 +47,7 @@
#include <mkldnn_types.h>
#include "mkldnn_extension_utils.h"
#include "ie_memcpy.h"
#include "nodes/common/cpu_memcpy.h"
#include "mkldnn_debug.h"
using namespace mkldnn;
@ -698,7 +698,7 @@ InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeV
auto fillInternalBlob = [&](char *data, size_t intBuffSize) {
size_t offset = blb->byteSize();
checkSize(intBuffSize, offset);
ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize());
data += blb->byteSize();
for (const auto &merged : getMergeWith()) {
wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(merged->getCnnLayer().get());
@ -711,7 +711,7 @@ InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeV
THROW_IE_EXCEPTION << "Cannot get internal blob layer for node " << getName() << ".";
offset += blb->byteSize();
checkSize(intBuffSize, offset);
ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize());
data += blb->byteSize();
}
};

View File

@ -9,7 +9,7 @@
#include <vector>
#include <cassert>
#include "ie_parallel.hpp"
#include "common/simple_copy.h"
#include "common/cpu_memcpy.h"
namespace InferenceEngine {
namespace Extensions {
@ -111,7 +111,7 @@ public:
for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
simple_copy(&dst_data[iwork], data_size, &src_data[src_idx * data_size], data_size);
cpu_memcpy(&dst_data[iwork], &src_data[src_idx * data_size], data_size);
for (int j = dst_dims.size() - 1; j >= 0; j--) {
counters[j] = (counters[j] + 1) % dst_dims[j];

View File

@ -12,7 +12,6 @@
#include <algorithm>
#include <limits>
#include "ie_parallel.hpp"
#include "common/simple_copy.h"
namespace InferenceEngine {
namespace Extensions {

View File

@ -11,9 +11,9 @@
* @brief Copies bytes between buffers with security enhancements
* Copies count bytes from src to dest. If the source and destination
* overlap, the behavior is undefined.
* @param dest
* @param dst
* pointer to the object to copy to
* @param destsz
* @param dst_size
* max number of bytes to modify in the destination (typically the size
* of the destination object)
* @param src
@ -23,16 +23,28 @@
@return zero on success and non-zero value on error.
*/
inline int simple_copy(void* dest, size_t destsz, void const* src, size_t count) {
inline void cpu_memcpy(void* dst, const void* src, size_t count) {
#ifdef _WIN32
memcpy_s(dst, count, src, count);
#else
std::memcpy(dst, src, count);
#endif
}
inline int cpu_memcpy_s(void* dst, size_t dst_size, const void* src, size_t count) {
size_t i;
if (!src || count > destsz ||
count > (dest > src ? ((uintptr_t)dest - (uintptr_t)src)
: ((uintptr_t)src - (uintptr_t)dest))) {
if (!src ||
count > dst_size ||
count > (dst > src ? ((uintptr_t)dst - (uintptr_t)src) : ((uintptr_t)src - (uintptr_t)dst))) {
// zero out dest if error detected
memset(dest, 0, destsz);
std::memset(dst, 0, dst_size);
return -1;
}
for (i = 0; i < count; ++i) (reinterpret_cast<uint8_t*>(dest))[i] = (reinterpret_cast<const uint8_t*>(src))[i];
#ifdef _WIN32
memcpy_s(dst, dst_size, src, count);
#else
std::memcpy(dst, src, count);
#endif
return 0;
}

View File

@ -3,6 +3,7 @@
//
#include "embedding_bag_sum.hpp"
#include "common/cpu_memcpy.h"
namespace InferenceEngine {
namespace Extensions {
@ -38,7 +39,7 @@ public:
} else if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
const UINT64* src = inputs[INDICES_IDX]->cbuffer().as<const UINT64*>();
for (size_t i = 0lu; i < bagsNum; i++) {
memcpy(_indices[i].data(), src + i * batch, batch * sizeof(UINT64));
cpu_memcpy(_indices[i].data(), src + i * batch, batch * sizeof(UINT64));
}
}
}

View File

@ -3,6 +3,7 @@
//
#include "embedding_bag_sum.hpp"
#include "common/cpu_memcpy.h"
namespace InferenceEngine {
namespace Extensions {
@ -51,7 +52,7 @@ public:
_indices[i] = static_cast<size_t>(src[i]);
} else if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
const UINT64* src = inputs[INDICES_IDX]->cbuffer().as<const UINT64*>();
memcpy(_indices.data(), src, inputs[INDICES_IDX]->byteSize());
cpu_memcpy(_indices.data(), src, inputs[INDICES_IDX]->byteSize());
}
// Initialize segments ids
@ -61,7 +62,7 @@ public:
_segmentIds[i] = static_cast<size_t>(src[i]);
} else if (inputs[SEGMENT_ID_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
const UINT64* src = inputs[SEGMENT_ID_IDX]->cbuffer().as<const UINT64*>();
memcpy(_segmentIds.data(), src, inputs[SEGMENT_ID_IDX]->byteSize());
cpu_memcpy(_segmentIds.data(), src, inputs[SEGMENT_ID_IDX]->byteSize());
}
if (inputs.size() > NUM_SEGMENTS_IDX) {

View File

@ -11,7 +11,7 @@
#include <algorithm>
#include <limits>
#include "ie_parallel.hpp"
#include "common/simple_copy.h"
#include "common/cpu_memcpy.h"
#include "common/fp16_utils.h"
namespace InferenceEngine {
@ -125,7 +125,7 @@ private:
if (idx < indexRange) {
// Copying data to destination from Dictionary
for (size_t j = 0; j < numDictionaries; j++) {
simple_copy(&dst_data[len * (i + j * src_indexSize)],
cpu_memcpy_s(&dst_data[len * (i + j * src_indexSize)],
output->byteSize() - (len * (i + j * src_indexSize)),
&src_dataDict[len * (idx + j * indexRange)],
len);

View File

@ -5,7 +5,7 @@
#include "mkldnn_batchnorm_node.h"
#include "mkldnn_depthwise_node.h"
#include <mkldnn_extension_utils.h>
#include "ie_memcpy.h"
#include "common/cpu_memcpy.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -81,7 +81,7 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
THROW_IE_EXCEPTION << "Cannot get weights blob for node " << getName() << ".";
size_t weightsByteSize = blb->byteSize();
ie_memcpy(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
data += blb->size();
blb = scshLayer->_biases;
@ -90,7 +90,7 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
} else {
if (weightsByteSize != blb->byteSize())
THROW_IE_EXCEPTION << "ScaleShift has incorrect weights!";
ie_memcpy(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
}
internalBlobs.push_back(internalBlob);
}

View File

@ -21,6 +21,7 @@
#include "mkldnn_quantize_node.h"
#include "mkldnn_pooling_node.h"
#include <limits>
#include "common/cpu_memcpy.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -646,7 +647,7 @@ void MKLDNNConcatNode::execute(mkldnn::stream strm) {
parallel_for(iter_count, [&](int i) {
const size_t dst_off = i * channels_size;
for (int j = 0; j < num_src; j++) {
memcpy(dst_ptrs[j] + dst_off, src_ptrs[j] + i * channels[j], channels[j]);
cpu_memcpy(dst_ptrs[j] + dst_off, src_ptrs[j] + i * channels[j], channels[j]);
}
});
} else {

View File

@ -9,6 +9,7 @@
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include "ie_parallel.hpp"
#include "common/cpu_memcpy.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -162,7 +163,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
#ifdef _WIN32
if (OD == 1 && OH == 1 && OW == 1 && ID == 1 && IH == 1 && IW == 1) {
for (int n = 0; n < ON; ++n) {
memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
cpu_memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
}
} else {
for (int n = 0; n < ON; ++n) {
@ -180,7 +181,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
(h+OFFSET_H)*IW*m_block_size +
OFFSET_W*m_block_size;
memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));
cpu_memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));
}
}
}
@ -189,7 +190,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
#else
if (OD == 1 && OH == 1 && OW == 1 && ID == 1 && IH == 1 && IW == 1) {
parallel_for(ON, [&](int n) {
memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
cpu_memcpy(&dst_data[n*OC], &src_data[(n+OFFSET_N)*IC + OFFSET_C], OC * sizeof(float));
});
} else {
parallel_for2d(ON, (OC / m_block_size), [&](int n, int c) {
@ -200,7 +201,7 @@ void MKLDNNCropNode::execute(mkldnn::stream strm) {
((d+OFFSET_D)*IH*IW + OFFSET_H*IW + OFFSET_W)*m_block_size;
for (int h = 0; h < OH; ++h) {
memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));
cpu_memcpy(dst_data + dst_ind, src_data + src_ind, m_inner_dim * sizeof(float));
src_ind += IW * m_block_size;
dst_ind += OW * m_block_size;

View File

@ -12,6 +12,7 @@
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include "ie_parallel.hpp"
#include "common/cpu_memcpy.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -262,7 +263,7 @@ void MKLDNNGemmNode::process_data() {
for (int b2 = 0; b2 < MB2; b2++) {
if (isThreeInputs) {
memcpy(d_ptr, c_ptr, M * N * sizeof(float));
cpu_memcpy(d_ptr, c_ptr, M * N * sizeof(float));
c_ptr += cOffsets[0];
}

View File

@ -8,7 +8,7 @@
#include <tuple>
#include <algorithm>
#include "caseless.hpp"
#include "ie_memcpy.h"
#include "common/cpu_memcpy.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -154,7 +154,7 @@ void MKLDNNInputNode::execute(mkldnn::stream strm) {
const int8_t *srcData = constBlob->cbuffer().as<int8_t *>();
int8_t *dstData = dstBlob->buffer();
ie_memcpy(dstData, dstBlob->byteSize(), srcData, constBlob->byteSize());
cpu_memcpy_s(dstData, dstBlob->byteSize(), srcData, constBlob->byteSize());
} else {
switch (precision.size()) {
case 1: {

View File

@ -6,6 +6,7 @@
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include "mkldnn_memory_node.hpp"
#include "common/cpu_memcpy.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -88,7 +89,7 @@ static void simple_copy(MKLDNNMemory& dst, const MKLDNNMemory& src) {
IE_ASSERT(srcSizeInByte == dstSizeInByte) << "Memory objects are not compatible. Has different sizes.";
memcpy(dstPtr, srcPtr, srcSizeInByte);
cpu_memcpy(dstPtr, srcPtr, srcSizeInByte);
}
MKLDNNMemoryInputNode::~MKLDNNMemoryInputNode() {

View File

@ -12,7 +12,7 @@
#include "jit_uni_depthwise.hpp"
#include "jit_uni_quantization.hpp"
#include "bf16transformer.h"
#include "common/cpu_memcpy.h"
#include "mkldnn_normalize_node.h"
using namespace mkldnn;
@ -1317,7 +1317,7 @@ void MKLDNNNormalizeNode::normalize_blk(const in_data_t* src_data, out_data_t* d
// post ops for tails: post-ops params is padding.
std::vector<float> weights_padding(CB * blk_size);
if (!channel_shared) {
memcpy(static_cast<float*>(&weights_padding[0]), weights, C * sizeof(float));
cpu_memcpy(static_cast<float*>(&weights_padding[0]), weights, C * sizeof(float));
}
for (size_t b = 0lu; b < B; b++) {

View File

@ -9,7 +9,6 @@
#include <vector>
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include <ie_memcpy.h>
#include <algorithm>
#include <set>
#include <cmath>

View File

@ -21,7 +21,7 @@
#include "jit_uni_eltwise.hpp"
#include "jit_uni_depthwise.hpp"
#include "jit_uni_quantization.hpp"
#include "common/simple_copy.h"
#include "common/cpu_memcpy.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -697,7 +697,7 @@ void MKLDNNResampleNode::NearestNeighbor_BLK(const in_data_t *in_ptr_, out_data_
out_data_t *out_ptr_dhw = out_ptr_dh + C * ox;
const in_data_t *in_ptr_dhw = in_ptr_dh + C * index_w[ox];
if (fusedWith.empty() && output_prec == input_prec) {
memcpy(out_ptr_dhw + tail, in_ptr_dhw + tail, (C - tail) * sizeof(in_data_t));
cpu_memcpy(out_ptr_dhw + tail, in_ptr_dhw + tail, (C - tail) * sizeof(in_data_t));
} else {
for (int c = tail; c < C; c++) {
float dst_value = static_cast<float>(in_ptr_dhw[c]);
@ -722,7 +722,7 @@ void MKLDNNResampleNode::NearestNeighbor_BLK(const in_data_t *in_ptr_, out_data_
out_data_t *out_ptr_dhw = out_ptr_dh + C * ox;
const in_data_t *in_ptr_dhw = in_ptr_dh + C * index_w[ox];
if (fusedWith.empty() && output_prec == input_prec) {
memcpy(out_ptr_dhw, in_ptr_dhw, C * sizeof(in_data_t));
cpu_memcpy(out_ptr_dhw, in_ptr_dhw, C * sizeof(in_data_t));
} else {
for (int c = 0; c < C; c++) {
float dst_value = static_cast<float>(in_ptr_dhw[c]);
@ -774,7 +774,7 @@ void MKLDNNResampleNode::NearestNeighbor_BLK(const in_data_t *in_ptr_, out_data_
out_data_t *out_ptr_cbdhw = out_ptr_cbdh + blk_size * w;
const in_data_t *in_ptr_cbdhw = in_ptr_cbdh + blk_size * index_w[w];
if (fusedWith.empty()) {
memcpy(out_ptr_cbdhw, in_ptr_cbdhw, blk_size * sizeof(in_data_t));
cpu_memcpy(out_ptr_cbdhw, in_ptr_cbdhw, blk_size * sizeof(in_data_t));
} else {
for (int blk = 0; blk < blk_size; blk++) {
float dst_value = static_cast<float>(in_ptr_cbdhw[blk]);
@ -808,7 +808,7 @@ void MKLDNNResampleNode::LinearInterpolation(const in_data_t *in_ptr_, out_data_
if (input_prec == Precision::FP32) {
size *= sizeof(float);
}
simple_copy(out_ptr_, size, in_ptr_, size);
cpu_memcpy(out_ptr_, in_ptr_, size);
return;
}

View File

@ -21,7 +21,7 @@
#include "jit_uni_eltwise.hpp"
#include "jit_uni_depthwise.hpp"
#include "jit_uni_quantization.hpp"
#include "common/simple_copy.h"
#include "common/cpu_memcpy.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -352,7 +352,7 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) {
splitter(srcBlockND[0], nthr, ithr, start, end);
size_t size = (end - start) * dataSize;
start *= dataSize;
simple_copy(dstPtr + start, size, srcPtr + start, size);
cpu_memcpy(dstPtr + start, srcPtr + start, size);
});
}
@ -405,7 +405,7 @@ void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, i
int64_t idxValue = getIndicesValue(indices, idx);
uint8_t *dstEntry = dstData + (b * srcBlockND[axis] + idxValue * blockToUpdate) * dataSize;
uint8_t *updateEntry = update + (b * updateBlockND[axis] + idx * blockToUpdate) * dataSize;
simple_copy(dstEntry, blockToUpdateSize, updateEntry, blockToUpdateSize);
cpu_memcpy(dstEntry, updateEntry, blockToUpdateSize);
});
}
@ -435,7 +435,7 @@ void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update,
}
dstOffset *= dataSize;
size_t updateOffset = tupleIdx * sizeToUpdate;
simple_copy(dstData + dstOffset, sizeToUpdate, update + updateOffset, sizeToUpdate);
cpu_memcpy(dstData + dstOffset, update + updateOffset, sizeToUpdate);
});
}
@ -470,7 +470,7 @@ void MKLDNNScatterUpdateNode::scatterElementsUpdate(uint8_t *indices, uint8_t *u
for (size_t iwork = start; iwork < end; iwork++) {
int64_t idxValue = getIndicesValue(indices, iwork);
if (idxValue < srcDataDim[axis])
simple_copy(dstData + dataSize * (dst_idx + idxValue * srcBlockND[axis + 1]), dataSize,
cpu_memcpy(dstData + dataSize * (dst_idx + idxValue * srcBlockND[axis + 1]),
update + iwork * dataSize, dataSize);
for (j = updateRank - 1; j >= 0; j--) {
@ -497,4 +497,4 @@ bool MKLDNNScatterUpdateNode::created() const {
REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterUpdate);
REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterElementsUpdate);
REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterNDUpdate);
REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterNDUpdate);

View File

@ -11,7 +11,6 @@
#include <map>
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include <ie_memcpy.h>
#include <legacy/graph_transformer.h>
using namespace mkldnn;

View File

@ -7,6 +7,7 @@
#include <string>
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include "common/cpu_memcpy.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -126,7 +127,7 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) {
for (int i = 0; i < m_outer_dim; ++i) {
for (int t = 0; t < tiles; ++t) {
memcpy(dst_ptr, src_ptr, m_inner_dim* sizeof(float));
cpu_memcpy(dst_ptr, src_ptr, m_inner_dim* sizeof(float));
dst_ptr += m_inner_dim;
}
src_ptr += m_inner_dim;

View File

@ -14,6 +14,7 @@
#include <string>
#include <algorithm>
#include "ie_parallel.hpp"
#include "common/cpu_memcpy.h"
namespace InferenceEngine {
namespace Extensions {
@ -263,7 +264,7 @@ void reorder(const float* src_data, const int* ranks, const int n, const int ste
for (int i = 0; i < n; ++i) {
const int j = dst_mapping[i];
assert(0 <= j && j < n);
std::memcpy(dst_data + i * step, src_data + j * step, sizeof(float) * step);
cpu_memcpy(dst_data + i * step, src_data + j * step, sizeof(float) * step);
}
}
@ -386,7 +387,7 @@ public:
reorder(&output_rois_features_temp[0], &original_rois_mapping[0], num_rois, feaxels_per_roi,
output_rois_features, &dummy_mapping[0]);
if (output_rois != nullptr) {
std::memcpy(output_rois, input_rois, 4 * num_rois * sizeof(float));
cpu_memcpy(output_rois, input_rois, 4 * num_rois * sizeof(float));
}
return OK;

View File

@ -10,6 +10,7 @@
#include <set>
#include <cassert>
#include "ie_parallel.hpp"
#include "common/cpu_memcpy.h"
namespace InferenceEngine {
namespace Extensions {
@ -157,7 +158,7 @@ public:
splitter(work_amount_dst, nthr, ithr, start, end);
src_idx = initter(start, CNTR_SIZE, counters, own_dims, ownStrides);
for (size_t iwork = start, dst_idx = start * dataLength; iwork < end; ++iwork, dst_idx += dataLength) {
memcpy(&dst_data[dst_idx], &src_data[dataLength * src_idx], sizeof(T) * dataLength);
cpu_memcpy(&dst_data[dst_idx], &src_data[dataLength * src_idx], sizeof(T) * dataLength);
src_idx = updater(src_idx, CNTR_SIZE, counters, own_dims, ownStrides);
}
});

View File

@ -12,7 +12,6 @@
#include <algorithm>
#include <limits>
#include "ie_parallel.hpp"
#include "common/simple_copy.h"
namespace InferenceEngine {
namespace Extensions {

View File

@ -12,7 +12,6 @@
#include <limits>
#include <functional>
#include "ie_parallel.hpp"
#include "common/simple_copy.h"
namespace InferenceEngine {
namespace Extensions {

View File

@ -12,7 +12,6 @@
#include <algorithm>
#include <limits>
#include "ie_parallel.hpp"
#include "common/simple_copy.h"
namespace InferenceEngine {
namespace Extensions {

View File

@ -12,7 +12,6 @@
#include <algorithm>
#include <limits>
#include "ie_parallel.hpp"
#include "common/simple_copy.h"
namespace InferenceEngine {
namespace Extensions {

View File

@ -9,7 +9,7 @@
#include <vector>
#include <cassert>
#include "ie_parallel.hpp"
#include "common/simple_copy.h"
#include "common/cpu_memcpy.h"
namespace InferenceEngine {
namespace Extensions {
@ -49,7 +49,7 @@ public:
if (src != dst) {
size_t srcSize = inputs[0]->byteSize();
size_t dstSize = outputs[0]->byteSize();
simple_copy(dst, dstSize, src, srcSize);
cpu_memcpy_s(dst, dstSize, src, srcSize);
}
return OK;

View File

@ -10,6 +10,7 @@
#include <cassert>
#include <algorithm>
#include "ie_parallel.hpp"
#include "common/cpu_memcpy.h"
namespace InferenceEngine {
namespace Extensions {
@ -323,7 +324,7 @@ void StridedSliceImpl::strided_slice_vp(const float *src_data, float* dst_data)
}
for (size_t iwork = start, dst_idx = start * dataLength, i = 1; iwork < end; ++iwork, dst_idx += dataLength) {
memcpy(&dst_data[dst_idx], &src_data[src_idx], sizeof(float) * dataLength);
cpu_memcpy(&dst_data[dst_idx], &src_data[src_idx], sizeof(float) * dataLength);
for (int j = dims_size_1 - 1; j >= 0; j--) {
counters[j]++;
if (counters[j] < dst_dims[j]) {

View File

@ -6,6 +6,7 @@
#include <algorithm>
#include <cassert>
#include <vector>
#include "common/cpu_memcpy.h"
namespace InferenceEngine {
@ -60,7 +61,7 @@ public:
sort(idx.begin(), idx.end(), [&input_probs](size_t i1, size_t i2) {return input_probs[i1] > input_probs[i2];});
for (int i = 0; i < top_rois_num; ++i) {
std::memcpy(output_rois + 4 * i, input_rois + 4 * idx[i], 4 * sizeof(float));
cpu_memcpy(output_rois + 4 * i, input_rois + 4 * idx[i], 4 * sizeof(float));
}
return OK;

View File

@ -14,7 +14,6 @@
#include <limits>
#include <utility>
#include "ie_parallel.hpp"
#include "common/simple_copy.h"
namespace InferenceEngine {
namespace Extensions {

View File

@ -9,7 +9,7 @@
#include <vector>
#include <cassert>
#include "ie_parallel.hpp"
#include "common/simple_copy.h"
#include "common/cpu_memcpy.h"
namespace InferenceEngine {
namespace Extensions {
@ -44,7 +44,7 @@ public:
if (src != dst) {
size_t srcSize = inputs[0]->byteSize();
size_t dstSize = outputs[0]->byteSize();
simple_copy(dst, dstSize, src, srcSize);
cpu_memcpy_s(dst, dstSize, src, srcSize);
}
return OK;

View File

@ -4,8 +4,6 @@
#include "ie_ir_parser.hpp"
#include <ie_memcpy.h>
#include <typeinfo>
#include <unordered_set>
#include <algorithm>

View File

@ -8,7 +8,7 @@
#include "unit_test_utils/mocks/mock_allocator.hpp"
#ifdef WIN32
#ifdef _WIN32
#define UNUSED
#else
#define UNUSED __attribute__((unused))

View File

@ -9,6 +9,11 @@ elseif(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQ
# On g++ 9.3.0 (Ubuntu 20.04) the ADE library raises "redundant-move" warnings
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=redundant-move")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-error=redundant-move")
elseif(("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") AND ("${MSVC_VERSION}" VERSION_GREATER_EQUAL "1920"))
# 1920 version of MSVC 2019
# This flagis needed for enabling SIMD vectorization with command '#pragma omp simd'.
# Compilation with '/openmp:experimental' key allow us to enable vectorizatikon capability in MSVC.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp:experimental")
endif()
function(build_with_lto)

@ -1 +1 @@
Subproject commit eb54063189a33a10c4aa90311788e6fbb4cdf2f6
Subproject commit b73474c80c21ae170b112803a1fc315e1549bdab