diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index 6eddc5575cc..9fab04f8bed 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -1283,8 +1283,9 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool auto input_data = HasTo2DReshapeData(layer) ? Get2DReshapedData(inputs, 8) : inputs; auto in_dims = input_data->getDims(); - uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / in_dims.front(); - uint32_t num_columns_in = in_dims.front(); + auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front(); + uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size; + uint32_t num_columns_in = batch_size; uint32_t num_rows_out = isDiag ? num_rows_in : FROM_IR_DIM(outputs, 1); uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in; uint32_t num_padding_out = isDiag ? num_padding : 0; diff --git a/inference-engine/src/gna_plugin/gna_groups.hpp b/inference-engine/src/gna_plugin/gna_groups.hpp index 2635dc1b95c..b2f3f106e70 100644 --- a/inference-engine/src/gna_plugin/gna_groups.hpp +++ b/inference-engine/src/gna_plugin/gna_groups.hpp @@ -31,7 +31,8 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input } } - InferenceEngine::SizeVector newDims(dims.size(), 1); + size_t newDimsSize = (dims.size() > 1) ? dims.size() : 2; + InferenceEngine::SizeVector newDims(newDimsSize, 1); newDims[0] = numColumnsIn; newDims[1] = numRowsIn; return std::make_shared(input->getName(), diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index c4e6b267e38..6e830276e31 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -1190,8 +1190,9 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer int inputNum = 0; for (auto &input : inputs) { auto inputLayout = input.second->getTensorDesc().getLayout(); - if (inputLayout != Layout::NC && inputLayout != Layout::CN && inputLayout != Layout::CHW && inputLayout != Layout::NCHW) { - THROW_GNA_EXCEPTION << "Expected input blob to have Layout::NC or Layout::CN, but was: " + if (inputLayout != Layout::C && inputLayout != Layout::NC && inputLayout != Layout::CN && + inputLayout != Layout::CHW && inputLayout != Layout::NCHW) { + THROW_GNA_EXCEPTION << "Expected input blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or Layout::CHW. But was: " << input.second->getTensorDesc().getLayout(); } @@ -1206,7 +1207,7 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer inputLayout = Layout::NC; } - auto is2D = input.second->getTensorDesc().getLayout() == Layout::NC || input.second->getTensorDesc().getLayout() == Layout::CN; + auto is1D = input.second->getTensorDesc().getLayout() == Layout::C; auto is3D = input.second->getTensorDesc().getLayout() == Layout::CHW; if (!inputsDesc->ptr_inputs_global_id.count(input.first)) { @@ -1232,9 +1233,9 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer } } - auto importedElements = is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3]; - auto importedFrames = is3D ? 1 : dims[0]; - auto targetGroups = is2D ? dims[dims.size() - 2] : dims[0]; // TODO: no proper support for groups yet + auto importedElements = is1D ? dims[0] : details::product(++std::begin(dims), std::end(dims)); + auto importedFrames = (is3D || is1D) ? 1 : dims[0]; + auto targetGroups = is1D ? 1 : dims[0]; // TODO: no proper support for groups yet auto importedElementSizeBytes = gnaFlags->sw_fp32 ? 4 : 2; auto importedBytes = importedElements * importedFrames * importedElementSizeBytes; @@ -1257,8 +1258,8 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer auto transpose_info = transpose_inputs_info.find(input.first); if (transpose_info != std::end(transpose_inputs_info)) { - size_t batchSize = dims[0]; - size_t elementsPerBatch = InferenceEngine::details::product(dims) / dims[0]; + size_t batchSize = (dims.size() > 1) ? dims[0] : 1; + size_t elementsPerBatch = (dims.size() > 1) ? InferenceEngine::details::product(dims) / dims[0] : dims[0]; size_t transposed_data_size = 0; for (const auto &part_transposition_info : transpose_info->second) { transposed_data_size += part_transposition_info.num_transpose_rows * part_transposition_info.num_transpose_columns; @@ -1345,89 +1346,87 @@ GnaWaitStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) { for (auto && outputBlobIt : request) { auto & outputBlob = outputBlobIt.second; auto & outputDesc = outputsDesc[output_idx]; - if (outputBlob->getTensorDesc().getLayout() == Layout::NC || outputBlob->getTensorDesc().getLayout() == Layout::CN - || outputBlob->getTensorDesc().getLayout() == Layout::NCHW || outputBlob->getTensorDesc().getLayout() == Layout::CHW) { - auto dims = outputBlob->getTensorDesc().getDims(); - auto is2D = outputBlob->getTensorDesc().getLayout() == Layout::NC || outputBlob->getTensorDesc().getLayout() == Layout::CN; - auto is3D = outputBlob->getTensorDesc().getLayout() == Layout::CHW; - auto& exportOutputDims = outputBlob->getTensorDesc().getDims(); - auto batchSize = is3D ? 1 : exportOutputDims[0]; - auto elementsPerBatch = is2D ? exportOutputDims[exportOutputDims.size() - 1] - : exportOutputDims[exportOutputDims.size() - 1] - * exportOutputDims[exportOutputDims.size() - 2] - * exportOutputDims[exportOutputDims.size() - 3]; - - auto transpose_output_info = transpose_outputs_info.find(outputBlobIt.first); - if (transpose_output_info != std::end(transpose_outputs_info)) { - size_t transposed_data_size = 0; - for (const auto &part_transposition_info : transpose_output_info->second) { - transposed_data_size += part_transposition_info.num_transpose_rows * part_transposition_info.num_transpose_columns; - } - if (elementsPerBatch != transposed_data_size) { - THROW_GNA_EXCEPTION << "Transposed data size (" << transposed_data_size - << ") do not match output buffer length of " << elementsPerBatch; - } - TransposeTensorFromNCHWToNHWC(outputDesc.num_bytes_per_element, - batchSize, - elementsPerBatch, - reinterpret_cast(outputDesc.ptrs[request_idx]), - true, - transpose_output_info->second); - } - - ExportScores(outputBlob->buffer(), - outputDesc.ptrs[request_idx], - outputDesc.orientation, - batchSize, - batchSize, - elementsPerBatch, - elementsPerBatch, - elementsPerBatch, - outputDesc.num_bytes_per_element, - sizeof(float)); - - if (gnadevice) { -#ifdef PLOT - FILE* f = nullptr; - static int num_infers = 0; - { - f = fopen("ex_scores.txt", "w"); - } - num_infers++; - if (f) { - auto dims = outputBlob->getTensorDesc().getDims(); - for (int i = 0; i < dims[dims.size() - 2]; i++) { - for (int j = 0; j < dims[dims.size() - 1]; j++) { - fprintf(f, "%d ", outputBlob->cbuffer().as()[dims[dims.size() - 1] * i + j]); - } - fprintf(f, "\n"); - } - fprintf(f, "\n\n"); - } -#endif - ConvertToFloat(outputBlob->buffer(), - outputBlob->buffer(), - elementsPerBatch, - batchSize, - outputDesc.scale_factor); -#ifdef PLOT - if (f) { - auto dims = outputBlob->getTensorDesc().getDims(); - for (int i = 0; i < dims[dims.size() - 2]; i++) { - for (int j = 0; j < dims[dims.size() - 1]; j++) { - fprintf(f, "%.2f ", outputBlob->cbuffer().as()[dims[dims.size() - 1] * i + j]); - } - fprintf(f, "\n"); - } - fclose(f); - } -#endif - } - } else { - THROW_GNA_EXCEPTION << "Expected output blob to have Layout::NC, Layout::CN, Layout::NCHW or Layout::CHW. But was " + if (!outputBlob->getTensorDesc().getLayout() == Layout::C && !outputBlob->getTensorDesc().getLayout() == Layout::NC && + !outputBlob->getTensorDesc().getLayout() == Layout::CN && !outputBlob->getTensorDesc().getLayout() == Layout::NCHW && + !outputBlob->getTensorDesc().getLayout() == Layout::CHW) { + THROW_GNA_EXCEPTION << "Expected output blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or Layout::CHW. But was " << outputBlob->getTensorDesc().getLayout(); } + auto dims = outputBlob->getTensorDesc().getDims(); + auto is1D = outputBlob->getTensorDesc().getLayout() == Layout::C; + auto is3D = outputBlob->getTensorDesc().getLayout() == Layout::CHW; + auto& exportOutputDims = outputBlob->getTensorDesc().getDims(); + auto batchSize = (is1D || is3D) ? 1 : exportOutputDims[0]; + auto elementsPerBatch = is1D ? exportOutputDims.front() : + details::product(++std::begin(exportOutputDims), std::end(exportOutputDims)); + + auto transpose_output_info = transpose_outputs_info.find(outputBlobIt.first); + if (transpose_output_info != std::end(transpose_outputs_info)) { + size_t transposed_data_size = 0; + for (const auto &part_transposition_info : transpose_output_info->second) { + transposed_data_size += part_transposition_info.num_transpose_rows * part_transposition_info.num_transpose_columns; + } + if (elementsPerBatch != transposed_data_size) { + THROW_GNA_EXCEPTION << "Transposed data size (" << transposed_data_size + << ") do not match output buffer length of " << elementsPerBatch; + } + TransposeTensorFromNCHWToNHWC(outputDesc.num_bytes_per_element, + batchSize, + elementsPerBatch, + reinterpret_cast(outputDesc.ptrs[request_idx]), + true, + transpose_output_info->second); + } + + ExportScores(outputBlob->buffer(), + outputDesc.ptrs[request_idx], + outputDesc.orientation, + batchSize, + batchSize, + elementsPerBatch, + elementsPerBatch, + elementsPerBatch, + outputDesc.num_bytes_per_element, + sizeof(float)); + + if (gnadevice) { +#ifdef PLOT + FILE* f = nullptr; + static int num_infers = 0; + { + f = fopen("ex_scores.txt", "w"); + } + num_infers++; + if (f) { + for (int i = 0; i < batchSize; i++) { + for (int j = 0; j < dims[dims.size() - 1]; j++) { + fprintf(f, "%d ", outputBlob->cbuffer().as()[dims[dims.size() - 1] * i + j]); + } + fprintf(f, "\n"); + } + fprintf(f, "\n\n"); + } +#endif + ConvertToFloat(outputBlob->buffer(), + outputBlob->buffer(), + elementsPerBatch, + batchSize, + outputDesc.scale_factor); +#ifdef PLOT + if (f) { + auto dims = outputBlob->getTensorDesc().getDims(); + for (int i = 0; i < batchSize; i++) { + for (int j = 0; j < dims[dims.size() - 1]; j++) { + fprintf(f, "%.2f ", outputBlob->cbuffer().as()[dims[dims.size() - 1] * i + j]); + } + fprintf(f, "\n"); + } + fclose(f); + } +#endif + } + output_idx++; } return GNA_REQUEST_COMPLETED; @@ -1455,11 +1454,22 @@ bool GNAPlugin::Infer(const InferenceEngine::BlobMap &input, InferenceEngine::Bl return Wait(QueueInference(input, result)); } +static InferenceEngine::Layout GetLayoutForDims(const InferenceEngine::SizeVector &dims) { + switch (dims.size()) { + case 1: return C; + case 2: return NC; + case 3: return CHW; + case 4: return NCHW; + default: + THROW_GNA_EXCEPTION << "Unsupported dimensions size in GNA: " << dims.size(); + } +} + Blob::Ptr GNAPlugin::GetOutputBlob(const std::string& name, InferenceEngine::Precision precision) { // need to have intermediate blob for interleave conversion InferenceEngine::Blob::Ptr outputBlob; auto outputDims = outputsDataMap[name]->getTensorDesc().getDims(); - outputBlob = make_blob_with_precision(TensorDesc(precision, outputDims, outputDims.size() == 2 ? NC : (outputDims.size() == 3 ? CHW : NCHW))); + outputBlob = make_blob_with_precision(TensorDesc(precision, outputDims, GetLayoutForDims(outputDims))); outputBlob->allocate(); return outputBlob; } @@ -1469,7 +1479,7 @@ Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Prec // need to have intermediate blob for interleave conversion // TODO: NCHW format support is experimental = c++ MO did insert reshape, while TF mo - not auto inputDims = inputsDataMap[name]->getTensorDesc().getDims(); - inputBlob = make_blob_with_precision(TensorDesc(precision, inputDims, inputDims.size() == 2 ? NC : (inputDims.size() == 3 ? CHW : NCHW))); + inputBlob = make_blob_with_precision(TensorDesc(precision, inputDims, GetLayoutForDims(inputDims))); inputBlob->allocate(); return inputBlob; } diff --git a/inference-engine/tests/unit/gna/gna_wait_test.cpp b/inference-engine/tests/unit/gna/gna_wait_test.cpp index 790e7c9baaa..68173a67f96 100644 --- a/inference-engine/tests/unit/gna/gna_wait_test.cpp +++ b/inference-engine/tests/unit/gna/gna_wait_test.cpp @@ -25,7 +25,7 @@ class GNAPluginForGNAWaitTest : public GNAPlugin { public: // Prepare underlining object to enable GNAInferRequest::Wait() working GNAPluginForGNAWaitTest() { - InferenceEngine::TensorDesc td{ InferenceEngine::Precision::FP32, {}, InferenceEngine::Layout::HW }; + InferenceEngine::TensorDesc td{ InferenceEngine::Precision::FP32, {1, 1}, InferenceEngine::Layout::HW }; auto fakeInfo = std::make_shared(); auto fakePtr = std::make_shared("fakeName", td); fakeInfo->setInputData(fakePtr);