[GNA] Added SW_FP32 mode w/o SF for BasicLSTM (#10115)

* [GNA] Added SW_FP32 mode w/o SF for BasicLSTM

* deleted additional test
 added sw_fp32 mode for exisiting test
 changed reference output for new mode

* [GNA] Fixed according to review

* [GNA] Parametrized weights range

* fixed after review

Co-authored-by: Mikhail Ryzhov <mikhail.ryzhov@intel.com>
This commit is contained in:
Andrey Noskov 2022-03-17 11:32:46 +03:00 committed by GitHub
parent bfa0e3e1a4
commit 0091d52c78
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 30 additions and 71 deletions

View File

@ -19,6 +19,10 @@ const std::vector<std::map<std::string, std::string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"GNA_SCALE_FACTOR_0", "1638.4"}
},
{
{"GNA_DEVICE_MODE", "GNA_SW_FP32"},
{"GNA_SCALE_FACTOR_0", "1638.4"}
}
};
@ -31,6 +35,8 @@ size_t small_num_cells = 10;
size_t big_num_cells = 49;
std::pair<float, float> weights_range = {0.f, 0.02f};
const std::vector<bool> decompose = { false, true };
INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM, Basic_LSTM_S,
@ -40,7 +46,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM, Basic_LSTM_S,
::testing::ValuesIn(configs),
::testing::ValuesIn(size_params),
::testing::Values(small_num_cells),
::testing::ValuesIn(decompose)),
::testing::ValuesIn(decompose),
::testing::Values(weights_range)),
Basic_LSTM_S::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM_big_cells_num, Basic_LSTM_S,
@ -50,6 +57,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM_big_cells_num, Basic_LSTM_S,
::testing::ValuesIn(configs),
::testing::Values(size_params[0]),
::testing::Values(big_num_cells),
::testing::ValuesIn(decompose)),
::testing::ValuesIn(decompose),
::testing::Values(weights_range)),
Basic_LSTM_S::getTestCaseName);
} // namespace

View File

@ -43,15 +43,9 @@ TEST_P(Basic_LSTM_S, CompareWithRefImpl_LowLatencyTransformation) {
// Generate inputs
GenerateInputs();
// Calculate References for the network before transformation passes
auto referenceOutputs = CalculateRefs();
// Apply LowLatency and UnrollTensorIterator transformations
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::LowLatency2>(); // LowLatency enables UnrollTI
manager.run_passes(function);
functionRefs = ngraph::clone_function(*function);
LoadNetwork();
auto referenceOutputs = CalculateRefs();
auto states = inferRequest.QueryState();
for (auto& state : states) {
auto name = state.GetName();

View File

@ -21,7 +21,8 @@ typedef std::tuple<
std::map<std::string, std::string>, // Configuration
std::pair<size_t, size_t>, // Third dimenstion and hidden size
size_t, // Number of Cells
bool // Decompose LSTMCell
bool, // Decompose LSTMCell
std::pair<float, float> // Input and weights range
> basicLstmParams;
class Basic_LSTM_S : public testing::WithParamInterface<basicLstmParams>,
@ -33,6 +34,7 @@ public:
static std::shared_ptr<ngraph::Function> GetNetwork(size_t thirdDimOut,
size_t hiddenSize,
size_t num_cells = 10,
std::pair<float, float> weights_range = {0.f, 10.f},
const InferenceEngine::Precision& netPrecission = InferenceEngine::Precision::FP32,
std::vector<float>* hidden_memory_init_out = nullptr,
std::vector<float>* cell_memory_init_out = nullptr);
@ -43,10 +45,11 @@ protected:
size_t hidden_size;
size_t third_dim;
std::pair<float, float> weights_range;
std::vector<float> hidden_memory_init;
std::vector<float> cell_memory_init;
void SetUp() override;
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> CalculateRefs() override;
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override;
};
} // namespace SubgraphTestsDefinitions

View File

@ -17,7 +17,8 @@ std::string Basic_LSTM_S::getTestCaseName(const testing::TestParamInfo<basicLstm
std::pair<size_t, size_t> size_params;
size_t num_cells;
bool decompose;
std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose) = obj.param;
std::pair<float, float> weights;
std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose, weights) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
@ -39,12 +40,12 @@ void Basic_LSTM_S::SetUp() {
std::pair<size_t, size_t> size_params;
size_t num_cells;
bool decompose;
std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose) = this->GetParam();
std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose, weights_range) = this->GetParam();
third_dim = size_params.first;
hidden_size = size_params.second;
outPrc = InferenceEngine::Precision::FP32;
function = GetNetwork(size_params.first, size_params.second, num_cells, netPrecision, &hidden_memory_init, &cell_memory_init);
function = GetNetwork(size_params.first, size_params.second, num_cells, weights_range, netPrecision, &hidden_memory_init, &cell_memory_init);
if (decompose) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
@ -55,6 +56,7 @@ void Basic_LSTM_S::SetUp() {
std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
size_t hiddenSize,
size_t num_cells,
std::pair<float, float> weights_range,
const InferenceEngine::Precision& netPrecission,
std::vector<float>* hidden_memory_init_out,
std::vector<float>* cell_memory_init_out) {
@ -70,8 +72,8 @@ std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(params[0], pattern1, false);
auto reshape1_shape = reshape1->output(0).get_shape();
auto H_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true);
auto C_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true);
auto H_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true, weights_range.second, weights_range.first);
auto C_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true, weights_range.second, weights_range.first);
if (hidden_memory_init_out != nullptr) {
*hidden_memory_init_out = std::static_pointer_cast<ngraph::opset1::Constant>(H_init)->cast_vector<float>();
}
@ -84,8 +86,9 @@ std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
C_t->set_friendly_name("cell_state_1");
//Body
auto X = std::make_shared<ngraph::opset1::Parameter>(ngPrc, ngraph::Shape{ batch_size, 1, reshape1_shape[2] });
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, reshape1_shape[2] }, {}, true);
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, {}, true);
auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, reshape1_shape[2] }, {}, true, weights_range.second, weights_range.first);
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, {}, true, weights_range.second,
weights_range.first);
//lstm [1, 10], [1, 118], [1, 118] -> [1, 118], [1, 118]
outFormShapes1 = { batch_size, reshape1_shape[2] };
@ -112,7 +115,7 @@ std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
auto out0 = tensor_iterator->get_iter_value(H_o, -1);
const size_t output_size = 12;
auto fc1 = ngraph::builder::makeFullyConnected(out0, ngPrc, output_size, true, { hiddenSize, output_size }, { 1 }, { 1 });
auto fc1 = ngraph::builder::makeFullyConnected(out0, ngPrc, output_size, true, { hiddenSize, output_size }, { weights_range.second }, { 0.f });
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(fc1) };
return std::make_shared<ngraph::Function>(results, params, "Basic_LSTM_S");
@ -146,60 +149,11 @@ void Basic_LSTM_S::Run() {
const auto& actualOutputs = GetOutputs();
auto referenceOutputs = CalculateRefs();
Compare(referenceOutputs, actualOutputs);
}
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> Basic_LSTM_S::CalculateRefs() {
//For now TensorIterator is not implemented in ngraph interpreter so it is needed to validate with another reference
auto reference_model = ngraph::clone_function(*function);
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.run_passes(reference_model);
auto refCnnNetwork = InferenceEngine::CNNNetwork{ reference_model };
auto refExecutableNetwork = core->LoadNetwork(refCnnNetwork, targetDevice);
auto refInferRequest = refExecutableNetwork.CreateInferRequest();
std::vector<InferenceEngine::InputInfo::Ptr> refInfos;
for (const auto& input : refCnnNetwork.getInputsInfo()) {
const auto& info = input.second;
refInfos.push_back(info);
}
for (std::size_t i = 0; i < inputs.size(); ++i) {
const auto& input = inputs[i];
const auto& info = refInfos[i];
refInferRequest.SetBlob(info->name(), input);
}
refInferRequest.Infer();
auto refOutputs = std::vector<InferenceEngine::Blob::Ptr>{};
for (const auto& output : refCnnNetwork.getOutputsInfo()) {
const auto& name = output.first;
refOutputs.push_back(refInferRequest.GetBlob(name));
}
auto referenceOutputs = std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>(refOutputs.size());
for (std::size_t i = 0; i < refOutputs.size(); ++i) {
const auto& reference = refOutputs[i];
const auto refSize = reference->byteSize();
referenceOutputs[i].first = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(reference->getTensorDesc().getPrecision());
auto& expectedOutput = referenceOutputs[i].second;
expectedOutput.resize(refSize);
auto refMemory = InferenceEngine::as<InferenceEngine::MemoryBlob>(reference);
IE_ASSERT(refMemory);
const auto refLockedMemory = refMemory->wmap();
const auto referenceBuffer = refLockedMemory.as<const std::uint8_t*>();
std::copy(referenceBuffer, referenceBuffer + refSize, expectedOutput.data());
}
return referenceOutputs;
InferenceEngine::Blob::Ptr Basic_LSTM_S::GenerateInput(const InferenceEngine::InputInfo& info) const {
return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), weights_range.second, weights_range.first, 1);
}
} // namespace SubgraphTestsDefinitions