[IE][VPU]: CMX limit compile option (#1268)

In some networks, mvTensor would request a large CMX-DMA transfer (<512K). That starves DMA for other timing critical tasks such as SIPP. Limit CMX-DMA request size as an option in myriad_compile:
* Add compile option TILING_CMX_LIMIT_KB
* Declare compile option TILING_CMX_LIMIT_KB in IE tools (compile_tool and vpu_compile)
* Add tests for compile option TILING_CMX_LIMIT_KB. Small fix for naming behavior tests.
This commit is contained in:
Nikita Kudriavtsev 2020-07-16 19:54:53 +03:00 committed by GitHub
parent d9927a9f35
commit 73ee68afb2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 85 additions and 26 deletions

View File

@ -107,14 +107,8 @@ protected:
}
static int parseInt(const std::string& src) {
return std::stoi(src);
}
static int parsePositiveInt(const std::string& src) {
const auto val = std::stoi(src);
if (val < 0) {
throw std::invalid_argument("Zero/negative value");
}
return val;
}

View File

@ -21,6 +21,7 @@ struct DefaultAllocation {
static int numStreams(const Platform& platform, const CompilationConfig& configuration);
static int numSlices(const Platform& platform, int numStreams);
static int numShaves(const Platform& platform, int numStreams, int numSlices);
static int tilingCMXLimit(int numSlices);
};
struct CompileEnv final {

View File

@ -43,6 +43,7 @@ struct CompilationConfig final {
int numSHAVEs = -1;
int numCMXSlices = -1;
int numExecutors = -1;
int tilingCMXLimitKB = -1;
bool hwOptimization = true;
bool hwExtraSplit = false;

View File

@ -136,6 +136,5 @@ void printTo(DotLabel& lbl, const HwPaddingInfo& hwPad);
//
int calculateHwBufferSize(const DimValues& dims, const DimsOrder& order = DimsOrder());
int tilingCMXLimit(int numSlices);
} // namespace vpu

View File

@ -32,6 +32,7 @@ struct Resources final {
int numCMXSlices = 0;
int numSHAVEs = 0;
int numExecutors = 0;
int tilingCMXLimit = 0;
};
void printTo(std::ostream& os, const Resources& res);

View File

@ -17,6 +17,7 @@ namespace VPUConfigParams {
DECLARE_VPU_CONFIG_KEY(NUMBER_OF_SHAVES);
DECLARE_VPU_CONFIG_KEY(NUMBER_OF_CMX_SLICES);
DECLARE_VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB);
DECLARE_VPU_CONFIG_KEY(TENSOR_STRIDES);

View File

@ -98,6 +98,12 @@ void CompileEnv::init(Platform platform, const CompilationConfig& config, const
R"(Value of configuration option ("{}") must be in the range [{}, {}], actual is "{}")",
VPU_CONFIG_KEY(NUMBER_OF_CMX_SLICES), 1, DeviceResources::numSlices(platform), numSlices);
int defaultCmxLimit = DefaultAllocation::tilingCMXLimit(numSlices);
const auto tilingCMXLimit = config.tilingCMXLimitKB != -1 ? std::min(config.tilingCMXLimitKB * 1024, defaultCmxLimit) : defaultCmxLimit;
VPU_THROW_UNLESS(tilingCMXLimit >= 0,
R"(Value of configuration option ("{}") must be greater than {}, actual is "{}")",
VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB), 0, tilingCMXLimit);
const auto numShaves = config.numSHAVEs != -1 ? config.numSHAVEs : DefaultAllocation::numShaves(platform, numExecutors, numSlices);
VPU_THROW_UNLESS(numShaves >= 1 && numShaves <= DeviceResources::numShaves(platform),
R"(Value of configuration option ("{}") must be in the range [{}, {}], actual is "{}")",
@ -114,6 +120,7 @@ void CompileEnv::init(Platform platform, const CompilationConfig& config, const
g_compileEnv->resources.numSHAVEs = numShaves;
g_compileEnv->resources.numCMXSlices = numSlices;
g_compileEnv->resources.numExecutors = numExecutors;
g_compileEnv->resources.tilingCMXLimit = tilingCMXLimit;
g_compileEnv->initialized = true;
}
@ -299,4 +306,8 @@ int DefaultAllocation::numShaves(const Platform& platform, int numStreams, int n
return numShavesForAllocation / numStreams;
}
int DefaultAllocation::tilingCMXLimit(int numSlices) {
return (numSlices / 2) * CMX_SLICE_SIZE + CMX_SLICE_SIZE / 2;
}
} // namespace vpu

View File

@ -640,8 +640,7 @@ std::vector<TilingOption> HWConvolutionTilingSearcher::selectBetterTiling() cons
const auto& splitOver = dirTiling.splitOverTensorDims();
const auto direction = dirTiling.getDirection();
const auto cmxLimit = tilingCMXLimit(env.resources.numCMXSlices);
const auto cmxLimit = env.resources.tilingCMXLimit;
// split over Input tensor for the Channel dimension always
for (int numChannelTiles = 1; numChannelTiles <= maxNumChannelTiles; numChannelTiles++) {

View File

@ -315,8 +315,7 @@ std::vector<TilingOption> HWPoolingTilingSearcher::selectBetterTiling() const {
const auto& splitOver = dirTiling.splitOverTensorDims();
const auto direction = dirTiling.getDirection();
const auto cmxLimit = tilingCMXLimit(env.resources.numCMXSlices);
const auto cmxLimit = env.resources.tilingCMXLimit;
for (int numBatchTiles = 1; numBatchTiles <= maxNumBatchTiles; numBatchTiles++) {
//

View File

@ -88,8 +88,4 @@ int calculateHwBufferSize(const DimValues& dims, const DimsOrder& order) {
return calcTotalByteSize(desc, strides);
}
int tilingCMXLimit(int numSlices) {
return (numSlices / 2) * CMX_SLICE_SIZE + CMX_SLICE_SIZE / 2;
}
} // namespace vpu

View File

@ -292,7 +292,7 @@ public:
VPU_PROFILE(hwFullyConnectedTiling);
const auto& env = CompileEnv::get();
const auto cmxLimit = tilingCMXLimit(env.resources.numCMXSlices);
const auto cmxLimit = env.resources.tilingCMXLimit;
for (const auto& origStage : model->getStages()) {
if (origStage->type() != StageType::StubFullyConnected) {

View File

@ -31,7 +31,7 @@ void PassImpl::run(const Model& model) {
VPU_PROFILE(splitHwConvAndPool);
const auto& env = CompileEnv::get();
const auto cmxLimit = tilingCMXLimit(env.resources.numCMXSlices);
const auto cmxLimit = env.resources.tilingCMXLimit;
for (const auto& convStage : model->getStages()) {
if (convStage == nullptr) {

View File

@ -28,6 +28,7 @@ namespace vpu {
void printTo(std::ostream& os, const Resources& res) {
os << "[" << std::endl;
os << "tilingCMXLimit=" << res.tilingCMXLimit << std::endl;
os << "numCMXSlices=" << res.numCMXSlices << std::endl;
os << "numSHAVEs=" << res.numSHAVEs << std::endl;
@ -36,6 +37,7 @@ void printTo(std::ostream& os, const Resources& res) {
void printTo(DotLabel& lbl, const Resources& res) {
DotLabel subLbl(lbl);
subLbl.appendPair("tilingCMXLimit", res.tilingCMXLimit);
subLbl.appendPair("numCMXSlices", res.numCMXSlices);
subLbl.appendPair("numSHAVEs", res.numSHAVEs);
}

View File

@ -44,6 +44,7 @@ IE_SUPPRESS_DEPRECATED_START
VPU_CONFIG_KEY(NUMBER_OF_SHAVES),
VPU_CONFIG_KEY(NUMBER_OF_CMX_SLICES),
VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB),
VPU_CONFIG_KEY(TENSOR_STRIDES),
@ -180,9 +181,28 @@ void ParsedConfig::parse(const std::map<std::string, std::string>& config) {
setOption(_compileConfig.customLayers, config, CONFIG_KEY(CONFIG_FILE));
}
setOption(_compileConfig.numSHAVEs, config, VPU_CONFIG_KEY(NUMBER_OF_SHAVES), parseInt);
setOption(_compileConfig.numCMXSlices, config, VPU_CONFIG_KEY(NUMBER_OF_CMX_SLICES), parseInt);
setOption(_compileConfig.numExecutors, config, VPU_MYRIAD_CONFIG_KEY(THROUGHPUT_STREAMS), parseInt);
auto isPositive = [](int value) {
return value >= 0;
};
auto isDefaultValue = [](int value) {
return value == -1;
};
auto preprocessCompileOption = [&](const std::string& src) {
int value = parseInt(src);
if (isPositive(value) || isDefaultValue(value)) {
return value;
}
throw std::invalid_argument("Value must be positive or default(-1).");
};
setOption(_compileConfig.numSHAVEs, config, VPU_CONFIG_KEY(NUMBER_OF_SHAVES), preprocessCompileOption);
setOption(_compileConfig.numCMXSlices, config, VPU_CONFIG_KEY(NUMBER_OF_CMX_SLICES), preprocessCompileOption);
setOption(_compileConfig.numExecutors, config, VPU_MYRIAD_CONFIG_KEY(THROUGHPUT_STREAMS), preprocessCompileOption);
setOption(_compileConfig.tilingCMXLimitKB, config, VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB), preprocessCompileOption);
if ((_compileConfig.numSHAVEs < 0 && _compileConfig.numCMXSlices >= 0) ||
(_compileConfig.numSHAVEs >= 0 && _compileConfig.numCMXSlices < 0)) {
@ -214,7 +234,10 @@ IE_SUPPRESS_DEPRECATED_END
_compileConfig.dumpAllPasses = std::stoi(envVar) != 0;
}
if (const auto envVar = std::getenv("IE_VPU_NUMBER_OF_SHAVES_AND_CMX_SLICES")) {
_compileConfig.numSHAVEs = _compileConfig.numCMXSlices = std::stoi(envVar);
_compileConfig.numSHAVEs = _compileConfig.numCMXSlices = preprocessCompileOption(envVar);
}
if (const auto envVar = std::getenv("IE_VPU_TILING_CMX_LIMIT_KB")) {
_compileConfig.tilingCMXLimitKB = preprocessCompileOption(envVar);
}
#endif
}

View File

@ -27,6 +27,10 @@ namespace {
{{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), CONFIG_VALUE(YES)}},
{{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), CONFIG_VALUE(NO)}},
{{VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB), "-1"}},
{{VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB), "0"}},
{{VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB), "10"}},
{{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), CONFIG_VALUE(YES)}},
{{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), CONFIG_VALUE(NO)}},
{{VPU_MYRIAD_CONFIG_KEY(PROTOCOL), VPU_MYRIAD_CONFIG_VALUE(USB)}},
@ -73,6 +77,8 @@ namespace {
{{VPU_MYRIAD_CONFIG_KEY(PLATFORM), "0"}},
{{VPU_MYRIAD_CONFIG_KEY(PLATFORM), "1"}},
{{VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB), "-10"}},
{{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), "ON"}},
{{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), "OFF"}}
};

View File

@ -20,7 +20,7 @@ namespace {
{{ MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , CommonTestUtils::DEVICE_MYRIAD}}
};
const std::vector<std::map<std::string, std::string>> Inconfigs = {
const std::vector<std::map<std::string, std::string>> inferConfigs = {
{},
{{VPU_MYRIAD_CONFIG_KEY(FORCE_RESET), CONFIG_VALUE(YES)}},
@ -33,6 +33,10 @@ namespace {
{{CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_DEBUG)}},
{{CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_TRACE)}},
{{VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB), "-1"}},
{{VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB), "0"}},
{{VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB), "1"}},
{{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), CONFIG_VALUE(YES)}},
{{VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), CONFIG_VALUE(NO)}},
@ -40,7 +44,7 @@ namespace {
{{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), CONFIG_VALUE(NO)}}
};
const std::vector<std::map<std::string, std::string>> InmultiConfigs = {
const std::vector<std::map<std::string, std::string>> inferMultiConfigs = {
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_MYRIAD},
{CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_DEBUG)}},
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_MYRIAD},
@ -65,13 +69,13 @@ namespace {
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_MYRIAD),
::testing::ValuesIn(Inconfigs)),
::testing::ValuesIn(inferConfigs)),
InferConfigInTests::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, InferConfigInTests,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_MULTI),
::testing::ValuesIn(InmultiConfigs)),
::testing::ValuesIn(inferMultiConfigs)),
InferConfigInTests::getTestCaseName);
} // namespace

View File

@ -26,6 +26,7 @@
#include "ngraph_functions/subgraph_builders.hpp"
namespace BehaviorTestsDefinitions {
// TODO: rename to SetupInferWithConfigTests
using InferConfigTests = BehaviorTestsUtils::BehaviorTestsBasic;
TEST_P(InferConfigTests, canSetExclusiveAsyncRequests) {
@ -83,6 +84,7 @@ TEST_P(InferConfigTests, withoutExclusiveAsyncRequests) {
}
}
// TODO: rename to InferWithConfigTests
using InferConfigInTests = BehaviorTestsUtils::BehaviorTestsBasic;
TEST_P(InferConfigInTests, CanInferWithConfig) {

View File

@ -43,6 +43,7 @@ compile_tool [OPTIONS]
This option must be used in order to compile blob without a connected Myriad device.
-VPU_NUMBER_OF_SHAVES <value> Optional. Specifies number of shaves. Should be set with "VPU_NUMBER_OF_CMX_SLICES". Overwrites value from config.
-VPU_NUMBER_OF_CMX_SLICES <value> Optional. Specifies number of CMX slices. Should be set with "VPU_NUMBER_OF_SHAVES". Overwrites value from config.
-VPU_TILING_CMX_LIMIT_KB <value> Optional. Specifies CMX limit for data tiling in kB. Value should be equal or greater than -1, where -1 means default value of limit. Overwrites value from config.
DLA options:
-DLA_ARCH_NAME <value> Optional. Specify architecture name used to compile executable network for FPGA device.

View File

@ -39,6 +39,9 @@ static constexpr char number_of_shaves_message[] = "Optional. Specifies number o
static constexpr char number_of_cmx_slices_message[] = "Optional. Specifies number of CMX slices."
" Should be set with \"VPU_NUMBER_OF_SHAVES\"."
" Overwrites value from config.";
static constexpr char tiling_cmx_limit_message[] = "Optional. Specifies CMX limit for data tiling."
" Value should be equal or greater than -1."
" Overwrites value from config.";
static constexpr char inputs_precision_message[] = "Optional. Specifies precision for all input layers of the network."
" Supported values: FP32, FP16, U8. Default value: FP16.";
static constexpr char outputs_precision_message[] = "Optional. Specifies precision for all output layers of the network."
@ -70,6 +73,7 @@ DEFINE_string(ol, "", outputs_layout_message);
DEFINE_string(VPU_MYRIAD_PLATFORM, "", platform_message);
DEFINE_string(VPU_NUMBER_OF_SHAVES, "", number_of_shaves_message);
DEFINE_string(VPU_NUMBER_OF_CMX_SLICES, "", number_of_cmx_slices_message);
DEFINE_string(VPU_TILING_CMX_LIMIT_KB, "", tiling_cmx_limit_message);
DEFINE_string(DLA_ARCH_NAME, "", dla_arch_name);
static void showUsage() {
@ -91,6 +95,7 @@ static void showUsage() {
std::cout << " -VPU_MYRIAD_PLATFORM <value> " << platform_message << std::endl;
std::cout << " -VPU_NUMBER_OF_SHAVES <value> " << number_of_shaves_message << std::endl;
std::cout << " -VPU_NUMBER_OF_CMX_SLICES <value> " << number_of_cmx_slices_message << std::endl;
std::cout << " -VPU_TILING_CMX_LIMIT_KB <value> " << tiling_cmx_limit_message << std::endl;
std::cout << " DLA options: " << std::endl;
std::cout << " -DLA_ARCH_NAME <value> " << dla_arch_name << std::endl;
std::cout << std::endl;
@ -164,6 +169,10 @@ static std::map<std::string, std::string> configure(const std::string &configFil
config[VPU_CONFIG_KEY(NUMBER_OF_CMX_SLICES)] = FLAGS_VPU_NUMBER_OF_CMX_SLICES;
}
if (!FLAGS_VPU_TILING_CMX_LIMIT_KB.empty()) {
config[VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB)] = FLAGS_VPU_TILING_CMX_LIMIT_KB;
}
if (!FLAGS_DLA_ARCH_NAME.empty()) {
config["DLIA_ARCH_NAME"] = FLAGS_DLA_ARCH_NAME;
}

View File

@ -36,6 +36,7 @@ myriad_compile [OPTIONS]
This option must be used in order to compile blob without a connected Myriad device.
-VPU_NUMBER_OF_SHAVES <value> Optional. Specifies number of shaves. Should be set with "VPU_NUMBER_OF_CMX_SLICES". Overwrites value from config.
-VPU_NUMBER_OF_CMX_SLICES <value> Optional. Specifies number of CMX slices. Should be set with "VPU_NUMBER_OF_SHAVES". Overwrites value from config.
-VPU_TILING_CMX_LIMIT_KB <value> Optional. Specifies CMX limit for data tiling in kB. Value should be equal or greater than -1, where -1 means default value of limit. Overwrites value from config.
```
Running the application with the empty list of options yields an error message.

View File

@ -36,6 +36,9 @@ static constexpr char number_of_shaves_message[] = "Optional. Specifies number o
static constexpr char number_of_cmx_slices_message[] = "Optional. Specifies number of CMX slices."
" Should be set with \"VPU_NUMBER_OF_SHAVES\"."
" Overwrites value from config.";
static constexpr char tiling_cmx_limit_message[] = "Optional. Specifies CMX limit for data tiling."
" Value should be equal or greater than -1."
" Overwrites value from config.";
static constexpr char inputs_precision_message[] = "Optional. Specifies precision for all input layers of network."
" Supported values: FP32, FP16, U8. Default value: FP16.";
static constexpr char outputs_precision_message[] = "Optional. Specifies precision for all output layers of network."
@ -58,6 +61,7 @@ DEFINE_string(iop, "", iop_message);
DEFINE_string(VPU_MYRIAD_PLATFORM, "", platform_message);
DEFINE_string(VPU_NUMBER_OF_SHAVES, "", number_of_shaves_message);
DEFINE_string(VPU_NUMBER_OF_CMX_SLICES, "", number_of_cmx_slices_message);
DEFINE_string(VPU_TILING_CMX_LIMIT_KB, "", tiling_cmx_limit_message);
static void showUsage() {
std::cout << std::endl;
@ -74,6 +78,7 @@ static void showUsage() {
std::cout << " -VPU_MYRIAD_PLATFORM <value> " << platform_message << std::endl;
std::cout << " -VPU_NUMBER_OF_SHAVES <value> " << number_of_shaves_message << std::endl;
std::cout << " -VPU_NUMBER_OF_CMX_SLICES <value> " << number_of_cmx_slices_message << std::endl;
std::cout << " -VPU_TILING_CMX_LIMIT_KB <value> " << tiling_cmx_limit_message << std::endl;
std::cout << std::endl;
}
@ -119,6 +124,10 @@ static std::map<std::string, std::string> configure(const std::string &configFil
config[VPU_CONFIG_KEY(NUMBER_OF_CMX_SLICES)] = FLAGS_VPU_NUMBER_OF_CMX_SLICES;
}
if (!FLAGS_VPU_TILING_CMX_LIMIT_KB.empty()) {
config[VPU_CONFIG_KEY(TILING_CMX_LIMIT_KB)] = FLAGS_VPU_TILING_CMX_LIMIT_KB;
}
return config;
}