[GPU] Apply cached_blob to make engine (#13781)

+ Updated oneDNN to use cache_blob
+ Updated oneDNN to fix group conv failure
+ Add g_os_iyx_osv8 format and relevant reorder to support oneDNN update
+ Used cached_blob to make engine if cache_dir config is used

Signed-off-by: Min, Byungil <byungil.min@intel.com>
This commit is contained in:
Min, Byungil 2022-11-21 22:52:18 +09:00 committed by GitHub
parent 0846bdb67e
commit 0cf319f855
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 59 additions and 6 deletions

View File

@ -222,6 +222,7 @@ struct format {
gyxio, ///< format used for weights for 2D convolution
goizyx, ///< format used for weights for 3D convolution
giozyx, ///< format used for weights for 3D deconvolution
g_os_iyx_osv8, ///< format used for weights for 2D convolution
g_os_iyx_osv16, ///< format used for weights for 2D convolution
g_os_iyx_osv32, ///< format used for weights for 2D convolution
gs_oiyx_gsv16, ///< format used for weights for 2D convolution

View File

@ -445,6 +445,7 @@ static cldnn::format convert_format(dnnl::memory::format_tag fmt, bool is_groupe
case dnnl::memory::format_tag::aBCde4b8c8b2c: return cldnn::format::g_os_is_yx_osa4_isa8_osv8_isv2;
case dnnl::memory::format_tag::aBCde8b2c: return cldnn::format::g_os_is_yx_osv8_isv2;
case dnnl::memory::format_tag::aBCde8b4c: return cldnn::format::g_os_is_yx_osv8_isv4;
case dnnl::memory::format_tag::aBcde8b: return cldnn::format::g_os_iyx_osv8;
case dnnl::memory::format_tag::aBCd2b8c16b4c: return cldnn::format::g_os_is_yx_osa2_isa8_osv16_isv4;
case dnnl::memory::format_tag::aBCd2b8c16b2c: return cldnn::format::g_os_is_yx_osa2_isa8_osv16_isv2;
case dnnl::memory::format_tag::aBCdef16c16b: return cldnn::format::g_os_is_zyx_isv16_osv16;

View File

@ -421,6 +421,8 @@ kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped) {
return kernel_selector::weights_layout::goizyx;
case format::giozyx:
return kernel_selector::weights_layout::giozyx;
case format::g_os_iyx_osv8:
return kernel_selector::weights_layout::g_os_iyx_osv8;
case format::g_os_iyx_osv16:
return kernel_selector::weights_layout::g_os_iyx_osv16;
case format::g_os_iyx_osv32:
@ -691,6 +693,8 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
return cldnn::format::goiyx;
case kernel_selector::weights_layout::goizyx:
return cldnn::format::goizyx;
case kernel_selector::weights_layout::g_os_iyx_osv8:
return cldnn::format::g_os_iyx_osv8;
case kernel_selector::weights_layout::g_os_iyx_osv16:
return cldnn::format::g_os_iyx_osv16;
case kernel_selector::weights_layout::g_os_iyx_osv32:

View File

@ -123,6 +123,8 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x
return GET_FILTER_GOIZYX(INPUT0, g, o, i, z, y, x);
#elif defined INPUT0_LAYOUT_GIOZYX
return GET_FILTER_GIOZYX(INPUT0, g, o, i, z, y, x);
#elif defined INPUT0_LAYOUT_G_OS_IYX_OSV8
return GET_FILTER_G_OS_IYX_OSV16(INPUT0, g, o, i, y, x, 8);
#elif defined INPUT0_LAYOUT_G_OS_IYX_OSV16
return GET_FILTER_G_OS_IYX_OSV16(INPUT0, g, o, i, y, x, 16);
#elif defined INPUT0_LAYOUT_G_OS_IYX_OSV32
@ -371,6 +373,8 @@ inline uint FUNC(get_output_index)(uint g, uint o, uint i, uint z, uint y, uint
return GET_FILTER_OS_ZY_IS_X_OSV8_ISV4_INDEX(OUTPUT, o, i, z, y, x);
#elif defined OUTPUT_LAYOUT_GOIZYX || defined OUTPUT_LAYOUT_GIOZYX
return GET_FILTER_INDEX_5D(OUTPUT, g, o, i, z, y, x);
#elif defined OUTPUT_LAYOUT_G_OS_IYX_OSV8
return GET_FILTER_G_OS_IYX_OSV16(OUTPUT, g, o, i, y, x, 8);
#elif defined OUTPUT_LAYOUT_G_OS_IYX_OSV16
return GET_FILTER_G_OS_IYX_OSV16(OUTPUT, g, o, i, y, x, 16);
#elif defined OUTPUT_LAYOUT_G_OS_IYX_OSV32

View File

@ -649,7 +649,7 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
)
)V0G0N";
} else if (l == WeightsLayout::os_iyx_osv16 || l == WeightsLayout::os_iyx_osv32 ||
l == WeightsLayout::os_iyx_osv32__ai32 || l == WeightsLayout::g_os_iyx_osv16 ||
l == WeightsLayout::os_iyx_osv32__ai32 || l == WeightsLayout::g_os_iyx_osv8 || l == WeightsLayout::g_os_iyx_osv16 ||
l == WeightsLayout::g_os_iyx_osv32) {
args macroNameArgs = {"prefix", "g", "o", "i", "y", "x", "sub_group_size"};
this->calcFunction = FuncBody(layout_name);
@ -881,6 +881,8 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const {
index_func_val = called_func_name + "(" + _name + ", g, o, i, 0, y, x)";
else if (layout == WeightsLayout::g_os_is_yx_isv16_osv16)
index_func_val = called_func_name + "(" + _name + ", g, o, i, 0, y, x, 16)";
else if (layout == WeightsLayout::g_os_iyx_osv8)
index_func_val = called_func_name + "(" + _name + ", g, o, i, y, x, 8)";
else if (layout == WeightsLayout::g_os_iyx_osv16)
index_func_val = called_func_name + "(" + _name + ", g, o, i, y, x, 16)";
else if (layout == WeightsLayout::g_is_os_yx_isv16_osv16)

View File

@ -379,6 +379,7 @@ std::string toString(WeightsLayout layout) {
case WeightsLayout::gyxio: return "GYXIO";
case WeightsLayout::goizyx: return "GOIZYX";
case WeightsLayout::giozyx: return "GIOZYX";
case WeightsLayout::g_os_iyx_osv8: return "G_OS_IYX_OSV8";
case WeightsLayout::g_os_iyx_osv16: return "G_OS_IYX_OSV16";
case WeightsLayout::g_os_iyx_osv32: return "G_OS_IYX_OSV32";
case WeightsLayout::gs_oiyx_gsv16: return "GS_OIYX_GSV16";

View File

@ -48,6 +48,7 @@ inline uint32_t SubGroupSize(WeightsLayout l) {
case WeightsLayout::os_i_osv8__ai8:
case WeightsLayout::iy_xs_os_xsv2_osv8__ao32:
case WeightsLayout::giy_xs_os_xsv2_osv8__ao32:
case WeightsLayout::g_os_iyx_osv8:
return 8;
default:
return 1;

View File

@ -166,6 +166,7 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{
{ WeightsLayout::gioyx, { 0, 1, -1, 3, 2, 4 } },
{ WeightsLayout::goizyx, { 0, 1, 2, 3, 4, 5 } },
{ WeightsLayout::giozyx, { 0, 1, 2, 4, 3, 5 } },
{ WeightsLayout::g_os_iyx_osv8, { 0, 1, -1, 2, 3, 4 } },
{ WeightsLayout::g_os_iyx_osv16, { 0, 1, -1, 2, 3, 4 } },
{ WeightsLayout::g_os_iyx_osv32, { 0, 1, -1, 2, 3, 4 } },
{ WeightsLayout::gs_oiyx_gsv16, { 0, 1, -1, 2, 3, 4 } },
@ -766,6 +767,10 @@ NDims WeightsTensor::GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l
case os_i_yxs_osv4_yxsv4:
newDims[3] = RoundUp(newDims[3], 4);
break;
case g_os_iyx_osv8:
assert(newDims.size() == 5);
newDims[3] = RoundUp(newDims[3], 8);
break;
case g_os_iyx_osv16:
case g_os_iyx_osv16_rotate_180:
assert(newDims.size() == 5);

View File

@ -184,6 +184,7 @@ enum WeightsLayout {
goizyx,
giozyx,
gyxio,
g_os_iyx_osv8,
g_os_iyx_osv16,
g_os_iyx_osv32,
gs_oiyx_gsv16,

View File

@ -87,7 +87,6 @@ static const std::map<format::type, format_traits> format_traits_map {
FMT_TRAITS(image_2d_weights_c4_fyx_b, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}),
FMT_TRAITS(image_2d_weights_c1_b_fyx, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}),
FMT_TRAITS(lstm_weights_dio, 1, 1, 2, 0, {0, 1, 3, 2}, "oixy", "oixy?", {}),
FMT_TRAITS(os_is_yx_isa8_osv8_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}),
FMT_TRAITS(os_is_yx_isa8_osv16_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}),
FMT_TRAITS(os_is_yx_isa8_osv8_isv4_swizzled_by_4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}),
FMT_TRAITS(os_is_yx_osa4_isa8_osv8_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 32}, {1, 16}}),
@ -98,8 +97,6 @@ static const std::map<format::type, format_traits> format_traits_map {
FMT_TRAITS(os_is_yx_osa2_isa8_osv16_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 32}, {1, 32}}),
FMT_TRAITS(os_is_yx_osa2_isa8_osv8_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 16}, {1, 16}}),
FMT_TRAITS(os_is_zyx_osa2_isa8_osv8_isv2, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 16}, {1, 16}}),
FMT_TRAITS(os_is_zyx_isa8_osv8_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 8}, {1, 4}}),
FMT_TRAITS(os_is_zyx_isa8_osv16_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 4}}),
FMT_TRAITS(os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 32}, {1, 32}}),
FMT_TRAITS(os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 32}, {1, 32}}),
FMT_TRAITS(is_os_yx_osa4_isa8_osv8_isv4, 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "ioxy", {{0, 32}, {1, 32}}),
@ -125,9 +122,12 @@ static const std::map<format::type, format_traits> format_traits_map {
FMT_TRAITS(is_os_yx_isv16_osv8, 1, 1, 2, 0, {1, 0, 2, 3, 4}, "ioyx", "oixy", {{1, 16}, {0, 8}}),
FMT_TRAITS(is_os_zyx_isa8_osv8_isv2, 1, 1, 3, 0, {1, 0, 2, 3, 4}, "iozyx", "ioxyz", {{1, 8}, {0, 8}, {1, 2}}),
FMT_TRAITS(is_os_zyx_isa8_osv8_isv4, 1, 1, 3, 0, {1, 0, 2, 3, 4}, "iozyx", "ioxyz", {{1, 8}, {0, 8}, {1, 4}}),
FMT_TRAITS(os_is_zyx_isa8_osv8_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 8}, {1, 4}}),
FMT_TRAITS(os_is_zyx_isa8_osv8_isv2, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 8}, {1, 2}}),
FMT_TRAITS(os_is_zyx_isa8_osv16_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 4}}),
FMT_TRAITS(is_os_yx_isa8_osv8_isv2, 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "ioxy?", {{1, 8}, {0, 8}, {1, 2}}),
FMT_TRAITS(is_os_yx_isa8_osv8_isv4, 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "ioxy?", {{1, 8}, {0, 8}, {1, 4}}),
FMT_TRAITS(os_is_yx_isa8_osv8_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{1, 8}, {0, 8}, {1, 4}}),
FMT_TRAITS(os_is_yx_isa8_osv8_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{1, 8}, {0, 8}, {1, 2}}),
FMT_TRAITS(os_is_osv32_isv32_swizzled_by_4, 1, 1, 0, 0, {0, 1, 2, 3}, "oixy", "oixy?", {{0, 32}, {1, 32}}),
FMT_TRAITS(os_is_zyx_isv8_osv16_isv2, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 2}}),
@ -156,6 +156,7 @@ static const std::map<format::type, format_traits> format_traits_map {
FMT_TRAITS(gioyx, 1, 1, 2, 1, {0, 2, 1, 3, 4}, "gioyx", "oixy??g", {}),
FMT_TRAITS(goizyx, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {}),
FMT_TRAITS(giozyx, 1, 1, 3, 1, {0, 2, 1, 3, 4, 5}, "giozyx", "oixyz?g", {}),
FMT_TRAITS(g_os_iyx_osv8, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 8}}),
FMT_TRAITS(g_os_iyx_osv16, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 16}}),
FMT_TRAITS(g_os_iyx_osv32, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 32}}),
FMT_TRAITS(gs_oiyx_gsv16, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{6, 16}}),

View File

@ -32,6 +32,7 @@ cl::PFN_clCreateFromD3D11Buffer cl::BufferDX::pfn_clCreateFromD3D11Buffer = NULL
#ifdef ENABLE_ONEDNN_FOR_GPU
#include <oneapi/dnnl/dnnl_ocl.hpp>
#include "openvino/util/file_util.hpp"
#endif
namespace cldnn {
@ -64,7 +65,38 @@ dnnl::engine& ocl_engine::get_onednn_engine() const {
if (!casted)
throw ov::Exception("[GPU] Invalid device type stored in ocl_engine");
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
auto config = this->configuration();
if (config.kernels_cache_path.empty()) {
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
} else {
// Use cached blob
auto path = config.kernels_cache_path;
if (path.back() != '/' && path.back() != '\\') {
path += "/";
}
auto blob_id = dnnl::ocl_interop::get_engine_cache_blob_id(casted->get_device().get());
if (blob_id.empty()) {
// Create engine without cache_blob
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
return *_onednn_engine;
}
std::string id_str(blob_id.begin(), blob_id.end());
size_t hash = std::hash<std::string>()(id_str);
path = path + std::to_string(hash) + ".onednn.cl_cache";
auto onednn_cache_blob = ov::util::load_binary(path);
if (onednn_cache_blob.empty()) {
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
onednn_cache_blob = dnnl::ocl_interop::get_engine_cache_blob(*_onednn_engine);
ov::util::save_binary(path, onednn_cache_blob);
} else {
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get(),
onednn_cache_blob));
}
}
}
return *_onednn_engine;

@ -1 +1 @@
Subproject commit b5faa77a4a651f1e44fa77348eded54ea3ec3eef
Subproject commit e5a70f43639ba968869a99931d77116791ace355