[GPU] Apply cached_blob to make engine (#13781)
+ Updated oneDNN to use cache_blob + Updated oneDNN to fix group conv failure + Add g_os_iyx_osv8 format and relevant reorder to support oneDNN update + Used cached_blob to make engine if cache_dir config is used Signed-off-by: Min, Byungil <byungil.min@intel.com>
This commit is contained in:
parent
0846bdb67e
commit
0cf319f855
@ -222,6 +222,7 @@ struct format {
|
||||
gyxio, ///< format used for weights for 2D convolution
|
||||
goizyx, ///< format used for weights for 3D convolution
|
||||
giozyx, ///< format used for weights for 3D deconvolution
|
||||
g_os_iyx_osv8, ///< format used for weights for 2D convolution
|
||||
g_os_iyx_osv16, ///< format used for weights for 2D convolution
|
||||
g_os_iyx_osv32, ///< format used for weights for 2D convolution
|
||||
gs_oiyx_gsv16, ///< format used for weights for 2D convolution
|
||||
|
@ -445,6 +445,7 @@ static cldnn::format convert_format(dnnl::memory::format_tag fmt, bool is_groupe
|
||||
case dnnl::memory::format_tag::aBCde4b8c8b2c: return cldnn::format::g_os_is_yx_osa4_isa8_osv8_isv2;
|
||||
case dnnl::memory::format_tag::aBCde8b2c: return cldnn::format::g_os_is_yx_osv8_isv2;
|
||||
case dnnl::memory::format_tag::aBCde8b4c: return cldnn::format::g_os_is_yx_osv8_isv4;
|
||||
case dnnl::memory::format_tag::aBcde8b: return cldnn::format::g_os_iyx_osv8;
|
||||
case dnnl::memory::format_tag::aBCd2b8c16b4c: return cldnn::format::g_os_is_yx_osa2_isa8_osv16_isv4;
|
||||
case dnnl::memory::format_tag::aBCd2b8c16b2c: return cldnn::format::g_os_is_yx_osa2_isa8_osv16_isv2;
|
||||
case dnnl::memory::format_tag::aBCdef16c16b: return cldnn::format::g_os_is_zyx_isv16_osv16;
|
||||
|
@ -421,6 +421,8 @@ kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped) {
|
||||
return kernel_selector::weights_layout::goizyx;
|
||||
case format::giozyx:
|
||||
return kernel_selector::weights_layout::giozyx;
|
||||
case format::g_os_iyx_osv8:
|
||||
return kernel_selector::weights_layout::g_os_iyx_osv8;
|
||||
case format::g_os_iyx_osv16:
|
||||
return kernel_selector::weights_layout::g_os_iyx_osv16;
|
||||
case format::g_os_iyx_osv32:
|
||||
@ -691,6 +693,8 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
|
||||
return cldnn::format::goiyx;
|
||||
case kernel_selector::weights_layout::goizyx:
|
||||
return cldnn::format::goizyx;
|
||||
case kernel_selector::weights_layout::g_os_iyx_osv8:
|
||||
return cldnn::format::g_os_iyx_osv8;
|
||||
case kernel_selector::weights_layout::g_os_iyx_osv16:
|
||||
return cldnn::format::g_os_iyx_osv16;
|
||||
case kernel_selector::weights_layout::g_os_iyx_osv32:
|
||||
|
@ -123,6 +123,8 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x
|
||||
return GET_FILTER_GOIZYX(INPUT0, g, o, i, z, y, x);
|
||||
#elif defined INPUT0_LAYOUT_GIOZYX
|
||||
return GET_FILTER_GIOZYX(INPUT0, g, o, i, z, y, x);
|
||||
#elif defined INPUT0_LAYOUT_G_OS_IYX_OSV8
|
||||
return GET_FILTER_G_OS_IYX_OSV16(INPUT0, g, o, i, y, x, 8);
|
||||
#elif defined INPUT0_LAYOUT_G_OS_IYX_OSV16
|
||||
return GET_FILTER_G_OS_IYX_OSV16(INPUT0, g, o, i, y, x, 16);
|
||||
#elif defined INPUT0_LAYOUT_G_OS_IYX_OSV32
|
||||
@ -371,6 +373,8 @@ inline uint FUNC(get_output_index)(uint g, uint o, uint i, uint z, uint y, uint
|
||||
return GET_FILTER_OS_ZY_IS_X_OSV8_ISV4_INDEX(OUTPUT, o, i, z, y, x);
|
||||
#elif defined OUTPUT_LAYOUT_GOIZYX || defined OUTPUT_LAYOUT_GIOZYX
|
||||
return GET_FILTER_INDEX_5D(OUTPUT, g, o, i, z, y, x);
|
||||
#elif defined OUTPUT_LAYOUT_G_OS_IYX_OSV8
|
||||
return GET_FILTER_G_OS_IYX_OSV16(OUTPUT, g, o, i, y, x, 8);
|
||||
#elif defined OUTPUT_LAYOUT_G_OS_IYX_OSV16
|
||||
return GET_FILTER_G_OS_IYX_OSV16(OUTPUT, g, o, i, y, x, 16);
|
||||
#elif defined OUTPUT_LAYOUT_G_OS_IYX_OSV32
|
||||
|
@ -649,7 +649,7 @@ class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, Weigh
|
||||
)
|
||||
)V0G0N";
|
||||
} else if (l == WeightsLayout::os_iyx_osv16 || l == WeightsLayout::os_iyx_osv32 ||
|
||||
l == WeightsLayout::os_iyx_osv32__ai32 || l == WeightsLayout::g_os_iyx_osv16 ||
|
||||
l == WeightsLayout::os_iyx_osv32__ai32 || l == WeightsLayout::g_os_iyx_osv8 || l == WeightsLayout::g_os_iyx_osv16 ||
|
||||
l == WeightsLayout::g_os_iyx_osv32) {
|
||||
args macroNameArgs = {"prefix", "g", "o", "i", "y", "x", "sub_group_size"};
|
||||
this->calcFunction = FuncBody(layout_name);
|
||||
@ -881,6 +881,8 @@ JitDefinitions WeightTensorJitConstant::GetDefinitions() const {
|
||||
index_func_val = called_func_name + "(" + _name + ", g, o, i, 0, y, x)";
|
||||
else if (layout == WeightsLayout::g_os_is_yx_isv16_osv16)
|
||||
index_func_val = called_func_name + "(" + _name + ", g, o, i, 0, y, x, 16)";
|
||||
else if (layout == WeightsLayout::g_os_iyx_osv8)
|
||||
index_func_val = called_func_name + "(" + _name + ", g, o, i, y, x, 8)";
|
||||
else if (layout == WeightsLayout::g_os_iyx_osv16)
|
||||
index_func_val = called_func_name + "(" + _name + ", g, o, i, y, x, 16)";
|
||||
else if (layout == WeightsLayout::g_is_os_yx_isv16_osv16)
|
||||
|
@ -379,6 +379,7 @@ std::string toString(WeightsLayout layout) {
|
||||
case WeightsLayout::gyxio: return "GYXIO";
|
||||
case WeightsLayout::goizyx: return "GOIZYX";
|
||||
case WeightsLayout::giozyx: return "GIOZYX";
|
||||
case WeightsLayout::g_os_iyx_osv8: return "G_OS_IYX_OSV8";
|
||||
case WeightsLayout::g_os_iyx_osv16: return "G_OS_IYX_OSV16";
|
||||
case WeightsLayout::g_os_iyx_osv32: return "G_OS_IYX_OSV32";
|
||||
case WeightsLayout::gs_oiyx_gsv16: return "GS_OIYX_GSV16";
|
||||
|
@ -48,6 +48,7 @@ inline uint32_t SubGroupSize(WeightsLayout l) {
|
||||
case WeightsLayout::os_i_osv8__ai8:
|
||||
case WeightsLayout::iy_xs_os_xsv2_osv8__ao32:
|
||||
case WeightsLayout::giy_xs_os_xsv2_osv8__ao32:
|
||||
case WeightsLayout::g_os_iyx_osv8:
|
||||
return 8;
|
||||
default:
|
||||
return 1;
|
||||
|
@ -166,6 +166,7 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{
|
||||
{ WeightsLayout::gioyx, { 0, 1, -1, 3, 2, 4 } },
|
||||
{ WeightsLayout::goizyx, { 0, 1, 2, 3, 4, 5 } },
|
||||
{ WeightsLayout::giozyx, { 0, 1, 2, 4, 3, 5 } },
|
||||
{ WeightsLayout::g_os_iyx_osv8, { 0, 1, -1, 2, 3, 4 } },
|
||||
{ WeightsLayout::g_os_iyx_osv16, { 0, 1, -1, 2, 3, 4 } },
|
||||
{ WeightsLayout::g_os_iyx_osv32, { 0, 1, -1, 2, 3, 4 } },
|
||||
{ WeightsLayout::gs_oiyx_gsv16, { 0, 1, -1, 2, 3, 4 } },
|
||||
@ -766,6 +767,10 @@ NDims WeightsTensor::GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l
|
||||
case os_i_yxs_osv4_yxsv4:
|
||||
newDims[3] = RoundUp(newDims[3], 4);
|
||||
break;
|
||||
case g_os_iyx_osv8:
|
||||
assert(newDims.size() == 5);
|
||||
newDims[3] = RoundUp(newDims[3], 8);
|
||||
break;
|
||||
case g_os_iyx_osv16:
|
||||
case g_os_iyx_osv16_rotate_180:
|
||||
assert(newDims.size() == 5);
|
||||
|
@ -184,6 +184,7 @@ enum WeightsLayout {
|
||||
goizyx,
|
||||
giozyx,
|
||||
gyxio,
|
||||
g_os_iyx_osv8,
|
||||
g_os_iyx_osv16,
|
||||
g_os_iyx_osv32,
|
||||
gs_oiyx_gsv16,
|
||||
|
@ -87,7 +87,6 @@ static const std::map<format::type, format_traits> format_traits_map {
|
||||
FMT_TRAITS(image_2d_weights_c4_fyx_b, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}),
|
||||
FMT_TRAITS(image_2d_weights_c1_b_fyx, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}),
|
||||
FMT_TRAITS(lstm_weights_dio, 1, 1, 2, 0, {0, 1, 3, 2}, "oixy", "oixy?", {}),
|
||||
FMT_TRAITS(os_is_yx_isa8_osv8_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}),
|
||||
FMT_TRAITS(os_is_yx_isa8_osv16_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}),
|
||||
FMT_TRAITS(os_is_yx_isa8_osv8_isv4_swizzled_by_4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {}),
|
||||
FMT_TRAITS(os_is_yx_osa4_isa8_osv8_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 32}, {1, 16}}),
|
||||
@ -98,8 +97,6 @@ static const std::map<format::type, format_traits> format_traits_map {
|
||||
FMT_TRAITS(os_is_yx_osa2_isa8_osv16_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 32}, {1, 32}}),
|
||||
FMT_TRAITS(os_is_yx_osa2_isa8_osv8_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy", {{0, 16}, {1, 16}}),
|
||||
FMT_TRAITS(os_is_zyx_osa2_isa8_osv8_isv2, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 16}, {1, 16}}),
|
||||
FMT_TRAITS(os_is_zyx_isa8_osv8_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 8}, {1, 4}}),
|
||||
FMT_TRAITS(os_is_zyx_isa8_osv16_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 4}}),
|
||||
FMT_TRAITS(os_is_yx_osa4_isa8_osv8_isv4_swizzled_by_4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 32}, {1, 32}}),
|
||||
FMT_TRAITS(os_is_zyx_osa4_isa8_osv8_isv4_swizzled_by_4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 32}, {1, 32}}),
|
||||
FMT_TRAITS(is_os_yx_osa4_isa8_osv8_isv4, 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "ioxy", {{0, 32}, {1, 32}}),
|
||||
@ -125,9 +122,12 @@ static const std::map<format::type, format_traits> format_traits_map {
|
||||
FMT_TRAITS(is_os_yx_isv16_osv8, 1, 1, 2, 0, {1, 0, 2, 3, 4}, "ioyx", "oixy", {{1, 16}, {0, 8}}),
|
||||
FMT_TRAITS(is_os_zyx_isa8_osv8_isv2, 1, 1, 3, 0, {1, 0, 2, 3, 4}, "iozyx", "ioxyz", {{1, 8}, {0, 8}, {1, 2}}),
|
||||
FMT_TRAITS(is_os_zyx_isa8_osv8_isv4, 1, 1, 3, 0, {1, 0, 2, 3, 4}, "iozyx", "ioxyz", {{1, 8}, {0, 8}, {1, 4}}),
|
||||
FMT_TRAITS(os_is_zyx_isa8_osv8_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 8}, {1, 4}}),
|
||||
FMT_TRAITS(os_is_zyx_isa8_osv8_isv2, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 8}, {1, 2}}),
|
||||
FMT_TRAITS(os_is_zyx_isa8_osv16_isv4, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 4}}),
|
||||
FMT_TRAITS(is_os_yx_isa8_osv8_isv2, 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "ioxy?", {{1, 8}, {0, 8}, {1, 2}}),
|
||||
FMT_TRAITS(is_os_yx_isa8_osv8_isv4, 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "ioxy?", {{1, 8}, {0, 8}, {1, 4}}),
|
||||
FMT_TRAITS(os_is_yx_isa8_osv8_isv4, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{1, 8}, {0, 8}, {1, 4}}),
|
||||
FMT_TRAITS(os_is_yx_isa8_osv8_isv2, 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{1, 8}, {0, 8}, {1, 2}}),
|
||||
FMT_TRAITS(os_is_osv32_isv32_swizzled_by_4, 1, 1, 0, 0, {0, 1, 2, 3}, "oixy", "oixy?", {{0, 32}, {1, 32}}),
|
||||
FMT_TRAITS(os_is_zyx_isv8_osv16_isv2, 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{1, 8}, {0, 16}, {1, 2}}),
|
||||
@ -156,6 +156,7 @@ static const std::map<format::type, format_traits> format_traits_map {
|
||||
FMT_TRAITS(gioyx, 1, 1, 2, 1, {0, 2, 1, 3, 4}, "gioyx", "oixy??g", {}),
|
||||
FMT_TRAITS(goizyx, 1, 1, 3, 1, {0, 1, 2, 3, 4, 5}, "goizyx", "oixyz?g", {}),
|
||||
FMT_TRAITS(giozyx, 1, 1, 3, 1, {0, 2, 1, 3, 4, 5}, "giozyx", "oixyz?g", {}),
|
||||
FMT_TRAITS(g_os_iyx_osv8, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 8}}),
|
||||
FMT_TRAITS(g_os_iyx_osv16, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 16}}),
|
||||
FMT_TRAITS(g_os_iyx_osv32, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{0, 32}}),
|
||||
FMT_TRAITS(gs_oiyx_gsv16, 1, 1, 2, 1, {0, 1, 2, 3, 4}, "goiyx", "oixy??g", {{6, 16}}),
|
||||
|
@ -32,6 +32,7 @@ cl::PFN_clCreateFromD3D11Buffer cl::BufferDX::pfn_clCreateFromD3D11Buffer = NULL
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
#include <oneapi/dnnl/dnnl_ocl.hpp>
|
||||
#include "openvino/util/file_util.hpp"
|
||||
#endif
|
||||
|
||||
namespace cldnn {
|
||||
@ -64,7 +65,38 @@ dnnl::engine& ocl_engine::get_onednn_engine() const {
|
||||
if (!casted)
|
||||
throw ov::Exception("[GPU] Invalid device type stored in ocl_engine");
|
||||
|
||||
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
|
||||
auto config = this->configuration();
|
||||
if (config.kernels_cache_path.empty()) {
|
||||
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
|
||||
} else {
|
||||
// Use cached blob
|
||||
auto path = config.kernels_cache_path;
|
||||
if (path.back() != '/' && path.back() != '\\') {
|
||||
path += "/";
|
||||
}
|
||||
|
||||
auto blob_id = dnnl::ocl_interop::get_engine_cache_blob_id(casted->get_device().get());
|
||||
if (blob_id.empty()) {
|
||||
// Create engine without cache_blob
|
||||
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
|
||||
return *_onednn_engine;
|
||||
}
|
||||
|
||||
std::string id_str(blob_id.begin(), blob_id.end());
|
||||
size_t hash = std::hash<std::string>()(id_str);
|
||||
path = path + std::to_string(hash) + ".onednn.cl_cache";
|
||||
|
||||
auto onednn_cache_blob = ov::util::load_binary(path);
|
||||
if (onednn_cache_blob.empty()) {
|
||||
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
|
||||
|
||||
onednn_cache_blob = dnnl::ocl_interop::get_engine_cache_blob(*_onednn_engine);
|
||||
ov::util::save_binary(path, onednn_cache_blob);
|
||||
} else {
|
||||
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get(),
|
||||
onednn_cache_blob));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return *_onednn_engine;
|
||||
|
2
src/plugins/intel_gpu/thirdparty/onednn_gpu
vendored
2
src/plugins/intel_gpu/thirdparty/onednn_gpu
vendored
@ -1 +1 @@
|
||||
Subproject commit b5faa77a4a651f1e44fa77348eded54ea3ec3eef
|
||||
Subproject commit e5a70f43639ba968869a99931d77116791ace355
|
Loading…
Reference in New Issue
Block a user