[GPU] Added int32 weights support for reorder_weights kernels (#20015)
This commit is contained in:
parent
c983b464ba
commit
2e88aa0770
@ -20,7 +20,7 @@ enum class reorder_mean_mode {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct WeightsReorderParams {
|
struct WeightsReorderParams {
|
||||||
WeightsReorderParams(const layout& in_layout, const layout& out_layout, bool transposed, bool grouped = false)
|
WeightsReorderParams(const layout& in_layout, const layout& out_layout, bool transposed = false, bool grouped = false)
|
||||||
: _in_layout(in_layout),
|
: _in_layout(in_layout),
|
||||||
_out_layout(out_layout),
|
_out_layout(out_layout),
|
||||||
_transposed(transposed),
|
_transposed(transposed),
|
||||||
|
@ -136,8 +136,7 @@ kernel_selector::data_type to_data_type(data_types dt) {
|
|||||||
case cldnn::data_types::f32:
|
case cldnn::data_types::f32:
|
||||||
return kernel_selector::data_type::F32;
|
return kernel_selector::data_type::F32;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
OPENVINO_THROW("[GPU] Unable to convert cldnn data type ", dt, " to kernel_selector data type");
|
||||||
return kernel_selector::data_type::F16;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -158,8 +157,7 @@ data_types from_data_type(kernel_selector::data_type dt) {
|
|||||||
case kernel_selector::data_type::F32:
|
case kernel_selector::data_type::F32:
|
||||||
return cldnn::data_types::f32;
|
return cldnn::data_types::f32;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
OPENVINO_THROW("[GPU] Unable to convert kernel_selector data type ", kernel_selector::toString(dt), " to cldnn data type");
|
||||||
return cldnn::data_types::f16;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -175,9 +173,10 @@ kernel_selector::weights_type to_weights_type(data_types dt) {
|
|||||||
return kernel_selector::weights_type::F16;
|
return kernel_selector::weights_type::F16;
|
||||||
case cldnn::data_types::f32:
|
case cldnn::data_types::f32:
|
||||||
return kernel_selector::weights_type::F32;
|
return kernel_selector::weights_type::F32;
|
||||||
|
case cldnn::data_types::i32:
|
||||||
|
return kernel_selector::weights_type::INT32;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
OPENVINO_THROW("[GPU] Unable to convert cldnn data type ", dt, " to kernel_selector weights type");
|
||||||
return kernel_selector::weights_type::F16;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -193,9 +192,10 @@ data_types from_weights_type(kernel_selector::weights_type dt) {
|
|||||||
return data_types::f16;
|
return data_types::f16;
|
||||||
case kernel_selector::weights_type::F32:
|
case kernel_selector::weights_type::F32:
|
||||||
return data_types::f32;
|
return data_types::f32;
|
||||||
|
case kernel_selector::weights_type::INT32:
|
||||||
|
return data_types::i32;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
OPENVINO_THROW("[GPU] Unable to convert kernel_selector weights type ", kernel_selector::toString(dt), " to cldnn data type");
|
||||||
return data_types::f16;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,6 +41,7 @@ inline uint32_t BytesPerElement(WeightsType wt) {
|
|||||||
return 2;
|
return 2;
|
||||||
case WeightsType::F32:
|
case WeightsType::F32:
|
||||||
case WeightsType::BINARY:
|
case WeightsType::BINARY:
|
||||||
|
case WeightsType::INT32:
|
||||||
return 4;
|
return 4;
|
||||||
default:
|
default:
|
||||||
throw std::runtime_error("[GPU] BytesPerElement doesn't support given precision");
|
throw std::runtime_error("[GPU] BytesPerElement doesn't support given precision");
|
||||||
|
@ -125,6 +125,7 @@ enum class WeightsType {
|
|||||||
F32,
|
F32,
|
||||||
INT8,
|
INT8,
|
||||||
UINT8,
|
UINT8,
|
||||||
|
INT32
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -123,6 +123,8 @@ std::string toCLType(WeightsType wType) {
|
|||||||
return "half";
|
return "half";
|
||||||
case WeightsType::F32:
|
case WeightsType::F32:
|
||||||
return GetTypeName<float>();
|
return GetTypeName<float>();
|
||||||
|
case WeightsType::INT32:
|
||||||
|
return GetTypeName<int32_t>();
|
||||||
default:
|
default:
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
@ -1505,6 +1507,8 @@ JitConstants MakeTypeJitConstants(WeightsType weightsType, const std::string& ma
|
|||||||
return MakeTypeJitConstants(Datatype::UINT8, macroName);
|
return MakeTypeJitConstants(Datatype::UINT8, macroName);
|
||||||
case WeightsType::BINARY:
|
case WeightsType::BINARY:
|
||||||
return MakeTypeJitConstants(Datatype::UINT32, macroName);
|
return MakeTypeJitConstants(Datatype::UINT32, macroName);
|
||||||
|
case WeightsType::INT32:
|
||||||
|
return MakeTypeJitConstants(Datatype::INT32, macroName);
|
||||||
}
|
}
|
||||||
assert(false || "Unreachable!");
|
assert(false || "Unreachable!");
|
||||||
// FIXME: Is there some builtin_unreachable available?
|
// FIXME: Is there some builtin_unreachable available?
|
||||||
|
@ -157,6 +157,7 @@ std::string toString(WeightsType wType) {
|
|||||||
case WeightsType::F32: return "F32";
|
case WeightsType::F32: return "F32";
|
||||||
case WeightsType::INT8: return "INT8";
|
case WeightsType::INT8: return "INT8";
|
||||||
case WeightsType::UINT8: return "UINT8";
|
case WeightsType::UINT8: return "UINT8";
|
||||||
|
case WeightsType::INT32: return "INT32";
|
||||||
default: return "";
|
default: return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -137,6 +137,8 @@ void ParamsKey::EnableInputWeightsType(WeightsType wt) {
|
|||||||
case WeightsType::BINARY:
|
case WeightsType::BINARY:
|
||||||
key.inputWeightsType.val.binary = 1;
|
key.inputWeightsType.val.binary = 1;
|
||||||
break;
|
break;
|
||||||
|
case WeightsType::INT32:
|
||||||
|
key.inputWeightsType.val.int32 = 1;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -158,6 +160,8 @@ void ParamsKey::EnableOutputWeightsType(WeightsType wt) {
|
|||||||
case WeightsType::BINARY:
|
case WeightsType::BINARY:
|
||||||
key.outputWeightsType.val.binary = 1;
|
key.outputWeightsType.val.binary = 1;
|
||||||
break;
|
break;
|
||||||
|
case WeightsType::INT32:
|
||||||
|
key.outputWeightsType.val.int32 = 1;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,8 @@ static WeightsType DataTypeToWeightsType(Datatype t) {
|
|||||||
return WeightsType::F32;
|
return WeightsType::F32;
|
||||||
case Datatype::BINARY:
|
case Datatype::BINARY:
|
||||||
return WeightsType::BINARY;
|
return WeightsType::BINARY;
|
||||||
|
case Datatype::INT32:
|
||||||
|
return WeightsType::INT32;
|
||||||
default:
|
default:
|
||||||
return WeightsType::UNSUPPORTED;
|
return WeightsType::UNSUPPORTED;
|
||||||
}
|
}
|
||||||
|
@ -12,9 +12,11 @@ ParamsKey ReorderWeightsKernel::GetSupportedKey() const {
|
|||||||
k.EnableInputWeightsType(WeightsType::INT8);
|
k.EnableInputWeightsType(WeightsType::INT8);
|
||||||
k.EnableInputWeightsType(WeightsType::F16);
|
k.EnableInputWeightsType(WeightsType::F16);
|
||||||
k.EnableInputWeightsType(WeightsType::F32);
|
k.EnableInputWeightsType(WeightsType::F32);
|
||||||
|
k.EnableInputWeightsType(WeightsType::INT32);
|
||||||
k.EnableOutputWeightsType(WeightsType::INT8);
|
k.EnableOutputWeightsType(WeightsType::INT8);
|
||||||
k.EnableOutputWeightsType(WeightsType::F16);
|
k.EnableOutputWeightsType(WeightsType::F16);
|
||||||
k.EnableOutputWeightsType(WeightsType::F32);
|
k.EnableOutputWeightsType(WeightsType::F32);
|
||||||
|
k.EnableOutputWeightsType(WeightsType::INT32);
|
||||||
k.EnableAllInputWeightsLayout();
|
k.EnableAllInputWeightsLayout();
|
||||||
k.EnableAllOutputWeightsLayout();
|
k.EnableAllOutputWeightsLayout();
|
||||||
k.EnableDifferentTypes();
|
k.EnableDifferentTypes();
|
||||||
|
@ -13,9 +13,11 @@ ParamsKey ReorderWeightsOpt::GetSupportedKey() const {
|
|||||||
k.EnableInputWeightsType(WeightsType::INT8);
|
k.EnableInputWeightsType(WeightsType::INT8);
|
||||||
k.EnableInputWeightsType(WeightsType::F16);
|
k.EnableInputWeightsType(WeightsType::F16);
|
||||||
k.EnableInputWeightsType(WeightsType::F32);
|
k.EnableInputWeightsType(WeightsType::F32);
|
||||||
|
k.EnableInputWeightsType(WeightsType::INT32);
|
||||||
k.EnableOutputWeightsType(WeightsType::INT8);
|
k.EnableOutputWeightsType(WeightsType::INT8);
|
||||||
k.EnableOutputWeightsType(WeightsType::F16);
|
k.EnableOutputWeightsType(WeightsType::F16);
|
||||||
k.EnableOutputWeightsType(WeightsType::F32);
|
k.EnableOutputWeightsType(WeightsType::F32);
|
||||||
|
k.EnableOutputWeightsType(WeightsType::INT32);
|
||||||
k.EnableInputWeightsLayout(WeightsLayout::oiyx);
|
k.EnableInputWeightsLayout(WeightsLayout::oiyx);
|
||||||
k.EnableInputWeightsLayout(WeightsLayout::ioyx);
|
k.EnableInputWeightsLayout(WeightsLayout::ioyx);
|
||||||
k.EnableInputWeightsLayout(WeightsLayout::oizyx);
|
k.EnableInputWeightsLayout(WeightsLayout::oizyx);
|
||||||
|
@ -1928,6 +1928,104 @@ TEST(reorder_gpu_i32, basic)
|
|||||||
ASSERT_EQ(*(a_ptr++), val);
|
ASSERT_EQ(*(a_ptr++), val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(reorder_weights_gpu_i32, reorder_weights)
|
||||||
|
{
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
layout in_layout(data_types::f32, format::bfyx, { 2, 2, 2, 2 });
|
||||||
|
layout out_layout(data_types::i32, format::oiyx, { 2, 2, 2, 2 });
|
||||||
|
auto weights_reorder_params = std::make_shared<WeightsReorderParams>(in_layout, out_layout);
|
||||||
|
|
||||||
|
auto input = engine.allocate_memory(in_layout);
|
||||||
|
|
||||||
|
set_values(input, {
|
||||||
|
1.f, 0.f, 5.f, 1.5f,
|
||||||
|
2.f, 0.f, 6.f, 5.2f,
|
||||||
|
3.f, 0.5f, 7.f, 12.f,
|
||||||
|
4.f, -0.5f, 8.f, 8.f
|
||||||
|
});
|
||||||
|
|
||||||
|
topology topology {
|
||||||
|
input_layout("input", in_layout),
|
||||||
|
reorder("reorder", input_info("input"), weights_reorder_params)
|
||||||
|
};
|
||||||
|
|
||||||
|
ExecutionConfig config = get_test_default_config(engine);
|
||||||
|
ov::intel_gpu::ImplementationDesc wr_impl_desc = { format::oiyx, "reorder_weights", impl_types::ocl };
|
||||||
|
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", wr_impl_desc} }));
|
||||||
|
|
||||||
|
network network(engine, topology, config);
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
ASSERT_EQ(outputs.size(), size_t(1));
|
||||||
|
ASSERT_EQ(outputs.begin()->first, "reorder");
|
||||||
|
|
||||||
|
std::vector<int32_t> ref_output = {
|
||||||
|
1, 0, 5, 1,
|
||||||
|
2, 0, 6, 5,
|
||||||
|
3, 0, 7, 12,
|
||||||
|
4, 0, 8, 8
|
||||||
|
};
|
||||||
|
|
||||||
|
auto output = outputs.begin()->second.get_memory();
|
||||||
|
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
|
||||||
|
|
||||||
|
ASSERT_EQ(output_ptr.size(), ref_output.size());
|
||||||
|
for (size_t i = 0; i < ref_output.size(); ++i) {
|
||||||
|
ASSERT_EQ(output_ptr[i], ref_output[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(reorder_weights_gpu_i32, reorder_weights_opt)
|
||||||
|
{
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
layout in_layout(data_types::f32, format::bfyx, { 16, 1, 2, 1 });
|
||||||
|
layout out_layout(data_types::i32, format::os_iyx_osv16, { 16, 1, 2, 1 });
|
||||||
|
auto weights_reorder_params = std::make_shared<WeightsReorderParams>(in_layout, out_layout);
|
||||||
|
|
||||||
|
auto input = engine.allocate_memory(in_layout);
|
||||||
|
|
||||||
|
set_values(input, {
|
||||||
|
0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f,
|
||||||
|
8.f, 9.f, 10.f, 0.5f, 12.f, 13.f, 14.f, 15.f,
|
||||||
|
16.f, 17.f, 18.f, 19.f, 20.f, -1.6f, 22.f, 23.f,
|
||||||
|
-1.0f, 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f
|
||||||
|
});
|
||||||
|
|
||||||
|
topology topology {
|
||||||
|
input_layout("input", in_layout),
|
||||||
|
reorder("reorder", input_info("input"), weights_reorder_params)
|
||||||
|
};
|
||||||
|
|
||||||
|
ExecutionConfig config = get_test_default_config(engine);
|
||||||
|
ov::intel_gpu::ImplementationDesc wr_impl_desc = { format::os_iyx_osv16, "reorder_weights_opt", impl_types::ocl };
|
||||||
|
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", wr_impl_desc} }));
|
||||||
|
|
||||||
|
network network(engine, topology, config);
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
ASSERT_EQ(outputs.size(), size_t(1));
|
||||||
|
ASSERT_EQ(outputs.begin()->first, "reorder");
|
||||||
|
|
||||||
|
std::vector<int32_t> ref_output = {
|
||||||
|
0, 2, 4, 6, 8, 10, 12, 14,
|
||||||
|
16, 18, 20, 22, -1, 26, 28, 30,
|
||||||
|
1, 3, 5, 7, 9, 0, 13, 15,
|
||||||
|
17, 19, -1, 23, 25, 27, 29, 31
|
||||||
|
};
|
||||||
|
|
||||||
|
auto output = outputs.begin()->second.get_memory();
|
||||||
|
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
|
||||||
|
|
||||||
|
ASSERT_EQ(output_ptr.size(), ref_output.size());
|
||||||
|
for (size_t i = 0; i < ref_output.size(); ++i) {
|
||||||
|
ASSERT_EQ(output_ptr[i], ref_output[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(reorder_gpu_i64, basic)
|
TEST(reorder_gpu_i64, basic)
|
||||||
{
|
{
|
||||||
// Test for converting data types f32->i64
|
// Test for converting data types f32->i64
|
||||||
|
Loading…
Reference in New Issue
Block a user