[GPU] Added int32 weights support for reorder_weights kernels (#20015)

This commit is contained in:
Roman Lyamin 2023-09-25 09:23:47 +04:00 committed by GitHub
parent c983b464ba
commit 2e88aa0770
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 124 additions and 9 deletions

View File

@ -20,7 +20,7 @@ enum class reorder_mean_mode {
};
struct WeightsReorderParams {
WeightsReorderParams(const layout& in_layout, const layout& out_layout, bool transposed, bool grouped = false)
WeightsReorderParams(const layout& in_layout, const layout& out_layout, bool transposed = false, bool grouped = false)
: _in_layout(in_layout),
_out_layout(out_layout),
_transposed(transposed),

View File

@ -136,8 +136,7 @@ kernel_selector::data_type to_data_type(data_types dt) {
case cldnn::data_types::f32:
return kernel_selector::data_type::F32;
default:
assert(0);
return kernel_selector::data_type::F16;
OPENVINO_THROW("[GPU] Unable to convert cldnn data type ", dt, " to kernel_selector data type");
}
}
@ -158,8 +157,7 @@ data_types from_data_type(kernel_selector::data_type dt) {
case kernel_selector::data_type::F32:
return cldnn::data_types::f32;
default:
assert(0);
return cldnn::data_types::f16;
OPENVINO_THROW("[GPU] Unable to convert kernel_selector data type ", kernel_selector::toString(dt), " to cldnn data type");
}
}
@ -175,9 +173,10 @@ kernel_selector::weights_type to_weights_type(data_types dt) {
return kernel_selector::weights_type::F16;
case cldnn::data_types::f32:
return kernel_selector::weights_type::F32;
case cldnn::data_types::i32:
return kernel_selector::weights_type::INT32;
default:
assert(0);
return kernel_selector::weights_type::F16;
OPENVINO_THROW("[GPU] Unable to convert cldnn data type ", dt, " to kernel_selector weights type");
}
}
@ -193,9 +192,10 @@ data_types from_weights_type(kernel_selector::weights_type dt) {
return data_types::f16;
case kernel_selector::weights_type::F32:
return data_types::f32;
case kernel_selector::weights_type::INT32:
return data_types::i32;
default:
assert(0);
return data_types::f16;
OPENVINO_THROW("[GPU] Unable to convert kernel_selector weights type ", kernel_selector::toString(dt), " to cldnn data type");
}
}

View File

@ -41,6 +41,7 @@ inline uint32_t BytesPerElement(WeightsType wt) {
return 2;
case WeightsType::F32:
case WeightsType::BINARY:
case WeightsType::INT32:
return 4;
default:
throw std::runtime_error("[GPU] BytesPerElement doesn't support given precision");

View File

@ -125,6 +125,7 @@ enum class WeightsType {
F32,
INT8,
UINT8,
INT32
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -123,6 +123,8 @@ std::string toCLType(WeightsType wType) {
return "half";
case WeightsType::F32:
return GetTypeName<float>();
case WeightsType::INT32:
return GetTypeName<int32_t>();
default:
return "";
}
@ -1505,6 +1507,8 @@ JitConstants MakeTypeJitConstants(WeightsType weightsType, const std::string& ma
return MakeTypeJitConstants(Datatype::UINT8, macroName);
case WeightsType::BINARY:
return MakeTypeJitConstants(Datatype::UINT32, macroName);
case WeightsType::INT32:
return MakeTypeJitConstants(Datatype::INT32, macroName);
}
assert(false || "Unreachable!");
// FIXME: Is there some builtin_unreachable available?

View File

@ -157,6 +157,7 @@ std::string toString(WeightsType wType) {
case WeightsType::F32: return "F32";
case WeightsType::INT8: return "INT8";
case WeightsType::UINT8: return "UINT8";
case WeightsType::INT32: return "INT32";
default: return "";
}
}

View File

@ -137,6 +137,8 @@ void ParamsKey::EnableInputWeightsType(WeightsType wt) {
case WeightsType::BINARY:
key.inputWeightsType.val.binary = 1;
break;
case WeightsType::INT32:
key.inputWeightsType.val.int32 = 1;
default:
break;
}
@ -158,6 +160,8 @@ void ParamsKey::EnableOutputWeightsType(WeightsType wt) {
case WeightsType::BINARY:
key.outputWeightsType.val.binary = 1;
break;
case WeightsType::INT32:
key.outputWeightsType.val.int32 = 1;
default:
break;
}

View File

@ -26,6 +26,8 @@ static WeightsType DataTypeToWeightsType(Datatype t) {
return WeightsType::F32;
case Datatype::BINARY:
return WeightsType::BINARY;
case Datatype::INT32:
return WeightsType::INT32;
default:
return WeightsType::UNSUPPORTED;
}

View File

@ -12,9 +12,11 @@ ParamsKey ReorderWeightsKernel::GetSupportedKey() const {
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputWeightsType(WeightsType::F16);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableInputWeightsType(WeightsType::INT32);
k.EnableOutputWeightsType(WeightsType::INT8);
k.EnableOutputWeightsType(WeightsType::F16);
k.EnableOutputWeightsType(WeightsType::F32);
k.EnableOutputWeightsType(WeightsType::INT32);
k.EnableAllInputWeightsLayout();
k.EnableAllOutputWeightsLayout();
k.EnableDifferentTypes();

View File

@ -13,9 +13,11 @@ ParamsKey ReorderWeightsOpt::GetSupportedKey() const {
k.EnableInputWeightsType(WeightsType::INT8);
k.EnableInputWeightsType(WeightsType::F16);
k.EnableInputWeightsType(WeightsType::F32);
k.EnableInputWeightsType(WeightsType::INT32);
k.EnableOutputWeightsType(WeightsType::INT8);
k.EnableOutputWeightsType(WeightsType::F16);
k.EnableOutputWeightsType(WeightsType::F32);
k.EnableOutputWeightsType(WeightsType::INT32);
k.EnableInputWeightsLayout(WeightsLayout::oiyx);
k.EnableInputWeightsLayout(WeightsLayout::ioyx);
k.EnableInputWeightsLayout(WeightsLayout::oizyx);

View File

@ -1928,6 +1928,104 @@ TEST(reorder_gpu_i32, basic)
ASSERT_EQ(*(a_ptr++), val);
}
TEST(reorder_weights_gpu_i32, reorder_weights)
{
auto& engine = get_test_engine();
layout in_layout(data_types::f32, format::bfyx, { 2, 2, 2, 2 });
layout out_layout(data_types::i32, format::oiyx, { 2, 2, 2, 2 });
auto weights_reorder_params = std::make_shared<WeightsReorderParams>(in_layout, out_layout);
auto input = engine.allocate_memory(in_layout);
set_values(input, {
1.f, 0.f, 5.f, 1.5f,
2.f, 0.f, 6.f, 5.2f,
3.f, 0.5f, 7.f, 12.f,
4.f, -0.5f, 8.f, 8.f
});
topology topology {
input_layout("input", in_layout),
reorder("reorder", input_info("input"), weights_reorder_params)
};
ExecutionConfig config = get_test_default_config(engine);
ov::intel_gpu::ImplementationDesc wr_impl_desc = { format::oiyx, "reorder_weights", impl_types::ocl };
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", wr_impl_desc} }));
network network(engine, topology, config);
network.set_input_data("input", input);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "reorder");
std::vector<int32_t> ref_output = {
1, 0, 5, 1,
2, 0, 6, 5,
3, 0, 7, 12,
4, 0, 8, 8
};
auto output = outputs.begin()->second.get_memory();
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
ASSERT_EQ(output_ptr.size(), ref_output.size());
for (size_t i = 0; i < ref_output.size(); ++i) {
ASSERT_EQ(output_ptr[i], ref_output[i]);
}
}
TEST(reorder_weights_gpu_i32, reorder_weights_opt)
{
auto& engine = get_test_engine();
layout in_layout(data_types::f32, format::bfyx, { 16, 1, 2, 1 });
layout out_layout(data_types::i32, format::os_iyx_osv16, { 16, 1, 2, 1 });
auto weights_reorder_params = std::make_shared<WeightsReorderParams>(in_layout, out_layout);
auto input = engine.allocate_memory(in_layout);
set_values(input, {
0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f,
8.f, 9.f, 10.f, 0.5f, 12.f, 13.f, 14.f, 15.f,
16.f, 17.f, 18.f, 19.f, 20.f, -1.6f, 22.f, 23.f,
-1.0f, 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f
});
topology topology {
input_layout("input", in_layout),
reorder("reorder", input_info("input"), weights_reorder_params)
};
ExecutionConfig config = get_test_default_config(engine);
ov::intel_gpu::ImplementationDesc wr_impl_desc = { format::os_iyx_osv16, "reorder_weights_opt", impl_types::ocl };
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", wr_impl_desc} }));
network network(engine, topology, config);
network.set_input_data("input", input);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "reorder");
std::vector<int32_t> ref_output = {
0, 2, 4, 6, 8, 10, 12, 14,
16, 18, 20, 22, -1, 26, 28, 30,
1, 3, 5, 7, 9, 0, 13, 15,
17, 19, -1, 23, 25, 27, 29, 31
};
auto output = outputs.begin()->second.get_memory();
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
ASSERT_EQ(output_ptr.size(), ref_output.size());
for (size_t i = 0; i < ref_output.size(); ++i) {
ASSERT_EQ(output_ptr[i], ref_output[i]);
}
}
TEST(reorder_gpu_i64, basic)
{
// Test for converting data types f32->i64