[GPU] Fixed a bug of eltwise to support blocked layout when broadcast. (#12770)

+ Support bs_fs_yx_bsv32_fsv32 format
This commit is contained in:
Jade Cho 2022-08-31 23:02:08 +09:00 committed by GitHub
parent 382028e9c2
commit d9d4b6d89b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 1 deletions

View File

@ -287,6 +287,31 @@ inline bool SimpleLayout(DataLayout l) {
}
}
inline bool DoubleBlockedLayout(DataLayout l) {
switch (l) {
case DataLayout::bs_fs_yx_bsv16_fsv16:
case DataLayout::bs_fs_zyx_bsv16_fsv32:
case DataLayout::bs_fs_zyx_bsv16_fsv16:
case DataLayout::bs_fs_yx_bsv4_fsv4:
case DataLayout::bs_fs_yx_bsv8_fsv4:
case DataLayout::bs_fs_yx_bsv8_fsv2:
case DataLayout::bs_fs_zyx_bsv8_fsv4:
case DataLayout::bs_fs_zyx_bsv8_fsv2:
case DataLayout::bs_fs_yx_bsv16_fsv4:
case DataLayout::bs_fs_zyx_bsv16_fsv4:
case DataLayout::bs_fs_yx_bsv16_fsv2:
case DataLayout::bs_fs_zyx_bsv16_fsv2:
case DataLayout::bs_fs_yx_bsv4_fsv2:
case DataLayout::bs_fs_yx_bsv32_fsv32:
case DataLayout::bs_fs_yx_bsv32_fsv16:
case DataLayout::bs_fs_zyx_bsv32_fsv32:
case DataLayout::bs_fs_zyx_bsv32_fsv16:
return true;
default:
return false;
}
}
inline bool GroupedLayout(WeightsLayout l);
inline bool GroupedLayout(DataLayout) {
@ -513,6 +538,7 @@ public:
uint32_t ElementSize() const override { return BytesPerElement(dtype); }
size_t Dimentions() const { return dims.size(); }
bool SimpleLayout() const { return Tensor::SimpleLayout(layout); }
bool DoubleBlockedLayout() const { return Tensor::DoubleBlockedLayout(layout); }
bool GroupedLayout() const { return Tensor::GroupedLayout(layout); }
bool operator==(const TensorBaseT& t) const {

View File

@ -486,7 +486,7 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
auto f_size = toCodeString(_tensor.Feature().v);
definitions.push_back({ safe_index_func_name, "(" + offset + " + ((f) % " + f_size + ") * " + f_pitch + ")" });
definitions.push_back({ index_func_name, "(" + offset + " + (f) * " + f_pitch + ")" });
} else if (_tensor.PitchesDifferFromLogicalDims()) {
} else if (_tensor.PitchesDifferFromLogicalDims() || _tensor.DoubleBlockedLayout()) {
// TODO This should be solved differently, by setting the macro arguments to zero
definitions.push_back({ safe_index_func_name, safe_index_func_val });
definitions.push_back({ index_func_name, index_func_val });