Merge remote-tracking branch 'upstream/master' into debian-packages
This commit is contained in:
@@ -2456,7 +2456,8 @@ bool evaluate(const shared_ptr<op::v9::IRDFT>& op, const HostTensorVector& outpu
|
||||
info.axes_data,
|
||||
irfft_result.data(),
|
||||
info.fft_output_shape,
|
||||
info.output_shape);
|
||||
info.output_shape,
|
||||
info.last_signal_size);
|
||||
|
||||
const auto output_type = op->get_input_element_type(0);
|
||||
runtime::reference::fft_postprocessing(outputs, output_type, irfft_result);
|
||||
|
||||
@@ -366,6 +366,40 @@ static const std::vector<float> input_data_6 = {
|
||||
-1.7881389, -1.1409098, -1.8951292, -2.1522717, -7.4092865, -0.38806117,
|
||||
-0.6685039, -1.3767233, -0.8713439, 0.71781945, 3.5203605, 0.6790297};
|
||||
|
||||
static const std::vector<float> input_data_7 = {
|
||||
0.73348462, 0.74833735, 0.40982435, 0.51988197, 0.99384421, 0.12469386,
|
||||
0.47686314, 0.25882564, 0.67028317, 0.58466398, 0.74927361, 0.19614283,
|
||||
0.82593526, 0.41205770, 0.74020169, 0.62222693, 0.33264240, 0.84108156,
|
||||
0.86392366, 0.79030966, 0.79792986, 0.47647899, 0.65967837, 0.92732906,
|
||||
0.90477190, 0.87232389, 0.55734667, 0.75560744, 0.70658521, 0.28530827,
|
||||
0.02554864, 0.14915414, 0.29936996, 0.74239557, 0.38158196, 0.26483291,
|
||||
0.15843351, 0.38703221, 0.79967600, 0.63790851, 0.66191234, 0.19395184,
|
||||
0.34992850, 0.89077723, 0.40746049, 0.01455611, 0.84174579, 0.91950995,
|
||||
0.43402124, 0.76620100, 0.96476467, 0.78331896, 0.48567269, 0.33793230,
|
||||
0.20362115, 0.51710568, 0.55455124, 0.10148728, 0.48229121, 0.58612092,
|
||||
0.91786709, 0.94405867, 0.54302465, 0.24146348, 0.34853454, 0.75880201,
|
||||
0.67781768, 0.29531289, 0.35969526, 0.01040005, 0.63142510, 0.67264276,
|
||||
0.57920180, 0.99608063, 0.91108299, 0.82647166, 0.54134147, 0.79556370,
|
||||
0.18579404, 0.95271365, 0.61918245, 0.17552980, 0.56332554, 0.58036855,
|
||||
0.33756331, 0.69359258, 0.03914420, 0.14962257, 0.26647894, 0.45042564,
|
||||
0.60093050, 0.67657016, 0.12601171, 0.95279680, 0.02868298, 0.82188820,
|
||||
0.17558198, 0.40678849, 0.90804391, 0.21813571, 0.69710526, 0.91450289,
|
||||
0.44277349, 0.70432336, 0.88161566, 0.23739783, 0.02746046, 0.05775890,
|
||||
0.63494471, 0.10963744, 0.68260565, 0.87579980, 0.34451002, 0.01422449,
|
||||
0.44081511, 0.78790226, 0.42010180, 0.62148773, 0.73164358, 0.85657540,
|
||||
0.21649672, 0.93347654, 0.65511518, 0.45192463, 0.57671214, 0.09925586,
|
||||
0.76042901, 0.84041443, 0.91933065, 0.00541233, 0.56194300, 0.71416635,
|
||||
0.15882159, 0.57976451, 0.37377713, 0.48352544, 0.96645849, 0.50040596,
|
||||
0.06060478, 0.21032667, 0.33303769, 0.80884551, 0.97500277, 0.28607026,
|
||||
0.12235457, 0.47764468, 0.09834820, 0.08864630, 0.21728048, 0.92446905,
|
||||
0.53802798, 0.22378462, 0.66087828, 0.64754384, 0.09980577, 0.50331927,
|
||||
0.90966904, 0.67624758, 0.22728569, 0.61184030, 0.66753081, 0.00405466,
|
||||
0.93407600, 0.89524725, 0.34496848, 0.01595642, 0.54338693, 0.65760153,
|
||||
0.69930304, 0.54202591, 0.66030817, 0.74371140, 0.95000083, 0.86475930,
|
||||
0.99826786, 0.85464029, 0.89926621, 0.90551912, 0.89889036, 0.38316505,
|
||||
0.06428984, 0.39342267, 0.40689672, 0.37076883, 0.72720439, 0.05071236,
|
||||
0.01355718, 0.95169120, 0.03623840, 0.05569115, 0.47255274, 0.44040655};
|
||||
|
||||
static const std::vector<float> expected_irdft2d_results_1 = {
|
||||
0.106065355, 0.7454709, 0.5723129, 0.45824066, 0.384706, 0.27398905, 0.6679619, 0.39547434,
|
||||
0.2815724, 0.779919, 0.59909385, 0.122946456, 0.38957337, 0.97498655, 0.46759892, 0.14017127,
|
||||
@@ -413,7 +447,7 @@ static const std::vector<float> expected_irdft2d_results_2 = {
|
||||
0.56639084, 0.01420842, 0.29673067, 0.63477397, 0.68019596, 0.39601113, 0.00000014,
|
||||
0.00000022};
|
||||
|
||||
static const std::vector<float> expected_rdft3d_results_2 = {
|
||||
static const std::vector<float> expected_irdft3d_results_2 = {
|
||||
0.29655575, 0.59799123, 0.22431113, 0.46143103, 0.53208175, 0.32705094, 0.59367000,
|
||||
0.29963828, 0.41763943, 0.24033307, 0.42796425, 0.56577777, 0.37677909, 0.32099129,
|
||||
0.28778578, 0.50527716, 0.39592624, -0.01477019, 0.46390174, 0.48881302, 0.69299017,
|
||||
@@ -450,6 +484,47 @@ static const std::vector<float> expected_rdft3d_results_2 = {
|
||||
0.49906203, 0.53449270, 0.22820431, 0.19888670, 0.56200754, 0.55242130, 0.36939947,
|
||||
0.01671917, 0.60996081};
|
||||
|
||||
static const std::vector<float> expected_irdft3d_results_3 ={
|
||||
0.51795123, 0.01846075, 0.03363710, -0.02286412, -0.00527071, -0.05116411,
|
||||
-0.01142488, -0.01784910, -0.01088149, 0.01049122, -0.00829387, 0.00942086,
|
||||
-0.02915924, 0.05941228, 0.05868882, -0.02329090, 0.06043447, 0.01260666,
|
||||
0.04213929, -0.03578551, -0.00354573, -0.02047438, -0.03469945, -0.02365786,
|
||||
0.00807303, 0.02364844, -0.00346402, -0.00134415, 0.04106979, 0.04961361,
|
||||
-0.01212564, -0.04288128, -0.26157875, -0.01917418, -0.04232584, 0.02477720,
|
||||
0.02514449, 0.04955597, -0.00301304, 0.00663580, 0.01947190, -0.01163269,
|
||||
-0.07920224, -0.01201069, 0.00564843, 0.00283007, -0.05916596, 0.03569793,
|
||||
-0.02454099, -0.01977048, -0.00360401, 0.00924050, -0.01237082, -0.04213287,
|
||||
-0.03306797, -0.01442351, -0.02601594, 0.07406829, -0.02896844, 0.00503278,
|
||||
0.00700455, 0.02915976, 0.01761130, -0.04474307, 0.03632101, 0.00957998,
|
||||
-0.02003984, -0.04022581, 0.03104216, 0.00388626, 0.05861915, 0.01034101,
|
||||
-0.00741989, 0.01010181, 0.01496502, -0.00544559, 0.04015258, -0.00600315,
|
||||
-0.06137903, 0.07850411, -0.00074931, 0.02540785, -0.00166176, 0.02205904,
|
||||
-0.02429718, 0.04010517, 0.02375359, 0.02229406, 0.01806382, -0.06089136,
|
||||
0.00447113, -0.03169147, 0.02836490, -0.05821620, 0.03905417, 0.03987032,
|
||||
0.29899586, -0.02616866, -0.00927641, -0.02134532, -0.02480746, -0.02636082,
|
||||
-0.05009444, -0.02208490, 0.02632000, 0.00493334, -0.00402312, -0.00935831,
|
||||
0.04154630, 0.00849218, 0.00232782, -0.01192997, -0.03309486, 0.01678531,
|
||||
0.03526979, 0.09272132, 0.01420703, -0.01919909, 0.01321082, -0.01661140,
|
||||
0.07861365, -0.02784724, 0.03900426, -0.00096805, -0.02880604, 0.02753764,
|
||||
-0.02092520, -0.01412453};
|
||||
|
||||
static const std::vector<float> expected_irdft3d_results_4 = {
|
||||
0.24882269, -0.00554157, -0.00759689, -0.00413212, 0.01099624, 0.02191469,
|
||||
0.02829072, -0.01410181, 0.04826954, 0.03587530, -0.01151859, 0.03459743,
|
||||
0.03157633, -0.03446264, 0.03595825, -0.01176664, 0.00625817, 0.00981066,
|
||||
-0.11900401, -0.02756717, 0.01933546, 0.03042892, -0.04917013, 0.00048474,
|
||||
-0.01849990, -0.01050222, -0.02433642, -0.08657554, -0.03473007, -0.01486101,
|
||||
0.00137630, -0.01972852, -0.06159696, 0.02284726, -0.03851998, -0.00885092,
|
||||
0.02397606, -0.02071742, -0.00586151, -0.01287085, 0.01713095, -0.07724825,
|
||||
0.05983482, -0.02824272, 0.02959802, 0.04051825, 0.00219584, 0.04053028,
|
||||
0.00415529, 0.02379833, -0.01936524, 0.04350142, 0.02095385, 0.03121966,
|
||||
-0.02675550, 0.01142533, 0.05606331, 0.02115209, 0.00866956, 0.05367358,
|
||||
-0.00479556, 0.05423974, -0.01172735, -0.01203834, 0.00181946, 0.00594081,
|
||||
0.00527473, 0.00781714, 0.07042868, -0.02243115, 0.03207793, -0.04213578,
|
||||
0.14912935, -0.01012542, -0.05799989, -0.02889979, 0.02934662, 0.03385938,
|
||||
0.00951527, -0.01760542, -0.01611288, 0.29838892, -0.01029289, -0.06226702,
|
||||
-0.03670440, 0.03954893, 0.00725941, 0.04219448, -0.03698240, 0.03564729};
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<IRDFTParams> generateParamsForIRDFT() {
|
||||
std::vector<IRDFTParams> params{
|
||||
@@ -684,7 +759,7 @@ std::vector<IRDFTParams> generateParamsForIRDFT() {
|
||||
ET,
|
||||
ET,
|
||||
input_data_6,
|
||||
expected_rdft3d_results_2,
|
||||
expected_irdft3d_results_2,
|
||||
op::v0::Constant::create<int64_t>(element::Type_t::i64, Shape{3}, {0, 1, 2}),
|
||||
op::v0::Constant::create<int64_t>(element::Type_t::i64, Shape{3}, {4, 5, 12})),
|
||||
// irdft3d_eval_2_negative_axes
|
||||
@@ -693,9 +768,45 @@ std::vector<IRDFTParams> generateParamsForIRDFT() {
|
||||
ET,
|
||||
ET,
|
||||
input_data_6,
|
||||
expected_rdft3d_results_2,
|
||||
expected_irdft3d_results_2,
|
||||
op::v0::Constant::create<int64_t>(element::Type_t::i64, Shape{3}, {-3, -2, 2}),
|
||||
op::v0::Constant::create<int64_t>(element::Type_t::i64, Shape{3}, {4, 5, 12})),
|
||||
// irdft3d_reversed_axes
|
||||
IRDFTParams(Shape{3, 4, 8, 2},
|
||||
Shape{4, 4, 8},
|
||||
ET,
|
||||
ET,
|
||||
input_data_7,
|
||||
expected_irdft3d_results_3,
|
||||
op::v0::Constant::create<int64_t>(element::Type_t::i64, Shape{3}, {2, 1, 0}),
|
||||
NULL),
|
||||
// irdft3d_reversed_negative_axes
|
||||
IRDFTParams(Shape{3, 4, 8, 2},
|
||||
Shape{4, 4, 8},
|
||||
ET,
|
||||
ET,
|
||||
input_data_7,
|
||||
expected_irdft3d_results_3,
|
||||
op::v0::Constant::create<int64_t>(element::Type_t::i64, Shape{3}, {-1, -2, -3}),
|
||||
NULL),
|
||||
// irdft3d_reversed_axes_with_signals
|
||||
IRDFTParams(Shape{3, 4, 8, 2},
|
||||
Shape{10, 3, 3},
|
||||
ET,
|
||||
ET,
|
||||
input_data_7,
|
||||
expected_irdft3d_results_4,
|
||||
op::v0::Constant::create<int64_t>(element::Type_t::i64, Shape{3}, {2, 1, 0}),
|
||||
op::v0::Constant::create<int64_t>(element::Type_t::i64, Shape{3}, {3, 3, 10})),
|
||||
// irdft3d_reversed_negative_axes_with_signals
|
||||
IRDFTParams(Shape{3, 4, 8, 2},
|
||||
Shape{10, 3, 3},
|
||||
ET,
|
||||
ET,
|
||||
input_data_7,
|
||||
expected_irdft3d_results_4,
|
||||
op::v0::Constant::create<int64_t>(element::Type_t::i64, Shape{3}, {-1, -2, -3}),
|
||||
op::v0::Constant::create<int64_t>(element::Type_t::i64, Shape{3}, {3, 3, 10})),
|
||||
};
|
||||
|
||||
return params;
|
||||
|
||||
@@ -16,7 +16,8 @@ void irdft(const std::vector<float>& input_data,
|
||||
const std::vector<int64_t>& axes_data,
|
||||
float* irdft_result,
|
||||
const Shape& fft_output_shape,
|
||||
const Shape& irdft_output_shape);
|
||||
const Shape& irdft_output_shape,
|
||||
const int64_t last_signal_size);
|
||||
} // namespace reference
|
||||
} // namespace runtime
|
||||
} // namespace ngraph
|
||||
|
||||
@@ -104,9 +104,9 @@ void irdft(const std::vector<float>& input_data,
|
||||
const std::vector<int64_t>& axes_data,
|
||||
float* irdft_result,
|
||||
const Shape& fft_output_shape,
|
||||
const Shape& irdft_output_shape) {
|
||||
const Shape& irdft_output_shape,
|
||||
const int64_t last_signal_size) {
|
||||
// calculate inverse FFT over the outer axes
|
||||
const int64_t last_signal_size = irdft_output_shape.back();
|
||||
const auto outer_ifft_axes = get_outer_fft_axes(axes_data);
|
||||
auto outer_ifft_shape = input_data_shape;
|
||||
for (const auto& a : outer_ifft_axes) {
|
||||
|
||||
@@ -976,8 +976,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
(parents[i]->is_type<depth_to_space>() && dts_supports_fusings(parents[i]->as<depth_to_space>())) ||
|
||||
(parents[i]->is_type<gather>()) ||
|
||||
(parents[i]->is_type<reduce>() && reduce_supports_fusings(parents[i]->as<reduce>())) ||
|
||||
(parents[i]->is_type<lrn>()) ||
|
||||
(parents[i]->is_type<activation>());
|
||||
(parents[i]->is_type<lrn>());
|
||||
}
|
||||
|
||||
// Disable fusion to a node on constant path when second input is in data flow
|
||||
|
||||
@@ -28,8 +28,8 @@ ActivationKernelBase::DispatchData ActivationKernelBase::SetDefault(const activa
|
||||
dispatchData.gws = {out.X().v * out.Y().v, Align(out.Feature().v, 16), Align(out.Batch().v, 16)};
|
||||
dispatchData.lws = {1, 16, 16};
|
||||
} else {
|
||||
dispatchData.gws = {out.X().v * out.W().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
|
||||
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws = {{Tensor::DataChannelName::X, Tensor::DataChannelName::W},
|
||||
dispatchData.gws = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
|
||||
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws = {{Tensor::DataChannelName::X},
|
||||
{Tensor::DataChannelName::Y, Tensor::DataChannelName::Z},
|
||||
{Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}};
|
||||
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo, in_layout, out_layout, dims_by_gws);
|
||||
|
||||
@@ -85,8 +85,7 @@ bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) co
|
||||
return false;
|
||||
|
||||
if (!params.fused_ops.empty() &&
|
||||
(params.outputs[0].GetLayout() != DataLayout::bfyx && params.outputs[0].GetLayout() != DataLayout::bfzyx &&
|
||||
params.outputs[0].GetLayout() != DataLayout::bfwzyx))
|
||||
(params.outputs[0].GetLayout() != DataLayout::bfyx && params.outputs[0].GetLayout() != DataLayout::bfzyx))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@@ -110,20 +109,11 @@ JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& param
|
||||
"x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"x % OUTPUT_SIZE_X"};
|
||||
} else if (params.inputs[0].GetDims().size() == 5) {
|
||||
idx_order = {"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_FEATURE_NUM)",
|
||||
idx_order = {"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM)",
|
||||
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM",
|
||||
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
|
||||
"x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"x % OUTPUT_SIZE_X"};
|
||||
} else if (params.inputs[0].GetDims().size() == 6) {
|
||||
idx_order = {"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W * OUTPUT_FEATURE_NUM)",
|
||||
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W) % OUTPUT_FEATURE_NUM",
|
||||
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_SIZE_W",
|
||||
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
|
||||
"x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"x % OUTPUT_SIZE_X"};
|
||||
} else {
|
||||
IE_THROW() << "Unknown dimension";
|
||||
}
|
||||
} else {
|
||||
if (params.inputs[0].GetDims().size() <= 4) {
|
||||
@@ -132,20 +122,11 @@ JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& param
|
||||
"(x + i) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"(x + i) % OUTPUT_SIZE_X"};
|
||||
} else if (params.inputs[0].GetDims().size() == 5) {
|
||||
idx_order = {"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_FEATURE_NUM)",
|
||||
idx_order = {"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM)",
|
||||
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM",
|
||||
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
|
||||
"(x + i) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"(x + i) % OUTPUT_SIZE_X"};
|
||||
} else if (params.inputs[0].GetDims().size() == 6) {
|
||||
idx_order = {"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W * OUTPUT_FEATURE_NUM)",
|
||||
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W) % OUTPUT_FEATURE_NUM",
|
||||
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_SIZE_W",
|
||||
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
|
||||
"(x + i) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"(x + i) % OUTPUT_SIZE_X"};
|
||||
} else {
|
||||
IE_THROW() << "Unknown dimension";
|
||||
}
|
||||
}
|
||||
FusedOpsConfiguration conf_vector = {"_VECTOR",
|
||||
|
||||
@@ -26,7 +26,7 @@ protected:
|
||||
JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const override;
|
||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||
return {FusedOpType::QUANTIZE,
|
||||
FusedOpType::ELTWISE,
|
||||
FusedOpType::SCALE,
|
||||
FusedOpType::ACTIVATION};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -40,10 +40,6 @@ JitConstants ActivationKernelRef::GetJitConstants(const activation_params& param
|
||||
idx_order = {"batch", "feature", "y", "x"};
|
||||
} else if (params.inputs[0].GetDims().size() == 5) {
|
||||
idx_order = {"batch", "feature", "z", "y", "x"};
|
||||
} else if (params.inputs[0].GetDims().size() == 6) {
|
||||
idx_order = {"batch", "feature", "w", "z", "y", "x"};
|
||||
} else {
|
||||
IE_THROW() << "unknown dimension";
|
||||
}
|
||||
FusedOpsConfiguration conf = {"", idx_order, "dst", input_dt, 1};
|
||||
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||
|
||||
@@ -23,7 +23,7 @@ public:
|
||||
JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const override;
|
||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||
return {FusedOpType::QUANTIZE,
|
||||
FusedOpType::ELTWISE,
|
||||
FusedOpType::SCALE,
|
||||
FusedOpType::ACTIVATION};
|
||||
}
|
||||
|
||||
|
||||
@@ -23,27 +23,13 @@ KERNEL(activation)(
|
||||
#endif
|
||||
)
|
||||
{
|
||||
#if OUTPUT_DIMS == 6
|
||||
#define ORDER batch,feature,w,z,y,x
|
||||
#elif OUTPUT_DIMS == 5
|
||||
#if OUTPUT_DIMS == 5
|
||||
#define ORDER batch,feature,z,y,x
|
||||
#elif OUTPUT_DIMS == 4
|
||||
#define ORDER batch,feature,y,x
|
||||
#endif
|
||||
|
||||
#if OUTPUT_DIMS == 6
|
||||
const uint x = (uint)get_global_id(0) % OUTPUT_SIZE_X;
|
||||
const uint y = (uint)get_global_id(1) % OUTPUT_SIZE_Y;
|
||||
const uint z = (uint)get_global_id(1) / OUTPUT_SIZE_Y;
|
||||
const uint w = (uint)get_global_id(0) / OUTPUT_SIZE_X;
|
||||
#if OUTPUT_BATCH_NUM == 1
|
||||
const unsigned feature = (uint)get_global_id(2);
|
||||
const unsigned batch = 0;
|
||||
#else
|
||||
const unsigned feature = (uint)get_global_id(2) % OUTPUT_FEATURE_NUM;
|
||||
const unsigned batch = (uint)get_global_id(2) / OUTPUT_FEATURE_NUM;
|
||||
#endif
|
||||
#elif OUTPUT_DIMS == 5
|
||||
#if OUTPUT_DIMS == 5
|
||||
const unsigned x = get_global_id(0);
|
||||
const uint y = (uint)get_global_id(1) % OUTPUT_SIZE_Y;
|
||||
const uint z = (uint)get_global_id(1) / OUTPUT_SIZE_Y;
|
||||
|
||||
@@ -158,19 +158,19 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_quantize_i8, ::testing
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_5, 2, 3, "activation_ref" }, // FIXME - accuracy bug
|
||||
}));
|
||||
|
||||
class activation_eltwise_activation_quantize_u8 : public ActivationFusingTest {};
|
||||
TEST_P(activation_eltwise_activation_quantize_u8, basic) {
|
||||
class activation_scale_activation_quantize_u8 : public ActivationFusingTest {};
|
||||
TEST_P(activation_scale_activation_quantize_u8, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
activation("act", "input", activation_func::relu),
|
||||
data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
data("in_low", get_mem(get_single_element_layout(p), 0)),
|
||||
data("in_high", get_mem(get_single_element_layout(p), 1, max_random)),
|
||||
data("out_low", get_mem(get_single_element_layout(p), -127)),
|
||||
data("out_high", get_mem(get_single_element_layout(p), 127)),
|
||||
eltwise("eltwise", { "act", "eltwise_data" }, eltwise_mode::prod, p.default_type),
|
||||
activation("act2", "eltwise", activation_func::softsign),
|
||||
scale("scale", "act", "scale_data"),
|
||||
activation("act2", "scale", activation_func::softsign),
|
||||
quantize("quant", "act2", "in_low", "in_high", "out_low", "out_high", 256, data_types::u8),
|
||||
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32)
|
||||
);
|
||||
@@ -179,18 +179,18 @@ TEST_P(activation_eltwise_activation_quantize_u8, basic) {
|
||||
execute(p);
|
||||
}
|
||||
|
||||
TEST_P(activation_eltwise_activation_quantize_u8, per_channel) {
|
||||
TEST_P(activation_scale_activation_quantize_u8, per_channel) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
activation("act", "input", activation_func::relu),
|
||||
data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
data("in_low", get_mem(get_per_channel_layout(p), 0)),
|
||||
data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
|
||||
data("out_low", get_mem(get_single_element_layout(p), -127)),
|
||||
data("out_high", get_mem(get_single_element_layout(p), 127)),
|
||||
eltwise("eltwise", { "act", "eltwise_data" }, eltwise_mode::prod, p.default_type),
|
||||
activation("act2", "eltwise", activation_func::softsign),
|
||||
scale("scale", "act", "scale_data"),
|
||||
activation("act2", "scale", activation_func::softsign),
|
||||
quantize("quant", "act2", "in_low", "in_high", "out_low", "out_high", 256, data_types::u8),
|
||||
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32)
|
||||
);
|
||||
@@ -199,7 +199,7 @@ TEST_P(activation_eltwise_activation_quantize_u8, per_channel) {
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation_quantize_u8, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_scale_activation_quantize_u8, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
// InputDataType = FP32
|
||||
activation_test_params{ CASE_ACTIVATION_F32_0, 2, 5, "activation_opt" },
|
||||
activation_test_params{ CASE_ACTIVATION_F32_1, 2, 5, "activation_opt" },
|
||||
@@ -219,19 +219,19 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation_quantize_u8,
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_2, 2, 5, "activation_ref" },
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_eltwise_activation_quantize_u8, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_scale_activation_quantize_u8, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_5, 2, 5, "activation_ref" }, // FIXME - accuracy bug
|
||||
}));
|
||||
|
||||
class activation_eltwise_activation : public ActivationFusingTest {};
|
||||
TEST_P(activation_eltwise_activation, basic) {
|
||||
class activation_scale_activation : public ActivationFusingTest {};
|
||||
TEST_P(activation_scale_activation, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
activation("act", "input", activation_func::relu),
|
||||
data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
eltwise("eltwise", { "act", "eltwise_data" }, eltwise_mode::prod, p.default_type),
|
||||
activation("act2", "eltwise", activation_func::exp),
|
||||
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
scale("scale", "act", "scale_data"),
|
||||
activation("act2", "scale", activation_func::exp),
|
||||
reorder("reorder_bfyx", "act2", p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
@@ -239,7 +239,7 @@ TEST_P(activation_eltwise_activation, basic) {
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_scale_activation, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
// InputDataType = FP32
|
||||
activation_test_params{ CASE_ACTIVATION_F32_0, 2, 4, "activation_opt" },
|
||||
activation_test_params{ CASE_ACTIVATION_F32_1, 2, 4, "activation_opt" },
|
||||
@@ -291,28 +291,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation, ::testing::
|
||||
activation_test_params{ CASE_ACTIVATION_3D_I8_1, 2, 4, "activation_ref" }
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_eltwise_activation, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_scale_activation, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_4, 2, 4, "activation_ref" }, // FIXME - accuracy bug
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_5, 2, 4, "activation_ref" }, // FIXME - accuracy bug
|
||||
}));
|
||||
|
||||
class activation_eltwise : public ActivationFusingTest {};
|
||||
TEST_P(activation_eltwise, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
activation("act", "input", activation_func::abs),
|
||||
data("eltwise_data", get_mem(get_single_element_layout(p), 10.0f)),
|
||||
eltwise("eltwise", { "act", "eltwise_data" }, eltwise_mode::prod, p.default_type),
|
||||
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
|
||||
);
|
||||
tolerance = 1e-05f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
#define CASE_ACTIVATION_4D_F32_0 { 1, 2, 3, 2, 3, 2 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
// InputDataType = FP32
|
||||
activation_test_params{ CASE_ACTIVATION_4D_F32_0, 2, 3, "activation_ref" },
|
||||
activation_test_params{ CASE_ACTIVATION_4D_F32_0, 2, 3, "activation_opt" }
|
||||
}));
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include <intel_gpu/primitives/activation.hpp>
|
||||
#include <intel_gpu/primitives/data.hpp>
|
||||
#include <intel_gpu/primitives/reorder.hpp>
|
||||
#include <intel_gpu/primitives/eltwise.hpp>
|
||||
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
@@ -16,50 +15,6 @@
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
static void test_abs_basic_bfwzyx(const char* kernel_name){
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
std::vector<int> shape = {2, 3, 7, 6, 5, 4};
|
||||
auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, tensor(format::bfwzyx, shape)});
|
||||
auto input_raw = generate_random_1d<float>(std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()), -9, 9);
|
||||
set_values(input, input_raw);
|
||||
|
||||
auto elt_dat = engine.allocate_memory({data_types::f32, format::bfwzyx, tensor(format::bfwzyx, shape)});
|
||||
auto elt_dat_raw = generate_random_1d<float>(std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()), -9, 9);
|
||||
set_values(elt_dat, elt_dat_raw);
|
||||
|
||||
topology topo(input_layout("input", input->get_layout()),
|
||||
activation("act", "input", activation_func::abs),
|
||||
data("elt_dat", elt_dat),
|
||||
eltwise("elt", {"act", "elt_dat"}, eltwise_mode::prod));
|
||||
build_options bo;
|
||||
implementation_desc act_impl = { format::bfwzyx, kernel_name };
|
||||
bo.set_option(build_option::force_implementations({{"act",act_impl}}));
|
||||
network net(engine, topo, bo);
|
||||
net.set_input_data("input", input);
|
||||
auto res = net.execute();
|
||||
EXPECT_EQ(res.size(), size_t(1));
|
||||
EXPECT_EQ(res.begin()->first, "elt");
|
||||
|
||||
auto output_memory = res.at("elt").get_memory();
|
||||
auto output_layout = output_memory->get_layout();
|
||||
cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
|
||||
|
||||
int w_size = output_layout.spatial(3);
|
||||
int z_size = output_layout.spatial(2);
|
||||
int y_size = output_layout.spatial(1);
|
||||
int x_size = output_layout.spatial(0);
|
||||
int f_size = output_layout.feature();
|
||||
int b_size = output_layout.batch();
|
||||
auto bfwzyx = std::vector<int>{b_size, f_size, w_size, z_size, y_size, x_size};
|
||||
EXPECT_EQ(output_layout.format, format::bfwzyx);
|
||||
EXPECT_EQ(bfwzyx, shape);
|
||||
for (size_t i = 0; i < input_raw.size(); ++i)
|
||||
EXPECT_FLOAT_EQ(abs(input_raw[i])*elt_dat_raw[i], output_ptr[i]);
|
||||
}
|
||||
TEST(activation_f32_fw_gpu, abs_basic_bfwzyx_opt) {test_abs_basic_bfwzyx("activation_ref");}
|
||||
TEST(activation_f32_fw_gpu, abs_basic_bfwzyx_ref) {test_abs_basic_bfwzyx("activation_opt");}
|
||||
|
||||
TEST(activation_f32_fw_gpu, not_basic_yxfb) {
|
||||
// Input:
|
||||
// 1 0 -3 4 5
|
||||
|
||||
@@ -17,7 +17,7 @@ from ...algorithm import Algorithm
|
||||
from ...algorithm_selector import COMPRESSION_ALGORITHMS
|
||||
from ...quantization import fake_quantize as fqut
|
||||
from ....graph.model_utils import save_model
|
||||
from ....samplers.index_sampler import IndexSampler
|
||||
from ....samplers.creator import create_sampler
|
||||
from ....utils.logger import get_logger
|
||||
|
||||
try:
|
||||
@@ -318,7 +318,10 @@ class OptimizationAlgorithm(Algorithm):
|
||||
def calculate_error_on_subset(self, subset_indices, model):
|
||||
def metric_error(subset_indices, model):
|
||||
self._engine.set_model(model)
|
||||
metrics, _ = self._engine.predict(None, IndexSampler(subset_indices))
|
||||
|
||||
index_sampler = create_sampler(self._engine, samples=subset_indices)
|
||||
metrics, _ = self._engine.predict(None , sampler=index_sampler)
|
||||
|
||||
error_rate = 0
|
||||
if self._default_metrics_values is not None:
|
||||
metrics_value = []
|
||||
|
||||
Reference in New Issue
Block a user