From 9af49a7f95cc84ffc51003b8b18822127fbea3e1 Mon Sep 17 00:00:00 2001
From: Bartosz Lesniewski <bartosz.lesniewski@intel.com>
Date: Mon, 21 Sep 2020 14:04:39 +0200
Subject: [PATCH] Reference ROIAlign-3 implementation (#2128)

---
 ngraph/core/include/ngraph/op/roi_align.hpp   |   9 +-
 .../ngraph/runtime/reference/roi_align.hpp    | 244 ++++++++++++++++++
 ngraph/core/src/op/roi_align.cpp              |  95 +++++++
 ngraph/test/CMakeLists.txt                    |   1 +
 .../test/models/onnx/roi_align_f32.prototxt   | 110 ++++++++
 ngraph/test/onnx/onnx_import.in.cpp           |  44 +++-
 ngraph/test/op_eval/roi_align.cpp             | 166 ++++++++++++
 ngraph/test/runtime/ie/unit_test.manifest     |   5 +-
 8 files changed, 669 insertions(+), 5 deletions(-)
 create mode 100644 ngraph/core/reference/include/ngraph/runtime/reference/roi_align.hpp
 create mode 100644 ngraph/test/models/onnx/roi_align_f32.prototxt
 create mode 100644 ngraph/test/op_eval/roi_align.cpp
diff --git a/ngraph/core/include/ngraph/op/roi_align.hpp b/ngraph/core/include/ngraph/op/roi_align.hpp
index 55abcfcd86e..b32bd120504 100644
--- a/ngraph/core/include/ngraph/op/roi_align.hpp
+++ b/ngraph/core/include/ngraph/op/roi_align.hpp
@@ -76,6 +76,9 @@ namespace ngraph
                 int get_sampling_ratio() const { return m_sampling_ratio; }
                 float get_spatial_scale() const { return m_spatial_scale; }
                 PoolingMode get_mode() const { return m_mode; }
+                bool evaluate(const HostTensorVector& outputs,
+                              const HostTensorVector& inputs) const override;
+
             private:
                 PoolingMode mode_from_string(const std::string& mode) const;
 
@@ -86,9 +89,9 @@ namespace ngraph
                 float m_spatial_scale;
                 PoolingMode m_mode;
             };
-        }
+        } // namespace v3
         using v3::ROIAlign;
-    }
+    } // namespace op
 
     std::ostream& operator<<(std::ostream& s, const op::v3::ROIAlign::PoolingMode& mode);
 
@@ -106,4 +109,4 @@ namespace ngraph
             "AttributeAdapter<op::v3::ROIAlign::PoolingMode>", 3};
         const DiscreteTypeInfo& get_type_info() const override { return type_info; }
     };
-}
+} // namespace ngraph
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/roi_align.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/roi_align.hpp
new file mode 100644
index 00000000000..f586c10c8ee
--- /dev/null
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/roi_align.hpp
@@ -0,0 +1,244 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <algorithm>
+#include "ngraph/coordinate_transform.hpp"
+#include "ngraph/op/roi_align.hpp" // for ROIAlign:PoolingMode
+#include "ngraph/shape.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace reference
+        {
+            using ROIPoolingMode = op::v3::ROIAlign::PoolingMode;
+            template <typename T>
+            void roi_align(const T* feature_maps,
+                           const T* rois,
+                           const int64_t* batch_indices,
+                           T* out,
+                           const Shape& feature_maps_shape,
+                           const Shape& rois_shape,
+                           const Shape& batch_indices_shape,
+                           const Shape& out_shape,
+                           const int pooled_height,
+                           const int pooled_width,
+                           const int sampling_ratio,
+                           const float spatial_scale,
+                           const ROIPoolingMode& pooling_mode)
+            {
+                auto C = feature_maps_shape[1];
+                auto feature_map_height = feature_maps_shape[2];
+                auto feature_map_width = feature_maps_shape[3];
+                auto num_rois = rois_shape[0];
+
+                CoordinateTransform feature_maps_transform(feature_maps_shape);
+                CoordinateTransform rois_transform(rois_shape);
+                CoordinateTransform out_transform(out_shape);
+
+                for (unsigned int roi_index = 0; roi_index < num_rois; roi_index++)
+                {
+                    // Get ROI`s corners
+                    T x1 = rois[rois_transform.index({roi_index, 0})] * spatial_scale;
+                    T y1 = rois[rois_transform.index({roi_index, 1})] * spatial_scale;
+                    T x2 = rois[rois_transform.index({roi_index, 2})] * spatial_scale;
+                    T y2 = rois[rois_transform.index({roi_index, 3})] * spatial_scale;
+
+                    T roi_width = std::max(x2 - x1, static_cast<T>(1.0));
+                    T roi_height = std::max(y2 - y1, static_cast<T>(1.0));
+
+                    T bin_width = roi_width / pooled_width;
+                    T bin_height = roi_height / pooled_height;
+
+                    auto sampling_ratio_x =
+                        sampling_ratio == 0 ? static_cast<int>(ceil(bin_width)) : sampling_ratio;
+                    auto sampling_ratio_y =
+                        sampling_ratio == 0 ? static_cast<int>(ceil(bin_height)) : sampling_ratio;
+
+                    uint64_t num_samples_in_bin = sampling_ratio_x * sampling_ratio_y;
+
+                    T sample_distance_x = bin_width / static_cast<T>(sampling_ratio_x);
+                    T sample_distance_y = bin_height / static_cast<T>(sampling_ratio_y);
+
+                    std::vector<std::pair<unsigned int, unsigned int>> pooling_points;
+                    std::vector<T> pooling_weights;
+
+                    pooling_points.reserve(4 * num_samples_in_bin * pooled_height * pooled_width);
+                    pooling_weights.reserve(4 * num_samples_in_bin * pooled_height * pooled_width);
+
+                    // Save the sample coords and weights as they will be identical across all
+                    // channels
+                    for (unsigned int y_bin_ind = 0; y_bin_ind < pooled_height; y_bin_ind++)
+                    {
+                        for (unsigned int x_bin_ind = 0; x_bin_ind < pooled_width; x_bin_ind++)
+                        {
+                            for (unsigned int y_sample_ind = 0; y_sample_ind < sampling_ratio_y;
+                                 y_sample_ind++)
+                            {
+                                T sample_y = y1 + static_cast<T>(y_bin_ind) * bin_height +
+                                             sample_distance_y * (static_cast<T>(y_sample_ind) +
+                                                                  static_cast<T>(0.5f));
+
+                                for (int64_t x_sample_ind = 0; x_sample_ind < sampling_ratio_x;
+                                     x_sample_ind++)
+                                {
+                                    T sample_x = x1 + static_cast<T>(x_bin_ind) * bin_width +
+                                                 sample_distance_x * (static_cast<T>(x_sample_ind) +
+                                                                      static_cast<T>(0.5f));
+
+                                    if (sample_x < -1.0 || sample_x > feature_map_width ||
+                                        sample_y < -1.0 || sample_y > feature_map_height)
+                                    {
+                                        // For this sample we save 4x point (0,0) with weight 0
+                                        pooling_points.insert(pooling_points.end(), 4, {0, 0});
+                                        pooling_weights.insert(pooling_weights.end(), 4, {0});
+                                        continue;
+                                    }
+
+                                    sample_x = std::max(sample_x, T{0});
+                                    sample_y = std::max(sample_y, T{0});
+
+                                    auto sample_y_low = static_cast<unsigned int>(sample_y);
+                                    auto sample_x_low = static_cast<unsigned int>(sample_x);
+                                    unsigned int sample_y_high;
+                                    unsigned int sample_x_high;
+
+                                    if (sample_y_low >= feature_map_height - 1)
+                                    {
+                                        sample_y_high = sample_y_low = feature_map_height - 1;
+                                        sample_y = static_cast<T>(sample_y_low);
+                                    }
+                                    else
+                                    {
+                                        sample_y_high = sample_y_low + 1;
+                                    }
+
+                                    if (sample_x_low >= feature_map_height - 1)
+                                    {
+                                        sample_x_high = sample_x_low = feature_map_width - 1;
+                                        sample_x = static_cast<T>(sample_x_low);
+                                    }
+                                    else
+                                    {
+                                        sample_x_high = sample_x_low + 1;
+                                    }
+                                    pooling_points.push_back({sample_y_low, sample_x_low});
+                                    pooling_points.push_back({sample_y_low, sample_x_high});
+                                    pooling_points.push_back({sample_y_high, sample_x_low});
+                                    pooling_points.push_back({sample_y_high, sample_x_high});
+
+                                    // weight calculation for bilinear interpolation
+                                    auto ly = sample_y - static_cast<T>(sample_y_low);
+                                    auto lx = sample_x - static_cast<T>(sample_x_low);
+                                    auto hy = static_cast<T>(1.) - ly;
+                                    auto hx = static_cast<T>(1.) - lx;
+
+                                    pooling_weights.push_back(hy * hx);
+                                    pooling_weights.push_back(hy * lx);
+                                    pooling_weights.push_back(ly * hx);
+                                    pooling_weights.push_back(ly * lx);
+                                }
+                            }
+                        }
+                    }
+
+                    std::vector<T> tmp_out;
+
+                    for (unsigned int channel_index = 0; channel_index < C; channel_index++)
+                    {
+                        tmp_out.reserve(pooled_height * pooled_width);
+                        unsigned int sample_index = 0;
+                        for (unsigned int y_bin_ind = 0; y_bin_ind < pooled_height; y_bin_ind++)
+                        {
+                            for (unsigned int x_bin_ind = 0; x_bin_ind < pooled_width; x_bin_ind++)
+                            {
+                                T pooled_value = 0;
+                                for (unsigned int bin_sample_ind = 0;
+                                     bin_sample_ind < num_samples_in_bin;
+                                     bin_sample_ind++)
+                                {
+                                    // the four parts are values of the four closest surrounding
+                                    // neighbours of considered sample, then basing on all sampled
+                                    // values in bin we calculate pooled value
+                                    auto sample_part_1 = feature_maps[feature_maps_transform.index(
+                                        {static_cast<unsigned int>(batch_indices[roi_index]),
+                                         channel_index,
+                                         pooling_points[sample_index].first,
+                                         pooling_points[sample_index].second})];
+                                    auto sample_part_2 = feature_maps[feature_maps_transform.index(
+                                        {static_cast<unsigned int>(batch_indices[roi_index]),
+                                         channel_index,
+                                         pooling_points[sample_index + 1].first,
+                                         pooling_points[sample_index + 1].second})];
+                                    auto sample_part_3 = feature_maps[feature_maps_transform.index(
+                                        {static_cast<unsigned int>(batch_indices[roi_index]),
+                                         channel_index,
+                                         pooling_points[sample_index + 2].first,
+                                         pooling_points[sample_index + 2].second})];
+                                    auto sample_part_4 = feature_maps[feature_maps_transform.index(
+                                        {static_cast<unsigned int>(batch_indices[roi_index]),
+                                         channel_index,
+                                         pooling_points[sample_index + 3].first,
+                                         pooling_points[sample_index + 3].second})];
+
+                                    switch (pooling_mode)
+                                    {
+                                    case ROIPoolingMode::MAX:
+                                    {
+                                        T sample_value = std::max(
+                                            {pooling_weights[sample_index] * sample_part_1,
+                                             pooling_weights[sample_index + 1] * sample_part_2,
+                                             pooling_weights[sample_index + 2] * sample_part_3,
+                                             pooling_weights[sample_index + 3] * sample_part_4});
+
+                                        pooled_value = sample_value > pooled_value ? sample_value
+                                                                                   : pooled_value;
+                                        break;
+                                    }
+                                    case ROIPoolingMode::AVG:
+                                    default:
+                                    {
+                                        T sample_value =
+                                            pooling_weights[sample_index] * sample_part_1 +
+                                            pooling_weights[sample_index + 1] * sample_part_2 +
+                                            pooling_weights[sample_index + 2] * sample_part_3 +
+                                            pooling_weights[sample_index + 3] * sample_part_4;
+                                        pooled_value += sample_value / (num_samples_in_bin);
+                                    }
+                                    }
+                                    sample_index += 4;
+                                }
+                                tmp_out.push_back(pooled_value);
+                            }
+                        }
+                        // save the calculations for all bins across this channel
+                        auto output_channel_offset =
+                            out_transform.index({static_cast<unsigned int>(roi_index),
+                                                 static_cast<unsigned int>(channel_index),
+                                                 static_cast<unsigned int>(0),
+                                                 static_cast<unsigned int>(0)});
+                        std::copy(tmp_out.begin(), tmp_out.end(), out + output_channel_offset);
+
+                        tmp_out.clear();
+                    }
+                }
+                return;
+            }
+        } // namespace reference
+    }     // namespace runtime
+} // namespace ngraph
diff --git a/ngraph/core/src/op/roi_align.cpp b/ngraph/core/src/op/roi_align.cpp
index 4040be2951b..dc5ed660675 100644
--- a/ngraph/core/src/op/roi_align.cpp
+++ b/ngraph/core/src/op/roi_align.cpp
@@ -16,6 +16,10 @@
 
 #include "roi_align.hpp"
 
+#include "ngraph/runtime/host_tensor.hpp"
+#include "ngraph/runtime/reference/roi_align.hpp"
+#include "util.hpp" // for host_tensor_2_vector
+
 using namespace std;
 using namespace ngraph;
 
@@ -67,6 +71,14 @@ void op::v3::ROIAlign::validate_and_infer_types()
         " and: ",
         get_input_element_type(1));
 
+    NODE_VALIDATION_CHECK(
+        this,
+        get_input_element_type(0) == get_input_element_type(1),
+        "Type of feature maps (inputs) and rois is expected to be the same. Got: ",
+        get_input_element_type(0),
+        " and: ",
+        get_input_element_type(1));
+
     NODE_VALIDATION_CHECK(this,
                           get_input_element_type(2).is_integral_number(),
                           "The data type for batch indices is expected to be an integer. Got: ",
@@ -190,4 +202,87 @@ namespace ngraph
     {
         return s << as_string(type);
     }
+} // namespace ngraph
+namespace
+{
+    bool evaluate_roi_align(const HostTensorVector& args,
+                            const HostTensorPtr& out,
+                            const int pooled_height,
+                            const int pooled_width,
+                            const int sampling_ratio,
+                            const float spatial_scale,
+                            const op::v3::ROIAlign::PoolingMode& pooling_mode)
+    {
+        auto feature_maps = args[0];
+        auto rois = args[1];
+        auto batch_indices = args[2];
+
+        std::vector<int64_t> batch_indices_vec_scaled_up =
+            host_tensor_2_vector<int64_t>(batch_indices);
+
+        switch (feature_maps->get_element_type())
+        {
+        case element::Type_t::bf16:
+        {
+            runtime::reference::roi_align<bfloat16>(feature_maps->get_data_ptr<bfloat16>(),
+                                                    rois->get_data_ptr<bfloat16>(),
+                                                    batch_indices_vec_scaled_up.data(),
+                                                    out->get_data_ptr<bfloat16>(),
+                                                    feature_maps->get_shape(),
+                                                    rois->get_shape(),
+                                                    batch_indices->get_shape(),
+                                                    out->get_shape(),
+                                                    pooled_height,
+                                                    pooled_width,
+                                                    sampling_ratio,
+                                                    spatial_scale,
+                                                    pooling_mode);
+            break;
+        }
+        case element::Type_t::f16:
+        {
+            runtime::reference::roi_align<float16>(feature_maps->get_data_ptr<float16>(),
+                                                   rois->get_data_ptr<float16>(),
+                                                   batch_indices_vec_scaled_up.data(),
+                                                   out->get_data_ptr<float16>(),
+                                                   feature_maps->get_shape(),
+                                                   rois->get_shape(),
+                                                   batch_indices->get_shape(),
+                                                   out->get_shape(),
+                                                   pooled_height,
+                                                   pooled_width,
+                                                   sampling_ratio,
+                                                   spatial_scale,
+                                                   pooling_mode);
+            break;
+        }
+        case element::Type_t::f32:
+        {
+            runtime::reference::roi_align<float>(feature_maps->get_data_ptr<float>(),
+                                                 rois->get_data_ptr<float>(),
+                                                 batch_indices_vec_scaled_up.data(),
+                                                 out->get_data_ptr<float>(),
+                                                 feature_maps->get_shape(),
+                                                 rois->get_shape(),
+                                                 batch_indices->get_shape(),
+                                                 out->get_shape(),
+                                                 pooled_height,
+                                                 pooled_width,
+                                                 sampling_ratio,
+                                                 spatial_scale,
+                                                 pooling_mode);
+            break;
+        }
+        default: NGRAPH_UNREACHABLE("unsupported input type for roi_align");
+        }
+
+        return true;
+    }
+} // namespace
+
+bool op::v3::ROIAlign::evaluate(const HostTensorVector& outputs,
+                                const HostTensorVector& inputs) const
+{
+    return evaluate_roi_align(
+        inputs, outputs[0], m_pooled_h, m_pooled_w, m_sampling_ratio, m_spatial_scale, m_mode);
 }
diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt
index 8a1caedaaf0..31224ab75a3 100644
--- a/ngraph/test/CMakeLists.txt
+++ b/ngraph/test/CMakeLists.txt
@@ -77,6 +77,7 @@ set(SRC
     op_eval/non_zero.cpp
     op_eval/reduce_l1.cpp
     op_eval/reduce_l2.cpp
+    op_eval/roi_align.cpp
     op_eval/softplus.cpp
     op_eval/split.cpp
     op_eval/swish.cpp
diff --git a/ngraph/test/models/onnx/roi_align_f32.prototxt b/ngraph/test/models/onnx/roi_align_f32.prototxt
new file mode 100644
index 00000000000..4652694c07e
--- /dev/null
+++ b/ngraph/test/models/onnx/roi_align_f32.prototxt
@@ -0,0 +1,110 @@
+ir_version: 3
+producer_name: "onnx-importer-test"
+graph {
+    node{
+        input: "feature_maps"
+        input: "rois"
+        input: "batch_indices"
+        output: "Y"
+        op_type: "RoiAlign"
+        attribute{
+            name: "output_height"
+            i: 3
+            type: INT
+        }
+        attribute{
+            name: "output_width"
+            i: 4
+            type: INT
+        }
+        attribute{
+            name: "sampling_ratio"
+            i: 2
+            type: INT
+        }
+        attribute{
+            name: "spatial_scale"
+            f: 0.0625
+            type: FLOAT
+        }
+        attribute{
+            name: "mode"
+            s: "avg"
+            type: STRING
+        }
+    }
+    name: "test-model"
+    input{
+        name: "feature_maps"
+        type {
+            tensor_type {
+                elem_type: 1
+                shape {
+                    dim {
+                        dim_value: 1
+                    }
+                    dim {
+                        dim_value: 3
+                    }
+                    dim {
+                        dim_value: 5
+                    }
+                    dim {
+                        dim_value: 5
+                    }
+                }
+            }
+        }
+    }
+    input{
+        name: "rois"
+        type {
+            tensor_type {
+                elem_type: 1
+                shape {
+                    dim {
+                        dim_value: 5
+                    }
+                    dim {
+                        dim_value: 4
+                    }
+                }
+            }
+        }
+    }
+    input{
+        name:"batch_indices"
+        type{
+            tensor_type{
+                elem_type: 6
+                shape {
+                    dim {
+                        dim_value: 5
+                    }
+                }
+            }
+        }
+    }
+    output{
+    name: "Y"
+    type {
+        tensor_type {
+            elem_type: 1
+            shape {
+                dim {
+                    dim_value: 5
+                }
+                dim {
+                    dim_value: 3
+                }
+                dim {
+                    dim_value: 3
+                }
+                dim {
+                    dim_value: 4
+                }
+            }
+        }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ngraph/test/onnx/onnx_import.in.cpp b/ngraph/test/onnx/onnx_import.in.cpp
index 779de839d43..774c42e98d6 100644
--- a/ngraph/test/onnx/onnx_import.in.cpp
+++ b/ngraph/test/onnx/onnx_import.in.cpp
@@ -748,7 +748,7 @@ namespace
         -1.21722605, 0.82919357,  0.55765697,  0.12657686,  0.63432172,  0.75425957,  -2.43721014,
         -1.24478184, 2.65316853,  1.19509542,  -0.95523998, 0.5149006,   -0.01151649, 0.68327026,
         -0.4589638,  -0.46554745, 0.21055324,  0.39266729,  2.05098086,  1.83207919};
-}
+} // namespace
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0)
 {
@@ -2468,6 +2468,48 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_empty_initializers_handling)
     test_case.run_with_tolerance_as_fp(2.0e-5f);
 }
 
+NGRAPH_TEST(${BACKEND_NAME}, onnx_roi_align_f32)
+{
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/roi_align_f32.prototxt"));
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_input<float>({0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.,  10., 11., 12.,
+                                13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
+                                26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
+                                39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51.,
+                                52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64.,
+                                65., 66., 67., 68., 69., 70., 71., 72., 73., 74.});
+
+    test_case.add_input<float>({7.,   5.,  7.,  5., -15., -15., -15., -15., -10., 21.,
+                                -10., 21., 13., 8., 13.,  8.,   -14., 19.,  -14., 19.});
+
+    test_case.add_input<int32_t>({0, 0, 0, 0, 0});
+    test_case.add_expected_output<float>(
+        Shape{5, 3, 3, 4},
+        {2.95833f, 3.20833f, 3.45833f, 3.70833f, 4.625f,   4.875f,   5.125f,   5.375f,   6.29167f,
+         6.54167f, 6.79167f, 7.04167f, 27.9583f, 28.2083f, 28.4583f, 28.7083f, 29.625f,  29.875f,
+         30.125f,  30.375f,  31.2917f, 31.5417f, 31.7917f, 32.0417f, 52.9583f, 53.2083f, 53.4583f,
+         53.7083f, 54.625f,  54.875f,  55.125f,  55.375f,  56.2917f, 56.5417f, 56.7917f, 57.0417f,
+         0.f,      0.f,      0.f,      0.f,      0.f,      0.f,      0.f,      0.f,      0.f,
+         0.f,      0.f,      0.f,      25.f,     25.f,     25.f,     25.f,     25.f,     25.f,
+         25.f,     25.f,     25.f,     25.f,     25.f,     25.f,     50.f,     50.f,     50.f,
+         50.f,     50.f,     50.f,     50.f,     50.f,     50.f,     50.f,     50.f,     50.f,
+         7.39583f, 7.39583f, 7.42708f, 7.64583f, 9.0625f,  9.0625f,  9.09375f, 9.3125f,  10.7292f,
+         10.7292f, 10.7604f, 10.9792f, 32.3958f, 32.3958f, 32.4271f, 32.6458f, 34.0625f, 34.0625f,
+         34.0938f, 34.3125f, 35.7292f, 35.7292f, 35.7604f, 35.9792f, 57.3958f, 57.3958f, 57.4271f,
+         57.6458f, 59.0625f, 59.0625f, 59.0938f, 59.3125f, 60.7292f, 60.7292f, 60.7604f, 60.9792f,
+         4.27083f, 4.52083f, 4.77083f, 5.02083f, 5.9375f,  6.1875f,  6.4375f,  6.6875f,  7.60417f,
+         7.85417f, 8.10417f, 8.35417f, 29.2708f, 29.5208f, 29.7708f, 30.0208f, 30.9375f, 31.1875f,
+         31.4375f, 31.6875f, 32.6042f, 32.8542f, 33.1042f, 33.3542f, 54.2708f, 54.5208f, 54.7708f,
+         55.0208f, 55.9375f, 56.1875f, 56.4375f, 56.6875f, 57.6042f, 57.8542f, 58.1042f, 58.3542f,
+         6.77083f, 6.77083f, 6.77083f, 6.80208f, 8.4375f,  8.4375f,  8.4375f,  8.46875f, 10.1042f,
+         10.1042f, 10.1042f, 10.1354f, 31.7708f, 31.7708f, 31.7708f, 31.8021f, 33.4375f, 33.4375f,
+         33.4375f, 33.4688f, 35.1042f, 35.1042f, 35.1042f, 35.1354f, 56.7708f, 56.7708f, 56.7708f,
+         56.8021f, 58.4375f, 58.4375f, 58.4375f, 58.4688f, 60.1042f, 60.1042f, 60.1042f, 60.1354f});
+    test_case.run_with_tolerance_as_fp(1.0e-4f);
+}
+
 NGRAPH_TEST(${BACKEND_NAME}, quant_dequant_pattern)
 {
     const auto function = onnx_import::import_onnx_model(
diff --git a/ngraph/test/op_eval/roi_align.cpp b/ngraph/test/op_eval/roi_align.cpp
new file mode 100644
index 00000000000..3e43e1f810d
--- /dev/null
+++ b/ngraph/test/op_eval/roi_align.cpp
@@ -0,0 +1,166 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <numeric>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "ngraph/op/roi_align.hpp"
+#include "ngraph/runtime/host_tensor.hpp"
+#include "ngraph/validation_util.hpp"
+#include "runtime/backend.hpp"
+#include "util/all_close_f.hpp"
+#include "util/test_tools.hpp"
+#include "util/type_prop.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+TEST(op_eval, roi_align_avg_pool)
+{
+    const int N = 1;
+    const int C = 3;
+    const int H = 5;
+    const int W = 5;
+    const int num_rois = 5;
+    const int pooled_height = 3;
+    const int pooled_width = 4;
+    const auto data_shape = Shape{N, C, H, W};
+    const auto rois_shape = Shape{num_rois, 4};
+
+    const auto data = make_shared<op::Parameter>(element::f32, data_shape);
+    const auto rois = make_shared<op::Parameter>(element::f32, rois_shape);
+    const auto batch_indices = make_shared<op::Parameter>(element::i32, Shape{num_rois});
+
+    auto roi_align = make_shared<op::v3::ROIAlign>(
+        data, rois, batch_indices, pooled_height, pooled_width, 2, 1.0f / 16.0f, "avg");
+
+    auto f = make_shared<Function>(roi_align, ParameterVector{data, rois, batch_indices});
+
+    std::vector<float> data_vec{0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.,  10., 11., 12.,
+                                13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
+                                26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
+                                39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51.,
+                                52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64.,
+                                65., 66., 67., 68., 69., 70., 71., 72., 73., 74.};
+
+    std::vector<float> rois_vec{7.,   5.,  7.,  5., -15., -15., -15., -15., -10., 21.,
+                                -10., 21., 13., 8., 13.,  8.,   -14., 19.,  -14., 19.};
+
+    std::vector<int64_t> batch_indices_vec{0, 0, 0, 0, 0};
+
+    auto result = make_shared<HostTensor>();
+
+    ASSERT_TRUE(f->evaluate({result},
+                            {make_host_tensor<element::Type_t::f32>(data_shape, data_vec),
+                             make_host_tensor<element::Type_t::f32>(rois_shape, rois_vec),
+                             make_host_tensor<element::Type_t::i64>(Shape{num_rois})}));
+
+    std::vector<float> expected_vec{
+        2.95833f, 3.20833f, 3.45833f, 3.70833f, 4.625f,   4.875f,   5.125f,   5.375f,   6.29167f,
+        6.54167f, 6.79167f, 7.04167f, 27.9583f, 28.2083f, 28.4583f, 28.7083f, 29.625f,  29.875f,
+        30.125f,  30.375f,  31.2917f, 31.5417f, 31.7917f, 32.0417f, 52.9583f, 53.2083f, 53.4583f,
+        53.7083f, 54.625f,  54.875f,  55.125f,  55.375f,  56.2917f, 56.5417f, 56.7917f, 57.0417f,
+        0.f,      0.f,      0.f,      0.f,      0.f,      0.f,      0.f,      0.f,      0.f,
+        0.f,      0.f,      0.f,      25.f,     25.f,     25.f,     25.f,     25.f,     25.f,
+        25.f,     25.f,     25.f,     25.f,     25.f,     25.f,     50.f,     50.f,     50.f,
+        50.f,     50.f,     50.f,     50.f,     50.f,     50.f,     50.f,     50.f,     50.f,
+        7.39583f, 7.39583f, 7.42708f, 7.64583f, 9.0625f,  9.0625f,  9.09375f, 9.3125f,  10.7292f,
+        10.7292f, 10.7604f, 10.9792f, 32.3958f, 32.3958f, 32.4271f, 32.6458f, 34.0625f, 34.0625f,
+        34.0938f, 34.3125f, 35.7292f, 35.7292f, 35.7604f, 35.9792f, 57.3958f, 57.3958f, 57.4271f,
+        57.6458f, 59.0625f, 59.0625f, 59.0938f, 59.3125f, 60.7292f, 60.7292f, 60.7604f, 60.9792f,
+        4.27083f, 4.52083f, 4.77083f, 5.02083f, 5.9375f,  6.1875f,  6.4375f,  6.6875f,  7.60417f,
+        7.85417f, 8.10417f, 8.35417f, 29.2708f, 29.5208f, 29.7708f, 30.0208f, 30.9375f, 31.1875f,
+        31.4375f, 31.6875f, 32.6042f, 32.8542f, 33.1042f, 33.3542f, 54.2708f, 54.5208f, 54.7708f,
+        55.0208f, 55.9375f, 56.1875f, 56.4375f, 56.6875f, 57.6042f, 57.8542f, 58.1042f, 58.3542f,
+        6.77083f, 6.77083f, 6.77083f, 6.80208f, 8.4375f,  8.4375f,  8.4375f,  8.46875f, 10.1042f,
+        10.1042f, 10.1042f, 10.1354f, 31.7708f, 31.7708f, 31.7708f, 31.8021f, 33.4375f, 33.4375f,
+        33.4375f, 33.4688f, 35.1042f, 35.1042f, 35.1042f, 35.1354f, 56.7708f, 56.7708f, 56.7708f,
+        56.8021f, 58.4375f, 58.4375f, 58.4375f, 58.4688f, 60.1042f, 60.1042f, 60.1042f, 60.1354f};
+    const auto expected_shape = Shape{num_rois, C, pooled_height, pooled_width};
+
+    EXPECT_EQ(result->get_element_type(), element::f32);
+    EXPECT_EQ(result->get_shape(), expected_shape);
+    ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_vec, 6, 0.001));
+}
+TEST(op_eval, roi_align_max_pool)
+{
+    const int N = 1;
+    const int C = 3;
+    const int H = 5;
+    const int W = 5;
+    const int num_rois = 5;
+    const int pooled_height = 3;
+    const int pooled_width = 4;
+    const auto data_shape = Shape{N, C, H, W};
+    const auto rois_shape = Shape{num_rois, 4};
+
+    const auto data = make_shared<op::Parameter>(element::f32, data_shape);
+    const auto rois = make_shared<op::Parameter>(element::f32, rois_shape);
+    const auto batch_indices = make_shared<op::Parameter>(element::i32, Shape{num_rois});
+
+    auto roi_align = make_shared<op::v3::ROIAlign>(
+        data, rois, batch_indices, pooled_height, pooled_width, 2, 1.0f / 16.0f, "max");
+
+    auto f = make_shared<Function>(roi_align, ParameterVector{data, rois, batch_indices});
+
+    std::vector<float> data_vec{0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.,  10., 11., 12.,
+                                13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
+                                26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
+                                39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51.,
+                                52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64.,
+                                65., 66., 67., 68., 69., 70., 71., 72., 73., 74.};
+
+    std::vector<float> rois_vec{7.,   5.,  7.,  5., -15., -15., -15., -15., -10., 21.,
+                                -10., 21., 13., 8., 13.,  8.,   -14., 19.,  -14., 19.};
+
+    std::vector<int64_t> batch_indices_vec{0, 0, 0, 0, 0};
+
+    auto result = make_shared<HostTensor>();
+
+    ASSERT_TRUE(f->evaluate({result},
+                            {make_host_tensor<element::Type_t::f32>(data_shape, data_vec),
+                             make_host_tensor<element::Type_t::f32>(rois_shape, rois_vec),
+                             make_host_tensor<element::Type_t::i64>(Shape{num_rois})}));
+
+    std::vector<float> expected_vec{
+        2.10938f,  2.95313f, 3.375f,   2.53125f,  3.35938f, 4.70313f, 5.375f,   4.03125f, 3.51563f,
+        4.92188f,  5.625f,   4.21875f, 10.8984f,  15.2578f, 17.4375f, 13.0781f, 17.3568f, 24.2995f,
+        27.7708f,  20.8281f, 18.1641f, 25.4297f,  29.0625f, 21.7969f, 19.6875f, 27.5625f, 31.5f,
+        23.625f,   31.3542f, 43.8958f, 50.1667f,  37.625f,  32.8125f, 45.9375f, 52.5f,    39.375f,
+        0.f,       0.f,      0.f,      0.f,       0.f,      0.f,      0.f,      0.f,      0.f,
+        0.f,       0.f,      0.f,      25.f,      25.f,     25.f,     25.f,     25.f,     25.f,
+        25.f,      25.f,     25.f,     25.f,      25.f,     25.f,     50.f,     50.f,     50.f,
+        50.f,      50.f,     50.f,     50.f,      50.f,     50.f,     50.f,     50.f,     50.f,
+        5.625f,    5.625f,   5.625f,   4.57031f,  8.95833f, 8.95833f, 8.95833f, 7.27865f, 9.375f,
+        9.375f,    9.375f,   7.61719f, 19.6875f,  19.6875f, 19.6875f, 15.9961f, 31.3542f, 31.3542f,
+        31.3542f,  25.4753f, 32.8125f, 32.8125f,  32.8125f, 26.6602f, 33.75f,   33.75f,   33.75f,
+        27.4219f,  53.75f,   53.75f,   53.75f,    43.6719f, 56.25f,   56.25f,   56.25f,   45.7031f,
+        4.5f,      3.9375f,  2.8125f,  3.9375f,   5.5f,     4.8125f,  3.4375f,  4.8125f,  4.58333f,
+        4.01042f,  2.86458f, 3.9375f,  23.25f,    20.3438f, 14.5313f, 18.f,     28.4167f, 24.86458f,
+        17.76042f, 22.f,     23.25f,   20.3437f,  14.5312f, 18.f,     42.f,     36.75f,   26.25f,
+        32.0625f,  51.3333f, 44.9167f, 32.08333f, 39.1875f, 42.f,     36.75f,   26.25f,   32.0625f,
+        4.375f,    4.375f,   4.375f,   4.375f,    7.70833f, 7.70833f, 7.70833f, 7.70833f, 9.375f,
+        9.375f,    9.375f,   9.375f,   21.875f,   21.875f,  21.875f,  21.875f,  26.9792f, 26.9792f,
+        26.9792f,  26.9792f, 32.8125f, 32.8125f,  32.8125f, 32.8125f, 40.1042f, 40.1042f, 40.1042f,
+        40.1042f,  46.25f,   46.25f,   46.25f,    46.25f,   56.25f,   56.25f,   56.25f,   56.25f};
+    const auto expected_shape = Shape{num_rois, C, pooled_height, pooled_width};
+
+    EXPECT_EQ(result->get_element_type(), element::f32);
+    EXPECT_EQ(result->get_shape(), expected_shape);
+    ASSERT_TRUE(test::all_close_f(read_vector<float>(result), expected_vec, 6, 0.001));
+}
\ No newline at end of file
diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest
index 29400589f81..c3a0f7a5bd5 100644
--- a/ngraph/test/runtime/ie/unit_test.manifest
+++ b/ngraph/test/runtime/ie/unit_test.manifest
@@ -552,6 +552,9 @@ abc_int64
 # Unsupported primitive of type: SigmoidBackprop
 sigmoid_bprop_n1c1h4
 
+# Unsupported primitive of type: ROIAlign
+IE_CPU.onnx_roi_align_f32
+
 # [NOT_IMPLEMENTED] Input image format BOOL is not supported yet...
 select
 not
@@ -1201,7 +1204,7 @@ IE_GPU.onnx_dyn_shapes_avg_pool_dyn_shape
 IE_GPU.onnx_dyn_shapes_max_pool_dyn_shape
 IE_GPU.onnx_dyn_shapes_global_avg_pool_dyn_shape
 IE_GPU.onnx_dyn_shapes_global_max_pool_dyn_shape
-
+IE_GPU.onnx_roi_align_f32
 IE_GPU.tanh
 IE_GPU.tan
 IE_GPU.sum_to_scalar