From 809f5597923672a8fca1725231c39d2a0e053a0c Mon Sep 17 00:00:00 2001 From: Kristian Bendiksen Date: Wed, 22 May 2024 11:14:12 +0200 Subject: [PATCH] Add test for reading Parquet files using Apache Arrow. --- ApplicationLibCode/UnitTests/CMakeLists.txt | 1 + .../UnitTests/RifParquetReader-Test.cpp | 52 ++++++++++++++++++ .../TestData/RifParquetReader/example.parquet | Bin 0 -> 1478 bytes CMakeLists.txt | 20 +++++++ vcpkg_x64-linux.txt | 1 + vcpkg_x64-osx.txt | 1 + vcpkg_x64-windows.txt | 1 + 7 files changed, 76 insertions(+) create mode 100644 ApplicationLibCode/UnitTests/RifParquetReader-Test.cpp create mode 100644 ApplicationLibCode/UnitTests/TestData/RifParquetReader/example.parquet diff --git a/ApplicationLibCode/UnitTests/CMakeLists.txt b/ApplicationLibCode/UnitTests/CMakeLists.txt index d25c8bd5c9..ac7d8e0de6 100644 --- a/ApplicationLibCode/UnitTests/CMakeLists.txt +++ b/ApplicationLibCode/UnitTests/CMakeLists.txt @@ -102,6 +102,7 @@ set(SOURCE_UNITTEST_FILES ${CMAKE_CURRENT_LIST_DIR}/RifSummaryCalculationIO-Test.cpp ${CMAKE_CURRENT_LIST_DIR}/RimEmReader-Test.cpp ${CMAKE_CURRENT_LIST_DIR}/RifPolygonReader-Test.cpp + ${CMAKE_CURRENT_LIST_DIR}/RifParquetReader-Test.cpp ) if(RESINSIGHT_ENABLE_GRPC) diff --git a/ApplicationLibCode/UnitTests/RifParquetReader-Test.cpp b/ApplicationLibCode/UnitTests/RifParquetReader-Test.cpp new file mode 100644 index 0000000000..bcc0138287 --- /dev/null +++ b/ApplicationLibCode/UnitTests/RifParquetReader-Test.cpp @@ -0,0 +1,52 @@ +#include "gtest/gtest.h" + +#include "RiaTestDataDirectory.h" + +#include +#include +#include +#include +#include + +#include +#include + +TEST( RifParquetReaderTest, ReadValidFile ) +{ + QDir baseFolder( TEST_DATA_DIR ); + + QString filename( "RifParquetReader/example.parquet" ); + QString filePath = baseFolder.absoluteFilePath( filename ); + EXPECT_TRUE( QFile::exists( filePath ) ); + + arrow::MemoryPool* pool = arrow::default_memory_pool(); + + auto openResult = arrow::io::ReadableFile::Open( filePath.toStdString().c_str() ); + EXPECT_TRUE( openResult.ok() ); + + std::shared_ptr input = std::move( openResult ).ValueOrDie(); + + // Open Parquet file reader + std::unique_ptr arrow_reader; + EXPECT_TRUE( parquet::arrow::OpenFile( input, pool, &arrow_reader ).ok() ); + + // Read entire file as a single Arrow table + std::shared_ptr table; + EXPECT_TRUE( arrow_reader->ReadTable( &table ).ok() ); + + // Expect one column named "col1" + EXPECT_EQ( table->columns().size(), 1 ); + std::vector expectedNames = { "col1" }; + EXPECT_EQ( table->ColumnNames(), expectedNames ); + + // Expected the column to contain 100 int64 [0, 99] + auto columnData = table->column( 0 ); + int expectedSize = 100; + EXPECT_EQ( columnData->length(), expectedSize ); + for ( int i = 0; i < expectedSize; i++ ) + { + std::shared_ptr scalar = columnData->GetScalar( i ).ValueOrDie(); + std::shared_ptr intScalar = std::dynamic_pointer_cast( scalar ); + EXPECT_TRUE( scalar->Equals( arrow::Int64Scalar( i ) ) ); + } +} diff --git a/ApplicationLibCode/UnitTests/TestData/RifParquetReader/example.parquet b/ApplicationLibCode/UnitTests/TestData/RifParquetReader/example.parquet new file mode 100644 index 0000000000000000000000000000000000000000..96d4beda6c624bdbbc8ae99c75c7b66f6226fa58 GIT binary patch literal 1478 zcmcK4%Wo1v90%~(7AYzgd@Sy^YsE(qD^RGVR+QS|C4B>0TdURrB1jOB)u5Afy;eCf%H2Y*@SGc&vV=10Q*LNwwKRpPpq zU4vp-5Xla*5dvm)upX-5Ca8uDPy;u^EpRK;!fmh->YyHOhqaP+V!aEB&;WNs3Cgeu zHbVvOfh}+^G{SvwKRf^r!d7U4hoBiAhHdZ&v_LDg!FFhe4%h*Y!cORfUGNy}hR2}` zo`5Hz8}`6cuoqJ3fnMl?ei(qKVG#DgemDRJ;Sd~#BQONRFapoOvv3rigJW!AW=-PQfUQ!8jBNe^bhWg`*eMqFRwOwVP7y-%kw0{Fhypqn>@rg3!=BH#gO5 zmKx{p#NMX5TNdZ9c%F>gmhN4B_r27!yqNN}Js4iOpMIZyJ-ND+2|W9nU3-}Q@bmZW z_2qof(zH;1T=x^&ozZtjNm$E`k4+ zvS89MSx1l+NlPP