mirror of
https://github.com/OPM/ResInsight.git
synced 2025-02-25 18:55:39 -06:00
#5960 Add utility for computing "edit distance" between two strings.
This commit is contained in:
parent
30ee125749
commit
8a0f911856
@ -142,3 +142,51 @@ size_t RiaStdStringTools::findCharMatchCount( const std::string& s, char c )
|
|||||||
}
|
}
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------------------------------
|
||||||
|
/// Function to find Levenshtein Distance between two strings (x and y).
|
||||||
|
/// Adapted from pseudocode from wikipedia: https://en.wikipedia.org/wiki/Levenshtein_distance
|
||||||
|
/// Implementation is the Wagner-Fischer variant: https://en.wikipedia.org/wiki/Wagner-Fischer_algorithm
|
||||||
|
///
|
||||||
|
/// Return value is higher when strings are more "different", and zero when strings are equal.
|
||||||
|
//--------------------------------------------------------------------------------------------------
|
||||||
|
int RiaStdStringTools::computeEditDistance( const std::string& x, const std::string& y )
|
||||||
|
{
|
||||||
|
// for all i and j, T[i,j] will hold the Levenshtein distance between
|
||||||
|
// the first i characters of x and the first j characters of y
|
||||||
|
int m = x.length();
|
||||||
|
int n = y.length();
|
||||||
|
|
||||||
|
std::vector<std::vector<int>> T( m + 1, std::vector<int>( n + 1, 0 ) );
|
||||||
|
|
||||||
|
// source prefixes can be transformed into empty string by
|
||||||
|
// dropping all characters
|
||||||
|
for ( int i = 1; i <= m; i++ )
|
||||||
|
T[i][0] = i;
|
||||||
|
|
||||||
|
// target prefixes can be reached from empty source prefix
|
||||||
|
// by inserting every character
|
||||||
|
for ( int j = 1; j <= n; j++ )
|
||||||
|
T[0][j] = j;
|
||||||
|
|
||||||
|
// fill the lookup table in bottom-up manner
|
||||||
|
for ( int i = 1; i <= m; i++ )
|
||||||
|
{
|
||||||
|
for ( int j = 1; j <= n; j++ )
|
||||||
|
{
|
||||||
|
int substitutionCost;
|
||||||
|
if ( x[i - 1] == y[j - 1] )
|
||||||
|
substitutionCost = 0;
|
||||||
|
else
|
||||||
|
substitutionCost = 1;
|
||||||
|
|
||||||
|
int deletion = T[i - 1][j] + 1;
|
||||||
|
int insertion = T[i][j - 1] + 1;
|
||||||
|
int replacement = T[i - 1][j - 1] + substitutionCost;
|
||||||
|
T[i][j] = std::min( std::min( deletion, insertion ), replacement );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The distance between the two full strings as the last value computed.
|
||||||
|
return T[m][n];
|
||||||
|
}
|
||||||
|
@ -43,6 +43,8 @@ public:
|
|||||||
|
|
||||||
static std::vector<std::string> splitStringBySpace( const std::string& s );
|
static std::vector<std::string> splitStringBySpace( const std::string& s );
|
||||||
|
|
||||||
|
static int computeEditDistance( const std::string& x, const std::string& y);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <class Container>
|
template <class Container>
|
||||||
static void splitByDelimiter( const std::string& str, Container& cont, char delimiter = ' ' );
|
static void splitByDelimiter( const std::string& str, Container& cont, char delimiter = ' ' );
|
||||||
|
@ -23,3 +23,21 @@ TEST( RiaStdStringToolsTest, ParseNumbers )
|
|||||||
EXPECT_TRUE( RiaStdStringTools::isNumber( text, decimalPoint ) );
|
EXPECT_TRUE( RiaStdStringTools::isNumber( text, decimalPoint ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------------------------------
|
||||||
|
///
|
||||||
|
//--------------------------------------------------------------------------------------------------
|
||||||
|
TEST( RiaStdStringToolsTest, EditDistance )
|
||||||
|
{
|
||||||
|
// Equal string needs zero edits
|
||||||
|
EXPECT_EQ( 0, RiaStdStringTools::computeEditDistance( "same", "same" ) );
|
||||||
|
|
||||||
|
// Empty strings are also zero edits
|
||||||
|
EXPECT_EQ( 0, RiaStdStringTools::computeEditDistance( "", "" ) );
|
||||||
|
|
||||||
|
// Examples from wikipedia
|
||||||
|
EXPECT_EQ( 3, RiaStdStringTools::computeEditDistance( "kitten", "sitting" ) );
|
||||||
|
EXPECT_EQ( 3, RiaStdStringTools::computeEditDistance( "sitting", "kitten" ) );
|
||||||
|
EXPECT_EQ( 3, RiaStdStringTools::computeEditDistance( "Saturday", "Sunday" ) );
|
||||||
|
EXPECT_EQ( 3, RiaStdStringTools::computeEditDistance( "Sunday", "Saturday" ) );
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user