Hi all,
I have inherited some fuzzy match code (below) which compares two text strings, it works fine and gives me the character similarity I need, but it also gives me 5 columns of other data (which I don't want or need). Example below.
Would anyone know which bit of the below to edit out so that I just get my similarity match answer only?
Thanks
I have inherited some fuzzy match code (below) which compares two text strings, it works fine and gives me the character similarity I need, but it also gives me 5 columns of other data (which I don't want or need). Example below.
Would anyone know which bit of the below to edit out so that I just get my similarity match answer only?
VBA Code:
'UDF
Function CustomFuzzy(rng1 As Range, rng2 As Range) As Variant '(rng1= single value, rng2=comparisons range)
Dim sRng As Range
Dim fArr(1 To 6)
tcount = rng2.Cells.Count
ReDim tArr(1 To tcount, 1 To 2)
i = 0
For Each sRng In rng2 ' = 1 To tcount 'Each tRng In rng2
i = i + 1
tArr(i, 2) = sRng.Value
tArr(i, 1) = Levenshtein(rng1.Value, sRng.Value)
Next
If tcount > 1 Then tArr = BubbleSrt(tArr, True)
If tcount > 3 Then retValCount = 3 Else retValCount = tcount
'returns the top three matches in a HORIZONTAL array
For p = 1 To retValCount '3
fArr((2 * p) - 1) = tArr(p, 1) 'numeric fuzzy score
fArr((2 * p)) = tArr(p, 2) 'text
Next p
CustomFuzzy = fArr
End Function
'Option Base 0 assumed
'POB: fn with byte array is 17 times faster
Function Levenshtein(ByVal string1 As String, ByVal string2 As String) As Long
Dim i As Long, j As Long, bs1() As Byte, bs2() As Byte
Dim string1_length As Long
Dim string2_length As Long
Dim distance() As Long
Dim min1 As Long, min2 As Long, min3 As Long
string1_length = Len(string1)
string2_length = Len(string2)
ReDim distance(string1_length, string2_length)
bs1 = string1
bs2 = string2
For i = 0 To string1_length
distance(i, 0) = i
Next
For j = 0 To string2_length
distance(0, j) = j
Next
For i = 1 To string1_length
For j = 1 To string2_length
'slow way: If Mid$(string1, i, 1) = Mid$(string2, j, 1) Then
If bs1((i - 1) * 2) = bs2((j - 1) * 2) Then ' *2 because Unicode every 2nd byte is 0
distance(i, j) = distance(i - 1, j - 1)
Else
'distance(i, j) = Application.WorksheetFunction.Min _
(distance(i - 1, j) + 1, _
distance(i, j - 1) + 1, _
distance(i - 1, j - 1) + 1)
' spell it out, 50 times faster than worksheetfunction.min
min1 = distance(i - 1, j) + 1
min2 = distance(i, j - 1) + 1
min3 = distance(i - 1, j - 1) + 1
If min1 <= min2 And min1 <= min3 Then
distance(i, j) = min1
ElseIf min2 <= min1 And min2 <= min3 Then
distance(i, j) = min2
Else
distance(i, j) = min3
End If
End If
Next
Next
Levenshtein = distance(string1_length, string2_length)
End Function
Public Function BubbleSrt(ArrayIn, Ascending As Boolean) 'started as 1D, converted to 2D sort
'in 2D, the first array parameter guides the lbound/ubound. I forgot the syntax for array.index to
'make this more flexible.
'The first parameter is the one it is sorted on
Dim SrtTemp As Variant
Dim i As Long
Dim j As Long
If Ascending = True Then
For i = LBound(ArrayIn) To UBound(ArrayIn)
For j = i + 1 To UBound(ArrayIn)
If ArrayIn(i, 1) > ArrayIn(j, 1) Then
'the 2D conversion is just the second half of each line, moving the second parameter
SrtTemp = ArrayIn(j, 1): SrtTemp2 = ArrayIn(j, 2)
ArrayIn(j, 1) = ArrayIn(i, 1): ArrayIn(j, 2) = ArrayIn(i, 2)
ArrayIn(i, 1) = SrtTemp: ArrayIn(i, 2) = SrtTemp2
End If
Next j
Next i
Else
For i = LBound(ArrayIn) To UBound(ArrayIn)
For j = i + 1 To UBound(ArrayIn)
If ArrayIn(i, 1) < ArrayIn(j, 1) Then
'the 2D conversion is just the second half of each line, moving the second parameter
SrtTemp = ArrayIn(j, 1): SrtTemp2 = ArrayIn(j, 2)
ArrayIn(j, 1) = ArrayIn(i, 1): ArrayIn(j, 2) = ArrayIn(i, 2)
ArrayIn(i, 1) = SrtTemp: ArrayIn(i, 2) = SrtTemp2
End If
Next j
Next i
End If
BubbleSrt = ArrayIn
End Function
Thanks