Wrote this mass of VBA code in order to compare sentences for equality. Any recommendations for speeding it up? Not looking to use other programs (though it would be easier) as VBA is the only current one I have access to. Looking for better algorithms for assignment issue as well as removing loops.
Main function:
Supporting Functions:
Thanks!
Main function:
Code:
'Name: sentenceComparison'Functionality: To compare two sentences and measure equality
'Parameters: string1 - String
' string2 - String
' ignoreAlphabetic - Boolean
' ignoreSpecialCharacters - Boolean
' numChar - Integer
' ignoreExtraWords - Boolean
' printConsole - Boolean
'Returns: String
' True
' False
Public Function sentenceComparison(String1 As String, String2 As String, _
Optional ignoreAlphabetic As Boolean = False, Optional ignoreSpecialCharacters = True, _
Optional numChar As Integer = 0, Optional ignoreExtraWords As Boolean = True, Optional printConsole As Boolean = True) As String
'Variable Declaration'
Dim l1 As Long, l2 As Long, equality() As Long, l As Double, Min As Integer
Dim buffS1 As New Collection, buffS2 As New Collection, bestFound() As String, toCheck() As String, equalityNxN As Integer
Dim C As String, bestFoundHungarian As String, str As String
Dim foundAlpha As Boolean
''''''''''''''''''''''
'Default Values Declaration'
If ignoreSpecialCharacters Then
String1 = specialClear(String1)
String2 = specialClear(String2)
End If
String1 = Trim(String1)
String2 = Trim(String2)
foundAlpha = False
If ignoreAlphabetic Then
String1 = LCase(String1)
String2 = LCase(String2)
End If
''''''''''''''''''''''''''''
'If they're equal return true
If (String1 = String2) Then
sentenceComparison = "True"
Exit Function
End If
l1 = Len(String1)
l2 = Len(String2)
'break up string1 into collection
For i = 1 To l1 + 1
If ((Not Mid$(String1, i, 1) = " ") And (i < l1 + 1)) Then
C = C & Mid$(String1, i, 1)
Else
If (Not C = "") Then
buffS1.Add Trim(C)
End If
C = ""
End If
Next
C = ""
'break up string2 into collection
For i = 1 To l2 + 1
If ((Not Mid$(String2, i, 1) = " ") And (i < l2 + 1)) Then
C = C & Mid$(String2, i, 1)
Else
If (Not C = "") Then
buffS2.Add Trim(C)
End If
C = ""
End If
Next
C = ""
ReDim equality(0 To (buffS1.count - 1), 0 To buffS2.count - 1)
If (UBound(equality, 1) - LBound(equality, 1) + 1) < (UBound(equality, 2) - LBound(equality, 2) + 1) Then
equality = transpose(equality)
ReDim Preserve equality(0 To UBound(equality, 1) - LBound(equality, 1), 0 To UBound(equality, 1) - LBound(equality, 1))
equality = transpose(equality)
Else
If (UBound(equality, 1) - LBound(equality, 1) + 1) > (UBound(equality, 2) - LBound(equality, 2) + 1) Then
ReDim Preserve equality(0 To UBound(equality, 1) - LBound(equality, 1), 0 To UBound(equality, 1) - LBound(equality, 1))
End If
End If
'this way see total difference in levensthein value if the extra words dont count
If ignoreExtraWords Then
For i = 0 To buffS1.count - 1
For j = 0 To buffS2.count - 1
equality(i, j) = Levenshtein(buffS1(i + 1), buffS2(j + 1))
Next j
Next i
End If
'this way see total difference in levenshtein value if the extra words compared with ""
If Not ignoreExtraWords Then
For i = 0 To UBound(equality, 1)
For j = 0 To UBound(equality, 2)
'set the equality equal to how close each word is to eachother'
If (i > buffS1.count - 1) Then
If (j > buffS2.count - 1) Then
equality(i, j) = Levenshtein("", "")
Else
equality(i, j) = Levenshtein("", buffS2(j + 1))
End If
Else
If (j > buffS2.count - 1) Then
equality(i, j) = Levenshtein(buffS1(i + 1), "")
Else
equality(i, j) = Levenshtein(buffS1(i + 1), buffS2(j + 1))
End If
End If
Next j
Next i
End If
bestFoundHungarian = hungarianAlgorithm(equality)
bestFound = Split(Trim(bestFoundHungarian), " ")
For i = LBound(bestFound, 1) To UBound(bestFound, 1)
sentenceComparison = sentenceComparison & bestFound(i) & ": " & equality(Left(bestFound(i), InStr(bestFound(i), ",") - 1) - 1, Right(bestFound(i), Len(bestFound(i)) - InStr(bestFound(i), ",")) - 1) & "; "
Next i
'Printing to console
If printConsole Then
Max = 0
For i = 1 To buffS1.count
If Len(buffS1(i)) > Max Then
Max = Len(buffS1(i))
End If
Next i
For i = 1 To buffS2.count
If Len(buffS2(i)) > Max Then
Max = Len(buffS2(i))
End If
Next i
For j = LBound(equality, 2) - 1 To UBound(equality, 2)
str = ""
For i = LBound(equality, 1) To UBound(equality, 1)
If j = LBound(equality, 2) - 1 And i < buffS1.count Then
If i = LBound(equality, 1) Then
str = str & ""
For k = 0 To Max
str = str & " "
Next k
str = str & "|"
End If
str = str & buffS1(i + 1)
For k = Len(buffS1(i + 1)) To Max
str = str & " "
Next k
str = str & "|"
Else
If j = LBound(equality, 2) - 1 Then
For k = 0 To Max
str = str & " "
Next k
str = str & "|"
Else
If i = LBound(equality, 1) And j < buffS2.count Then
str = str & buffS2(j + 1)
For k = Len(buffS2(j + 1)) To Max
str = str & " "
Next k
str = str & "|" & equality(i, j)
C = CLng(equality(i, j))
For k = Len(C) To Max
str = str & " "
Next k
str = str & "|"
Else
If i = LBound(equality, 1) Then
For k = 0 To Max
str = str & " "
Next k
str = str & "|" & equality(i, j)
For k = 1 To Max
str = str & " "
Next k
str = str & "|"
Else
str = str & equality(i, j)
C = CLng(equality(i, j))
For k = Len(C) To Max
str = str & " "
Next k
str = str & "|"
End If
End If
End If
End If
Next i
Debug.Print "|" & str
Next j
End If
If getLevenshteinCount(Trim(sentenceComparison)) > numChar Then
sentenceComparison = "False"
Else
sentenceComparison = "True"
End If
End Function
Supporting Functions:
Code:
Private Function getLevenshteinCount(ofString As String) As Integer
Dim splitOfString() As String
splitOfString = Split(ofString, ";")
For i = LBound(splitOfString) To UBound(splitOfString) - 1
getLevenshteinCount = getLevenshteinCount + Right(Trim(splitOfString(i)), Len(Trim(splitOfString(i))) - InStr(Trim(splitOfString(i)), " "))
Next i
End Function
Private Function transpose(matrixToT() As Long) As Long()
Dim tMatrix() As Long
ReDim tMatrix(0 To UBound(matrixToT, 2) - LBound(matrixToT, 2), 0 To UBound(matrixToT, 1) - LBound(matrixToT, 1))
For j = LBound(matrixToT, 2) To UBound(matrixToT, 2)
For i = LBound(matrixToT, 1) To UBound(matrixToT, 1)
tMatrix(j, i) = matrixToT(i, j)
Next i
Next j
transpose = tMatrix
End Function
'Name: specialClear
'Functionality: To clear any non-alphabetic characters/non-numerical characters from a string
'Parameters: stringToRemove - String
'Returns: String
Public Function specialClear(stringToRemove As String) As String
Dim buff As New Collection
Dim C As String
stringToRemove = Trim(stringToRemove)
For i = 1 To Len(stringToRemove)
C = Mid$(stringToRemove, i, 1)
If (isAlphaChar(C) Or isNumberChar(C)) Then
buff.Add C
Else
If (C = " ") Then
buff.Add "-"
Else
buff.Add "-"
End If
End If
Next
specialClear = Trim(WorksheetFunction.Substitute(Join(CollectionToArray(buff), ""), "-", " "))
End Function
'Name: isAlphaChar
'Functionality: To return whether or not a string value is alphabetic
'Parameters: strValue - String
'Returns: Boolean
Public Function isAlphaChar(strValue As String) As Boolean
Select Case Asc(strValue)
Case 65 To 90, 97 To 122
isAlphaChar = True
Case Else
isAlphaChar = False
End Select
End Function
'Name: isNumberChar
'Functionality: To return whether or not a string value is numerical
'Parameters: strValue - String
'Returns: Boolean
Public Function isNumberChar(strValue As String) As Boolean
Select Case Asc(strValue)
Case 48 To 57
isNumberChar = True
Case Else
isNumberChar = False
End Select
End Function
'Name: CollectionToArray
'Functionality: To Turn A Collection of Data into an Array
'Parameters: myCol - Collection
'Returns: Variant
Public Function CollectionToArray(myCol As Collection) As Variant
Dim result As Variant
Dim cnt As Long
ReDim result(myCol.count - 1)
For cnt = 0 To myCol.count - 1
result(cnt) = Trim(myCol(cnt + 1))
Next cnt
CollectionToArray = result
End Function
'Name: Levenshtein
'Functionality: To measure how many changes it takes to get from one string to the next
'Parameters: s1 - String
' s2 - String
'Returns: Integer
Function Levenshtein(s1 As String, s2 As String)
Dim i As Integer
Dim j As Integer
Dim l1 As Integer
Dim l2 As Integer
Dim d() As Integer
Dim min1 As Integer
Dim min2 As Integer
l1 = Len(s1)
l2 = Len(s2)
ReDim d(l1, l2)
For i = 0 To l1
d(i, 0) = i
Next
For j = 0 To l2
d(0, j) = j
Next
For i = 1 To l1
For j = 1 To l2
If Mid(s1, i, 1) = Mid(s2, j, 1) Then
d(i, j) = d(i - 1, j - 1)
Else
min1 = d(i - 1, j) + 1
min2 = d(i, j - 1) + 1
If min2 < min1 Then
min1 = min2
End If
min2 = d(i - 1, j - 1) + 1
If min2 < min1 Then
min1 = min2
End If
d(i, j) = min1
End If
Next
Next
Levenshtein = d(l1, l2)
End Function
Option Base 1
Public Function hungarianAlgorithm(matrixToSolve As Variant) As String
'================================
'© CREATED BY CAPIBARBAROJA.
'================================
'Resources:
'http://216.249.163.93/bob.pilgrim/445/munkres.html
'================================
'- Declaring for calculating the Hungarian Algorithm
Dim C() As Double 'Matrix
Dim A() As Integer 'Masked matrix
Dim X() As Integer 'Masked matrix for step 5
Dim C_cov() As Integer 'Masked matrix to check if columns are covered
Dim R_cov() As Integer 'Masked matrix to check if rows are covered
Dim saved_col As Integer 'Saving columns that have star in step 5
Dim saved_row As Integer 'Saving rows that have primes in step 5
Dim start_in_row As Boolean 'Done to check if there is a star in row in step 4
Dim Col As Integer 'Step 4
Dim stop_ As Boolean 'To stop step 5
Dim m As Integer 'Columns
Dim n As Integer 'Rows
Dim Min As Double 'Minimum
Dim Max As Double 'Maximum
Dim i As Integer 'Columns in for statments
Dim h As Integer 'Columns in for statments
Dim j As Integer 'Rows in for statements
'----------------------------------------
'- Declaring for debugging
Dim DoDebug As Boolean
Dim str As String
Dim array_ As String
str = ""
stepCounter = 0
'----------------------------------------
'INSTRUCTIONS:
'----------------------------------------
m = UBound(matrixToSolve, 1) - LBound(matrixToSolve, 1) + 1 'state how many columns your matrix will have(see NB:)
n = UBound(matrixToSolve, 2) - LBound(matrixToSolve, 2) + 1 'state how many rows your matrix will have(see NB:)
ReDim C(m, n)
ReDim A(m, n)
ReDim X(m, n)
ReDim C_cov(m)
ReDim R_cov(n)
' Makes space for the arrays/masked arrays to be filled. If
'you delete this statement your code will fail.
'A matrix is created in C() as following
'(Create your matrix here):
For i = LBound(C, 1) To UBound(C, 1)
For j = LBound(C, 2) To UBound(C, 2)
C(i, j) = matrixToSolve(i - 1, j - 1)
Next j
Next i
'First number is column in the matrix, second number
'is row in matrix.
'You can make the matrix as big as you wish, but remind, the
'bigger it is, the longer it will take to be done.
'NB: YOUR MATRIX MUST BE SQUARE(same number of columns
'and rows), ELSE THE CODE MIGHT END IN ERROR.
'You must state which of them is the biggest:
Max = -9999999
For i = LBound(C, 1) To UBound(C, 1)
For j = LBound(C, 2) To UBound(C, 2)
If (C(i, j) > Max) Then
Max = C(i, j)
End If
Next j
Next i
' Change 3 by the biggest number in your matrix.
'----------------------------------------
'------------ GETTING RESULTS -----------
'----------------------------------------
' When you run this code, the result will be shown in
'immediate window(press CTRL + G to show). Objects marked
'with a star are the most effecient.
'----------------------------------------
'=================================
'===== HUNGARIAN ALGORITHM =======
'=================================
'---------------------------------
'Debug? - Set DoDebug as True if error, else set as False:
DoDebug = False
'Debugging values will appear in immediate window(press CTRL + G to show)
'=================================
Step_1:
'For each row of the matrix, find the smallest element and
'subtract it from every element in its row. Go to Step 2.
For j = 1 To n
Min = C(1, j)
For i = 1 To m
If Min > C(i, j) Then
Min = C(i, j)
End If
Next
For i = 1 To m
C(i, j) = C(i, j) - Min
Next
Next
'------------------------------------------
If DoDebug = True Then
stepCounter = stepCounter + 1
Debug.Print stepCounter & ".- Step 1 done. Next Step 2"
End If
'------------------------------------------
Step_2:
'Find a zero (Z) in the resulting matrix. If there is no
'starred zero in its row or column, star Z. Repeat for
'each element in the matrix. Go to Step 3.
For j = 1 To n
For i = 1 To m
If C(i, j) = 0 And C_cov(i) = 0 And R_cov(j) = 0 Then
A(i, j) = 1
C_cov(i) = 1
R_cov(j) = 1
End If
Next
Next
For i = 1 To m
C_cov(i) = 0
Next
For j = 1 To n
R_cov(j) = 0
Next
'------------------------------------------
If DoDebug = True Then
stepCounter = stepCounter + 1
Debug.Print stepCounter & ".- Step 2 done. Next Step 3"
End If
'------------------------------------------
Step_3:
'Cover each column containing a starred zero. If K
'columns are covered, the starred zeros describe a
'complete set of unique assignments. In this case,
'Go to DONE, otherwise, Go to Step 4.
count = 0
For i = 1 To m
For j = 1 To n
If A(i, j) = 1 Then
C_cov(i) = 1
count = count + 1
Exit For
End If
Next
Next
If count >= n Then
GoTo DONE
End If
'------------------------------------------
If DoDebug = True Then
stepCounter = stepCounter + 1
Debug.Print stepCounter & ".- Step 3 done. Next Step 4"
End If
'------------------------------------------
Step_4:
'Find a noncovered zero and prime it. If there is no
'starred zero in the row containing this primed zero,
'Go to Step 5. Otherwise, cover this row and uncover
'the column containing the starred zero. Continue in
'this manner until there are no uncovered zeros left.
'Save the smallest uncovered value and Go to Step 6.
For j = 1 To n
For i = 1 To m
star_in_row = False
If C(i, j) = 0 And R_cov(j) = 0 And C_cov(i) = 0 Then
A(i, j) = 2
save_col = i
For h = 1 To m
If C(h, j) = 0 And A(h, j) = 1 Then
star_in_row = True
Col = h
Exit For
End If
Next
If star_in_row = False Then
X(i, j) = 1
'------------------------------------------
If DoDebug = True Then
stepCounter = stepCounter + 1
Debug.Print stepCounter & ".- Step 4 done. Next Step 5"
End If
'------------------------------------------
GoTo Step_5
Else
'Uncover column containing starred zero
C_cov(Col) = 0
'Cover row containing primed zero
R_cov(j) = 1
End If
End If
Next
Next
'Save smallest uncovered value
Min = Max
For j = 1 To n
For i = 1 To m
If C_cov(i) = 0 And R_cov(j) = 0 Then
If Min > C(i, j) Then
Min = C(i, j)
End If
End If
Next
Next
'------------------------------------------
If DoDebug = True Then
stepCounter = stepCounter + 1
Debug.Print stepCounter & ".- Step 4 done. Next Step 6"
End If
'------------------------------------------
GoTo Step_6
Step_5:
'Construct a series of alternating primed and starred zeros
'as follows. Let Z0 represent the uncovered primed zero
'found in Step 4. Let Z1 denote the starred zero in the
'column of Z0 (if any). Let Z2 denote the primed zero in
'the row of Z1 (there will always be one). Continue until
'the series terminates at a primed zero that has no starred
'zero in its column. Unstar each starred zero of the series,
'star each primed zero of the series, erase all primes and
'uncover every line in the matrix. Return to Step 3.
stop_ = False
Do While stop_ = False
For j = 1 To n
If A(save_col, j) = 1 Then
X(save_col, j) = 1
save_row = j
Exit For
End If
If j = n Then
stop_ = True
GoTo Nextline
End If
Next
For i = 1 To m
If A(i, save_row) = 2 Then
X(i, save_row) = 1
save_col = i
Exit For
End If
Next
Nextline: Loop
For j = 1 To n
For i = 1 To m
If X(i, j) = 1 Then
A(i, j) = A(i, j) - 1
X(i, j) = 0
ElseIf A(i, j) = 2 Then
A(i, j) = 0
End If
C_cov(i) = 0
R_cov(j) = 0
Next
Next
'------------------------------------------
If DoDebug = True Then
stepCounter = stepCounter + 1
Debug.Print stepCounter & ".- Step 5 done. Next Step 3"
End If
'------------------------------------------
GoTo Step_3
Step_6:
'Add the value found in Step 4 to every element of each
'covered row, and subtract it from every element of each
'uncovered column. Return to Step 4 without altering
'any stars, primes, or covered lines.
For i = 1 To m
For j = 1 To n
If R_cov(j) = 1 Then
C(i, j) = C(i, j) + Min
End If
If C_cov(i) = 0 Then
C(i, j) = C(i, j) - Min
End If
Next
Next
'------------------------------------------
If DoDebug = True Then
stepCounter = stepCounter + 1
Debug.Print stepCounter & ".- Step 6 done. Next Step 4"
End If
'------------------------------------------
GoTo Step_4
'==========================================
DONE:
'Writes the result to the Immediate Window.
hungarianAlgorithm = ""
WriteMatrix:
str = ""
For j = 1 To n
For i = 1 To m
array_ = CInt(C(i, j))
If A(i, j) = 1 Then
array_ = array_ & "*"
hungarianAlgorithm = hungarianAlgorithm & i & "," & j & " "
ElseIf A(i, j) = 2 Then
array_ = array_ & "'"
End If
If X(i, j) = 1 Then
array_ = array_ & "x"
End If
If R_cov(j) = 1 Then
array_ = array_ & "-"
End If
If C_cov(i) = 1 Then
array_ = array_ & "-"
End If
str = str & "|" & array_
Next
Debug.Print str & "|"
str = ""
Next
Exit Function
'================ END =====================
End Function
Thanks!