Hi,
I am trying use fuzzy string matching in a search engine with vba. I attached below how it looks like. In the below image, when I type in "fruit" as my keyword, there is a 0% match when there is "fruit juice" in row 16. It is a multifield search engine so there are three search headings so far: KeyWords, Year Founded, and Locations. I am unsure of how this works, and I would appreciate any help. But overall, the code works really well for me, especially when I use multiple search fields at the same time.
I am trying use fuzzy string matching in a search engine with vba. I attached below how it looks like. In the below image, when I type in "fruit" as my keyword, there is a 0% match when there is "fruit juice" in row 16. It is a multifield search engine so there are three search headings so far: KeyWords, Year Founded, and Locations. I am unsure of how this works, and I would appreciate any help. But overall, the code works really well for me, especially when I use multiple search fields at the same time.
VBA Code:
Option Explicit
Const msSheetName As String = "Sheet4" '** Defines the worksheet
Dim msMatchCriteriaCell As String '** Defines the cell containing the match Keywords heading
Const msMatchCriteriaHeading As String = "Search Heading" '** Heading for search criteria headings
Const msFileTitleHeading As String = "Excel File Title" '** Heading for file column
Const msMatchHeading As String = "%Match" '** Heading for results
Const msngMinPercent As Single = 0.5 '** Minimum % Match
Sub FuzzySearch()
Dim lSearchRowPtr As Long
Dim lDataHeadingColumn As Long
Dim lFileDataEndRow As Long
Dim lFileDataRow As Long
Dim lMatchPercentColumn As Long
Dim rDataHeadingRow As Range
Dim sFileAddress As String
Dim sPercentMatchHeadingAddress As String
Dim sCurHeading As String
Dim sCurCriteria As String
Dim sCurFileKeywords As String
Dim sUsedRangeEndAddress As String
Dim sngCurMatch As Single
Dim sngTotalScore As Single
Dim sngMaxScore As Single
Dim vaSearchHeadingCriteria As Variant
Dim vaData As Variant
Dim WS As Worksheet
Set WS = Sheets(msSheetName)
With WS.UsedRange
'** Get all the data in the worksheet **
sUsedRangeEndAddress = WS.Cells(.Rows.Count, .Columns.Count).Address
vaData = WS.Range("A1", sUsedRangeEndAddress).Value
End With
'** Find cell containing File Criteria heading **
msMatchCriteriaCell = FindHeading(msMatchCriteriaHeading, WS.Range("A1:" & sUsedRangeEndAddress))
If msMatchCriteriaCell = "" Then
Exit Sub
End If
'** Get match criteria info **
vaSearchHeadingCriteria = GetMatchCriteria()
'** Find cell containing File Title heading **
sFileAddress = FindHeading(msFileTitleHeading, WS.Range("A1:" & sUsedRangeEndAddress))
If sFileAddress = "" Then
Exit Sub
End If
'** find cell containing '%Match' heading **
sPercentMatchHeadingAddress = FindHeading(msMatchHeading, WS.Range("A1:" & sUsedRangeEndAddress))
If sPercentMatchHeadingAddress = "" Then
Exit Sub
End If
lMatchPercentColumn = WS.Range(sPercentMatchHeadingAddress).Column
Set rDataHeadingRow = WS.Rows(WS.Range(sFileAddress).Row)
lFileDataEndRow = WS.Cells(WS.Rows.Count, WS.Range(sFileAddress).Column).End(xlUp).Row
'** Loop thru file data entries **
For lFileDataRow = WS.Range(sFileAddress).Row + 1 To lFileDataEndRow
sngMaxScore = 0
sngTotalScore = 0
'** Loop thru match criteria **
For lSearchRowPtr = 2 To UBound(vaSearchHeadingCriteria, 1)
'** Get next criteria heading **
sCurHeading = Trim$(vaSearchHeadingCriteria(lSearchRowPtr, 1))
If sCurHeading <> "" Then
'** Get file data column containing the required criteria heading **
lDataHeadingColumn = FindDataHeadingColumn(Heading:=sCurHeading, _
SearchRange:=rDataHeadingRow)
If lDataHeadingColumn > 0 Then
sngMaxScore = sngMaxScore + 1
'** Get criteria keywords **
sCurCriteria = NormaliseKeywords(CStr(vaSearchHeadingCriteria(lSearchRowPtr, 2)))
'** Get matching File keywords **
sCurFileKeywords = NormaliseKeywords(CStr(vaData(lFileDataRow, lDataHeadingColumn)))
sngCurMatch = GetMatchPercent(String1:=sCurCriteria, _
String2:=sCurFileKeywords)
If sngCurMatch >= msngMinPercent Then
sngTotalScore = sngTotalScore + sngCurMatch
End If
End If
End If
Next lSearchRowPtr
sngTotalScore = sngTotalScore / sngMaxScore
WS.Cells(lFileDataRow, lMatchPercentColumn).Value = sngTotalScore
Next lFileDataRow
End Sub
Private Function FindCellHeading()
End Function
Private Function GetMatchPercent(ByVal String1 As String, ByVal String2 As String) As Single
Dim lPtr1 As Long
Dim lPtr2 As Long
Dim sngMatchPercent As Single
Dim sngCurMatchPercent As Single
Dim saString1() As String
Dim saString2() As String
saString1 = Split(String1, ",")
saString2 = Split(String2, ",")
sngMatchPercent = 0
For lPtr1 = 0 To UBound(saString1)
If saString1(lPtr1) <> "" Then
For lPtr2 = 0 To UBound(saString2)
If saString2(lPtr2) <> "" Then
sngCurMatchPercent = GetLevenshteinPercentMatch(String1:=saString1(lPtr1), _
String2:=saString2(lPtr2), _
Normalised:=True)
If sngCurMatchPercent > sngMatchPercent Then
sngMatchPercent = sngCurMatchPercent
End If
End If
Next
End If
Next lPtr1
GetMatchPercent = sngMatchPercent
End Function
Private Function FindDataHeadingColumn(ByVal Heading As String, ByRef SearchRange As Range) As Long
Dim vColumn As Variant
FindDataHeadingColumn = 0
On Error Resume Next
vColumn = Application.Match(Heading, SearchRange, 0)
If Not IsError(vColumn) Then
FindDataHeadingColumn = CLng(vColumn)
End If
On Error GoTo 0
End Function
Private Function GetMatchCriteria() As Variant
Dim lEndRow As Long
Dim lColumn As Long
Dim vaData As Variant
With Sheets(msSheetName)
lColumn = .Range(msMatchCriteriaCell).Column
lEndRow = .Cells(.Rows.Count, lColumn).End(xlUp).Row
vaData = .Range(msMatchCriteriaCell, .Cells(lEndRow, lColumn + 1)).Value
End With
GetMatchCriteria = vaData
End Function
Function FindHeading(ByVal SearchHeading As String, ByRef SearchRange As Range, Optional MessageIfNF As Boolean = True) As String
Dim Rng As Range
FindHeading = ""
If Trim(SearchHeading) <> "" Then
With SearchRange
Set Rng = .Find(What:=SearchHeading, _
After:=.Cells(.Cells.Count), _
LookIn:=xlValues, _
LookAt:=xlWhole, _
SearchOrder:=xlByRows, _
SearchDirection:=xlNext, _
MatchCase:=False)
If Not Rng Is Nothing Then
FindHeading = Rng.Resize(1, 1).Address
End If
End With
End If
If FindHeading = "" _
And MessageIfNF = True Then
MsgBox prompt:="Heading '" & SearchHeading & "' not found!", _
Buttons:=vbCritical + vbOKOnly
End If
End Function
Private Function NormaliseKeywords(ByVal Keywords As String) As String
Dim lPtr As Long
Dim saKeywords() As String
Dim sResult As String
sResult = LCase$(Trim$(Keywords))
If sResult <> "" Then
saKeywords = Split(sResult, ",")
For lPtr = 0 To UBound(saKeywords)
saKeywords(lPtr) = Trim$(saKeywords(lPtr))
Next lPtr
sResult = Join(saKeywords, ",")
End If
NormaliseKeywords = sResult
End Function
Public Function GetLevenshteinPercentMatch(ByVal String1 As String, _
ByVal String2 As String, _
Optional Normalised As Boolean = True) As Single
Dim iLen As Integer
If Normalised = False Then
String1 = UCase$(WorksheetFunction.Trim(String1))
String2 = UCase$(WorksheetFunction.Trim(String2))
End If
iLen = WorksheetFunction.Max(Len(String1), Len(String2))
GetLevenshteinPercentMatch = (iLen - LevenshteinDistance(String1, String2)) / iLen
End Function
Public Function LevenshteinDistance(ByVal s As String, ByVal t As String) As Integer
'********************************
'*** Compute Levenshtein Distance
'********************************
Dim d() As Integer ' matrix
Dim m As Integer ' length of t
Dim N As Integer ' length of s
Dim I As Integer ' iterates through s
Dim j As Integer ' iterates through t
Dim s_i As String ' ith character of s
Dim t_j As String ' jth character of t
Dim cost As Integer ' cost
' Step 1
N = Len(s)
m = Len(t)
If N = 0 Then
LevenshteinDistance = m
Exit Function
End If
If m = 0 Then
LevenshteinDistance = N
Exit Function
End If
ReDim d(0 To N, 0 To m) As Integer
' Step 2
For I = 0 To N
d(I, 0) = I
Next I
For j = 0 To m
d(0, j) = j
Next j
' Step 3
For I = 1 To N
s_i = Mid$(s, I, 1)
' Step 4
For j = 1 To m
t_j = Mid$(t, j, 1)
' Step 5
If s_i = t_j Then
cost = 0
Else
cost = 1
End If
' Step 6
d(I, j) = WorksheetFunction.Min(d(I - 1, j) + 1, d(I, j - 1) + 1, d(I - 1, j - 1) + cost)
Next j
Next I
' Step 7
LevenshteinDistance = d(N, m)
End Function