Option Explicit
Dim msRefWorksheet As String
Dim msRefLookupCol As String
Dim msRefCodeCol As String
Dim msDataWorksheet As String
Dim msDataLookupCol As String
Dim msDataCodeCol As String
Dim msngSoundexMatchPercent As Single
Dim msngMetaphoneMatchPercent As Single
Dim mvParameters As Variant
Dim mvRefWorkbook As Variant
Dim mvDataWorkbook As Variant
Dim mvaDataLookup As Variant
Dim mvaDataCode As Variant
Dim mvaRefLookup As Variant
Dim mvaRefCode As Variant
Dim mwsParameters As Worksheet
Sub ReturnAlphaCodes()
Dim iCol As Integer
Dim lRow As Long
Dim lRowData As Long
Dim lRowDataEnd As Long
Dim lRowRef As Long
Dim lBestMatchRow As Long
Dim sngBestMatchPercent As Single
Dim sngCurMatchPercent As Single
Dim sDataLookupValue As String
Dim sRefLookupValue As String
Dim wbCur As Workbook
Dim wsCur As Worksheet
If GetParameters = False Then Exit Sub
Set wbCur = Workbooks.Open(Filename:=mvRefWorkbook, ReadOnly:=True)
Set wsCur = wbCur.Sheets(msRefWorksheet)
With wsCur
mvaRefLookup = Intersect(.Columns(msRefLookupCol), .UsedRange).Value
mvaRefCode = Intersect(.Columns(msRefCodeCol), .UsedRange).Value
End With
wbCur.Close savechanges:=False
Set wbCur = Workbooks.Open(Filename:=mvDataWorkbook, ReadOnly:=False)
Set wsCur = wbCur.Sheets(msDataWorksheet)
With wsCur
mvaDataLookup = Intersect(.Columns(msDataLookupCol), .UsedRange).Value
ReDim mvaDataCode(1 To UBound(mvaDataLookup, 1), 1 To 1)
End With
lRowDataEnd = UBound(mvaDataLookup, 1)
For lRowData = 2 To lRowDataEnd
Application.StatusBar = "Processing data row " & lRowData & " of " & lRowDataEnd
lBestMatchRow = 0
sngBestMatchPercent = 0
sDataLookupValue = LCase$(WorksheetFunction.Trim(CStr(mvaDataLookup(lRowData, 1))))
For lRowRef = 2 To UBound(mvaRefLookup, 1)
sRefLookupValue = LCase$(WorksheetFunction.Trim(CStr(mvaRefLookup(lRowRef, 1))))
sngCurMatchPercent = FuzzyPercent(String1:=sDataLookupValue, String2:=sRefLookupValue, Algorithm:=2, Normalised:=True)
If sngCurMatchPercent > sngBestMatchPercent Then
If sngCurMatchPercent > 0.5 Then
lBestMatchRow = lRowRef
sngBestMatchPercent = sngCurMatchPercent
If sngBestMatchPercent = 1 Then Exit For 'Exit scan if we've found 100% match
End If
End If
Next lRowRef
If sngBestMatchPercent > 0.1 Then
mvaDataCode(lRowData, 1) = mvaRefCode(lBestMatchRow, 1)
Else
mvaDataCode(lRowData, 1) = "*** NO MATCH FOUND ***"
End If
Next lRowData
mvaDataCode(1, 1) = wsCur.Range(msDataCodeCol & "1").Value
wsCur.Range(msDataCodeCol & "1").Resize(UBound(mvaDataCode, 1)).Value = mvaDataCode
Application.StatusBar = False
End Sub
Private Function GetParameters() As Boolean
Dim lRow As Long
Dim sCurKeyword As String
Dim sCurValue As String
Set mwsParameters = Sheets("Parameters")
GetParameters = True
mvParameters = mwsParameters.UsedRange.Value
For lRow = 2 To UBound(mvParameters, 1)
sCurKeyword = NormaliseKey(CStr(mvParameters(lRow, 1)))
sCurValue = CStr(mvParameters(lRow, 2))
Select Case sCurKeyword
Case "referenceworkbook"
If sCurValue = "" Then
mvRefWorkbook = Application.GetOpenFilename(filefilter:="Excel files (*.xls*), *.xls*", _
Title:="Please select Input Reference workbook")
If mvRefWorkbook = False Then
GetParameters = False
Exit Function
End If
mwsParameters.Cells(lRow, 2).Value = mvRefWorkbook
Else
mvRefWorkbook = sCurValue
End If
Case "referenceworksheet"
msRefWorksheet = sCurValue
Case "referencelookupcolumn"
msRefLookupCol = sCurValue
Case "referencecodecolumn"
msRefCodeCol = sCurValue
Case "dataworkbook"
If sCurValue = "" Then
mvDataWorkbook = Application.GetOpenFilename(filefilter:="Excel files (*.xls*), *.xls*", _
Title:="Please select Data workbook")
If mvDataWorkbook = False Then
GetParameters = False
Exit Function
End If
' mwsParameters.Cells(lRow, 2) = mvDataWorkbook
Else
mvDataWorkbook = sCurValue
End If
Case "dataworksheet"
msDataWorksheet = sCurValue
Case "datalookupcolumn"
msDataLookupCol = sCurValue
Case "datacodecolumn"
msDataCodeCol = sCurValue
Case Else
If sCurKeyword <> "" Then MsgBox "Keyword in row " & lRow & " '" & CStr(mvParameters(lRow, 1)) & "' ignored"
End Select
Next lRow
End Function
Private Function NormaliseKey(ByVal String1 As String) As String
NormaliseKey = Replace(LCase$(String1), " ", "")
End Function
Function FuzzyPercent(ByVal String1 As String, _
ByVal String2 As String, _
Optional Algorithm As Variant = 3, _
Optional Normalised As Boolean = False) As Single
'*************************************
'** Return a % match on two strings **
'*************************************
Dim bSoundex As Boolean
Dim bBasicMetaphone As Boolean
Dim intLen1 As Integer, intLen2 As Integer
Dim intCurLen As Integer
Dim intTo As Integer
Dim intPos As Integer
Dim intPtr As Integer
Dim intScore As Integer
Dim intTotScore As Integer
Dim intStartPos As Integer
Dim lngAlgorithm As Long
Dim sngScore As Single
Dim strWork As String
bSoundex = LCase$(CStr(Algorithm)) = "soundex"
bBasicMetaphone = LCase$(CStr(Algorithm)) = "metaphone"
'-------------------------------------------------------
'-- If strings havent been normalised, normalise them --
'-------------------------------------------------------
If Normalised = False Then
If bSoundex Or bBasicMetaphone Then
String1 = NormaliseStringAtoZ(String1)
String2 = NormaliseStringAtoZ(String2)
Else
String1 = LCase$(WorksheetFunction.Trim(String1))
String2 = LCase$(WorksheetFunction.Trim(String2))
End If
End If
'----------------------------------------------
'-- Give 100% match if strings exactly equal --
'----------------------------------------------
If String1 = String2 Then
FuzzyPercent = 1
Exit Function
End If
If bSoundex Then
String1 = Soundex(Replace(String1, " ", ""))
String2 = Soundex(Replace(String2, " ", ""))
If String1 = String2 Then
FuzzyPercent = msngSoundexMatchPercent
Else
FuzzyPercent = 0
End If
Exit Function
ElseIf bBasicMetaphone Then
String1 = Metaphone1(String1)
String2 = Metaphone1(String2)
If String1 = String2 Then
FuzzyPercent = msngMetaphoneMatchPercent
Else
FuzzyPercent = 0
End If
Exit Function
End If
intLen1 = Len(String1)
intLen2 = Len(String2)
If intLen1 = 0 Or intLen2 = 0 Then
FuzzyPercent = 0
Exit Function
End If
'----------------------------------------
'-- Give 0% match if string length < 2 --
'----------------------------------------
If intLen1 < 2 Then
FuzzyPercent = 0
Exit Function
End If
intTotScore = 0 'initialise total possible score
intScore = 0 'initialise current score
lngAlgorithm = Val(Algorithm)
'--------------------------------------------------------
'-- If Algorithm = 1 or 3, Search for single characters --
'--------------------------------------------------------
If (lngAlgorithm And 1) <> 0 Then
If intLen1 < intLen2 Then
FuzzyAlg1 String1, String2, intScore, intTotScore
Else
FuzzyAlg1 String2, String1, intScore, intTotScore
End If
End If
'-----------------------------------------------------------
'-- If Algorithm = 2 or 3, Search for pairs, triplets etc. --
'-----------------------------------------------------------
If (lngAlgorithm And 2) <> 0 Then
If intLen1 < intLen2 Then
FuzzyAlg2 String1, String2, intScore, intTotScore
Else
FuzzyAlg2 String2, String1, intScore, intTotScore
End If
End If
'-------------------------------------------------------------
'-- If Algorithm = 4,5,6,7, use Levenstein Distance method --
'-- (Algorithm 4 was Dan Ostrander's code) --
'-------------------------------------------------------------
If (lngAlgorithm And 4) <> 0 Then
If intLen1 < intLen2 Then
' sngScore = FuzzyAlg4(String1, String1)
sngScore = GetLevenshteinPercentMatch(String1:=String1, _
String2:=String2, _
Normalised:=True)
Else
' sngScore = FuzzyAlg4(String2, String1)
sngScore = GetLevenshteinPercentMatch(String1:=String2, _
String2:=String1, _
Normalised:=True)
End If
intScore = intScore + (sngScore * 100)
intTotScore = intTotScore + 100
End If
FuzzyPercent = intScore / intTotScore
End Function
Private Sub FuzzyAlg1(ByVal String1 As String, _
ByVal String2 As String, _
ByRef Score As Integer, _
ByRef TotScore As Integer)
Dim intLen1 As Integer, intPos As Integer, intPtr As Integer, intStartPos As Integer
intLen1 = Len(String1)
TotScore = TotScore + intLen1 'update total possible score
intPos = 0
For intPtr = 1 To intLen1
intStartPos = intPos + 1
intPos = InStr(intStartPos, String2, Mid$(String1, intPtr, 1))
If intPos > 0 Then
If intPos > intStartPos + 3 Then 'No match if char is > 3 bytes away
intPos = intStartPos
Else
Score = Score + 1 'Update current score
End If
Else
intPos = intStartPos
End If
Next intPtr
End Sub
Private Sub FuzzyAlg2(ByVal String1 As String, _
ByVal String2 As String, _
ByRef Score As Integer, _
ByRef TotScore As Integer)
Dim intCurLen As Integer, intLen1 As Integer, intTo As Integer, intPtr As Integer, intPos As Integer
Dim strWork As String
intLen1 = Len(String1)
For intCurLen = 1 To intLen1
strWork = String2 'Get a copy of String2
intTo = intLen1 - intCurLen + 1
TotScore = TotScore + Int(intLen1 / intCurLen) 'Update total possible score
For intPtr = 1 To intTo Step intCurLen
intPos = InStr(strWork, Mid$(String1, intPtr, intCurLen))
If intPos > 0 Then
Mid$(strWork, intPos, intCurLen) = String$(intCurLen, &H0) 'corrupt found string
Score = Score + 1 'Update current score
End If
Next intPtr
Next intCurLen
End Sub
Public Function GetLevenshteinPercentMatch(ByVal String1 As String, _
ByVal String2 As String, _
Optional Normalised As Boolean = False) As Single
Dim iLen As Integer
If Normalised = False Then
String1 = UCase$(WorksheetFunction.Trim(String1))
String2 = UCase$(WorksheetFunction.Trim(String2))
End If
iLen = WorksheetFunction.Max(Len(String1), Len(String2))
GetLevenshteinPercentMatch = (iLen - LevenshteinDistance(String1, String2)) / iLen
End Function
'********************************
'*** Compute Levenshtein Distance
'********************************
Public Function LevenshteinDistance(ByVal s As String, ByVal t As String) As Integer
Dim d() As Integer ' matrix
Dim m As Integer ' length of t
Dim N As Integer ' length of s
Dim I As Integer ' iterates through s
Dim j As Integer ' iterates through t
Dim s_i As String ' ith character of s
Dim t_j As String ' jth character of t
Dim cost As Integer ' cost
' Step 1
N = Len(s)
m = Len(t)
If N = 0 Then
LevenshteinDistance = m
Exit Function
End If
If m = 0 Then
LevenshteinDistance = N
Exit Function
End If
ReDim d(0 To N, 0 To m) As Integer
' Step 2
For I = 0 To N
d(I, 0) = I
Next I
For j = 0 To m
d(0, j) = j
Next j
' Step 3
For I = 1 To N
s_i = Mid$(s, I, 1)
' Step 4
For j = 1 To m
t_j = Mid$(t, j, 1)
' Step 5
If s_i = t_j Then
cost = 0
Else
cost = 1
End If
' Step 6
d(I, j) = WorksheetFunction.Min(d(I - 1, j) + 1, d(I, j - 1) + 1, d(I - 1, j - 1) + cost)
Next j
Next I
' Step 7
LevenshteinDistance = d(N, m)
End Function
Function Soundex(Surname As String) As String
' Developed by Richard J. Yanco
' This function follows the Soundex rules given at
' http://home.utah-inter.net/kinsearch/Soundex.html
Dim Result As String, c As String * 1
Dim Location As Integer
Surname = UCase(Surname)
If Surname = "" Then
Soundex = ""
Exit Function
End If
' First character must be a letter
If Asc(Left(Surname, 1)) < 65 Or Asc(Left(Surname, 1)) > 90 Then
Soundex = ""
Exit Function
Else
' St. is converted to Saint
If Left(Surname, 3) = "ST." Then
Surname = "SAINT" & Mid(Surname, 4)
End If
' Convert to Soundex: letters to their appropriate digit,
' A,E,I,O,U,Y ("slash letters") to slashes
' H,W, and everything else to zero-length string
Result = Left(Surname, 1)
For Location = 2 To Len(Surname)
Result = Result & SoundexCategory(Mid(Surname, Location, 1))
Next Location
' Remove double letters
Location = 2
Do While Location < Len(Result)
If Mid(Result, Location, 1) = Mid(Result, Location + 1, 1) Then
Result = Left(Result, Location) & Mid(Result, Location + 2)
Else
Location = Location + 1
End If
Loop
' If SoundexCategory of 1st letter equals 2nd character, remove 2nd character
If SoundexCategory(Left(Result, 1)) = Mid(Result, 2, 1) Then
Result = Left(Result, 1) & Mid(Result, 3)
End If
' Remove slashes
For Location = 2 To Len(Result)
If Mid(Result, Location, 1) = "/" Then
Result = Left(Result, Location - 1) & Mid(Result, Location + 1)
End If
Next
' Trim or pad with zeroes as necessary
Select Case Len(Result)
Case 4
Soundex = Result
Case Is < 4
Soundex = Result & String(4 - Len(Result), "0")
Case Is > 4
Soundex = Left(Result, 4)
End Select
End If
End Function
Private Function SoundexCategory(c) As String
' Returns a Soundex code for a letter
Select Case True
Case c Like "[AEIOUY]"
SoundexCategory = "/"
Case c Like "[BPFV]"
SoundexCategory = "1"
Case c Like "[CSKGJQXZ]"
SoundexCategory = "2"
Case c Like "[DT]"
SoundexCategory = "3"
Case c = "L"
SoundexCategory = "4"
Case c Like "[MN]"
SoundexCategory = "5"
Case c = "R"
SoundexCategory = "6"
Case Else 'This includes H and W, spaces, punctuation, etc.
SoundexCategory = ""
End Select
End Function
Private Function NormaliseStringAtoZ(ByVal String1 As String) As String
'---------------------------------------------------------
'-- Remove all but alpha chars and convert to lowercase --
'---------------------------------------------------------
Dim iPtr As Integer
Dim sChar As String
Dim sResult As String
sResult = ""
For iPtr = 1 To Len(String1)
sChar = LCase$(Mid$(String1, iPtr, 1))
If sChar <> UCase$(sChar) Then sResult = sResult & sChar
Next iPtr
NormaliseStringAtoZ = sResult
End Function
Function Metaphone1(ByVal String1 As String) As String
'-- Metaphone Basic Rules
'-- ** NOTE ** Depending on the order that they are obeyed, these rules can cancel each other out
'-- I have amended the order that the rules are obeyed as I feel appropriate, but uncertain that it is as the
'-- author intended.
'-- These are the rules as specified in Wikipedia entry:
'-- 1.***** Drop duplicate adjacent letters, except for C.
'-- 2.***** If the word begins with 'KN', 'GN', 'PN', 'AE', 'WR', drop the first letter.
'-- 3.***** Drop 'B' if after 'M' at the end of the word.
'-- 4.***** 'C' transforms to 'X' if followed by 'IA' or 'H' (unless in latter case, it is part of '-SCH-', in which case it transforms to 'K'). 'C' transforms to 'S' if followed by 'I', 'E', or 'Y'. Otherwise, 'C' transforms to 'K'.
'-- 5.***** 'D' transforms to 'J' if followed by 'GE', 'GY', or 'GI'. Otherwise, 'D' transforms to 'T'.
'-- 6.***** Drop 'G' if followed by 'H' and 'H' is not at the end or before a vowel. Drop 'G' if followed by 'N' or 'NED' and is at the end.
'-- 7.***** 'G' transforms to 'J' if before 'I', 'E', or 'Y', and it is not in 'GG'. Otherwise, 'G' transforms to 'K'.
'-- 8.***** Drop 'H' if after vowel and not before a vowel.
'-- 9.***** 'CK' transforms to 'K'.
'-- 10.*** 'PH' transforms to 'F'.
'-- 11.*** 'Q' transforms to 'K'.
'-- 12.*** 'S' transforms to 'X' if followed by 'H', 'IO', or 'IA'.
'-- 13.*** 'T' transforms to 'X' if followed by 'IA' or 'IO'. 'TH' transforms to '0'. Drop 'T' if followed by 'CH'.
'-- 14.*** 'V' transforms to 'F'.
'-- 15.*** 'WH' transforms to 'W' if at the beginning. Drop 'W' if not followed by a vowel.
'-- 16.*** 'X' transforms to 'S' if at the beginning. Otherwise, 'X' transforms to 'KS'.
'-- 17.*** Drop 'Y' if not followed by a vowel.
'-- 18.*** 'Z' transforms to 'S'.
'-- 19.*** Drop all vowels unless it is the beginning.
Dim iPtr As Integer
Dim iLen As Integer
Dim sString As String
Dim sResult As String
Dim sChar As String
Dim sLeft As String
Dim sRight As String
Dim sAtoZ As String
Dim vArray As Variant
Dim vCur As Variant
'-- Remove non alphabetics and convert to lowercase --
sString = NormaliseStringAtoZ(String1)
'-- 5.***** 'D' transforms to 'J' if followed by 'GE', 'GY', or 'GI'. Otherwise, 'D' transforms to 'T'.
sString = Replace(sString, "dge", "j")
sString = Replace(sString, "dgy", "j")
sString = Replace(sString, "dgi", "j")
sString = Replace(sString, "d", "t")
'-- 2.*If the word begins with 'KN', 'GN', 'PN', 'AE', 'WR', drop the first letter.
vArray = Array("kn", "gn", "pn", "ae", "wr")
For Each vCur In vArray
If Left$(sString, 2) = CStr(vCur) Then
sString = Right$(sString, Len(sString) - 1)
Exit For
End If
Next vCur
'-- 7.***** 'G' transforms to 'J' if before 'I', 'E', or 'Y', and it is not in 'GG'. Otherwise, 'G' transforms to 'K'.
iPtr = InStr(sString, "g")
iLen = Len(sString)
Do
If iPtr < 1 Or iPtr >= Len(sString) Then Exit Do
If Mid$(sString & " ", iPtr, 2) <> "gg" Then
If InStr("iey", Mid$(sString & " ", iPtr + 1, 1)) > 0 Then
Mid$(sString, iPtr, 1) = "j"
Else
Mid$(sString, iPtr, 1) = "k"
End If
End If
iPtr = InStr(iPtr + 1, sString, "g")
Loop
'-- 1. Drop duplicate adjacent letters, except for C.
sAtoZ = "abdefghijklmnopqrstuvwxyz"
For iPtr = 1 To Len(sAtoZ)
sChar = Mid$(sAtoZ, iPtr, 1)
sString = Replace(sString, sChar & sChar, sChar)
Next iPtr
'-- 3.***** Drop 'B' if after 'M' at the end of the word.
If Right$(sString, 2) = "mb" Then sString = Left$(sString, Len(sString) - 1)
'-- 9.***** 'CK' transforms to 'K'.
sString = Replace(sString, "ck", "k")
'-- 4.***** 'C' transforms to 'X' if followed by 'IA' or 'H' (unless in latter case, it is part of '-SCH-',
'-- in which case it transforms to 'K'). 'C' transforms to 'S' if followed by 'I', 'E', or 'Y'.
'-- Otherwise, 'C' transforms to 'K'.
sString = Replace(sString, "sch", "k")
sString = Replace(sString, "cia", "x")
sString = Replace(sString, "ch", "x")
sString = Replace(sString, "ci", "s")
sString = Replace(sString, "ce", "s")
sString = Replace(sString, "cy", "s")
sString = Replace(sString, "c", "k")
'-- 6.***** Drop 'G' if followed by 'H' and 'H' is not at the end or before a vowel.
'-- Drop 'G' if followed by 'N' or 'NED' and is at the end.
If Right$(sString, 4) = "gned" Then sString = Left$(sString, Len(sString) - 4) & "ned"
If Right$(sString, 2) = "gn" Then sString = Left$(sString, Len(sString) - 2) & "n"
iPtr = InStr(sString, "g")
Do
iLen = Len(sString)
If iPtr >= iLen Or iPtr < 1 Then Exit Do
If Mid$(sString, iPtr, 2) = "gh" Then
If InStr("aeiou", Mid$(sString, iPtr + 2)) < 1 Then
sLeft = Left$(sString, iPtr - 1)
sRight = Mid$(sString, iPtr + 1)
sString = sLeft & sRight
End If
End If
iPtr = InStr(iPtr + 1, sString, "g")
Loop
'-- 13.*** 'T' transforms to 'X' if followed by 'IA' or 'IO'. 'TH' transforms to '0'. Drop 'T' if followed by 'CH'.
sString = Replace(sString, "tio", "xio")
sString = Replace(sString, "tia", "xia")
sString = Replace(sString, "tch", "ch")
sString = Replace(sString, "th", "0")
'-- 8.***** Drop 'H' if after vowel and not before a vowel.
iPtr = InStr(sString, "h")
Do
iLen = Len(sString)
If iPtr < 1 Then Exit Do
If InStr("aeiou", Mid$(sString & " ", iPtr + 1, 1)) < 1 Then
If InStr("aeiou", Mid$(" " & sString, iPtr, 1)) > 0 Then
sLeft = Left$(sString, iPtr - 1)
sRight = Mid$(sString, iPtr + 1)
sString = sLeft & sRight
iPtr = iPtr - 1
End If
End If
iPtr = InStr(iPtr + 1, sString, "h")
Loop
'-- 10.*** 'PH' transforms to 'F'.
sString = Replace(sString, "ph", "f")
'-- 11.*** 'Q' transforms to 'K'.
sString = Replace(sString, "q", "k")
'-- 12.*** 'S' transforms to 'X' if followed by 'H', 'IO', or 'IA'.
sString = Replace(sString, "sio", "xio")
sString = Replace(sString, "sia", "xia")
sString = Replace(sString, "sh", "xh")
'-- 14.*** 'V' transforms to 'F'.
sString = Replace(sString, "v", "f")
'-- 15.*** 'WH' transforms to 'W' if at the beginning. Drop 'W' if not followed by a vowel.
If Left$(sString, 2) = "wh" Then sString = "w" & Mid$(sString, 3)
'-- 16.*** 'X' transforms to 'S' if at the beginning. Otherwise, 'X' transforms to 'KS'.
If Left$(sString, 1) = "x" Then sString = "s" & Mid$(sString, 2)
sString = Replace(sString, "x", "ks")
'-- 17.*** Drop 'Y' if not followed by a vowel.
iPtr = InStr(sString, "y")
Do
If iPtr < 1 Then Exit Do
If InStr("aeiou", Mid$(sString & " ", iPtr + 1, 1)) < 1 Then
sLeft = Left$(sString, iPtr - 1)
sRight = Mid$(sString, iPtr + 1)
sString = sLeft & sRight
iPtr = iPtr - 1
End If
iPtr = InStr(iPtr + 1, sString, "y")
Loop
'-- 18.*** 'Z' transforms to 'S'.
sString = Replace(sString, "z", "s")
'-- 19.*** Drop all vowels unless it is the beginning.
sResult = Left$(sString, 1)
For iPtr = 2 To Len(sString)
sChar = Mid$(sString, iPtr, 1)
If InStr("aeiou", sChar) < 1 Then sResult = sResult & sChar
Next iPtr
Metaphone1 = sResult
End Function