Sub ListBuilder()
Application.ScreenUpdating = False
Dim StrIn As String, StrTmp As String, StrExcl As String, StrOut() As String
Dim i As Long, j As Long
'Define the exlusions list
StrExcl = "A,Am,An,And,Are,As,At,B,Be,But,By,C,Can,Cm,D,Did," & _
"Do,Does,E,Eg,En,Eq,Etc,F,For,G,Get,Go,Got,H,Has,Have," & _
"He,Her,Him,How,I,Ie,If,In,Into,Is,It,Its,J,K,L,M,Me," & _
"Mi,Mm,My,N,Na,Nb,No,Not,O,Of,Off,Ok,On,One,Or,Our,Out," & _
"P,Q,R,Re,S,She,So,T,The,Their,Them,They,This,T,To,U,V," & _
"Via,Vs,W,Was,We,Were,Who,Will,With,Would,X,Y,Yd,You,Your,Z"
With ActiveDocument.Range
'Convert email & web addresses to hyperlinks, then capture & delete
.AutoFormat
While .Hyperlinks.Count > 0
With .Hyperlinks(1)
StrTmp = StrTmp & " " & .Address
.Delete
End With
Wend
'Delete words of 3 characters or less
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "<[! ]{1,3}>"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchWildcards = True
.Execute Replace:=wdReplaceAll
End With
'ActiveWindow.View.ShowFieldCodes = False
'Capitalise the first letter of each word
.Case = wdTitleWord
'Get the document's text
StrIn = .Text
'Strip out unwanted characters. Amongst others, hyphens and formatted single quotes are retained at this stage
For i = 1 To 255
Select Case i
'To strip out unwanted characters
Case 1 To 38, 40 To 64, 91 To 96, 123 To 144, 147 To 149, 152 To 171, 174 To 191, 247
StrIn = Replace(StrIn, Chr(i), " ")
End Select
Next
'Convert smart single quotes to plain single quotes & delete any at the start/end of a word
StrIn = Replace(Replace(Replace(Replace(StrIn, Chr(145), "'"), Chr(146), "'"), "' ", " "), " '", " ")
'Process the exclusions list
For i = 0 To UBound(Split(StrExcl, ","))
While InStr(StrIn, " " & Split(StrExcl, ",")(i) & " ") > 0
StrIn = Replace(StrIn, " " & Split(StrExcl, ",")(i) & " ", " ")
Wend
Next
'Incorporate email & web addresses
StrIn = StrIn & StrTmp
'Clean up any duplicate spaces
While InStr(StrIn, " ") > 0
StrIn = Replace(StrIn, " ", " ")
Wend
StrIn = " " & Trim(StrIn) & " "
j = UBound(Split(StrIn, " "))
For i = 1 To j
StrTmp = Split(StrIn, " ")(1)
While InStr(StrIn, " " & StrTmp & " ") > 0
StrIn = Replace(StrIn, " " & StrTmp & " ", " ")
Wend
'Update the output array
ReDim Preserve StrOut(i - 1)
StrOut(i - 1) = StrTmp
If UBound(Split(StrIn, " ")) = 1 Then Exit For
Next
WordBasic.SortArray StrOut()
.Text = Join(StrOut, ", ")
'Restore email & web address hyperlinks
.AutoFormat
.Style = wdStyleNormal
End With
Application.ScreenUpdating = True
End Sub