Option Explicit
Const sNumber As String = "1,2,3"
Const xPattern As String = "A-Z0-9_'"
Const NOC As Long = 4
Const xCol As String = "G:XFD"
Const BF As String = "B:F"
Const HC As Long = 10000
Dim VBX
Dim SN As Long
Dim rSW As Range
Sub Word_Phrase_Frequency_v1x()
Dim i As Long, j As Long, n As Long, k As Long
Dim txa As String
Dim z, t, s
t = Timer
Application.ScreenUpdating = False
Range(xCol).Clear
On Error Resume Next
Range(BF).SpecialCells(xlCellTypeFormulas, xlErrors).ClearContents
Range(BF).SpecialCells(xlConstants, xlErrors).ClearContents
On Error GoTo 0
j = Range("A" & Rows.Count).End(xlUp).Row
For i = 2 To j
txa = Join(Application.Transpose(Application.Transpose(Cells(i, "B").Resize(1, 5))), " ")
z = Split(sNumber, ",")
SN = (UBound(z) + 1)
ReDim VBX(1 To 1, 1 To SN * NOC * 2)
With Sheets("Sheet2")
Set rSW = .Range("A1", .Cells(.Rows.Count, "A").End(xlUp))
End With
If rSW.Cells(1) <> "" Then Call stopWord(xPattern, txa)
For k = LBound(z) To UBound(z)
Call toProcessY(CLng(z(k)), txa, xPattern, i)
Next
Cells(i, "G").Resize(1, UBound(VBX, 2)) = VBX
Next
For Each s In z
For k = 1 To NOC
n = n + 1
VBX(1, n) = s & " WORD"
n = n + 1
VBX(1, n) = "count"
Next
Next
Cells(1, "G").Resize(1, UBound(VBX, 2)) = VBX
Range(xCol).Columns.AutoFit
Columns(HC).Resize(, 2).Clear
Application.ScreenUpdating = True
Debug.Print "It's done in: " & Timer - t & " seconds"
End Sub
Sub stopWord(xP As String, tx As String)
Dim n As Long
Dim stW, x
Dim regEx As Object
stW = rSW.Value
Set regEx = CreateObject("VBScript.RegExp")
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = True
End With
tx = " " & tx
For Each x In stW
regEx.Pattern = "[^" & xP & "]" & x & "[^" & xP & "]"
If regEx.Test(tx) Then
tx = regEx.Replace(tx, "|")
End If
Next
End Sub
Sub toProcessY(n As Long, ByVal tx As String, xP As String, rn As Long)
Dim regEx As Object, matches As Object, x As Object, d As Object
Dim i As Long, rc As Long
Dim va, q
Static y As Long
Set regEx = CreateObject("VBScript.RegExp")
With regEx
.Global = True
.MultiLine = True
.IgnoreCase = True
End With
If n > 1 Then
regEx.Pattern = "( ){2,}"
If regEx.Test(tx) Then
tx = regEx.Replace(tx, " ")
End If
tx = Trim(tx)
regEx.Pattern = "[^" & xP & " ]+"
If regEx.Test(tx) Then
tx = regEx.Replace(tx, vbLf)
End If
tx = Replace(tx, vbLf & " ", vbLf & "")
End If
Set d = CreateObject("scripting.dictionary")
d.CompareMode = vbTextCompare
regEx.Pattern = Trim(WorksheetFunction.Rept("[" & xP & "]+ ", n))
Set matches = regEx.Execute(tx)
For Each x In matches
d(CStr(x)) = d(CStr(x)) + 1
Next
For i = 1 To n - 1
regEx.Pattern = "^[" & xP & "]+ "
If regEx.Test(tx) Then
tx = regEx.Replace(tx, "")
regEx.Pattern = Trim(WorksheetFunction.Rept("[" & xP & "]+ ", n))
Set matches = regEx.Execute(tx)
For Each x In matches
d(CStr(x)) = d(CStr(x)) + 1
Next
End If
Next
If d.Count = 0 Then y = y + (NOC * 2): Exit Sub
With Cells(2, HC).Resize(d.Count, 2)
Select Case d.Count
Case Is < 65536
.Value = Application.Transpose(Array(d.Keys, d.Items))
Case Is <= 1048500
ReDim va(1 To d.Count, 1 To 2)
i = 0
For Each q In d.Keys
i = i + 1
va(i, 1) = q: va(i, 2) = d(q)
Next
.Value = va
Case Else
MsgBox "Process is canceled, the result is more than 1048500 rows"
End Select
.Sort Key1:=.Cells(1, 2), Order1:=xlDescending, Key2:=.Cells(1, 1), Order2:=xlAscending, Header:=xlNo
va = .Resize(NOC, 2).Value
Columns(HC).Resize(, 2).Clear
End With
If y >= UBound(VBX, 2) Then y = 0
For i = 1 To UBound(va, 1)
y = y + 1
VBX(1, y) = va(i, 1)
y = y + 1
VBX(1, y) = va(i, 2)
Next
End Sub