Hello,
I have the below code and would like a second pair of eye to make sure that it is going to be running quickly and efficiently. Anything that can be done to remove duplicated actions in the code that is not necessary?
The goal of the code is to open PDF files based on specified location and extract the PDF data into Excel. Once in Excel, scraping report to extract data based on notations in ExtractingData function of code and adding into two separate sheets to then add into format of AuditData sheet per ProcessedAudits function section and then finally highlighting and coding specific text for Scope and Risk at the end of the code. Loop will run for each PDF file found in specified folder. FYI - Data extracted from PDF file is not in a table.
Let me know if more information is needed. I cannot add sample data since it is sensitive data.
I have the below code and would like a second pair of eye to make sure that it is going to be running quickly and efficiently. Anything that can be done to remove duplicated actions in the code that is not necessary?
The goal of the code is to open PDF files based on specified location and extract the PDF data into Excel. Once in Excel, scraping report to extract data based on notations in ExtractingData function of code and adding into two separate sheets to then add into format of AuditData sheet per ProcessedAudits function section and then finally highlighting and coding specific text for Scope and Risk at the end of the code. Loop will run for each PDF file found in specified folder. FYI - Data extracted from PDF file is not in a table.
Let me know if more information is needed. I cannot add sample data since it is sensitive data.
VBA Code:
Sub PDF_Extraction()
'Macro opens PDF Files as an editable Word Document
'Copies the contents of the Word document
'Pastes the Clipboard contents into Excel
'Declare Variables
Dim myWorksheet As Worksheet
Dim wordApp As Object
Dim myWshShell As wshShell
Dim strPath
Dim oDoc As Object
Dim strFile As String
Dim registryKey As String
Dim wordVersion As String
'Set Variables
'Error
On Error Resume Next
Set wordApp = GetObject(, "Word.Application")
If Err Then
Set wordApp = CreateObject("Word.Application")
End If
On Error GoTo 0
Set myWshShell = New wshShell
strPath = "C:\Users\mh15601\Desktop\Projects\Audit Plan\2019\Testing\PDF Files\DO NOT DELETE\"
wordVersion = wordApp.Version
registryKey = "HKCU\SOFTWARE\Microsoft\Office\" & wordVersion & "\Word\Options\"
'Optimize Macro
Application.ScreenUpdating = False
Application.EnableEvents = False
Application.Calculation = xlCalculationManual
'Open and Copy PDF Files
myWshShell.RegWrite registryKey & "DisableConvertPdfWarning", 1, "REG_DWORD"
strFile = Dir$(strPath & "*.pdf")
While strFile <> ""
Set oDoc = wordApp.Documents.Open(fileName:=strPath & strFile, _
ConfirmConversions:=False)
'Copy Data from Word
oDoc.Content.Copy
'Excel
Set myWorksheet = ActiveWorkbook.Worksheets("PDF FILE")
With myWorksheet
'.Range("A1").Activate
'.PasteSpecial Format:="HTML"
.Range("A1").Select
.Paste
End With
'Remove Shapes
Dim shp As Shape
For Each shp In ActiveSheet.Shapes
shp.Delete
Next shp
'Close Word
oDoc.Close SaveChanges:=0
'myWshShell.RegWrite registryKey & "DisableConvertPdfWarning", 0, "REG_DWORD"
'Run Excel Macros
With myWorksheet
Run ExtractingData
Run ProcessedAudits
Run HighlightStringsAuditScope
Run HighlightStringsRiskDescription
End With
'Clear Other Sheet
With Worksheets("Other")
.Range("A2:E31").Delete Shift:=xlUp
.Range("M:O").Delete Shift:=xlToLeft
Range("A2").Select
End With
'Clear ChartData Sheet
Sheets("ChartData").Cells.Delete Shift:=xlUp
Range("A1").Select
'Clear PDF FILE Sheet
Sheets("PDF FILE").Cells.Delete Shift:=xlUp
Sheets("PDF FILE").Select
Range("A1").Select
strFile = Dir$()
Wend
'Clear Word and PDF
Set wordApp = Nothing
Set myWshShell = Nothing
'Message Box when Complete
MsgBox "Complete!"
'Reset Macro Optimization Settings
Application.EnableEvents = True
Application.Calculation = xlCalculationAutomatic
Application.ScreenUpdating = True
End Sub
Function ExtractingData()
' ExtractingData Macro
' Purpose: Extract PDF Audit Files data from Excel to be processed into SharePoint
' Created by Miriam Hamid
' Created on 9/10/2019
'Define Variable
Dim lRow As Long
'Set Variable
lRow = Range("A" & Rows.Count).End(xlUp).Row
Application.EnableEvents = False
Worksheets("PDF FILE").Activate
With Worksheets("PDF FILE")
'Clear FindSettings
Cells.Find(What:="", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
'Find "Report Issuance Date" Data and Paste into ChartData tab
On Error Resume Next
Cells.Find(What:="Report Issuance Date", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
Range(ActiveCell.Offset(1, 0), ActiveCell.Offset(1, 0)).Select
Selection.Copy Destination:=Sheets("Other").Range("A2")
'Find "Audit Reference Number" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Audit Reference Number", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
Range(ActiveCell.Offset(1, 0), ActiveCell.Offset(2, 0)).Select
Selection.Copy Destination:=Sheets("Other").Range("A3")
'Find "Rating" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Rating", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
Range(ActiveCell.Offset(0, 0), ActiveCell.Offset(2, 0)).Select
Selection.Copy Destination:=Sheets("Other").Range("A5")
'Find "Executive(s)" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
'Cells.Find(What:="Executive(s)", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
'ActiveCell.Select
'Selection.Copy Destination:=Sheets("Other").Range("A7")
On Error Resume Next
Cells.Find(What:="Accountable", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
Range(Selection, Selection.End(xlDown)).Select
Selection.Copy Destination:=Sheets("Other").Range("M1")
'Selection.Copy
'Worksheets("Other").Range("M1").PasteSpecial Paste:=xlPasteValues
Application.CutCopyMode = False
'Find Responsible and delete cells from Responsible down
Sheets("Other").Select
On Error Resume Next
Cells.Find(What:="Responsible", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
Range(Selection, Selection.End(xlDown)).Delete Shift:=xlUp
'Remove "Accountable Execustive(s):" from text
Columns("M:Q").Select
Selection.Replace What:="Accountable ", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Executive(s):", Replacement:="", LookAt:=xlPart _
, SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
'Concatenation Formula
Range("A8").FormulaR1C1 = "=CONCATENATE(R1C13,R2C13,R3C13,R4C13,R5C13,R6C13,R7C13,R8C13,R9C13,R10C13,R11C13,R12C13,R13C13)"
'Find "Key Measures" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Key Measures", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
On Error GoTo 0
Range(ActiveCell.Offset(0, 0), ActiveCell.Offset(12, 4)).Select
Selection.Copy Destination:=Sheets("Other").Range("A10")
'Formula for above data
Sheets("Other").Select
Range("E2").FormulaR1C1 = "=R2C1"
Range("E3").FormulaR1C1 = "=MID(R3C1,FIND(""Audit ID"",R3C1)+9,99)"
Range("E4").FormulaR1C1 = "=MID(R4C1,FIND(""Report ID"",R4C1)+10,99)"
Range("E5").FormulaR1C1 = "=TEXTJOIN("" "",,MID(R5C1,FIND(""Audit Rating:"",R5C1)+14,99),R6C1)"
Range("E7").FormulaR1C1 = "=TRIM(R7C1)"
Range("E8").FormulaR1C1 = "=TRIM(R8C1)"
Range("E:E").Copy
Range("E:E").PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
:=False, Transpose:=False
Application.CutCopyMode = False
Range("A2").Select
'Find "Risk Description" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Risk Description", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
ActiveCell.CurrentRegion.Select
Selection.Copy Destination:=Sheets("ChartData").Range("AA1")
'Find "Issue#" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Issue #", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
ActiveCell.CurrentRegion.Select
Selection.Copy Destination:=Sheets("ChartData").Range("A1")
'Find "IBAM" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Issue Relevance", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
Cells.Find(What:="IBAM", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
On Error GoTo 0
'Range(ActiveCell.Offset(2, 0), ActiveCell.Offset(2, 1)).Select
Range(ActiveCell.Offset(0, 0), ActiveCell.Offset(0, 1)).Select
Selection.Copy Destination:=Sheets("ChartData").Range("H1")
'Find "Audit Scope" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Audit Scope", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
On Error GoTo 0
ActiveCell.CurrentRegion.Select
Selection.Copy Destination:=Sheets("ChartData").Range("AJ1")
'Clear extra data after Audit Scope
Dim StartRange As String
Dim EndRange As String
Cells.Find(What:="List of Audit Entities and Accountable Executives").Select
StartRange = ActiveCell.Address
Selection.Offset(20, 0).Select
EndRange = ActiveCell.Address
ActiveSheet.Range(StartRange & ":" & EndRange).Select
Selection.Clear
Range("AJ1").Select
'Clear FindSettings
Cells.Find(What:="", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
'Delete non-numeric rows in Column A
Dim LR3 As Long, i3 As Long
With Sheets("ChartData")
LR3 = .Range("A" & .Rows.Count).End(xlUp).Row
For i3 = LR3 To 2 Step -1
If Not IsNumeric(.Range("A" & i3).Value) Then .Rows(i3).Delete
Next i3
End With
'Sheets("ChartData").Select
'With Range("A2", Cells(Rows.Count, 1).End(xlUp))
' .SpecialCells(xlCellTypeConstants, xlTextValues).EntireRow.Delete
'End With
'Convert Text to Column IBAM Data
Sheets("ChartData").Select
Application.DisplayAlerts = False
If Range("I1") = "" Then
Else
Range("I1").TextToColumns Destination:=Range("J1"), DataType:=xlDelimited, _
TextQualifier:=xlDoubleQuote, ConsecutiveDelimiter:=False, Tab:=False, _
Semicolon:=False, Comma:=True, Space:=False, Other:=False, FieldInfo _
:=Array(Array(1, 1), Array(2, 1), Array(3, 1)), TrailingMinusNumbers:=True
Application.DisplayAlerts = True
End If
End With
'IBAM Formula
Range("D2:D" & lRow).Formula = "=IFNA(IF(RC[-3]=HLOOKUP(RC[-3],R1C9:R1C32,1,0),""IBAM"",""""),"""")"
Range("E2:E" & lRow).Formula = "=RC[-2]&"" ""&RC[-1]"
Range("D2:E" & lRow).Copy
Range("D2:E" & lRow).PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
:=False, Transpose:=False
Application.CutCopyMode = False
Range("A1").Select
Application.EnableEvents = True
End Function
Function ProcessedAudits()
'Macro to extract audit data from PDF FILE into Excel
'Created by Miriam Hamid on 9/10/19
'Define Variables
Dim LR As Long, LR2 As Long
Dim LastR As Long
Dim rng As Range
Dim i As String
Dim SourceRange As Range
'Set Variables
LR = Sheets("AuditData").Cells(Rows.Count, "A").End(xlUp).Row + 1
LastR = Range("A" & Rows.Count).End(xlUp).Row
LR2 = Sheets("ChartData").Cells(Rows.Count, "AA").End(xlUp).Row
Set SourceRange = Sheets("ChartData").Range("AA3" & LR2)
Worksheets("AuditData").Activate
'Add Audit Data
With Worksheets("AuditData")
Range("A" & LR).FormulaR1C1 = "=Other!R3C5"
Range("B" & LR).FormulaR1C1 = "=Other!R8C5"
Range("C" & LR).FormulaR1C1 = "=Other!R2C5"
Range("D" & LR).FormulaR1C1 = "=Other!R5C5"
Range("E" & LR).FormulaR1C1 = "=Other!R4C5"
Range("F" & LR).FormulaR1C1 = "=Other!R7C5"
Range("G" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C3,""Level 1"")"
Range("H" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C3,""Level 2"")"
Range("I" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C3,""Level 3"")"
Range("J" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C3,""Level 4"")"
Range("K" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C3,""Level 5"")"
Range("L" & LR).FormulaR1C1 = "=SUM(RC[-5]:RC[-1])"
Range("M" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C5,""Level 1 IBAM"")"
Range("N" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C5,""Level 2 IBAM"")"
Range("O" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C5,""Level 3 IBAM"")"
Range("P" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C5,""Level 4 IBAM"")"
Range("Q" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C5,""Level 5 IBAM"")"
Range("R" & LR).FormulaR1C1 = "=SUM(RC[-5]:RC[-1])"
Range("S" & LR).FormulaR1C1 = "=TRIM(LEFT(SUBSTITUTE('Other'!R17C1,""%"",REPT("" "",100)),100))"
Range("T" & LR).FormulaR1C1 = "=TRIM(LEFT(SUBSTITUTE('Other'!R17C3,""%"",REPT("" "",100)),100))"
Range("U" & LR).FormulaR1C1 = "=TRIM(LEFT(SUBSTITUTE('Other'!R20C3,""%"",REPT("" "",100)),100))"
Range("V" & LR).FormulaR1C1 = "=TRIM(LEFT(SUBSTITUTE('Other'!R22C3,""%"",REPT("" "",100)),100))"
Range("W" & LR).FormulaR1C1 = "=TEXTJOIN("" | "",,ChartData!R[1]C[4]:R[20]C[4])"
Range("X" & LR).FormulaR1C1 = "=TEXTJOIN("" | "",,ChartData!RC[12]:R[1]C[12])"
' For Each rng In SourceRange
' i = i & rng & " "
' Next rng
' Range("Y" & LR).Value = i
'Copy PasteSpecial Values
ActiveCell.CurrentRegion.Select
ActiveCell.CurrentRegion.Copy
ActiveCell.CurrentRegion.PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
:=False, Transpose:=False
Application.CutCopyMode = False
Range("A1").Select
End With
End Function
Function HighlightStringsAuditScope()
Dim wb As Workbook: Set wb = ThisWorkbook
Dim ws2 As Worksheet: Set ws2 = wb.Sheets("Keywords")
Dim ws As Worksheet: Set ws = wb.Sheets("AuditData")
Dim d As Object, RX As Object, M As Object
Dim a As Variant
Dim c As Range
Dim i As Long, Cat As Long
Set d = CreateObject("Scripting.Dictionary")
d.CompareMode = 1
a = ws2.Range("A1", ws2.Range("B" & Rows.Count).End(xlUp)).Value
Cat = 2
For i = 2 To UBound(a)
If a(i, 1) <> a(i - 1, 1) Then Cat = Cat + 1
d(a(i, 2)) = Cat
Next i
Set RX = CreateObject("VBScript.RegExp")
RX.Global = True
RX.IgnoreCase = True
RX.Pattern = Replace(Replace(Join(Application.Transpose(ws2.Range("B2", ws2.Range("B" & Rows.Count).End(xlUp)).Value), "|"), "(", "\("), ")", "\)")
For Each c In ws.Range("X2", ws.Range("X" & Rows.Count).End(xlUp))
With c
If RX.Test(.Value) Then .Interior.Color = vbYellow
For Each M In RX.Execute(.Value)
With .Characters(M.FirstIndex + 1, Len(M)).Font
.Bold = True
.ColorIndex = d(CStr(M))
End With
Next M
End With
Next c
'MsgBox ("Macro is Finished")
End Function
Function HighlightStringsRiskDescription()
Dim wb As Workbook: Set wb = ThisWorkbook
Dim ws2 As Worksheet: Set ws2 = wb.Sheets("Keywords")
Dim ws As Worksheet: Set ws = wb.Sheets("AuditData")
Dim d As Object, RX As Object, M As Object
Dim a As Variant
Dim c As Range
Dim i As Long, Cat As Long
Set d = CreateObject("Scripting.Dictionary")
d.CompareMode = 1
a = ws2.Range("A1", ws2.Range("B" & Rows.Count).End(xlUp)).Value
Cat = 2
For i = 2 To UBound(a)
If a(i, 1) <> a(i - 1, 1) Then Cat = Cat + 1
d(a(i, 2)) = Cat
Next i
Set RX = CreateObject("VBScript.RegExp")
RX.Global = True
RX.IgnoreCase = True
RX.Pattern = Replace(Replace(Join(Application.Transpose(ws2.Range("B2", ws2.Range("B" & Rows.Count).End(xlUp)).Value), "|"), "(", "\("), ")", "\)")
For Each c In ws.Range("W2", ws.Range("W" & Rows.Count).End(xlUp))
With c
If RX.Test(.Value) Then .Interior.Color = vbYellow
For Each M In RX.Execute(.Value)
With .Characters(M.FirstIndex + 1, Len(M)).Font
.Bold = True
.ColorIndex = d(CStr(M))
End With
Next M
End With
Next c
'MsgBox ("Macro is Finished")
End Function
Public Function ConCatRange(ByVal target As Range, Optional delim As String = "") As String
Dim c As Range
For Each c In target
If c.Value <> "" Then ConCatRange = ConCatRange & delim & c.Value
Next c
ConCatRange = Mid(ConCatRange, Len(delim) + 1)
End Function