Hello,
I have the below code that will open PDF file in Word and copy the full document into excel and then do a search in column A, but this does not work for all PDF files. There are some PDF files where the data is in another column and named differently.
Once the PDF file is open in word, could I instead do various word searches and copy that paragraph/table data into excel cell as it is towards the end of my code?
I have the below code that will open PDF file in Word and copy the full document into excel and then do a search in column A, but this does not work for all PDF files. There are some PDF files where the data is in another column and named differently.
Once the PDF file is open in word, could I instead do various word searches and copy that paragraph/table data into excel cell as it is towards the end of my code?
VBA Code:
Sub PDF_Extraction()
'Macro opens PDF Files as an editable Word Document
'Copies the contents of the Word document
'Pastes the Clipboard contents into Excel
'Declare Variables
Dim myWorksheet As Worksheet
Dim wordApp As Object
Dim myWshShell As wshShell
Dim strPath
Dim oDoc As Object
Dim strFile As String
Dim registryKey As String
Dim wordVersion As String
'Set Variables
'Error
On Error Resume Next
Set wordApp = GetObject(, "Word.Application")
If Err Then
Set wordApp = CreateObject("Word.Application")
End If
On Error GoTo 0
Set myWshShell = New wshShell
strPath = "C:\Users\mh15601\Desktop\Projects\Audit Plan\2019\Testing\PDF Files\DO NOT DELETE\"
wordVersion = wordApp.Version
registryKey = "HKCU\SOFTWARE\Microsoft\Office\" & wordVersion & "\Word\Options\"
'Optimize Macro
Application.ScreenUpdating = False
Application.EnableEvents = False
Application.Calculation = xlCalculationManual
'Open and Copy PDF Files
myWshShell.RegWrite registryKey & "DisableConvertPdfWarning", 1, "REG_DWORD"
strFile = Dir$(strPath & "*.pdf")
While strFile <> ""
Set oDoc = wordApp.Documents.Open(fileName:=strPath & strFile, _
ConfirmConversions:=False)
'Copy Data from Word
oDoc.Content.Copy
'Excel
Set myWorksheet = ActiveWorkbook.Worksheets("PDF FILE")
With myWorksheet
'.Range("A1").Activate
'.PasteSpecial Format:="HTML"
.Range("A1").Select
.Paste
End With
'Remove Shapes
Dim shp As Shape
For Each shp In ActiveSheet.Shapes
shp.Delete
Next shp
'Close Word
oDoc.Close SaveChanges:=0
'myWshShell.RegWrite registryKey & "DisableConvertPdfWarning", 0, "REG_DWORD"
'Run Excel Macros
With myWorksheet
Run ExtractingData
Run ProcessedAudits
End With
'Clear Other Sheet
With Worksheets("Other")
.Range("A2:E31").Delete Shift:=xlUp
.Range("M:O").Delete Shift:=xlToLeft
Range("A2").Select
End With
'Clear ChartData Sheet
Sheets("ChartData").Cells.Delete Shift:=xlUp
Range("A1").Select
'Clear PDF FILE Sheet
Sheets("PDF FILE").Cells.Delete Shift:=xlUp
Sheets("PDF FILE").Select
Range("A1").Select
strFile = Dir$()
Wend
'Clear Word and PDF
Set wordApp = Nothing
Set myWshShell = Nothing
'Message Box when Complete
MsgBox "Complete!"
'Reset Macro Optimization Settings
Application.EnableEvents = True
Application.Calculation = xlCalculationAutomatic
Application.ScreenUpdating = True
End Sub
Function ExtractingData()
' ExtractingData Macro
' Purpose: Extract PDF Audit Files data from Excel to be processed into SharePoint
' Created by Miriam Hamid
' Created on 9/10/2019
'Define Variable
Dim lRow As Long
'Set Variable
lRow = Range("A" & Rows.Count).End(xlUp).Row
Application.EnableEvents = False
Worksheets("PDF FILE").Activate
With Worksheets("PDF FILE")
'Clear FindSettings
Cells.Find(What:="", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
'Find "Report Issuance Date" Data and Paste into ChartData tab
On Error Resume Next
Cells.Find(What:="Report Issuance Date", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
Range(ActiveCell.Offset(1, 0), ActiveCell.Offset(1, 0)).Select
Selection.Copy Destination:=Sheets("Other").Range("A2")
'Find "Audit Reference Number" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Audit Reference Number", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
Range(ActiveCell.Offset(1, 0), ActiveCell.Offset(2, 0)).Select
Selection.Copy Destination:=Sheets("Other").Range("A3")
'Find "Rating" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Rating", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
Range(ActiveCell.Offset(0, 0), ActiveCell.Offset(2, 0)).Select
Selection.Copy Destination:=Sheets("Other").Range("A5")
'Find "Executive(s)" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
'Cells.Find(What:="Executive(s)", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
'ActiveCell.Select
'Selection.Copy Destination:=Sheets("Other").Range("A7")
On Error Resume Next
Cells.Find(What:="Accountable", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
Range(Selection, Selection.End(xlDown)).Select
Selection.Copy Destination:=Sheets("Other").Range("M1")
'Selection.Copy
'Worksheets("Other").Range("M1").PasteSpecial Paste:=xlPasteValues
Application.CutCopyMode = False
'Find Responsible and delete cells from Responsible down
Sheets("Other").Select
On Error Resume Next
Cells.Find(What:="Responsible", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
Range(Selection, Selection.End(xlDown)).Delete Shift:=xlUp
'Remove "Accountable Execustive(s):" from text
Columns("M:Q").Select
Selection.Replace What:="Accountable ", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
Selection.Replace What:="Executive(s):", Replacement:="", LookAt:=xlPart _
, SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
'Concatenation Formula
Range("A8").FormulaR1C1 = "=CONCATENATE(R1C13,R2C13,R3C13,R4C13,R5C13,R6C13,R7C13,R8C13,R9C13,R10C13,R11C13,R12C13,R13C13)"
'Find "Key Measures" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Key Measures", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
On Error GoTo 0
Range(ActiveCell.Offset(0, 0), ActiveCell.Offset(12, 4)).Select
Selection.Copy Destination:=Sheets("Other").Range("A10")
'Formula for above data
Sheets("Other").Select
Range("E2").FormulaR1C1 = "=R2C1"
Range("E3").FormulaR1C1 = "=MID(R3C1,FIND(""Audit ID"",R3C1)+9,99)"
Range("E4").FormulaR1C1 = "=MID(R4C1,FIND(""Report ID"",R4C1)+10,99)"
Range("E5").FormulaR1C1 = "=TEXTJOIN("" "",,MID(R5C1,FIND(""Audit Rating:"",R5C1)+14,99),R6C1)"
Range("E7").FormulaR1C1 = "=TRIM(R7C1)"
Range("E8").FormulaR1C1 = "=TRIM(R8C1)"
Range("E:E").Copy
Range("E:E").PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
:=False, Transpose:=False
Application.CutCopyMode = False
Range("A2").Select
'Find "Risk Description" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Risk Description", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
ActiveCell.CurrentRegion.Select
Selection.Copy Destination:=Sheets("ChartData").Range("AA1")
'Find "Issue#" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Issue #", After:=ActiveCell, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False).Activate
On Error GoTo 0
ActiveCell.CurrentRegion.Select
Selection.Copy Destination:=Sheets("ChartData").Range("A1")
'Find "IBAM" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Issue Relevance", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
Cells.Find(What:="IBAM", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
On Error GoTo 0
'Range(ActiveCell.Offset(2, 0), ActiveCell.Offset(2, 1)).Select
Range(ActiveCell.Offset(0, 0), ActiveCell.Offset(0, 1)).Select
Selection.Copy Destination:=Sheets("ChartData").Range("H1")
'Find "Audit Scope" Data and Paste into ChartData tab
Sheets("PDF FILE").Select
On Error Resume Next
Cells.Find(What:="Audit Scope", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
On Error GoTo 0
ActiveCell.CurrentRegion.Select
Selection.Copy Destination:=Sheets("ChartData").Range("AJ1")
'Clear FindSettings
Cells.Find(What:="", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
'Delete non-numeric rows in Column A
Dim LR3 As Long, i3 As Long
With Sheets("ChartData")
LR3 = .Range("A" & .Rows.Count).End(xlUp).Row
For i3 = LR3 To 2 Step -1
If Not IsNumeric(.Range("A" & i3).Value) Then .Rows(i3).Delete
Next i3
End With
'Sheets("ChartData").Select
'With Range("A2", Cells(Rows.Count, 1).End(xlUp))
' .SpecialCells(xlCellTypeConstants, xlTextValues).EntireRow.Delete
'End With
'Convert Text to Column IBAM Data
Sheets("ChartData").Select
Application.DisplayAlerts = False
If Range("I1") = "" Then
Else
Range("I1").TextToColumns Destination:=Range("J1"), DataType:=xlDelimited, _
TextQualifier:=xlDoubleQuote, ConsecutiveDelimiter:=False, Tab:=False, _
Semicolon:=False, Comma:=True, Space:=False, Other:=False, FieldInfo _
:=Array(Array(1, 1), Array(2, 1), Array(3, 1)), TrailingMinusNumbers:=True
Application.DisplayAlerts = True
End If
End With
'IBAM Formula
Range("D2:D" & lRow).Formula = "=IFNA(IF(RC[-3]=HLOOKUP(RC[-3],R1C9:R1C32,1,0),""IBAM"",""""),"""")"
Range("E2:E" & lRow).Formula = "=RC[-2]&"" ""&RC[-1]"
Range("D2:E" & lRow).Copy
Range("D2:E" & lRow).PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
:=False, Transpose:=False
Application.CutCopyMode = False
Range("A1").Select
Application.EnableEvents = True
End Function
Function ProcessedAudits()
'Macro to extract audit data from PDF FILE into Excel
'Created by Miriam Hamid on 9/10/19
'Define Variables
Dim LR As Long, LR2 As Long
Dim LastR As Long
Dim rng As Range
Dim i As String
Dim SourceRange As Range
'Set Variables
LR = Sheets("AuditData").Cells(Rows.Count, "A").End(xlUp).Row + 1
LastR = Range("A" & Rows.Count).End(xlUp).Row
LR2 = Sheets("ChartData").Cells(Rows.Count, "AA").End(xlUp).Row
Set SourceRange = Sheets("ChartData").Range("AA3" & LR2)
Worksheets("AuditData").Activate
'Add Audit Data
With Worksheets("AuditData")
Range("A" & LR).FormulaR1C1 = "=Other!R3C5"
Range("B" & LR).FormulaR1C1 = "=Other!R8C5"
Range("C" & LR).FormulaR1C1 = "=Other!R2C5"
Range("D" & LR).FormulaR1C1 = "=Other!R5C5"
Range("E" & LR).FormulaR1C1 = "=Other!R4C5"
Range("F" & LR).FormulaR1C1 = "=Other!R7C5"
Range("G" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C3,""Level 1"")"
Range("H" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C3,""Level 2"")"
Range("I" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C3,""Level 3"")"
Range("J" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C3,""Level 4"")"
Range("K" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C3,""Level 5"")"
Range("L" & LR).FormulaR1C1 = "=SUM(RC[-5]:RC[-1])"
Range("M" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C5,""Level 1 IBAM"")"
Range("N" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C5,""Level 2 IBAM"")"
Range("O" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C5,""Level 3 IBAM"")"
Range("P" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C5,""Level 4 IBAM"")"
Range("Q" & LR).FormulaR1C1 = "=COUNTIF(ChartData!C5,""Level 5 IBAM"")"
Range("R" & LR).FormulaR1C1 = "=SUM(RC[-5]:RC[-1])"
Range("S" & LR).FormulaR1C1 = "=TRIM(LEFT(SUBSTITUTE('Other'!R17C1,""%"",REPT("" "",100)),100))"
Range("T" & LR).FormulaR1C1 = "=TRIM(LEFT(SUBSTITUTE('Other'!R17C3,""%"",REPT("" "",100)),100))"
Range("U" & LR).FormulaR1C1 = "=TRIM(LEFT(SUBSTITUTE('Other'!R20C3,""%"",REPT("" "",100)),100))"
Range("V" & LR).FormulaR1C1 = "=TRIM(LEFT(SUBSTITUTE('Other'!R22C3,""%"",REPT("" "",100)),100))"
For Each rng In SourceRange
i = i & rng & " "
Next rng
Range("W" & LR).Value = i
'Copy PasteSpecial Values
ActiveCell.CurrentRegion.Select
ActiveCell.CurrentRegion.Copy
ActiveCell.CurrentRegion.PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
:=False, Transpose:=False
'Range("A2:V" & LastR).Copy
'Range("A2:V" & LastR).PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
:=False, Transpose:=False
Application.CutCopyMode = False
Range("A1").Select
End With
End Function
Public Function ConCatRange(ByVal target As Range, Optional delim As String = "") As String
Dim c As Range
For Each c In target
If c.Value <> "" Then ConCatRange = ConCatRange & delim & c.Value
Next c
ConCatRange = Mid(ConCatRange, Len(delim) + 1)
End Function
Thank you,