I have some base code that I found on this site that will parse a PDF based on a word. I need to expand that to parse a multi-page PDF into multiple folders based on a series of words. The words and corresponding folders are on a Sheet, the code should open the Source PDF, start with page 1, word 1 and compare that word with the list from the sheet (column B). If found, it will look at the folder and filename in the corresponding row (offset 3 and 4). It will then extract the page to that folder with that filename (appending date/time to the end for uniqueness). It will then move to the next page on the source PDF. If it finds a word in Sheet1.column B, it will look to the folder and filename and if the file exists, it will insert that page. If it doesnt exist (new folder/filename), it will create it.
I think I have the extracting part figured out, I just need to be able to merge new pages that are found after a PDF has been created .
Sub Search_text_and_splitPages(F_path As String)
Dim Acro_App As Acrobat.AcroApp
Dim Acro_AVDoc As Acrobat.AcroAVDoc
Dim Acro_PDDoc As Acrobat.AcroPDDoc
Dim Acro_JSO As Object
Dim Search_Word As String
Dim Pg_Cntr As Long
Dim Wrd_cntr As Long
Dim word As String
Dim Pg_Extract As Integer
Dim Flag As Boolean
Dim FindRow As Range
Dim strDestFileName As String
Dim dirRootFolder As String
Dim dirDestFolder As String
Dim dirDestFile As String
On Error GoTo errhandler:
'Initialize variables
Set Acro_App = New Acrobat.AcroApp
Set Acro_AVDoc = New Acrobat.AcroAVDoc
'open PDF File
Acro_AVDoc.Open F_path, ""
Set Acro_PDDoc = Acro_AVDoc.GetPDDoc
'Initialize JSO object
Set Acro_JSO = Acro_PDDoc.GetJSObject
For Pg_Cntr = 0 To Acro_JSO.numpages - 1
For Wrd_cntr = 0 To Acro_JSO.getpagenumwords(Pg_Cntr) - 1
word = Acro_JSO.getpagenthword(Pg_Cntr, Wrd_cntr)
If VBA.VarType(word) = vbString And word <> "" Then
Set FindRow = Range("B:B").Find(What:=word, LookIn:=xlValues, LookAt:=xlWhole, SearchOrder:=xlByRows, MatchCase:=True)
If Not FindRow Is Nothing Then ' if findrow is something (Prevents Errors)
dirRootFolder = FindRow.Offset(0, 3).Value
strDestFileName = FindRow.Offset(0, 4).Value
If dirRootFolder = "" Then GoTo nxtpage
dirDestFolder = dirDestRootPath & "\" & dirRootFolder
dirDestFile = dirDestFolder & "\" & strDestFileName & "_" & ParseCreationDateTime & ".pdf"
'select the word by calling JS object
Call Acro_JSO.getpagenthword(Pg_Cntr, Wrd_cntr)
'Extract PDF Page (OR INSERT IF EXISTS)
Acro_JSO.extractpages Pg_Cntr, Pg_Cntr, dirDestFile
flag = True
If Pg_Cntr = Acro_JSO.numpages - 1 Then GoTo exit_program
GoTo nxtpage
End If
End If
DoEvents
Next Wrd_cntr
nxtpage:
Next Pg_Cntr
If Pg_Cntr = Acro_JSO.numpages And flag = False Then GoTo ExitMacro Else GoTo exit_program
errhandler:
MsgBox "Error No: " & Err.Number & ", Error Description: " & Err.Description
GoTo Out
ExitMacro:
MsgBox "No Code Found in PDF"
GoTo Out
exit_program:
ParseCompletionDateTime = Now
MsgBox ("Parse Completion: " & ParseCompletionDateTime)
Out:
A.Close
Acro_AVDoc.Close False
Acro_App.Exit
Set Acro_JSO = Nothing
Set Acro_PDDoc = Nothing
Set Acro_AVDoc = Nothing
Set Acro_App = Nothing
End Sub
I think I have the extracting part figured out, I just need to be able to merge new pages that are found after a PDF has been created .
Sub Search_text_and_splitPages(F_path As String)
Dim Acro_App As Acrobat.AcroApp
Dim Acro_AVDoc As Acrobat.AcroAVDoc
Dim Acro_PDDoc As Acrobat.AcroPDDoc
Dim Acro_JSO As Object
Dim Search_Word As String
Dim Pg_Cntr As Long
Dim Wrd_cntr As Long
Dim word As String
Dim Pg_Extract As Integer
Dim Flag As Boolean
Dim FindRow As Range
Dim strDestFileName As String
Dim dirRootFolder As String
Dim dirDestFolder As String
Dim dirDestFile As String
On Error GoTo errhandler:
'Initialize variables
Set Acro_App = New Acrobat.AcroApp
Set Acro_AVDoc = New Acrobat.AcroAVDoc
'open PDF File
Acro_AVDoc.Open F_path, ""
Set Acro_PDDoc = Acro_AVDoc.GetPDDoc
'Initialize JSO object
Set Acro_JSO = Acro_PDDoc.GetJSObject
For Pg_Cntr = 0 To Acro_JSO.numpages - 1
For Wrd_cntr = 0 To Acro_JSO.getpagenumwords(Pg_Cntr) - 1
word = Acro_JSO.getpagenthword(Pg_Cntr, Wrd_cntr)
If VBA.VarType(word) = vbString And word <> "" Then
Set FindRow = Range("B:B").Find(What:=word, LookIn:=xlValues, LookAt:=xlWhole, SearchOrder:=xlByRows, MatchCase:=True)
If Not FindRow Is Nothing Then ' if findrow is something (Prevents Errors)
dirRootFolder = FindRow.Offset(0, 3).Value
strDestFileName = FindRow.Offset(0, 4).Value
If dirRootFolder = "" Then GoTo nxtpage
dirDestFolder = dirDestRootPath & "\" & dirRootFolder
dirDestFile = dirDestFolder & "\" & strDestFileName & "_" & ParseCreationDateTime & ".pdf"
'select the word by calling JS object
Call Acro_JSO.getpagenthword(Pg_Cntr, Wrd_cntr)
'Extract PDF Page (OR INSERT IF EXISTS)
Acro_JSO.extractpages Pg_Cntr, Pg_Cntr, dirDestFile
flag = True
If Pg_Cntr = Acro_JSO.numpages - 1 Then GoTo exit_program
GoTo nxtpage
End If
End If
DoEvents
Next Wrd_cntr
nxtpage:
Next Pg_Cntr
If Pg_Cntr = Acro_JSO.numpages And flag = False Then GoTo ExitMacro Else GoTo exit_program
errhandler:
MsgBox "Error No: " & Err.Number & ", Error Description: " & Err.Description
GoTo Out
ExitMacro:
MsgBox "No Code Found in PDF"
GoTo Out
exit_program:
ParseCompletionDateTime = Now
MsgBox ("Parse Completion: " & ParseCompletionDateTime)
Out:
A.Close
Acro_AVDoc.Close False
Acro_App.Exit
Set Acro_JSO = Nothing
Set Acro_PDDoc = Nothing
Set Acro_AVDoc = Nothing
Set Acro_App = Nothing
End Sub