brawnystaff
Board Regular
- Joined
- Aug 9, 2012
- Messages
- 109
- Office Version
- 365
Currently have Excel 2016 and Acrobat Pro installed on my computer and using the macro below to extract pages based on list of keywords (actually deletes pages that does not have the keywords in them and saves as a new file, which is basically the same thing).
The only problem is that currently, it extracts only pages that have all the keywords in the selection in them. I need to modify so that if it has one keyword in the list, it will extract the page. See below..
Any ideas? Thanks...
The only problem is that currently, it extracts only pages that have all the keywords in the selection in them. I need to modify so that if it has one keyword in the list, it will extract the page. See below..
Code:
Option Explicit
Sub Extract_PDF_Num_Keyword(control As IRibbonControl)
'extract pages from selected PDF based on selected keywords in sheet
Dim xMsg As String
Dim xInput As String
Dim xOutput As String
Dim xResponse As Long
Dim xErrors As Long
Dim xDeleted As Long
Dim i As Long
Dim j As Long
Dim AcroApp As CAcroApp
Dim AcroPDDoc As CAcroPDDoc
Dim AcroHiliteList As CAcroHiliteList
Dim AcroTextSelect As CAcroPDTextSelect
Dim PageNumber As Variant
Dim PageContent As Variant
Dim xContent As Variant
Dim Cell As Variant
Dim Rng As Range
xInput = Application.GetOpenFilename("PDF Files (*.pdf), *.pdf")
If xInput = "False" Then Exit Sub
xOutput = FolderPart(xInput) & ActiveCell.Value & "_Keyword" & ".pdf"
xResponse = MsgBox("About to extract all pages which contain all keywords from the range A1:A" & Chr(10) _
& Chr(10) & "Input:" & Chr(9) & xInput _
& Chr(10) & "Output:" & Chr(9) & xOutput _
& Chr(10) & Chr(10) & "('OK' to continue, 'Cancel' to quit.)", vbOKCancel, "Delete Pages")
If xResponse = 2 Then
MsgBox "User chose not to continue. Run terminated."
Exit Sub
End If
' Files and data OK?
If Dir(xInput) = "" Then xMsg = "Input file not found - " & xInput & Chr(10)
'If Dir(xOutput) <> "" Then xMsg = "Output file exists - " & xOutput & Chr(10)
Set Rng = Selection
If xMsg <> "" Then
MsgBox (xMsg & Chr(10) & "Run cancelled.")
Exit Sub
End If
' Open the PDF...
Set AcroApp = CreateObject("AcroExch.App")
Set AcroPDDoc = CreateObject("AcroExch.PDDoc")
If AcroPDDoc.Open(xInput) <> True Then
MsgBox (xInput & " couldn't be opened - run cancelled.")
Exit Sub
End If
' Read each page...
For i = AcroPDDoc.GetNumPages - 1 To 0 Step -1
Set PageNumber = AcroPDDoc.AcquirePage(i)
Set PageContent = CreateObject("AcroExch.HiliteList")
'Get up to 9,999 words from page...
If PageContent.Add(0, 9999) <> True Then
Debug.Print "Add Error on Page " & i + 1
xErrors = xErrors + 1
Else
Set AcroTextSelect = PageNumber.CreatePageHilite(PageContent)
If Not AcroTextSelect Is Nothing Then
xContent = ""
For j = 0 To AcroTextSelect.GetNumText - 1
xContent = xContent & AcroTextSelect.GetText(j)
Next j
For Each Cell In Rng
If Not InStr(1, UCase(xContent), UCase(Cell), vbTextCompare) > 0 Then
Debug.Print "Page " & i + 1 & " contains " & Cell & " - " & xContent
' To avoid problems with the delete...
Set AcroTextSelect = Nothing
Set PageContent = Nothing
Set PageNumber = Nothing
If AcroPDDoc.DeletePages(i, i) = False Then
MsgBox ("Error deleting page " & i + 1 & " - run cancelled.")
Exit Sub
End If
xDeleted = xDeleted + 1
Exit For
End If
Next Cell
End If
End If
Next i
If AcroPDDoc.Save(PDSaveFull, xOutput) = False Then
MsgBox "Cannot save the modified document"
Exit Sub
Else
MsgBox (xDeleted & " pages deleted. (" & xErrors & " errors.)")
End If
AcroPDDoc.Close
AcroApp.Exit
End Sub
Function FolderPart(sPath As String) As String
FolderPart = Left(sPath, InStrRev(sPath, "\"))
End Function
Any ideas? Thanks...