Search Page By Page Instead Of Word by Word

fiberboysa

Board Regular
Joined
Apr 25, 2012
Messages
106
Office Version
  1. 365
Platform
  1. Windows
I found the following VBA code here which search file word by word and page by page and returns the page number. I wish that it could be modified to search the file page by page NOT word by word and return page number. Then it will be more fast in my opinion.

VBA Code:
Sub FindTextAcroAll()  Dim FindWord     'Word you want to search
    Dim acroAppObj As Object
    Dim PDFDocObj As Object
    Dim myPDFPageHiliteObj As Object
    Dim Cell As Range, Rng As Range
    Dim PDFJScriptObj As Object
    Dim wordHilite As Object
    Dim Check3 As String
    Dim pAcroPDPage As CAcroPDPage
    Dim iword As Integer, iTotalWords As Integer
    Dim numOfPage As Integer, Nthpage As Integer
    Dim word As String, sPath As String
    Dim PageNumber As Long
    
   Set acroAppObj = CreateObject("AcroExch.App")
   Set PDFDocObj = CreateObject("AcroExch.PDDoc")
   Set myPDFPageHiliteObj = CreateObject("AcroExch.HiliteList")
   Check3 = myPDFPageHiliteObj.Add(0, 32767)


    sPath = Application.GetOpenFilename("PDF Files (*.pdf), *.pdf")       'Path of pdf where you want to search
    If sPath = "False" Then GoTo GetOut
   '  acroAppObj.Show
  Application.ScreenUpdating = False
  Application.Calculation = xlCalculationManual
   Set Rng = Selection
   'x = 0    uncomment if plan to cascade pages
  
   For Each Cell In Rng
      y = 1
    FindWord = Cell


    PDFDocObj.Open (sPath)


    numOfPage = PDFDocObj.GetNumPages


    word = vbNullString
 Set PDFJScriptObj = Nothing


    For Nthpage = 0 To numOfPage - 1  'substitute x for 0 if you want it go back and search past last known word
      Set pAcroPDPage = PDFDocObj.AcquirePage(Nthpage)
      Set wordHilite = pAcroPDPage.CreateWordHilite(myPDFPageHiliteObj)
      Set PDFJScriptObj = PDFDocObj.GetJSObject
       iTotalWords = wordHilite.GetNumText
        iTotalWords = PDFJScriptObj.getPageNumWords(Nthpage)
        ''check the each word
        For iword = 0 To iTotalWords - 1
           word = Trim(CStr(PDFJScriptObj.getPageNthWord(Nthpage, iword)))
            If word <> "" Then
                If LCase(word) = LCase(FindWord) Then   'LCase to make keyword search non-case specific
                  x = Nthpage + 1
                    PageNumber = Nthpage + 1
                  Cell.Offset(0, y).Value = PageNumber
                    y = y + 1
                    GoTo Skip:
                End If
                word = ""
            End If
        Next iword
Skip:
    Next Nthpage
      Next Cell
    MsgBox "Done"
    
  Application.ScreenUpdating = True
  Application.Calculation = xlAutomatic
GetOut:
End Sub
 

Excel Facts

Do you hate GETPIVOTDATA?
Prevent GETPIVOTDATA. Select inside a PivotTable. In the Analyze tab of the ribbon, open the dropown next to Options and turn it off
Hi Allz,
Following code will do the trick and save the page numbers in Column B2 onwards. I have got help from Source 1 and Source 2

VBA Code:
Option Explicit

Sub FindTextInPDF()
 
    'Declaring the necessary variables.
    Dim PDFPath As String
    Dim App As Object
    Dim AVDoc As Object
    Dim pdDoc As Object
    Dim jso As Object
    Dim pageNo As Integer
    Dim No_Of_BLs As Integer
    Dim BL_Loop As Integer
    Dim wb As Workbook
    Dim ws As Worksheet
    Dim mergedPDF As String
    Dim x As Integer
 
    'Specify the destination PDF file name
    mergedPDF = ThisWorkbook.Sheets("Setup").Range("B2").Value
    
    'Delete destination (merged) PDF if it already exists - a new PDF will be created
    If Dir(mergedPDF) <> vbNullString Then Kill mergedPDF
 
    'Specify the path of the sample PDF form.
    PDFPath = ThisWorkbook.Sheets("Setup").Range("A2").Value
    
    'Setiing Worksheet And Sheet
    Set wb = ThisWorkbook
    wb.Activate
    Set ws = wb.Sheets("Extract PDFs")
    Worksheets(ws.Name).Activate
    No_Of_BLs = Cells(Rows.Count, 1).End(xlUp).Row
    
    'Check if the file exists.
    If Dir(PDFPath) = "" Then
        MsgBox "Cannot find the PDF file!" & vbCrLf & "Check the PDF path and retry.", _
                vbCritical, "File Path Error"
        Exit Sub
    End If
 
    'Check if the input file is a PDF file.
    If LCase(Right(PDFPath, 3)) <> "pdf" Then
        MsgBox "The input file is not a PDF file!", vbCritical, "File Type Error"
        Exit Sub
    End If
 
    On Error Resume Next
 
    'Initialize Acrobat by creating the App object.
    Set App = CreateObject("AcroExch.App")
 
    'Check if the object was created. In case of error release the object and exit.
    If Err.Number <> 0 Then
        MsgBox "Could not create the Adobe Application object!", vbCritical, "Object Error"
        Set App = Nothing
        Exit Sub
    End If
 
    'Create the AVDoc object.
    Set AVDoc = CreateObject("AcroExch.AVDoc")
 
    'Check if the object was created. In case of error release the objects and exit.
    If Err.Number <> 0 Then
        MsgBox "Could not create the AVDoc object!", vbCritical, "Object Error"
        Set AVDoc = Nothing
        Set App = Nothing
        Exit Sub
    End If
 
    On Error GoTo 0
 
    'Open the PDF file.
    If AVDoc.Open(PDFPath, "") = True Then
        
        For BL_Loop = 2 To No_Of_BLs
        'Open successful, bring the PDF document to the front.
 
        'Use the FindText method in order to find and highlight the desired text.
        'The FindText method returns true if the text was found or false if it was not.
        If AVDoc.FindText(Cells(BL_Loop, 1).Value, True, True, False) = True Then
            Set pdDoc = AVDoc.GetPDDoc()
            Set jso = pdDoc.GetJSObject
            pageNo = jso.pagenum + 1
            Cells(BL_Loop, 2).Value = pageNo
            End If
        Next BL_Loop
    Else
        'Unable to open the PDF file, close the Acrobat application.
        App.Exit
 
        'Release the objects.
        Set AVDoc = Nothing
        Set App = Nothing
 
        'Inform the user.
        MsgBox "Could not open the PDF file!", vbCritical, "File error"
 
    End If
 
End Sub
 
Upvote 0
Solution

Forum statistics

Threads
1,225,738
Messages
6,186,728
Members
453,368
Latest member
positivemind

We've detected that you are using an adblocker.

We have a great community of people providing Excel help here, but the hosting costs are enormous. You can help keep this site running by allowing ads on MrExcel.com.
Allow Ads at MrExcel

Which adblocker are you using?

Disable AdBlock

Follow these easy steps to disable AdBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the icon in the browser’s toolbar.
2)Click on the "Pause on this site" option.
Go back

Disable AdBlock Plus

Follow these easy steps to disable AdBlock Plus

1)Click on the icon in the browser’s toolbar.
2)Click on the toggle to disable it for "mrexcel.com".
Go back

Disable uBlock Origin

Follow these easy steps to disable uBlock Origin

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back

Disable uBlock

Follow these easy steps to disable uBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back
Back
Top