lazarandreiflorin
New Member
- Joined
- Dec 16, 2018
- Messages
- 34
is there a way to search text documents ( pdf and docx) for a combination of words and highlight it using excel ? thank you
Sub ExtractUnderlinedTextFromWord()
'If opening the document in Word requires you to answer a prompt (enable content or some such)
'this code will hang. You can kill the WINWORD process in the Task Manager
Dim appWD As Object
Dim sFilePathName As String
Dim iAnswer As Integer
Dim lNextWriteRow As Long
Dim lDocIndex As Long
Dim sFileNameExt As Variant
Dim aryFilePathNameExt() As Variant
Dim sFilePath As String
Dim lDocCount As Long
If Cells(Rows.Count, 1).End(xlUp).row > 1 Then
iAnswer = MsgBox("There are data in column A. Do you want to erase it?", vbYesNo, "Erase column A?")
If iAnswer <> vbYes Then
MsgBox "Process cancelled."
GoTo End_Sub
End If
Columns(1).Cells.Clear
End If
sFilePath = GetFolder(ThisWorkbook.path)
If sFilePath = "" Then
MsgBox "No directory selected."
GoTo End_Sub
End If
'Build array
sFileNameExt = Dir(sFilePath & "\*.doc?")
Do While sFileNameExt <> vbNullString
lDocIndex = lDocIndex + 1
ReDim Preserve aryFilePathNameExt(1 To lDocIndex)
aryFilePathNameExt(lDocIndex) = sFilePath & "\" & sFileNameExt
sFileNameExt = Dir
Loop
lDocCount = lDocIndex
'Uncomment next paragraph if you want to see document opened
' appWD.Visible = True
' With appWD
' .WindowState = 0 'wdWindowStateNormal
' .Resize Width:=appWD.InchesToPoints(6), Height:=appWD.InchesToPoints(4)
' End With
For lDocIndex = LBound(aryFilePathNameExt) To UBound(aryFilePathNameExt)
Application.StatusBar = lDocIndex & " / " & lDocCount & ": " & aryFilePathNameExt(lDocIndex)
Set appWD = CreateObject("Word.Application")
appWD.Documents.Open fileName:=aryFilePathNameExt(lDocIndex), _
ConfirmConversions:=False, ReadOnly:=True, AddToRecentFiles:=False, _
PasswordDocument:="", PasswordTemplate:="", Revert:=False, _
WritePasswordDocument:="", WritePasswordTemplate:="", Format:= _
0, XMLTransform:="" '0=wdOpenFormatAuto
'Some code extracted from
'http://stackoverflow.com/questions/13465709/repeating-microsoft-word-vba-until-no-search-results-found
lNextWriteRow = lNextWriteRow + 1
ActiveSheet.Cells(lNextWriteRow, 1).Value = aryFilePathNameExt(lDocIndex)
'Start at the top of the document
appWD.Selection.HomeKey Unit:=6 'wdStory
appWD.Selection.ExtendMode = False
'find a footnote to kick it off
appWD.Selection.Find.ClearFormatting
appWD.Selection.Find.Font.Underline = 1 'wdUnderlineSingle
With appWD.Selection.Find
.Text = "*"
.Replacement.Text = ""
.Forward = True
.Wrap = 1 'wdFindContinue
.Format = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
End With
appWD.Selection.Find.Execute
Do While appWD.Selection.Find.Found = True
'Jump back to the start of the document.
appWD.Selection.Extend
'On the last loop you'll not find a result so check here
If appWD.Selection.Find.Found Then
appWD.Selection.Extend
Do While appWD.ActiveDocument.Characters(appWD.Selection.End).Font.Underline
appWD.Selection.MoveRight Unit:=1 ' wdCharacter
Loop
appWD.Selection.MoveLeft Unit:=1 'wdCharacter
lNextWriteRow = lNextWriteRow + 1
ActiveSheet.Cells(lNextWriteRow, 2).Value = appWD.Selection
appWD.Selection.Collapse 0 'wdCollapseEnd
End If
appWD.Selection.ExtendMode = False
appWD.Selection.Find.Execute
Loop
appWD.Quit
Next
End_Sub:
Application.ScreenUpdating = True
Application.StatusBar = False
Set appWD = Nothing
End Sub
Sub Test(SearchWords As String)
Dim PFWdApp As Object, oPara As Object
'open Word application
On Error Resume Next
Set PFWdApp = GetObject(, "word.application")
If Err.Number <> 0 Then
On Error GoTo 0
Set PFWdApp = CreateObject("Word.Application")
End If
PFWdApp.Visible = True
PFWdApp.Documents.Open Filename:="C:\testfolder\test.doc"
'check each para
For Each oPara In PFWdApp.ActiveDocument.Paragraphs
'if not blank para
If oPara.Range.Text <> Chr(13) Then
If InStr(oPara.Range.Text, SearchWord) Then
With oPara.Range.Find
.Text = SearchWord
.Forward = True
.MatchWholeWord = True
.Execute
If .found = True Then
.Parent.Font.ColorIndex = 6
End If
End With
End If
End If
Next oPara
Set oPara = Nothing
Set PFWdApp = Nothing
End Sub
Call Test("Bob' your uncle")