A common problem when saving a sheet (or multiple sheets or the entire workbook) as a PDF file, using
Notes -
PDF - https://opensource.adobe.com/dc-acrobat-sdk-docs/acrobatsdk/pdfs/acrobatsdk_jsapiref.pdf
HTML - Acrobat JavaScript API Reference — Acrobat-PDFL SDK: JavaScript Reference
ExportAsFixedFormat Type:=xlTypePDF
, is that Excel doesn't preserve the links to cells as links to pages in the PDF. The code below is a solution to this problem.Notes -
- The code uses the Acrobat API and therefore Acrobat Pro must be installed. The Acrobat API is not available if the free Adobe Reader or another PDF reader is installed.
- The code uses late binding of the Adobe Acrobat Type Library (acrobat.tlb) and therefore no references are required in the VBA project.
- The type of link/hyperlink handled by the code are links to 'Place in This Document', inserted via the Insert -> Link dialogue. It doesn't handle cell formula links created with the HYPERLINK function.
- The code expects the links to be on "Sheet1", to cells on the same sheet. It creates 2 PDFs in the same folder as the workbook. "Sheet1.pdf" is simply the sheet saved as a PDF using
ExportAsFixedFormat Type:=xlTypePDF
. The code reads that file, adds the required links (using the Acrobat addLink function) and saves the modified file as "Sheet1 with page links added.pdf". - The code handles horizontal and vertical page breaks on "Sheet1".
PDF - https://opensource.adobe.com/dc-acrobat-sdk-docs/acrobatsdk/pdfs/acrobatsdk_jsapiref.pdf
HTML - Acrobat JavaScript API Reference — Acrobat-PDFL SDK: JavaScript Reference
VBA Code:
Option Explicit
Public Sub Save_Sheet_As_PDF_With_Links_To_Pages()
Dim PDDoc As Object 'Acrobat.AcroPDDoc
Dim PDDocLink As Object 'Acrobat.AcroPDDoc
Dim JSO As Object
Dim JSOlink As Object
Dim quads1 As Variant, quads2 As Variant, linkRect(0 To 3) As Double
Dim PDFlink As Object
Dim link As Hyperlink
Dim linkOnPage As Long, prevLinkOnPage As Long, linkToPage As Long
Dim word As String, wordIndex As Long
Dim pageWordsCount As Long, pageWords() As String
Dim linkWordsCount As Long, linkWords() As String
Dim startPageWordsIndex As Long, p As Long
Dim PDFinputFile As String, PDFoutputFile As String
Dim PDFlinkFile As String
Dim PDFsaveSheet As Worksheet, PDFlinkSheet As Worksheet
With ActiveWorkbook
'Full file names of the 2 PDF files created by this routine
PDFinputFile = .Path & "\Sheet1.pdf"
PDFoutputFile = .Path & "\Sheet1 with page links added.pdf"
'The sheet containing links, to be saved as a PDF
Set PDFsaveSheet = .Worksheets("Sheet1")
'Add temporary sheet to which each link will be copied
Application.ScreenUpdating = False
Set PDFlinkSheet = .Worksheets.Add(After:=.Worksheets(.Worksheets.Count))
PDFsaveSheet.Activate
Application.ScreenUpdating = True
End With
'Reset all hyperlinks on the PDF sheet and save it as a PDF - this is the input file
Reset_All_Hyperlinks PDFsaveSheet
PDFsaveSheet.ExportAsFixedFormat Type:=xlTypePDF, Filename:=PDFinputFile, _
Quality:=xlQualityStandard, IncludeDocProperties:=True, IgnorePrintAreas:=False, OpenAfterPublish:=False
Set PDDoc = CreateObject("AcroExch.PDDoc") 'New Acrobat.AcroPDDoc
If PDDoc.Open(PDFinputFile) Then
Set JSO = PDDoc.GetJSObject
With PDFsaveSheet
prevLinkOnPage = 0
'Loop through links on the sheet
For Each link In .Hyperlinks
If link.SubAddress <> "" Then
'Link is to 'Place in This Document'
'Get the worksheet page number that this link is on and the page number it links to
linkOnPage = GetPageNumber(link.Range)
linkToPage = GetPageNumber(Range(link.SubAddress))
'Debug.Print link.Range.Address; " Page ="; linkOnPage, link.Address, link.SubAddress, link.TextToDisplay, " Target Page ="; linkToPage
'Copy this link to the temporary sheet and save the sheet as a temporary PDF file. This isolates the link's words, allowing the code to look for them
'on the page in the main PDF using the getPageNthWord method
PDFlinkFile = ActiveWorkbook.Path & "tempLink.pdf"
With PDFlinkSheet
.Cells.Clear
link.Range.Copy .Range("A1")
.ExportAsFixedFormat Type:=xlTypePDF, Filename:=PDFlinkFile, _
Quality:=xlQualityStandard, IncludeDocProperties:=False, IgnorePrintAreas:=False, OpenAfterPublish:=False
.Cells.Clear
End With
'For the PDF page that this link is on, extract all the words into the pageWords array, but only if it's a different page number to the previous PDF page,
'to avoid repeating the same extraction
If linkOnPage <> prevLinkOnPage Then
pageWordsCount = JSO.getpageNumWords(linkOnPage - 1)
ReDim pageWords(0 To pageWordsCount - 1)
For wordIndex = 0 To pageWordsCount - 1
word = JSO.getPageNthWord(linkOnPage - 1, wordIndex, False) 'False: don't remove whitespace and punctuation from the word
word = Replace(word, vbCr, "")
word = Trim(Replace(word, vbLf, ""))
'Debug.Print wordIndex; ">" & word & "<"
pageWords(wordIndex) = word
Next
prevLinkOnPage = linkOnPage
End If
'Extract all words from the temporary PDF into the linkWords array
'Set PDDocLink = New Acrobat.AcroPDDoc
Set PDDocLink = CreateObject("AcroExch.PDDoc")
If PDDocLink.Open(PDFlinkFile) Then
Set JSOlink = PDDocLink.GetJSObject
linkWordsCount = JSOlink.getpageNumWords(0)
ReDim linkWords(0 To linkWordsCount - 1)
For wordIndex = 0 To linkWordsCount - 1
word = JSOlink.getPageNthWord(0, wordIndex, False) 'False: don't remove whitespace and punctuation from the word
word = Replace(word, vbCr, "")
word = Trim(Replace(word, vbLf, ""))
'Debug.Print wordIndex; ">" & word & "<"
linkWords(wordIndex) = word
Next
PDDocLink.Close
Kill PDFlinkFile
End If
'Look for this link's words as sequential words in the pageWords array and get the index of the first word
startPageWordsIndex = FindSubArrayInMainArray(pageWords, linkWords)
If startPageWordsIndex >= 0 Then
'Found all this link's words on the PDF page
'Debug.Print "Found at " & startPageWordsIndex
'---------------------------------------------------------------------------------------
'Definitions from "Adobe JavaScript for Acrobat API Reference"
'PDF - https://opensource.adobe.com/dc-acrobat-sdk-docs/acrobatsdk/pdfs/acrobatsdk_jsapiref.pdf
'HTML - https://opensource.adobe.com/dc-acrobat-sdk-docs/library/jsapiref/index.html
'quads
' An array of 8 x n numbers specifying the coordinates of n quadrilaterals in default user space. Each quadrilateral encompasses a word or group of
' contiguous words in the text underlying the annotation. See the PDF Reference version 1.7 for more details. The quads for a word can be obtained
' through calls to the Doc object getPageNthWordQuads method.
'-------
'getPageNthWordQuads
' Gets the quads list for the nth word on the page. The quads property of the Annotation object can be used for constructing text markup, underline,
' strikeout, highlight and squiggly annotations. See also getPageNthWord, getPageNumWords, and selectPageNthWord.
' Note: This method throws an exception if the document security is set to prevent content extraction.
'
'Parameters
' nPage (optional) The 0-based index of the page. The default is 0, the first page in the document.
' nWord (optional) The 0-based index of the word. The default is 0, the first word on the page.
'
'Returns
' The quads list for the nth word on the page.
'-------
'addLink
' Adds a new link to the specified page with the specified coordinates, if the user has permission to add links to the document. See also getLinks,
' removeLinks and the Link object.
'
'Parameters
' nPage The page on which to add the new link.
' oCoords An array of four numbers in rotated user space specifying the size and placement of the link. The numbers are the coordinates of the
' bounding rectangle in the following order: upper-left x, upper-left y, lower-right x and lower-right y.
'
'Returns
' The newly created Link object.
'---------------------------------------------------------------------------------------
'Get quads of first and last words of the display text from the PDF page. quads1 and quads2 are arrays of 8 numbers, specifying the 4 coordinates
'of the bounding box of the specified word index number. If the display text is only 1 word then quads1 and quads2 will contain the same coordinates.
quads1 = JSO.getPageNthWordQuads(linkOnPage - 1, startPageWordsIndex)
quads2 = JSO.getPageNthWordQuads(linkOnPage - 1, startPageWordsIndex + UBound(linkWords))
'Transfer the coordinates of the upper-left of the display text's first word and the lower-right of the display text's last word to the linkRect array
'
'linkRect is an array of 4 numbers specifying the coordinates of the bounding rectangle for the link to be added on the PDF page in the
'following order: upper-left x, upper-left y, lower-right x and lower-right y
'
' ===================PAGE===================
' | |
' | |
' | -------linkRect------- |
' | |(x1,y1) | |
' | |first last| |
' | | (x2,y2)| |
' | ---------------------- |
' | |
' | |
' |(0,0) |
' ===========================================
linkRect(0) = CLng(quads1(0)(0)) 'x1 (upper-left x)
linkRect(1) = CLng(quads1(0)(1)) 'y1 (upper-left y)
linkRect(2) = CLng(quads2(0)(2)) 'x2 (lower-right x)
linkRect(3) = CLng(quads2(0)(5)) 'y2 (lower-right y)
'Debug.Print linkRect(0); linkRect(1), linkRect(2); linkRect(3)
'Add link on the PDF page, around the word(s) bounded by the coordinates in the linkRect array and make it point to the link's destination page
PDFlink.setAction "this.pageNum = " & linkToPage - 1 & ";"
Else
MsgBox "Excel link display text '" & link.TextToDisplay & "' not found on PDF page " & linkOnPage
End If
End If
Next
End With
If PDDoc.Save(1, PDFoutputFile) Then
MsgBox "Successfully saved " & PDFoutputFile, vbInformation, "Add Links to PDF"
ActiveWorkbook.FollowHyperlink PDFoutputFile
Else
MsgBox "Cannot save the output PDF document " & PDFoutputFile, vbExclamation, "Add Links to PDF"
End If
PDDoc.Close
End If
Set PDDocLink = Nothing
Set PDDoc = Nothing
'Delete the temporary sheet
Application.DisplayAlerts = False
PDFlinkSheet.Delete
Application.DisplayAlerts = False
End Sub
Private Sub Reset_All_Hyperlinks(ws As Worksheet)
Dim link As Hyperlink
'Reset colour of all hyperlinks on the specified worksheet to 'not visited'
With ws
'Debug.Print "Hyperlinks count ="; .Hyperlinks.Count
For Each link In .Hyperlinks
Reset_Hyperlink link
Next
End With
End Sub
Private Sub Reset_Hyperlink(link As Hyperlink)
'Reset colour of specified hyperlink to 'not visited'
With link
'Debug.Print .Range.Address, .Address, .SubAddress, .ScreenTip, .TextToDisplay
If .SubAddress <> "" Then
If .ScreenTip <> "" Then
.Parent.Hyperlinks.Add Anchor:=.Range, Address:=.Address, SubAddress:=.SubAddress, ScreenTip:=.ScreenTip, TextToDisplay:=.TextToDisplay
Else
.Parent.Hyperlinks.Add Anchor:=.Range, Address:=.Address, SubAddress:=.SubAddress, TextToDisplay:=.TextToDisplay
End If
Else
If .ScreenTip <> "" Then
.Parent.Hyperlinks.Add Anchor:=.Range, Address:=.Address, ScreenTip:=.ScreenTip, TextToDisplay:=.TextToDisplay
Else
.Parent.Hyperlinks.Add Anchor:=.Range, Address:=.Address, TextToDisplay:=.TextToDisplay
End If
End If
End With
End Sub
'Based on https://www.excelforum.com/excel-programming-vba-macros/559027-returning-the-activecells-page-number.html#post1646565
'with my changes to make it work for all cases
Private Function GetPageNumber(CallerCell As Range) As Long
'Return the page number of the specified cell, accounting for horizontal and vertical page breaks on the cell's worksheet
Dim VPBreak As VPageBreak
Dim HPBreak As HPageBreak
Dim VPBreakCount As Long
Dim HPBreakCount As Long
Dim PageNumber As Long
With CallerCell.Worksheet
VPBreakCount = 0
For Each VPBreak In .VPageBreaks
If VPBreak.Location.Column <= CallerCell.Column Then
VPBreakCount = VPBreakCount + 1
Else
Exit For
End If
Next
HPBreakCount = 0
For Each HPBreak In .HPageBreaks
If HPBreak.Location.Row <= CallerCell.Row Then
HPBreakCount = HPBreakCount + 1
Else
Exit For
End If
Next
Select Case .PageSetup.Order
Case xlDownThenOver
PageNumber = (.HPageBreaks.Count + 1) * VPBreakCount + HPBreakCount + 1
Case xlOverThenDown
PageNumber = .VPageBreaks.Count * HPBreakCount + VPBreakCount + 1
End Select
If .PageSetup.FirstPageNumber <> xlAutomatic Then
PageNumber = PageNumber + .PageSetup.FirstPageNumber
End If
End With
GetPageNumber = PageNumber
End Function
Private Function FindSubArrayInMainArray(mainArray As Variant, subArray As Variant) As Long
'Look for sequence of subArray elements in mainArray elements in the same order and, if found, return the index in mainArray where subArray was found, otherwise return -1
Dim m As Long, s As Long
FindSubArrayInMainArray = -1
m = LBound(mainArray)
s = LBound(subArray)
While m <= UBound(mainArray) And s <= UBound(subArray)
'Debug.Print s; ">" & subArray(s) & "<", m; ">" & mainArray(m) & "<", FindSubArrayInMainArray
If subArray(s) = mainArray(m) Then
If FindSubArrayInMainArray = -1 Then FindSubArrayInMainArray = m
s = s + 1
Else
FindSubArrayInMainArray = -1
s = 0
End If
m = m + 1
Wend
If s <> UBound(subArray) + 1 Then FindSubArrayInMainArray = -1
End Function