idlovetolearn
New Member
- Joined
- Oct 2, 2017
- Messages
- 2
Guys
really hoping someone can help here as I am at my wits end.
Background
My knowledge of writing code is beginner at best, but I am a) very eager to learn & b) in need of some expertise. I am a long time lurker and the knowledge/help provided to others in recent times, have been a God send to me.
The code below was written for me by an ex-member of staff and when I tested it, it worked a treat. However now we are in a place where I can utilize this code, it doesn't seem to work for me.
What do I need?
Well can you tell me where we're going wrong? This code, in a nutshell, was designed to read a list of letters (in PDF) and search for a string for a reference number, nine digits long and will always end in a zero. Once located it should then search a folder for an attachment that is named the same reference number that it has just found. Once done, it will create a new PDF with both the letter and the attachment, and then move on to the next letter and so on and so on.
I am really hoping this is a simple fix one of you amazing people can help with, or at least point me in the right direction - here's hoping.
really hoping someone can help here as I am at my wits end.
Background
My knowledge of writing code is beginner at best, but I am a) very eager to learn & b) in need of some expertise. I am a long time lurker and the knowledge/help provided to others in recent times, have been a God send to me.
The code below was written for me by an ex-member of staff and when I tested it, it worked a treat. However now we are in a place where I can utilize this code, it doesn't seem to work for me.
What do I need?
Well can you tell me where we're going wrong? This code, in a nutshell, was designed to read a list of letters (in PDF) and search for a string for a reference number, nine digits long and will always end in a zero. Once located it should then search a folder for an attachment that is named the same reference number that it has just found. Once done, it will create a new PDF with both the letter and the attachment, and then move on to the next letter and so on and so on.
I am really hoping this is a simple fix one of you amazing people can help with, or at least point me in the right direction - here's hoping.
Code:
Imports iTextSharp.text
Imports iTextSharp.text.pdf
Imports System.IO
Public Class Main
Public ListToRemove(200)
'list of pages from letter file to be removed, used to parse across routines
Public ListCount As Integer
Private Sub Button1_Click(sender As Object, e As EventArgs) Handles btnGo.Click
Analyse()
'separated code from button action as originally was a differnt routine
End Sub
''' <summary>
''' Extract a single page from source pdf to a new pdf
''' </summary>
''' <param name="sourcePdf">the full path to source pdf file</param>
''' <param name="pageNumberToExtract">the page number to extract</param>
''' <param name="outPdf">the full path for the output pdf</param>
''' 'NumberOfPagesToextract = Number of pages to extrace i.e letter is 3 pages will extract 3 pages
''' 'sourcePdf2() array of attchments to extract, all attachments from array will be extracted
''' ' outPdf = sourcepdf(pagenumbertoextract to numberofpagestoextract) + sourcepdf2(all)
''' <remarks></remarks>
Sub ExtractPdfPage(ByVal sourcePdf As String, ByVal pageNumberToExtract As Integer, ByVal NumberOfPagesToExtract As Integer, ByVal outPdf As String, ByVal sourcePdf2() As String)
Dim reader As iTextSharp.text.pdf.PdfReader = Nothing
Dim reader2 As iTextSharp.text.pdf.PdfReader = Nothing
Dim doc As iTextSharp.text.Document = Nothing
Dim pdfCpy As iTextSharp.text.pdf.PdfCopy = Nothing
Dim page As iTextSharp.text.pdf.PdfImportedPage = Nothing
Dim OrigDoc As iTextSharp.text.pdf.PdfImportedPage = Nothing
Try
reader = New iTextSharp.text.pdf.PdfReader(sourcePdf)
doc = New iTextSharp.text.Document(reader.GetPageSizeWithRotation(1))
pdfCpy = New iTextSharp.text.pdf.PdfCopy(doc, New System.IO.FileStream(outPdf, System.IO.FileMode.Append))
doc.Open()
'sets doc to open and new file to open/create/append
For L = 1 To NumberOfPagesToExtract
page = pdfCpy.GetImportedPage(reader, pageNumberToExtract + L - 1)
pdfCpy.AddPage(page)
'number of pages from page number of letter , i.e letter with refernce is on page 3 and has 4 pages
'extract all to new file
Next
Dim m As Integer = 0
For m = 0 To sourcePdf2.Count
If sourcePdf2(m) = "" Then Exit For
reader2 = New iTextSharp.text.pdf.PdfReader(sourcePdf2(m))
For T = 1 To reader2.NumberOfPages
OrigDoc = pdfCpy.GetImportedPage(reader2, T)
pdfCpy.AddPage(OrigDoc)
'loops through nubmer of attachments and number of pages per attachment to add to new file
Next
reader2.Close()
'closes reader
System.IO.File.Move(sourcePdf2(m), txtOutDir.Text.ToString & "\Done" & Mid(sourcePdf2(m), sourcePdf2(m).LastIndexOf("\") + 1, Len(sourcePdf2(m)) - sourcePdf2(m).LastIndexOf("\")))
'moves "Done" attchment to Done folder
Next
doc.Close()
reader.Close()
'close reader and doc
Catch ex As Exception
MsgBox(ex.ToString)
'If the above code fails gives message of exception
End Try
doc.Close()
reader.Close()
reader2.Close()
'closes both readers and doc if not already done so file isnt open next time around as cannot write to open file
End Sub
Sub RemovePdfPages(ByVal sourcePdf As String, ByVal OutPutPDF As String, Count As Integer, List As Array)
'Removes page from a PDF
'sourcepdf = letters file
'outputpdf = output file for no attachments found
'count = number of pages to remove per letter
'list = array of letters to remove
'outputpdf = sourcepdf - pages per list per count
Dim reader As iTextSharp.text.pdf.PdfReader = Nothing
Dim reader2 As iTextSharp.text.pdf.PdfReader = Nothing
Dim doc As iTextSharp.text.Document = Nothing
'Dim doc As PdfManipulation.DocumentEx = Nothing
Dim pdfCpy As iTextSharp.text.pdf.PdfCopy = Nothing
Dim page As iTextSharp.text.pdf.PdfImportedPage = Nothing
Dim OrigDoc As iTextSharp.text.pdf.PdfImportedPage = Nothing
Try
reader = New iTextSharp.text.pdf.PdfReader(sourcePdf)
doc = New iTextSharp.text.Document(reader.GetPageSizeWithRotation(1))
pdfCpy = New iTextSharp.text.pdf.PdfCopy(doc, New System.IO.FileStream(OutPutPDF, System.IO.FileMode.Append))
doc.Open()
Dim noPages As Integer
noPages = reader.NumberOfPages
Dim skip As Boolean
'creates duplocate of letter file without the letters that it has already found attachments for
For T = 0 To noPages
For X = 0 To 200
If T = List(X) Then
skip = True
Exit For
Else
End If
Next
If skip = True Then
skip = False
Else
OrigDoc = pdfCpy.GetImportedPage(reader, T)
pdfCpy.AddPage(OrigDoc)
skip = False
End If
Next
doc.Close()
reader.Close()
Catch ex As Exception
doc.Close()
reader.Close()
'if tries to remove page from empty document then all letters have got an attachemnt therefore file will be empty
If ex.Message.ToString = "The document has no pages." Then
MsgBox("All attachments match up with letters.", vbOKOnly, "As n Ls")
Else
MsgBox(ex.ToString)
End If
End Try
End Sub
Private Sub btnChangeAttDir_Click(sender As Object, e As EventArgs) Handles btnChangeAttDir.Click
OpenFolder.ShowDialog()
txtAttachmentFolder.Text = OpenFolder.SelectedPath
'set/change path for attachment directory
End Sub
Private Sub btnOutDir_Click(sender As Object, e As EventArgs) Handles btnOutDir.Click
OpenFolder.ShowDialog()
txtOutDir.Text = OpenFolder.SelectedPath
'set/change path for output directory
End Sub
Sub Analyse()
ListCount = 0
'nulls listcount if ran more than once
System.IO.Directory.CreateDirectory(txtOutDir.Text.ToString & "\Done\")
'creates output directory for "done" attachments
DataGridView1.Rows.Clear()
'clears datagridview1 if ran more than once
Dim count As Integer = 0
'count = number of individual attachments
Dim AttachmentSources
'list of individual attachments
Dim x As Integer = 0
'counter
Dim LetterSource As String = ""
'lettersource is source of letters file
Dim sOut = ""
'sOut is text string of page of pdf
Dim done As Integer = 0
'doen = number of letters found with matching attachments, used as a count to output - not nescessary for process
Dim SetDir As String = ""
'directory of attachment folder
SetDir = txtAttachmentFolder.Text.ToString & "\"
'setting attachment directory
OpenLetters.FileName = ""
'openletters is openfile dialog
OpenLetters.ShowDialog()
'opening dialog
If OpenLetters.FileName = "" Then
MsgBox("No Letter file has been selected, please select a file.", MsgBoxStyle.OkOnly, "As & Ls")
Exit Sub
End If
'if no file is selected then messagebox and close process leave program open
LetterSource = OpenLetters.FileName
'setting letter source as selected file
AttachmentSources = My.Computer.FileSystem.GetFiles(SetDir)
'setting attachments from list of files in attachments directory
Dim SearchKeyword As String = ""
'keyword to look within letters before assuming reference number
SearchKeyword = txtSearchString.Text.ToString
count = AttachmentSources.Count
'number of attachemnets in directory
Dim Refs(count - 1) As String
'creating array
For x = 0 To count - 1
Refs(x) = (Mid(AttachmentSources(x), AttachmentSources(x).ToString.LastIndexOf("\") + 2, 9))
Next
'ref is array of references found within name of file - assums attachments are names as such DirectoryPath\ReferenceNumber.PDF
'will also work if is any other charters after the reference nubmer as assumes 9 digit ref
Dim oReader As New iTextSharp.text.pdf.PdfReader(LetterSource)
'oreader is pdf reader - reading LetterSource = is reading lettersfile
Dim LetterRefs(oReader.NumberOfPages - 1) As String
'references found on each letter , page number is number in array, used to match attachment to letter
Dim PageNumbers(oReader.NumberOfPages - 1) As Integer
'page number of each letter, will only show first page of letter, used to find beginning of each letter
Dim PagesCount(oReader.NumberOfPages - 1) As Integer
'number of pages for each letter, if array number if not first page is blank
Dim AttachmentsFound(oReader.NumberOfPages - 1) As String
'number of attachments found for each letter
Dim AttachmentsPath(oReader.NumberOfPages - 1) As String
'attachment path, array number relates to page number of first page of letter, this can have multiple attachment comma separated
For i = 0 To oReader.NumberOfPages - 1
'loop for each page in letter source , reader is reading letters file
Dim its As New iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy
sOut = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(oReader, i + 1, its)
'sout is text on page , loops each page to sout changes with each itteration
If sOut.IndexOf(SearchKeyword) >= 0 Then
'if the keyword is found on the file then
LetterRefs(i) = Mid(sOut, sOut.IndexOf(SearchKeyword) + Len(SearchKeyword) + 1, 9)
'reference for letter is placed into array
PageNumbers(i) = i + 1
'as page number starts on 0 there is no page 0 within file to +1 to correct
PagesCount(i) = 1
'page count for this letter is 1
Else
'if keyword is not found assumes is back of previous letter
Dim P As Integer = 0
Do While P = P
'loop backward to find next not blank page count
If PagesCount(i - P) <> 0 Then
PagesCount(i - P) = PagesCount(i - P) + 1
Exit Do
'increase pagecout of letter by one
Else
P = P + 1
'keep looping backward by increasing number to go back by
End If
Loop
PagesCount(i) = 0
'pages count if blank for non first pages
End If
Next
''
'if the ref matches a ref on an attachment then attachment path = that attachment, builds up multiple attachment string if more than one is found
For i = 0 To oReader.NumberOfPages - 1
For j = 0 To count - 1
If LetterRefs(i) = Refs(j) Then
If AttachmentsFound(i) = "" Then
AttachmentsFound(i) = Refs(j)
AttachmentsPath(i) = AttachmentSources(j)
Else
AttachmentsFound(i) = AttachmentsFound(i) & "," & Refs(j)
AttachmentsPath(i) = AttachmentsPath(i) & "," & AttachmentSources(j)
End If
End If
Next
Next
' for each page creates line in datagrid view, not needed for process
For i = 0 To oReader.NumberOfPages - 1
DataGridView1.Rows.Add({LetterRefs(i), PageNumbers(i), PagesCount(i), Replace(AttachmentsPath(i), txtAttachmentFolder.Text.ToString & "\", "")})
Next
Dim path1 As String
'path 1 is first file to merge file
Dim path2(200) As String
'path2 is array of paths to merge with
'output is path1 + path2(all)
Dim PgCount As Integer
'nubmer of pages in first path to extract - path will be letter file
Dim commas As Integer = 0
'number of commas in attachment path
For i = 0 To oReader.NumberOfPages - 1
'loop each page
If LetterRefs(i) = "" Or AttachmentsPath(i) = "" Then
'if no reference found or no attachment found does nothing otherwise as below
Else
commas = CountCharacter(AttachmentsPath(i), ",")
'number of commas in attachment path string
Dim path As String
path = AttachmentsPath(i)
'attachment path set so if no comma found then path still set
For C = 0 To commas
'loop for each comma found
Dim FirstComma As Integer
Dim NextComma As Integer
FirstComma = path.IndexOf(",")
'position of first comma
If FirstComma < 0 Then FirstComma = Len(path)
'if a comma if found then
NextComma = path.IndexOf(",", FirstComma)
'position of next comma
path2(C) = Mid(path, 1, FirstComma)
'path2 is set to first file in string
path = Mid(path, FirstComma + 2)
'removes first string from path
'loops
Next
'sets paths for each attachment into array
path1 = LetterSource
'letters file
Dim oReadera As New iTextSharp.text.pdf.PdfReader(path1)
'reading letter file
PgCount = PagesCount(i)
'Number of pages to extract
oReadera.Close()
'close reader
ExtractPdfPage(path1, PageNumbers(i), PgCount, txtOutDir.Text.ToString & "\" & LetterRefs(i) & ".pdf", path2)
'creates new PDF with name of Reference found on letter with page ref was found on and pages following without reference and then all attachments found with same ref
For x = 0 To 200
path2(x) = ""
Next
'clears path2 array so when loop no extra attachments appear with letter
done = done + 1
'Number of "done" letters
End If
If PageNumbers(i) <> 0 Then
ListToRemove(i) = PageNumbers(i)
ListCount = ListCount + 1
If PagesCount(i) > 1 Then
ListToRemove(i + 1) = PageNumbers(i) + 1
ListCount = ListCount + 1
End If
End If
'creates list of pages to be removed
Next
oReader.Close()
'close reader if not already done
RemovePdfPages(LetterSource, txtOutDir.Text.ToString & "\No Attachments Found.pdf", ListCount, ListToRemove)
'duplicates letter file if page is not in ListToRemove
MsgBox("Done." & vbNewLine & count & " Attachments found, " & ListCount & "merged with letters." & vbNewLine & "Letters And attachments merged total=" & done, MsgBoxStyle.OkOnly, "As & Ls")
'completion message
End Sub
Public Function CountCharacter(ByVal value As String, ByVal ch As Char) As Integer
Return value.Count(Function(c As Char) c = ch)
'counts number of commas in string
End Function
End Class