Follow along with the video below to see how to install our site as a web app on your home screen.
Note: This feature may not be available in some browsers.
Private Sub getBodyTextPDF()
'instantiate the adobe object that we are going to use
'we are using this object b/c this is the only object i
'could find that had a function that returned the number of
'pages in a prf file. the number of pages is important later on
Set myPDFPageCount = CreateObject("acroexch.pddoc")
'when we open the file it will return true/false
filelocation = txtFileName.Text
openResult = myPDFPageCount.Open(filelocation)
'little but of error handling, if we cannot open the file properly
If openResult = False Then
Set myPDFPageCount = Nothing
MsgBox "Error opening file"
Exit Sub
End If
'get the number of pages
pageCount = myPDFPageCount.GetNumPages
'when we close the file it will return truw/false
closeResult = myPDFPageCount.Close
'little but of error handling, if we cannot open the file properly
If closeResult = False Then
Set myPDFPageCount = Nothing
MsgBox "Error closing file"
Exit Sub
End If
'destroy the object we do not need it anymore
Set myPDFPageCount = Nothing
'i could only figure out how to get text from one page at a time
'so i decided to run a loop that would get the text from a file
'one page at a time. (adobe counts the first page)
'instantiate object that we are going to use to get the text
Set myPDF = CreateObject("acroexch.pddoc")
'once again open the file
openResult = myPDF.Open(filelocation)
For pagenumber = 0 To pageCount - 1
DoEvents
getPDFTextFromPage pagenumber
lblStatus.Caption = "Extracting : " & pagenumber + 1 & " of " & pageCount
Next
Set myPDF = Nothing
End Sub
Private Sub getPDFTextFromPage(pagenumber As Integer)
'create pdf page object, with a specified page
Set myPDFPage = myPDF.AcquirePage(pagenumber)
'create a hilite object, this hilite object is what we will use to extract
'the text, if you can hilite text then you can pull it out of the pdf file.
Set myPageHilite = CreateObject("acroexch.hilitelist")
'returns true/false, we are setting the parameters of the hilite object,
'we are telling the hilite object that when you are called hilite the
'entire page (0-9000)
hiliteResult = myPageHilite.Add(0, 9000)
'we are now going to hilite the page specified
Set pageSelect = myPDFPage.CreatePageHilite(myPageHilite)
'when pdf hilites it breaks up the page into little pieces so when we try
'to extract that data from the hilite we ger it in little chuncks so have to loop
'the data togther and append it together.
'we can also use the same string (pdfData to append all the pages together)
For i = 0 To pageSelect.GetNumText - 1
DoEvents
pdfData = pdfData & pageSelect.GetText(i)
Next
pdfData = pdfData & vbFormFeed
'clean up
Set myPDFPage = Nothing
Set myPageHilite = Nothing
Set pageSelect = Nothing
End Sub