BCL easyPDF SDK
easyPDF SDK Usermanual
PDF Creator Programming API  |  Download Free Trial  |  Contact Us to Purchase

ExtractText Method

Extract text from a PDF file.

void ExtractText(string InputFileName,
                 string OutputFileName,
                 string Password,
                 int From,
                 int To,
                 string PageSeparator,
                 int CodePage)

Sub ExtractText(InputFileName As String, _
                OutputFileName As String, _
                Password As String, _
                From As Integer, _
                To As Integer, _
                PageSeparator As String, _
                CodePage As Integer)

def ExtractText(self,
                InputFileName,
                OutputFileName,
                Password,
                From,
                To,
                PageSeparator,
                CodePage)
void ExtractText(String InputFileName,
                 String OutputFileName,
                 String Password,
                 int From,
                 int To,
                 String PageSeparator,
                 int CodePage) throws PDFProcessorException

function ExtractText($InputFileName,
                     $OutputFileName,
                     $Password,
                     $From,
                     $To,
                     $PageSeparator,
                     $CodePage) 

BclPrcResult ExtractText(const BclPDFProcessorSettings* pSettings,
                         const wchar_t* InputFileName,
                         const wchar_t* OutputFileName,
                         const wchar_t* Password,
                         int From,
                         int To,
                         const wchar_t* PageSeparator,
                         int CodePage);
Sub ExtractText(InputFileName As String, _
                OutputFileName As String, _
                [Password], _
                [From], _
                [To], _
                [PageSeparator], _
                [CodePage])

Parameters

Return Values

N/A.

Remarks

  1. This method extracts text from PDF file with all formatting information discarded.
  2. The extracted text can be useful for text indexing purpose.
  3. The page number uses zero-based index, meaning that page number starts from 0.

Example Usage

Set oProcessor = CreateObject("easyPDF.PDFProcessor.8")
 
' just extract with default option
oProcessor.ExtractText "C:\test\input1.pdf", "C:\test\output1.txt"
 
' extract first 5 pages from input PDF file
oProcessor.ExtractText "C:\test\input2.pdf",
                       "C:\test\output2.txt",_
                       From:=0,
                       To:=4
 
' extract using all options
oProcessor.ExtractText "C:\test\input3.pdf",
                       "C:\test\output3.txt",_
                       Password:="my_password",
                       From:=0,
                       To:=4,
                       PageSeparator:="[MY_PAGE_SEP]",
                       CodePage:=PRC_CP_UTF8