Saturday, July 20, 2013

Reading PDF content with itextsharp dll in VB.NET or C#

Links
http://www.dotnetspider.com/forum/156957-read-pdf-content-vb-net.aspx
http://jadn.co.uk/w/ReadPdfUsingCsharp.htm
http://forums.asp.net/p/1408202/3097463.aspx#3097463

below link contain tutorials of itextsharp.
http://itextsharp.sourceforge.net/tutorial/ch01.html

Code

public string ReadFile(string Filename)
{
    PdfReader pdfreader = new PdfReader(Filename);
    string pdfText = string.Empty;

    for (int i = 1; i <= reader2.NumberOfPages; i++)
    {
        ITextExtractionStrategy itextextStrat = new pdf.parser.SimpleTextExtractionStrategy();
        PdfReader reader = new PdfReader(Filename);
        String extractText = PdfTextExtractor.GetTextFromPage(reader, i, itextextStrat);

        extractText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(extractText)));
        pdfText = pdfText + extractText;
        reader.Close();
    }
    return pdfText;
}

No comments: