<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/'><id>tag:blogger.com,1999:blog-9003103274808968548.post5398229670737236514..comments</id><updated>2012-01-10T22:22:19.679-08:00</updated><category term='Coding'/><category term='Python'/><category term='Blogger tips and hacks'/><category term='System Administration'/><category term='C'/><category term='Troubleshooting'/><category term='Sun systems'/><category term='Shell Programming'/><category term='ssh'/><category term='XML'/><category term='About this blog'/><category term='Useful tools'/><category term='Windows'/><category term='Howtos'/><category term='Search'/><category term='Java'/><category term='Algorithms'/><category term='Page Rank'/><category term='openoffice'/><category term='GUI'/><category term='Technical Articles'/><category term='SWIG'/><category term='Netbeans'/><category term='Stanford'/><category term='Sun'/><category term='Sun Technologies for Students'/><category term='Investment Science'/><category term='General'/><category term='Jython'/><category term='Linux'/><category term='Socket Programming'/><category term='My Bookshelf'/><category term='Solaris'/><category term='Certifications'/><category term='VNC'/><category term='Hacking'/><title type='text'>Comments on Techtalks: PDF Text Parser: Converting PDF to Text in Java us...</title><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://www.prasannatech.net/feeds/5398229670737236514/comments/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html'/><link rel='next' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default?start-index=26&amp;max-results=25'/><author><name>Prasanna Seshadri</name><uri>http://www.blogger.com/profile/02028881738236321272</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>84</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-8415169790319851901</id><published>2012-01-10T22:22:19.679-08:00</published><updated>2012-01-10T22:22:19.679-08:00</updated><title type='text'>thanx sir it help alot</title><content type='html'>thanx sir it help alot</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/8415169790319851901'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/8415169790319851901'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1326262939679#c8415169790319851901' title=''/><author><name>priyanka</name><uri>http://www.blogger.com/profile/07535090686359973673</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-480176441'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-8459963665254769316</id><published>2012-01-07T08:47:02.456-08:00</published><updated>2012-01-07T08:47:02.456-08:00</updated><title type='text'>Works great thanks.

I am using pdfbox 1.6.0 there...</title><content type='html'>Works great thanks.&lt;br /&gt;&lt;br /&gt;I am using pdfbox 1.6.0 there is a class ExtractText.  It does the same thing but a lot simpler.  &lt;br /&gt;&lt;br /&gt;ExtractText.main(new String[]{&amp;quot;your pdf.pdf&amp;quot;});&lt;br /&gt;&lt;br /&gt;It strips everything and gives you a text file.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/8459963665254769316'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/8459963665254769316'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1325954822456#c8459963665254769316' title=''/><author><name>Anonymous</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-268519161'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-190144977719776353</id><published>2011-12-26T05:40:54.764-08:00</published><updated>2011-12-26T05:40:54.764-08:00</updated><title type='text'>Respected Sir,
I need to convert a Pdf containing ...</title><content type='html'>Respected Sir,&lt;br /&gt;I need to convert a Pdf containing hyperlink to a html,in which the hyperlink should be displayed in the html and on click of that page should be redirected to url specified.While extracting the Pdf if I can parse the hyperlink in such a way so that I can identify that the converted text is hyperlink then my job will be done.&lt;br /&gt;If you have any solution please reply me at  this email:  gaurav.das@saggezza.com&lt;br /&gt;&lt;br /&gt;Thanks in advance</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/190144977719776353'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/190144977719776353'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1324906854764#c190144977719776353' title=''/><author><name>Gaurav</name><uri>http://www.blogger.com/profile/17657904498797854108</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1882932213'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-2844580746944553881</id><published>2011-11-01T06:03:20.049-07:00</published><updated>2011-11-01T06:03:20.049-07:00</updated><title type='text'>Hello guys, I have small problem when trying to re...</title><content type='html'>Hello guys, I have small problem when trying to read PDF file, I get this: &lt;br /&gt;&lt;br /&gt;Past v vismaz divas probl mas, kas ir j risina programm t jiem, t s ir plaša projekta &lt;br /&gt;funkcionalit te un t  izveides laiks.&lt;br /&gt;&lt;br /&gt;in place of: &lt;br /&gt;&lt;br /&gt;Pastāv vismaz divas problēmas, kas ir jārisina programmētājiem, tās ir plaša projekta&lt;br /&gt;funkcionalitāte un tā izveides laiks.&lt;br /&gt;&lt;br /&gt;Can someone tell me, how can I add other encodings to text Parser?</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/2844580746944553881'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/2844580746944553881'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1320152600049#c2844580746944553881' title=''/><author><name>Anonymous</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-2091381353'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-3232330848540196931</id><published>2011-10-29T21:40:24.864-07:00</published><updated>2011-10-29T21:40:24.864-07:00</updated><title type='text'>Thank you very much!</title><content type='html'>Thank you very much!</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/3232330848540196931'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/3232330848540196931'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1319949624864#c3232330848540196931' title=''/><author><name>Mariana</name><uri>http://www.blogger.com/profile/08108416191352567066</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-2078294153'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-609929701774841012</id><published>2011-09-20T00:19:34.793-07:00</published><updated>2011-09-20T00:19:34.793-07:00</updated><title type='text'>There should be a fontbox library added into class...</title><content type='html'>There should be a fontbox library added into classpath</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/609929701774841012'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/609929701774841012'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1316503174793#c609929701774841012' title=''/><author><name>Chenda</name><uri>http://www.blogger.com/profile/01126769972098992056</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-385780056'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-702368424586197824</id><published>2011-07-28T22:48:35.008-07:00</published><updated>2011-07-28T22:48:35.008-07:00</updated><title type='text'>Hi Prasanna Seshadri 

Thanks for this post.  I ca...</title><content type='html'>Hi Prasanna Seshadri &lt;br /&gt;&lt;br /&gt;Thanks for this post.  I can covert pdf using this code but getting lots of null null character&lt;br /&gt;&lt;br /&gt;Can anyone suggest me?&lt;br /&gt;&lt;br /&gt;out put ..&lt;br /&gt;Interest nullearinnull loans and &lt;br /&gt;nullorronullinnulls and interest free loans 1nullnull21null 1nullnull2nullnull 11null1nullnull nullnullnullnull1 3nullnull 10null nullnullnullnullnull2&lt;br /&gt;nullinance leases 2null0 2nullnull 13null null null null nullnull3&lt;br /&gt;Trade and other payanullles 3nullnullnull3null 1nullnull null null null null 3nullnullnull32&lt;br /&gt;null1nullnullnull3 1nullnullnull0null 11null32null nullnullnullnull1 3nullnull 10null null3null10null&lt;br /&gt;The tanullle nullelonull summarises the maturity profile of the nullompanynulls financial lianullilities nullincludinnull trade and other payanulllesnull at 30 April 2010 and &lt;br /&gt;30 April 200null nullased on contractual undiscounted paymentsnull&lt;br /&gt;Contractual cash nullows&lt;br /&gt;nullithin  &lt;br /&gt;one year</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/702368424586197824'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/702368424586197824'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1311918515008#c702368424586197824' title=''/><author><name>vaishali</name><uri>http://www.blogger.com/profile/08494558585061033560</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-416283858'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-450552587365040077</id><published>2011-07-28T13:27:19.963-07:00</published><updated>2011-07-28T13:27:19.963-07:00</updated><title type='text'>this program runs great on me. Thanks!
However I c...</title><content type='html'>this program runs great on me. Thanks!&lt;br /&gt;However I can not extract any filled-in text in my PDF file. The filled-in text are all missing, so essentially I got a blank PDF converted in text file. &lt;br /&gt;Has anyone experienced the same and what solution you have to get around it? &lt;br /&gt;Thanks a million.!&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Wei</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/450552587365040077'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/450552587365040077'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1311884839963#c450552587365040077' title=''/><author><name>Wei</name><uri>http://www.blogger.com/profile/15777464544795318998</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-228885484'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-2665706446337652729</id><published>2011-07-06T00:19:54.305-07:00</published><updated>2011-07-06T00:19:54.305-07:00</updated><title type='text'>Hello dear, I face a problem like the folowing sni...</title><content type='html'>Hello dear, I face a problem like the folowing snippet. I use same jar and jdk as you suggest. But face the following error. There is any suggestion for this. Thank you. I try to extract bangla text from pdf file. Any suggestion? Please help. &lt;br /&gt;&lt;br /&gt;G:\PDFBOX&amp;gt;javac PDFTextParser.java&lt;br /&gt;&lt;br /&gt;G:\PDFBOX&amp;gt;java PDFTextParser book.pdf test.txt&lt;br /&gt;Parsing text from PDF file book.pdf....&lt;br /&gt;An exception occured in parsing the PDF Document.&lt;br /&gt;org.pdfbox.exceptions.WrappedIOException&lt;br /&gt;        at org.pdfbox.util.PDFStreamEngine.(PDFStreamEngine.java:128)&lt;br /&gt;        at org.pdfbox.util.PDFTextStripper.(PDFTextStripper.java:119)&lt;br /&gt;        at PDFTextParser.pdftoText(PDFTextParser.java:59)&lt;br /&gt;        at PDFTextParser.main(PDFTextParser.java:102)&lt;br /&gt;java.lang.NullPointerException&lt;br /&gt;        at org.pdfbox.util.PDFStreamEngine.(PDFStreamEngine.java:117)&lt;br /&gt;        at org.pdfbox.util.PDFTextStripper.(PDFTextStripper.java:119)&lt;br /&gt;        at PDFTextParser.pdftoText(PDFTextParser.java:59)&lt;br /&gt;        at PDFTextParser.main(PDFTextParser.java:102)&lt;br /&gt;PDF to Text Conversion failed.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/2665706446337652729'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/2665706446337652729'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1309936794305#c2665706446337652729' title=''/><author><name>Hasan Rahman</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-115888935'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-4289484330758564233</id><published>2011-06-20T12:51:02.746-07:00</published><updated>2011-06-20T12:51:02.746-07:00</updated><title type='text'>Hi,

I need help develop a tool which can compare ...</title><content type='html'>Hi,&lt;br /&gt;&lt;br /&gt;I need help develop a tool which can compare two pdf files and highlight the non-matched content. Could you please suggest me the process how to do it.&lt;br /&gt;&lt;br /&gt;Please do reply me.&lt;br /&gt;&lt;br /&gt;Thanks in advance.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/4289484330758564233'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/4289484330758564233'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1308599462746#c4289484330758564233' title=''/><author><name>shravu</name><uri>http://www.blogger.com/profile/03926233787901360722</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-560387536'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-2433472998061850744</id><published>2011-06-20T08:24:16.916-07:00</published><updated>2011-06-20T08:24:16.916-07:00</updated><title type='text'>I want to extract the font size of the text...as a...</title><content type='html'>I want to extract the font size of the text...as a single line contains two different fonts...</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/2433472998061850744'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/2433472998061850744'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1308583456916#c2433472998061850744' title=''/><author><name>Grewal (Nitin)</name><uri>http://www.blogger.com/profile/03480067214786159315</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='24' height='32' src='http://3.bp.blogspot.com/_OGINj_etqjM/S1rx6p8MQmI/AAAAAAAAAT8/_1EQlL1rUMc/S220/IMG_2988.JPG'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-424620745'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-7708008318565439585</id><published>2011-06-18T22:30:52.334-07:00</published><updated>2011-06-18T22:30:52.334-07:00</updated><title type='text'>Looks like your jar doesn&amp;#39;t have the class jav...</title><content type='html'>Looks like your jar doesn&amp;#39;t have the class java.lang.NoClassDefFoundError: org/fontbox/afm/AFMParser&lt;br /&gt;&lt;br /&gt;Use the version of PDFBox mentioned in this article.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/7708008318565439585'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/7708008318565439585'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1308461452334#c7708008318565439585' title=''/><author><name>Prasanna Seshadri</name><uri>http://www.blogger.com/profile/02028881738236321272</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1945045701'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-107499669883013728</id><published>2011-06-18T20:03:46.423-07:00</published><updated>2011-06-18T20:03:46.423-07:00</updated><title type='text'>hii..
 i am avinash...
 i am using your code in my...</title><content type='html'>hii..&lt;br /&gt; i am avinash...&lt;br /&gt; i am using your code in my program..but it raises some exceptions..can u tell me how to solve these..exceptions...&lt;br /&gt;inform me as early as possible....&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Parsing text from PDF file c:/simple.pdf....&lt;br /&gt;Exception in thread &amp;quot;main&amp;quot; java.lang.NoClassDefFoundError: org/fontbox/afm/AFMParser&lt;br /&gt;        at org.pdfbox.pdmodel.font.PDFont.getAFM(PDFont.java:350)&lt;br /&gt;        at org.pdfbox.pdmodel.font.PDSimpleFont.getFontHeight(PDSimpleFont.java:104)&lt;br /&gt;        at org.pdfbox.util.PDFStreamEngine.showString(PDFStreamEngine.java:336)&lt;br /&gt;        at org.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:80)&lt;br /&gt;        at org.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:452)&lt;br /&gt;        at org.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:215)&lt;br /&gt;        at org.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:174)&lt;br /&gt;        at org.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:336)&lt;br /&gt;        at org.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:259)&lt;br /&gt;        at org.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:216)&lt;br /&gt;        at org.pdfbox.util.PDFTextStripper.getText(PDFTextStripper.java:149)&lt;br /&gt;        at javaapplication12.PDFTextParser.pdftoText(Main.java:54)&lt;br /&gt;        at javaapplication12.Main.main(Main.java:97)&lt;br /&gt;Caused by: java.lang.ClassNotFoundException: org.fontbox.afm.AFMParser&lt;br /&gt;        at java.net.URLClassLoader$1.run(URLClassLoader.java:202)&lt;br /&gt;        at java.security.AccessController.doPrivileged(Native Method)&lt;br /&gt;        at java.net.URLClassLoader.findClass(URLClassLoader.java:190)&lt;br /&gt;        at java.lang.ClassLoader.loadClass(ClassLoader.java:307)&lt;br /&gt;        at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)&lt;br /&gt;        at java.lang.ClassLoader.loadClass(ClassLoader.java:248)&lt;br /&gt;        ... 13 more&lt;br /&gt;&lt;br /&gt;  thank u...</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/107499669883013728'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/107499669883013728'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1308452626423#c107499669883013728' title=''/><author><name>avi</name><uri>http://www.blogger.com/profile/02843912177045811739</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-799311978'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-6910173899187868071</id><published>2011-06-18T20:01:35.371-07:00</published><updated>2011-06-18T20:01:35.371-07:00</updated><title type='text'>hiiiii....
i am avinash...
i am using your code in...</title><content type='html'>hiiiii....&lt;br /&gt;i am avinash...&lt;br /&gt;i am using your code in my program...but it shows some errors can u tell me how to solve these error....pls tell me as soon as possible..very urgent...&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Parsing text from PDF file c:/simple.pdf....&lt;br /&gt;Exception in thread &amp;quot;main&amp;quot; java.lang.NoClassDefFoundError: org/fontbox/afm/AFMParser&lt;br /&gt;        at org.pdfbox.pdmodel.font.PDFont.getAFM(PDFont.java:350)&lt;br /&gt;        at org.pdfbox.pdmodel.font.PDSimpleFont.getFontHeight(PDSimpleFont.java:104)&lt;br /&gt;        at org.pdfbox.util.PDFStreamEngine.showString(PDFStreamEngine.java:336)&lt;br /&gt;        at org.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:80)&lt;br /&gt;        at org.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:452)&lt;br /&gt;        at org.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:215)&lt;br /&gt;        at org.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:174)&lt;br /&gt;        at org.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:336)&lt;br /&gt;        at org.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:259)&lt;br /&gt;        at org.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:216)&lt;br /&gt;        at org.pdfbox.util.PDFTextStripper.getText(PDFTextStripper.java:149)&lt;br /&gt;        at javaapplication12.PDFTextParser.pdftoText(Main.java:54)&lt;br /&gt;        at javaapplication12.Main.main(Main.java:97)&lt;br /&gt;Caused by: java.lang.ClassNotFoundException: org.fontbox.afm.AFMParser&lt;br /&gt;        at java.net.URLClassLoader$1.run(URLClassLoader.java:202)&lt;br /&gt;        at java.security.AccessController.doPrivileged(Native Method)&lt;br /&gt;        at java.net.URLClassLoader.findClass(URLClassLoader.java:190)&lt;br /&gt;        at java.lang.ClassLoader.loadClass(ClassLoader.java:307)&lt;br /&gt;        at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)&lt;br /&gt;        at java.lang.ClassLoader.loadClass(ClassLoader.java:248)&lt;br /&gt;        ... 13 more&lt;br /&gt;&lt;br /&gt;    ..thank u....</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/6910173899187868071'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/6910173899187868071'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1308452495371#c6910173899187868071' title=''/><author><name>avi</name><uri>http://www.blogger.com/profile/02843912177045811739</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-799311978'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-639072671091057214</id><published>2011-06-12T09:20:50.447-07:00</published><updated>2011-06-12T09:20:50.447-07:00</updated><title type='text'>It works with following commands:

compile:
javac ...</title><content type='html'>It works with following commands:&lt;br /&gt;&lt;br /&gt;compile:&lt;br /&gt;javac -classpath ./PDFBox-0.7.3/lib/PDFBox-0.7.3.jar:/ PDFTextParser.java&lt;br /&gt;&lt;br /&gt;execute:&lt;br /&gt;java -classpath ../PDFBox-0.7.3/lib/PDFBox-0.7.3.jar:../PDFBox-0.7.3/external/bcmail-jdk14-132.jar:../PDFBox-0.7.3/external/bcprov-jdk14-132.jar:../PDFBox-0.7.3external/checkstyle-all-4.2.jar:../PDFBox-0.7.3external/junit.jar:../PDFBox-0.7.3external/lucene-demos-2.0.0.jar:../PDFBox-0.7.3/external/ant.jar:../PDFBox-0.7.3/external/FontBox-0.1.0-dev.jar:../PDFBox-0.7.3/external/lucene-core-2.0.0.jar:. PDFTextParser ECN\ 001\ \(LDN120508\).pdf kernelsource1.txt&lt;br /&gt;&lt;br /&gt;Jani Verkkomäki</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/639072671091057214'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/639072671091057214'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1307895650447#c639072671091057214' title=''/><author><name>Anonymous</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-2111297680'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-261753076881382983</id><published>2011-05-23T07:02:25.713-07:00</published><updated>2011-05-23T07:02:25.713-07:00</updated><title type='text'>Hi to all

For error message:
Exception in thread ...</title><content type='html'>Hi to all&lt;br /&gt;&lt;br /&gt;For error message:&lt;br /&gt;Exception in thread &amp;quot;main&amp;quot; java.lang.NoClassDefFoundError: org/apache/commons/logging/LogFactory&lt;br /&gt; at org.apache.pdfbox.pdfparser.BaseParser.(BaseParser.java:58)&lt;br /&gt;&lt;br /&gt;jar commons-logging-x.x.x.jar miss&lt;br /&gt;You can find it here:&lt;br /&gt;http://commons.apache.org/logging/download_logging.cgi</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/261753076881382983'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/261753076881382983'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1306159345713#c261753076881382983' title=''/><author><name>LordMax</name><uri>http://www.blogger.com/profile/18206947955710484744</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='19' height='32' src='http://4.bp.blogspot.com/_vKt6XDzRlhE/SlMmDZf9jKI/AAAAAAAAAEA/xaIXMnSZFbg/S220/cilindro2.jpg'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1236919480'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-2902559458130562079</id><published>2011-05-23T05:09:44.718-07:00</published><updated>2011-05-23T05:09:44.718-07:00</updated><title type='text'>Thanks for your blog. I have a problem extracting ...</title><content type='html'>Thanks for your blog. I have a problem extracting text, the bold letters in the pdf file goes to the end of the corresponding line. I have tried by setting pdfTextStripper.setSortByPosition(true), but it makes some other contents misplaced. Is there any other options to make this work?</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/2902559458130562079'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/2902559458130562079'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1306152584718#c2902559458130562079' title=''/><author><name>Anonymous</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1540046259'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-3456833000886809105</id><published>2011-05-10T01:37:13.058-07:00</published><updated>2011-05-10T01:37:13.058-07:00</updated><title type='text'>I want to write text to PDF file with position x,y...</title><content type='html'>I want to write text to PDF file with position x,y,width and height as we do it for read using objTextStripperbyArea.getTextForRegion.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/3456833000886809105'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/3456833000886809105'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1305016633058#c3456833000886809105' title=''/><author><name>Rana</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1302947690'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-4709878067602555097</id><published>2011-05-09T07:14:30.995-07:00</published><updated>2011-05-09T07:14:30.995-07:00</updated><title type='text'>Exception in thread &amp;quot;main&amp;quot; java.lang.NoC...</title><content type='html'>Exception in thread &amp;quot;main&amp;quot; java.lang.NoClassDefFoundError: org/apache/commons/logging/LogFactory&lt;br /&gt; at org.apache.pdfbox.pdfparser.BaseParser.(BaseParser.java:58)&lt;br /&gt; at de.fhwedel.swp.indexier06.Main.main(Main.java:188)&lt;br /&gt;Caused by: java.lang.ClassNotFoundException: org.apache.commons.logging.LogFactory&lt;br /&gt; at java.net.URLClassLoader$1.run(Unknown Source)&lt;br /&gt; at java.security.AccessController.doPrivileged(Native Method)&lt;br /&gt; at java.net.URLClassLoader.findClass(Unknown Source)&lt;br /&gt; at java.lang.ClassLoader.loadClass(Unknown Source)&lt;br /&gt; at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source)&lt;br /&gt; at java.lang.ClassLoader.loadClass(Unknown Source)&lt;br /&gt; ... 2 more&lt;br /&gt;&lt;br /&gt;I dont know what the program want???</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/4709878067602555097'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/4709878067602555097'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1304950470995#c4709878067602555097' title=''/><author><name>Anonymous</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-780111823'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-2939772400728985570</id><published>2011-03-24T11:40:22.107-07:00</published><updated>2011-03-24T11:40:22.107-07:00</updated><title type='text'>For people who are reporting compilation errors, i...</title><content type='html'>For people who are reporting compilation errors, if you follow the above procedure as it is it should work fine, please make sure that the packages you use are also the ones I used to compile, the jars may have changed with new versions.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/2939772400728985570'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/2939772400728985570'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1300992022107#c2939772400728985570' title=''/><author><name>Prasanna Seshadri</name><uri>http://www.blogger.com/profile/02028881738236321272</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1945045701'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-4347542661888052466</id><published>2011-01-11T21:31:01.351-08:00</published><updated>2011-01-11T21:31:01.351-08:00</updated><title type='text'>am using netbeans 6.9...pls help me know where to ...</title><content type='html'>am using netbeans 6.9...pls help me know where to include the jar files , to find the jar files in pdfbox library and set the external directory to the classpath</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/4347542661888052466'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/4347542661888052466'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1294810261351#c4347542661888052466' title=''/><author><name>crys</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-775662749'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-8221851540000434596</id><published>2010-12-21T00:18:15.071-08:00</published><updated>2010-12-21T00:18:15.071-08:00</updated><title type='text'>Thanks a lot buddy..how can we use the command lin...</title><content type='html'>Thanks a lot buddy..how can we use the command line parameters in this to convert pdf to html</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/8221851540000434596'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/8221851540000434596'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1292919495071#c8221851540000434596' title=''/><author><name>Vignesh</name><uri>http://www.blogger.com/profile/05922119143672925151</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1195426050'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-5371139638625890777</id><published>2010-11-27T03:56:04.807-08:00</published><updated>2010-11-27T03:56:04.807-08:00</updated><title type='text'>Hi....
I have read ur Blog and i ike ur work in pd...</title><content type='html'>Hi....&lt;br /&gt;I have read ur Blog and i ike ur work in pdf and java.&lt;br /&gt;I am new in this field. I want to make a program that take a PDF file in input and as an out put it shows all Headings of the Document.&lt;br /&gt;For example there is a document with 5 pages than my program should show all the different type of headings in that 5 pages.&lt;br /&gt;what i need, &lt;br /&gt;1. that is there any object/tag for heading in pdf heading.&lt;br /&gt;2. Or any other way to do this.&lt;br /&gt;&lt;br /&gt;I will be very thank full to u for ur time</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/5371139638625890777'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/5371139638625890777'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1290858964807#c5371139638625890777' title=''/><author><name>Coder Xpert</name><uri>http://www.blogger.com/profile/15619005276845871103</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-288183087'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-1925532346272908106</id><published>2010-11-26T00:00:39.565-08:00</published><updated>2010-11-26T00:00:39.565-08:00</updated><title type='text'>hi, can help me? i am new. using netbean. how do i...</title><content type='html'>hi, can help me? i am new. using netbean. how do i using the source code provided?</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/1925532346272908106'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/1925532346272908106'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1290758439565#c1925532346272908106' title=''/><author><name>Anonymous</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-11507919'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-5418362235099829416</id><published>2010-11-25T10:51:29.158-08:00</published><updated>2010-11-25T10:51:29.158-08:00</updated><title type='text'>Not sure about indexing, the idea is to have a sim...</title><content type='html'>Not sure about indexing, the idea is to have a simple text parser for PDF documents.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/5418362235099829416'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/5398229670737236514/comments/default/5418362235099829416'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html?showComment=1290711089158#c5418362235099829416' title=''/><author><name>Prasanna Seshadri</name><uri>http://www.blogger.com/profile/02028881738236321272</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/01/convert-pdf-text-parser-java-api-pdfbox.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-5398229670737236514' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/5398229670737236514' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1945045701'/></entry></feed>
