<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/'><id>tag:blogger.com,1999:blog-9003103274808968548.post7295489955685743411..comments</id><updated>2011-09-20T08:02:53.879-07:00</updated><category term='Coding'/><category term='Python'/><category term='Blogger tips and hacks'/><category term='System Administration'/><category term='C'/><category term='Troubleshooting'/><category term='Sun systems'/><category term='Shell Programming'/><category term='ssh'/><category term='XML'/><category term='About this blog'/><category term='Useful tools'/><category term='Windows'/><category term='Howtos'/><category term='Search'/><category term='Java'/><category term='Algorithms'/><category term='Page Rank'/><category term='openoffice'/><category term='GUI'/><category term='Technical Articles'/><category term='SWIG'/><category term='Netbeans'/><category term='Stanford'/><category term='Sun'/><category term='Sun Technologies for Students'/><category term='Investment Science'/><category term='General'/><category term='Jython'/><category term='Linux'/><category term='Socket Programming'/><category term='My Bookshelf'/><category term='Solaris'/><category term='Certifications'/><category term='VNC'/><category term='Hacking'/><title type='text'>Comments on Techtalks: HTML Text Parser: Converting HTML to Text in Java ...</title><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://www.prasannatech.net/feeds/7295489955685743411/comments/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html'/><author><name>Prasanna Seshadri</name><uri>http://www.blogger.com/profile/02028881738236321272</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>7</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>25</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-2777802347873877233</id><published>2011-09-20T08:02:53.879-07:00</published><updated>2011-09-20T08:02:53.879-07:00</updated><title type='text'>may i please use your code in an open source proje...</title><content type='html'>may i please use your code in an open source project? &lt;br /&gt;(http://code.google.com/p/orayta/)&lt;br /&gt;thanks.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/2777802347873877233'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/2777802347873877233'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html?showComment=1316530973879#c2777802347873877233' title=''/><author><name>abe.izar</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-7295489955685743411' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/7295489955685743411' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-702872013'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-4271550286893675582</id><published>2011-08-16T02:33:41.037-07:00</published><updated>2011-08-16T02:33:41.037-07:00</updated><title type='text'></title><content type='html'>This comment has been removed by the author.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/4271550286893675582'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/4271550286893675582'/><author><name>M412</name><uri>http://www.blogger.com/profile/12775623595340714808</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-7295489955685743411' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/7295489955685743411' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.contentRemoved' value='true'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-755748170'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-9203945376184873667</id><published>2011-08-14T00:03:38.577-07:00</published><updated>2011-08-14T00:03:38.577-07:00</updated><title type='text'>hi..

can you plz tell me how to convert html to w...</title><content type='html'>hi..&lt;br /&gt;&lt;br /&gt;can you plz tell me how to convert html to word document using java...&lt;br /&gt;my mail id is aucse_n@yahoo.com</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/9203945376184873667'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/9203945376184873667'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html?showComment=1313305418577#c9203945376184873667' title=''/><author><name>Anonymous</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-7295489955685743411' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/7295489955685743411' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-905978303'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-4427986223681172576</id><published>2010-10-10T15:55:30.406-07:00</published><updated>2010-10-10T15:55:30.406-07:00</updated><title type='text'>Hello,

The code works great.. Thanks for sharing ...</title><content type='html'>Hello,&lt;br /&gt;&lt;br /&gt;The code works great.. Thanks for sharing it..&lt;br /&gt;Is there any way to get rid of the contents of &amp;lt; script &amp;gt; tags. Although the tag are gotten rid off, the actual script is still visible.&lt;br /&gt;&lt;br /&gt;Thanks.</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/4427986223681172576'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/4427986223681172576'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html?showComment=1286751330406#c4427986223681172576' title=''/><author><name>1..2......3...........NILL</name><uri>http://www.blogger.com/profile/04199322416608731119</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-7295489955685743411' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/7295489955685743411' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-876952278'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-124268420287362967</id><published>2010-07-14T12:35:47.869-07:00</published><updated>2010-07-14T12:35:47.869-07:00</updated><title type='text'>Hi Prasanna,
Great code. Very clean. I don&amp;#39;t b...</title><content type='html'>Hi Prasanna,&lt;br /&gt;Great code. Very clean. I don&amp;#39;t believe I&amp;#39;ve used so many try-catches in one method.&lt;br /&gt;I&amp;#39;ve been experimenting with Neko for a project, and it seems unreliable at times. The parsing is spot on, but retrieving , not as much.&lt;br /&gt;I&amp;#39;ve been using the DOMParser and when I give the file path as argument, sometimes it parses it and other times simply says unknown protocol. This is what I wrote:&lt;br /&gt;String filePath=&amp;quot;c:\\x.html&amp;quot;&lt;br /&gt;DOMParser p1= new DOMParser();&lt;br /&gt;p1.parse(filePath);&lt;br /&gt;Doc d1= p1.getDocument();&lt;br /&gt;&lt;br /&gt;This code ran well enough in my test bed, but in the final implementation it cant pick up on the file path. Is there a known error or bug or am I doing something wrong?&lt;br /&gt;&lt;br /&gt;I know you have used InputSource, but the DOMParser does mention that you can either use InputSource or pass a file path and it will resolve it as an input source.&lt;br /&gt;&lt;br /&gt;Lastly, a bit unrelated, when opening a file in Java why cant it process file paths with spaces like &amp;quot;c:\\my file\\x.html&amp;quot;. This may be simply restricted to DOM in general.&lt;br /&gt;&lt;br /&gt;Please get back to me if possible at schmuck_dud@yahoo.com</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/124268420287362967'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/124268420287362967'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html?showComment=1279136147869#c124268420287362967' title=''/><author><name>Ankit</name><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img1.blogblog.com/img/blank.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-7295489955685743411' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/7295489955685743411' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1282275525'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-3817571678235791794</id><published>2010-06-12T18:17:37.407-07:00</published><updated>2010-06-12T18:17:37.407-07:00</updated><title type='text'>Great code Prasanna! It works like a gem. Thanks f...</title><content type='html'>Great code Prasanna! It works like a gem. Thanks for sharing!</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/3817571678235791794'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/3817571678235791794'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html?showComment=1276391857407#c3817571678235791794' title=''/><author><name>Sumit</name><uri>http://www.blogger.com/profile/11568747058604141505</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='24' src='http://bp1.blogger.com/_2D0C2-TYFm4/SGh_VYSCUhI/AAAAAAAADvo/0-bWfJCoyTI/S220/IMG_0366.JPG'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-7295489955685743411' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/7295489955685743411' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-2014336401'/></entry><entry><id>tag:blogger.com,1999:blog-9003103274808968548.post-3331683279209233102</id><published>2010-05-30T00:09:47.943-07:00</published><updated>2010-05-30T00:09:47.943-07:00</updated><title type='text'>hello sir,
i am an engineering student 
doing my p...</title><content type='html'>hello sir,&lt;br /&gt;i am an engineering student &lt;br /&gt;doing my project which need a html parser to parse file.&lt;br /&gt;i have read ur code for htmlparsing using nekohtml its good using a recursive function &lt;br /&gt;my problem is actually retrieving the actual content from a wiki site &lt;br /&gt;it works for that also but i am getting the text not related to the context &lt;br /&gt;i mean while pasing a text of a file suppose   http://en.wikipedia.org/wiki/Valley&lt;br /&gt;i am getting text in main body content about valley and also some coding done in source code &lt;br /&gt;can u fix it and modify code to get the exact contents about valley (file)&lt;br /&gt;please post the modified code&lt;br /&gt;plese mail if possible chanikya.cse.vits@gmail.com</content><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/3331683279209233102'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/9003103274808968548/7295489955685743411/comments/default/3331683279209233102'/><link rel='alternate' type='text/html' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html?showComment=1275203387943#c3331683279209233102' title=''/><author><name>chanu</name><uri>http://www.blogger.com/profile/04667508868991146236</uri><email>noreply@blogger.com</email><gd:image xmlns:gd='http://schemas.google.com/g/2005' rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:in-reply-to xmlns:thr='http://purl.org/syndication/thread/1.0' href='http://www.prasannatech.net/2009/02/convert-html-text-parser-java-api.html' ref='tag:blogger.com,1999:blog-9003103274808968548.post-7295489955685743411' source='http://www.blogger.com/feeds/9003103274808968548/posts/default/7295489955685743411' type='text/html'/><gd:extendedProperty xmlns:gd='http://schemas.google.com/g/2005' name='blogger.itemClass' value='pid-1421498027'/></entry></feed>
