Search This Blog

Sunday, December 25, 2011

Getting the HTML content of a webpage - JAVA


import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;

public class GetHTMLContentThroughURLTest {

public static void main(String[] args) throws IOException{

String targetURL = "http://www.shahidbits.blogspot.in";

GetHTMLContentThroughURL obj = new GetHTMLContentThroughURL(targetURL);
String str = obj.getContent();
System.out.println(str);

}
}


class GetHTMLContentThroughURL {

private URL url;

public GetHTMLContentThroughURL(String targetURL) throws MalformedURLException{
url = new URL(targetURL);

}


/** The method getContent() makes a connection with the URL, grabs the HTML content of it, stores it in a String
*  and returns it.
*/
public String getContent() throws IOException{

   java.io.InputStream input =  url.openStream();
   StringBuffer strbuf = new StringBuffer();

   byte [] buffer = new byte[256];

   while(true){
       int byteRead = input.read(buffer);
       if(byteRead == -1)
           break;
       for(int i = 0; i < byteRead; i++){
        strbuf.append((char)buffer[i]);
       }
   }
   return strbuf.toString();
}
}