These .jar files are needed:
commons-httpclient-3.1.jar
commons-logging-api-1.0.4.jar
commons-codec-1.4.jar
I got those .jar files when I downloaded nutch from apache website.
This example gets the “http://www.sina.com.cn/” webpage, whose charset is gb2312. It’s converted into UTF-8 and printed out.
The code is as below:
package mytryapp;
import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.*;
import org.apache.commons.httpclient.params.HttpMethodParams;
import java.io.*;
public class MytryApp {
private static String url = “http://www.sina.com.cn/”;
public static void main(String[] args) {
// Create an instance of HttpClient.
HttpClient client = new HttpClient();
// Create a method instance.
GetMethod method = new GetMethod(url);
// Provide custom retry handler is necessary
method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(3, false));
try {
// Execute the method.
int statusCode = client.executeMethod(method);
if (statusCode != HttpStatus.SC_OK) {
System.err.println(“Method failed: ” + method.getStatusLine());
}
// Read the response body.
InputStream responseBody = method.getResponseBodyAsStream();
// Deal with the response.
// Use caution: ensure correct character encoding and is not binary data
System.out.println(convertStreamToString(responseBody));
} catch (HttpException e) {
System.err.println(“Fatal protocol violation: ” + e.getMessage());
e.printStackTrace();
} catch (IOException e) {
System.err.println(“Fatal transport error: ” + e.getMessage());
e.printStackTrace();
} finally {
// Release the connection.
method.releaseConnection();
}
}
public static String convertStreamToString(InputStream is)
throws IOException {
if (is != null) {
Writer writer = new StringWriter();
char[] buffer = new char[1024];
try {
Reader reader = new BufferedReader(
new InputStreamReader(is, “gb2312”));
int n;
while ((n = reader.read(buffer)) != -1) {
writer.write(buffer, 0, n);
}
} finally {
is.close();
}
return writer.toString();
} else {
return “”;
}
}
}