#33 ignore 'content-encoding' when redirect

pull/88/head^2
yihua.huang 11 years ago
parent 8f774afc84
commit 00b0a751b4

@ -1,9 +1,8 @@
package us.codecraft.webmagic.downloader;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.*;
import org.apache.http.client.CookieStore;
import org.apache.http.client.protocol.ResponseContentEncoding;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory;
@ -60,31 +59,24 @@ public class HttpClientGenerator {
}
});
}
// httpClientBuilder.disableContentCompression().addInterceptorFirst(new HttpResponseInterceptor() {
//
// public void process(
// final HttpResponse response,
// final HttpContext context) throws HttpException, IOException {
// if (response.getStatusLine().getStatusCode() != 200) {
// return;
// }
// HttpEntity entity = response.getEntity();
// if (entity != null) {
// Header ceheader = entity.getContentEncoding();
// if (ceheader != null) {
// HeaderElement[] codecs = ceheader.getElements();
// for (int i = 0; i < codecs.length; i++) {
// if (codecs[i].getName().equalsIgnoreCase("gzip")) {
// response.setEntity(
// new GzipDecompressingEntity(response.getEntity()));
// return;
// }
// }
// }
// }
// }
//
// });
// Http client has some problem handling compressing entity for redirect
// So I disable it and do it manually
// https://issues.apache.org/jira/browse/HTTPCLIENT-1432
httpClientBuilder.disableContentCompression();
httpClientBuilder.addInterceptorFirst(new HttpResponseInterceptor() {
private ResponseContentEncoding contentEncoding = new ResponseContentEncoding();
public void process(
final HttpResponse response,
final HttpContext context) throws HttpException, IOException {
if (response.getStatusLine().getStatusCode() == 301 || response.getStatusLine().getStatusCode() == 302) {
return;
}
contentEncoding.process(response, context);
}
});
if (site != null) {
httpClientBuilder.setRetryHandler(new DefaultHttpRequestRetryHandler(site.getRetryTimes(), true));
}

@ -37,7 +37,6 @@ public class BaiduBaikePageProcesser implements PageProcessor {
list.add(String.format(urlTemplate,"风力发电"));
list.add(String.format(urlTemplate,"太阳能"));
list.add(String.format(urlTemplate,"地热发电"));
list.add(String.format(urlTemplate,"众数"));
list.add(String.format(urlTemplate,"地热发电"));
List<ResultItems> resultItemses = spider.getAll(list);
for (ResultItems resultItemse : resultItemses) {

Loading…
Cancel
Save