#33 ignore 'content-encoding' when redirect

pull/88/head^2
yihua.huang 11 years ago
parent 8f774afc84
commit 00b0a751b4

@ -1,9 +1,8 @@
package us.codecraft.webmagic.downloader; package us.codecraft.webmagic.downloader;
import org.apache.http.HttpException; import org.apache.http.*;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.client.CookieStore; import org.apache.http.client.CookieStore;
import org.apache.http.client.protocol.ResponseContentEncoding;
import org.apache.http.config.Registry; import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder; import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory; import org.apache.http.conn.socket.ConnectionSocketFactory;
@ -60,31 +59,24 @@ public class HttpClientGenerator {
} }
}); });
} }
// httpClientBuilder.disableContentCompression().addInterceptorFirst(new HttpResponseInterceptor() { // Http client has some problem handling compressing entity for redirect
// // So I disable it and do it manually
// public void process( // https://issues.apache.org/jira/browse/HTTPCLIENT-1432
// final HttpResponse response, httpClientBuilder.disableContentCompression();
// final HttpContext context) throws HttpException, IOException { httpClientBuilder.addInterceptorFirst(new HttpResponseInterceptor() {
// if (response.getStatusLine().getStatusCode() != 200) {
// return; private ResponseContentEncoding contentEncoding = new ResponseContentEncoding();
// }
// HttpEntity entity = response.getEntity(); public void process(
// if (entity != null) { final HttpResponse response,
// Header ceheader = entity.getContentEncoding(); final HttpContext context) throws HttpException, IOException {
// if (ceheader != null) { if (response.getStatusLine().getStatusCode() == 301 || response.getStatusLine().getStatusCode() == 302) {
// HeaderElement[] codecs = ceheader.getElements(); return;
// for (int i = 0; i < codecs.length; i++) { }
// if (codecs[i].getName().equalsIgnoreCase("gzip")) { contentEncoding.process(response, context);
// response.setEntity( }
// new GzipDecompressingEntity(response.getEntity()));
// return; });
// }
// }
// }
// }
// }
//
// });
if (site != null) { if (site != null) {
httpClientBuilder.setRetryHandler(new DefaultHttpRequestRetryHandler(site.getRetryTimes(), true)); httpClientBuilder.setRetryHandler(new DefaultHttpRequestRetryHandler(site.getRetryTimes(), true));
} }

@ -37,7 +37,6 @@ public class BaiduBaikePageProcesser implements PageProcessor {
list.add(String.format(urlTemplate,"风力发电")); list.add(String.format(urlTemplate,"风力发电"));
list.add(String.format(urlTemplate,"太阳能")); list.add(String.format(urlTemplate,"太阳能"));
list.add(String.format(urlTemplate,"地热发电")); list.add(String.format(urlTemplate,"地热发电"));
list.add(String.format(urlTemplate,"众数"));
list.add(String.format(urlTemplate,"地热发电")); list.add(String.format(urlTemplate,"地热发电"));
List<ResultItems> resultItemses = spider.getAll(list); List<ResultItems> resultItemses = spider.getAll(list);
for (ResultItems resultItemse : resultItemses) { for (ResultItems resultItemse : resultItemses) {

Loading…
Cancel
Save