From 0cc0ccee3578ed3ce9a3cd920912bc8b1338da87 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Sun, 25 Aug 2013 15:41:43 +0800 Subject: [PATCH] add charset specific for easy call of HttpClientDownloader --- .../webmagic/downloader/HttpClientDownloader.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 75634104..7a063298 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -46,6 +46,17 @@ public class HttpClientDownloader implements Downloader { return (Html) page.getHtml(); } + /** + * A simple method to download a url. + * + * @param url + * @return html + */ + public Html download(String url,String charset) { + Page page = download(new Request(url), Site.me().setCharset(charset).toTask()); + return (Html) page.getHtml(); + } + @Override public Page download(Request request, Task task) { Site site = null; @@ -87,13 +98,12 @@ public class HttpClientDownloader implements Downloader { } while (retry); int statusCode = httpResponse.getStatusLine().getStatusCode(); if (acceptStatCode.contains(statusCode)) { + handleGzip(httpResponse); //charset if (charset == null) { String value = httpResponse.getEntity().getContentType().getValue(); charset = UrlUtils.getCharset(value); } - // - handleGzip(httpResponse); return handleResponse(request, charset, httpResponse, task); } else { logger.warn("code error " + statusCode + "\t" + request.getUrl());