diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java index 758e4c68..c11df693 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java @@ -8,6 +8,7 @@ import us.codecraft.webmagic.utils.HttpConstant; import us.codecraft.webmagic.utils.UrlUtils; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; @@ -49,6 +50,8 @@ public class Page { private byte[] bytes; private List targetRequests = new ArrayList(); + + private String charset; public Page() { } @@ -238,6 +241,14 @@ public class Page { this.bytes = bytes; } + public String getCharset() { + return charset; + } + + public void setCharset(String charset) { + this.charset = charset; + } + @Override public String toString() { return "Page{" + @@ -249,8 +260,10 @@ public class Page { ", url=" + url + ", headers=" + headers + ", statusCode=" + statusCode + - ", success=" + downloadSuccess + + ", downloadSuccess=" + downloadSuccess + ", targetRequests=" + targetRequests + + ", charset='" + charset + '\'' + + ", bytes=" + Arrays.toString(bytes) + '}'; } } diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java index a41de900..9d0b9ccf 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java @@ -49,7 +49,7 @@ public class Request implements Serializable { * When it is set to TRUE, the downloader will not try to parse response body to text. * */ - private boolean binarayContent = false; + private boolean binaryContent = false; public Request() { } @@ -168,12 +168,12 @@ public class Request implements Serializable { this.requestBody = requestBody; } - public boolean isBinarayContent() { - return binarayContent; + public boolean isBinaryContent() { + return binaryContent; } - public void setBinarayContent(boolean binarayContent) { - this.binarayContent = binarayContent; + public void setBinaryContent(boolean binaryContent) { + this.binaryContent = binaryContent; } @Override diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 5d0b033e..13175fc4 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -112,7 +112,7 @@ public class HttpClientDownloader extends AbstractDownloader { String contentType = httpResponse.getEntity().getContentType() == null ? "" : httpResponse.getEntity().getContentType().getValue(); Page page = new Page(); page.setBytes(bytes); - if (!request.isBinarayContent()){ + if (!request.isBinaryContent()){ page.setRawText(getResponseContent(charset, contentType, bytes)); } page.setUrl(new PlainText(request.getUrl())); diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java index cbb7abc0..6a1c8319 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java @@ -280,7 +280,7 @@ public class HttpClientDownloaderTest { public void run() throws Exception { final HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); Request request = new Request(); - request.setBinarayContent(true); + request.setBinaryContent(true); request.setUrl("http://127.0.0.1:13423/"); Page page = httpClientDownloader.download(request, Site.me().toTask()); assertThat(page.getRawText()).isNull();