Merge branch 'release/1.0.2'

pull/1181/head WebMagic-1.0.2
Joe Zhou 2 months ago
commit 837253cfc9

@ -50,7 +50,7 @@ WebMagic use slf4j with slf4j-log4j12 implementation. If you customized your slf
### First crawler:
Write a class implements PageProcessor. For example, I wrote a crawler of github repository infomation.
Write a class implements PageProcessor. For example, I wrote a crawler of github repository information.
```java
public class GithubRepoPageProcessor implements PageProcessor {
@ -112,7 +112,7 @@ public class GithubRepo {
Documents: [http://webmagic.io/docs/](http://webmagic.io/docs/)
The architecture of webmagic (refered to [Scrapy](http://scrapy.org/))
The architecture of webmagic (referred to [Scrapy](http://scrapy.org/))
![image](http://code4craft.github.io/images/posts/webmagic.png)

@ -12,7 +12,7 @@
<version>2.2.1</version>
</parent>
<groupId>us.codecraft</groupId>
<version>1.0.1</version>
<version>1.0.2</version>
<packaging>pom</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

@ -52,9 +52,44 @@ public class Page {
private String charset;
/**
* Returns a {@link Page} with {@link #downloadSuccess} is {@code true},
* and {@link #request} is specified.
*
* @param request the request.
* @since 1.0.2
*/
public static Page ofSuccess(Request request) {
return new Page(request, true);
}
/**
* Returns a {@link Page} with {@link #downloadSuccess} is {@code true},
* and {@link #request} is specified.
*
* @param request the request.
* @since 1.0.2
*/
public static Page ofFailure(Request request) {
return new Page(request, false);
}
public Page() {
}
/**
* Constructs a {@link Page} with {@link #request}
* and {@link #downloadSuccess} specified.
*
* @param request the request.
* @param downloadSuccess the download success flag.
* @since 1.0.2
*/
private Page(Request request, boolean downloadSuccess) {
this.request = request;
this.downloadSuccess = downloadSuccess;
}
/**
* Returns a {@link Page} with {@link #downloadSuccess} is {@code false}.
*
@ -73,7 +108,9 @@ public class Page {
* @param request the {@link Request}.
* @return the page.
* @since 0.10.0
* @deprecated Use {@link #ofFailure(Request)} instead.
*/
@Deprecated(since = "1.0.2", forRemoval = true)
public static Page fail(Request request){
Page page = new Page();
page.setRequest(request);

@ -76,13 +76,14 @@ public class HttpClientDownloader extends AbstractDownloader {
CloseableHttpClient httpClient = getHttpClient(task.getSite());
Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(request, task) : null;
HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, task.getSite(), proxy);
Page page = Page.fail(request);
Page page = null;
try {
httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext());
page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task);
onSuccess(page, task);
return page;
} catch (IOException e) {
page = Page.ofFailure(request);
onError(page, task, e);
return page;
} finally {
@ -105,7 +106,7 @@ public class HttpClientDownloader extends AbstractDownloader {
HttpEntity entity = httpResponse.getEntity();
byte[] bytes = entity != null ? IOUtils.toByteArray(entity.getContent()) : new byte[0];
String contentType = entity != null && entity.getContentType() != null ? entity.getContentType().getValue() : null;
Page page = new Page();
Page page = Page.ofSuccess(request);
page.setBytes(bytes);
if (!request.isBinaryContent()) {
if (charset == null) {
@ -117,7 +118,6 @@ public class HttpClientDownloader extends AbstractDownloader {
page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request);
page.setStatusCode(httpResponse.getStatusLine().getStatusCode());
page.setDownloadSuccess(true);
if (responseHeader) {
page.setHeaders(HttpClientUtils.convertHeaders(httpResponse.getAllHeaders()));
}

@ -10,7 +10,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<artifactId>webmagic-coverage</artifactId>

@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Loading…
Cancel
Save