diff --git a/README.md b/README.md
index 967b9f67..341e0961 100644
--- a/README.md
+++ b/README.md
@@ -38,12 +38,12 @@ webmagic使用maven管理依赖,在项目中添加对应的依赖即可使用w
us.codecraft
webmagic-core
- 0.4.1
+ 0.4.2
us.codecraft
webmagic-extension
- 0.4.1
+ 0.4.2
#### 项目结构
diff --git a/en_docs/README.md b/en_docs/README.md
index 82b82a81..684da90d 100644
--- a/en_docs/README.md
+++ b/en_docs/README.md
@@ -28,12 +28,12 @@ Add dependencies to your project:
us.codecraft
webmagic-core
- 0.4.0
+ 0.4.2
us.codecraft
webmagic-extension
- 0.4.0
+ 0.4.2
## Get Started:
diff --git a/pom.xml b/pom.xml
index a302728e..3d488ae5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
7
us.codecraft
- 0.4.2-SNAPSHOT
+ 0.4.3-SNAPSHOT
4.0.0
pom
diff --git a/user-manual.md b/user-manual.md
index acb955ee..f225c8a7 100644
--- a/user-manual.md
+++ b/user-manual.md
@@ -27,12 +27,12 @@ webmagic使用maven管理依赖,在项目中添加对应的依赖即可使用w
us.codecraft
webmagic-core
- 0.4.0
+ 0.4.2
us.codecraft
webmagic-extension
- 0.4.0
+ 0.4.2
#### 项目结构
diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml
index c419ea70..914bfdab 100644
--- a/webmagic-core/pom.xml
+++ b/webmagic-core/pom.xml
@@ -3,7 +3,7 @@
us.codecraft
webmagic-parent
- 0.4.2-SNAPSHOT
+ 0.4.3-SNAPSHOT
4.0.0
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
index b6baaa7e..da34c2de 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
@@ -70,6 +70,7 @@ public class HttpClientDownloader implements Downloader {
CloseableHttpClient httpClient = httpClients.get(domain);
if (httpClient == null) {
synchronized (this) {
+ httpClient = httpClients.get(domain);
if (httpClient == null) {
httpClient = httpClientGenerator.getClient(site);
httpClients.put(domain, httpClient);
@@ -104,6 +105,7 @@ public class HttpClientDownloader implements Downloader {
}
RequestConfig.Builder requestConfigBuilder = RequestConfig.custom()
.setConnectionRequestTimeout(site.getTimeOut())
+ .setSocketTimeout(site.getTimeOut())
.setConnectTimeout(site.getTimeOut())
.setCookieSpec(CookieSpecs.BEST_MATCH);
if (site != null && site.getHttpProxy() != null) {
diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml
index 51b3924e..c6af14f6 100644
--- a/webmagic-extension/pom.xml
+++ b/webmagic-extension/pom.xml
@@ -3,7 +3,7 @@
us.codecraft
webmagic-parent
- 0.4.2-SNAPSHOT
+ 0.4.3-SNAPSHOT
4.0.0
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/AppStore.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/AppStore.java
index fcc937b5..d0d056f8 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/AppStore.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/AppStore.java
@@ -23,14 +23,18 @@ public class AppStore {
@ExtractBy(type = ExtractBy.Type.JsonPath, value = "$..userRatingCount")
private int userRatingCount;
- @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$..screenshotUrls",multi = true)
+ @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$..screenshotUrls")
private List screenshotUrls;
+ @ExtractBy(type = ExtractBy.Type.JsonPath, value = "$..supportedDevices")
+ private List supportedDevices;
+
public static void main(String[] args) {
AppStore appStore = OOSpider.create(Site.me(), AppStore.class).get("http://itunes.apple.com/lookup?id=653350791&country=cn&entity=software");
System.out.println(appStore.trackName);
System.out.println(appStore.description);
System.out.println(appStore.userRatingCount);
System.out.println(appStore.screenshotUrls);
+ System.out.println(appStore.supportedDevices);
}
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
index d7da0c9d..62b6de08 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java
@@ -131,7 +131,9 @@ class PageModelExtractor {
if (regexPattern.trim().equals("")) {
regexPattern = ".*";
}
- fieldExtractor = new FieldExtractor(field, new RegexSelector(regexPattern), FieldExtractor.Source.Url, extractByUrl.notNull(), extractByUrl.multi());
+ fieldExtractor = new FieldExtractor(field,
+ new RegexSelector(regexPattern), FieldExtractor.Source.Url, extractByUrl.notNull(),
+ extractByUrl.multi() || List.class.isAssignableFrom(field.getType()));
Method setterMethod = getSetterMethod(clazz, field);
if (setterMethod != null) {
fieldExtractor.setSetterMethod(setterMethod);
@@ -157,7 +159,7 @@ class PageModelExtractor {
selector = new AndSelector(ExtractorUtils.getSelectors(extractBies));
}
fieldExtractor = new FieldExtractor(field, selector, comboExtract.source() == ComboExtract.Source.RawHtml ? FieldExtractor.Source.RawHtml : FieldExtractor.Source.Html,
- comboExtract.notNull(), comboExtract.multi());
+ comboExtract.notNull(), comboExtract.multi() || List.class.isAssignableFrom(field.getType()));
Method setterMethod = getSetterMethod(clazz, field);
if (setterMethod != null) {
fieldExtractor.setSetterMethod(setterMethod);
@@ -172,7 +174,7 @@ class PageModelExtractor {
if (extractBy != null) {
Selector selector = ExtractorUtils.getSelector(extractBy);
fieldExtractor = new FieldExtractor(field, selector, extractBy.source() == ExtractBy.Source.RawHtml ? FieldExtractor.Source.RawHtml : FieldExtractor.Source.Html,
- extractBy.notNull(), extractBy.multi());
+ extractBy.notNull(), extractBy.multi() || List.class.isAssignableFrom(field.getType()));
Method setterMethod = getSetterMethod(clazz, field);
if (setterMethod != null) {
fieldExtractor.setSetterMethod(setterMethod);
@@ -359,7 +361,7 @@ class PageModelExtractor {
}
private void setField(Object o, FieldExtractor fieldExtractor, Object value) throws IllegalAccessException, InvocationTargetException {
- if (value==null){
+ if (value == null) {
return;
}
if (fieldExtractor.getSetterMethod() != null) {
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java
index 5268a254..6d2ce6cd 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java
@@ -75,6 +75,8 @@ public @interface ComboExtract {
* Define whether the extractor return more than one result.
* When set to 'true', the extractor return a list of string (so you should define the field as List).
*
+ * Deprecated since 0.4.2. This option is determined automatically by the class of field.
+ * @deprecated since 0.4.2
* @return whether the extractor return more than one result
*/
boolean multi() default false;
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java
index 8fddccf8..2e23aa00 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java
@@ -67,6 +67,8 @@ public @interface ExtractBy {
* Define whether the extractor return more than one result.
* When set to 'true', the extractor return a list of string (so you should define the field as List).
*
+ * Deprecated since 0.4.2. This option is determined automatically by the class of field.
+ * @deprecated since 0.4.2
* @return whether the extractor return more than one result
*/
boolean multi() default false;
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByUrl.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByUrl.java
index 328c0795..6c778629 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByUrl.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractByUrl.java
@@ -33,6 +33,8 @@ public @interface ExtractByUrl {
* Define whether the extractor return more than one result.
* When set to 'true', the extractor return a list of string (so you should define the field as List).
*
+ * Deprecated since 0.4.2. This option is determined automatically by the class of field.
+ * @deprecated since 0.4.2
* @return whether the extractor return more than one result
*/
boolean multi() default false;
diff --git a/webmagic-lucene/pom.xml b/webmagic-lucene/pom.xml
index 3dcf2b62..223942a9 100644
--- a/webmagic-lucene/pom.xml
+++ b/webmagic-lucene/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.4.0
+ 0.4.2
4.0.0
diff --git a/webmagic-samples/pom.xml b/webmagic-samples/pom.xml
index 8c6b87e2..7b86ba2f 100644
--- a/webmagic-samples/pom.xml
+++ b/webmagic-samples/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.4.1
+ 0.4.2
4.0.0
diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml
index a8841df7..225d1555 100644
--- a/webmagic-saxon/pom.xml
+++ b/webmagic-saxon/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.4.0
+ 0.4.2
4.0.0
diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml
index 29fe1f79..1c65513d 100644
--- a/webmagic-scripts/pom.xml
+++ b/webmagic-scripts/pom.xml
@@ -3,7 +3,7 @@
webmagic-parent
us.codecraft
- 0.4.2-SNAPSHOT
+ 0.4.3-SNAPSHOT
4.0.0
@@ -31,6 +31,11 @@
webmagic-core
${project.version}
+
+ us.codecraft
+ webmagic-extension
+ ${project.version}
+
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java
index 7d3b6365..57a923ef 100644
--- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java
@@ -92,7 +92,8 @@ public class ScriptConsole {
ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom()
.language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build();
pageProcessor.getSite().setSleepTime(params.getSleepTime());
- pageProcessor.getSite().setAcceptStatCode(Sets.newHashSet(200, 404, 500));
+ pageProcessor.getSite().setRetryTimes(3);
+ pageProcessor.getSite().setAcceptStatCode(Sets.newHashSet(200, 404,403, 500,502));
Spider spider = Spider.create(pageProcessor).thread(params.getThread());
spider.clearPipeline().addPipeline(new Pipeline() {
@Override
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java
index 9dc74133..d1e5d7fe 100644
--- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java
@@ -34,6 +34,7 @@ public class ScriptEnginePool {
public void release(ScriptEngine scriptEngine){
scriptEngines.add(scriptEngine);
+ availableCount.incrementAndGet();
}
}
diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml
index 842d5e16..3026a369 100644
--- a/webmagic-selenium/pom.xml
+++ b/webmagic-selenium/pom.xml
@@ -5,7 +5,7 @@
webmagic-parent
us.codecraft
- 0.4.1
+ 0.4.2
4.0.0
diff --git a/zh_docs/README.md b/zh_docs/README.md
index e6961d8e..c58469a9 100644
--- a/zh_docs/README.md
+++ b/zh_docs/README.md
@@ -34,12 +34,12 @@ webmagic使用maven管理依赖,在项目中添加对应的依赖即可使用w
us.codecraft
webmagic-core
- 0.4.0
+ 0.4.2
us.codecraft
webmagic-extension
- 0.4.0
+ 0.4.2
#### 项目结构