diff --git a/pom.xml b/pom.xml
index cda7ad1e..3774b4b2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -9,7 +9,31 @@
UTF-8
1.8
1.8
+ 3.18.1
+ 1.4
+ 4.4
+ 2.11.0
+ 3.12.0
+ 1.2.75
+ 3.0.10
+ 31.1-jre
+ 2.26
+ 4.5.13
+ 4.4.14
+ 3.7.1
+ 9.2.14.0
+ 2.6.0
+ 4.13.2
+ 2.7.2
+ 1.2.17
+ 1.10.19
+ 1.1.0
+ 1.2.0
+ 10.3
+ 3.141.59
+ 1.7.36
4.0.0.RELEASE
+ 0.3.2
webmagic-parent
webmagic-parent
@@ -58,59 +82,59 @@
junit
junit
- 4.13.1
+ ${junit.version}
test
org.mockito
mockito-all
- 1.10.19
+ ${mockito-all.version}
test
org.apache.httpcomponents
httpclient
- 4.5.13
+ ${httpclient.version}
org.apache.httpcomponents
httpcore
- 4.4.14
+ ${httpcore.version}
com.google.guava
guava
- 30.1-jre
+ ${guava.version}
com.jayway.jsonpath
json-path
- 2.5.0
+ ${json-path.version}
org.slf4j
slf4j-api
- 1.7.30
+ ${slf4j.version}
org.slf4j
slf4j-log4j12
- 1.7.30
+ ${slf4j.version}
us.codecraft
xsoup
- 0.3.2
+ ${xsoup.version}
com.alibaba
fastjson
- 1.2.75
+ ${fastjson.version}
com.github.dreamhead
moco-core
- 1.1.0
+ ${moco.version}
test
@@ -122,73 +146,73 @@
log4j
log4j
- 1.2.17
+ ${log4j.version}
org.assertj
assertj-core
- 3.18.1
+ ${assertj.version}
test
org.apache.commons
commons-lang3
- 3.11
+ ${commons-lang3.version}
- commons-collections
- commons-collections
- 3.2.2
+ org.apache.commons
+ commons-collections4
+ ${commons-collections4.version}
commons-io
commons-io
- 2.8.0
+ ${commons-io.version}
org.codehaus.groovy
groovy-all
- 3.0.7
+ ${groovy-all.version}
org.jruby
jruby
- 9.2.14.0
+ ${jruby.version}
org.python
jython
- 2.7.2
+ ${jython.version}
org.seleniumhq.selenium
selenium-java
- 3.141.59
+ ${selenium-java.version}
net.sf.saxon
Saxon-HE
- 10.3
+ ${saxon-he.version}
net.sourceforge.htmlcleaner
htmlcleaner
- 2.9
+ ${htmlcleaner.version}
com.github.detro
phantomjsdriver
- 1.2.0
+ ${phantomjsdriver.version}
commons-cli
commons-cli
- 1.4
+ ${commons-cli.version}
redis.clients
jedis
- 3.6.0
+ ${jedis.version}
diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml
index 049477cb..64b8013f 100644
--- a/webmagic-core/pom.xml
+++ b/webmagic-core/pom.xml
@@ -52,8 +52,8 @@
- commons-collections
- commons-collections
+ org.apache.commons
+ commons-collections4
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
index bc8bb94c..00091c90 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
@@ -1,6 +1,20 @@
package us.codecraft.webmagic;
-import org.apache.commons.collections.CollectionUtils;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.List;
+import java.util.UUID;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.ReentrantLock;
+import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.SerializationUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -17,16 +31,6 @@ import us.codecraft.webmagic.thread.CountableThreadPool;
import us.codecraft.webmagic.utils.UrlUtils;
import us.codecraft.webmagic.utils.WMCollections;
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.*;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.locks.Condition;
-import java.util.concurrent.locks.ReentrantLock;
-
/**
* Entrance of a crawler.
* A spider contains four modules: Downloader, Scheduler, PageProcessor and
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AbstractSelectable.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AbstractSelectable.java
index e2bb5521..8775af10 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AbstractSelectable.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AbstractSelectable.java
@@ -1,9 +1,9 @@
package us.codecraft.webmagic.selector;
-import org.apache.commons.collections.CollectionUtils;
import java.util.ArrayList;
import java.util.List;
+import org.apache.commons.collections4.CollectionUtils;
/**
* @author code4crafer@gmail.com
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java
index 6a638dbf..cfe55472 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java
@@ -1,14 +1,14 @@
package us.codecraft.webmagic.selector;
-import org.apache.commons.collections.CollectionUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.commons.collections4.CollectionUtils;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
-import java.util.ArrayList;
-import java.util.List;
-
/**
* CSS selector. Based on Jsoup.
*
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java
index f5c0baeb..aa9a903f 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java
@@ -1,11 +1,11 @@
package us.codecraft.webmagic.selector;
-import com.alibaba.fastjson.JSON;
-import com.jayway.jsonpath.JsonPath;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import com.alibaba.fastjson.JSON;
+import com.jayway.jsonpath.JsonPath;
/**
* JsonPath selector.
@@ -16,15 +16,20 @@ import java.util.Map;
*/
public class JsonPathSelector implements Selector {
- private String jsonPathStr;
+ private final String jsonPathStr;
- private JsonPath jsonPath;
+ private final JsonPath jsonPath;
public JsonPathSelector(String jsonPathStr) {
this.jsonPathStr = jsonPathStr;
this.jsonPath = JsonPath.compile(this.jsonPathStr);
}
+ @SuppressWarnings("unused")
+ public String getJsonPathStr() {
+ return jsonPathStr;
+ }
+
@Override
public String select(String text) {
Object object = jsonPath.read(text);
@@ -32,8 +37,8 @@ public class JsonPathSelector implements Selector {
return null;
}
if (object instanceof List) {
- List list = (List) object;
- if (list != null && list.size() > 0) {
+ List> list = (List>) object;
+ if (list.size() > 0) {
return toString(list.iterator().next());
}
}
@@ -49,8 +54,9 @@ public class JsonPathSelector implements Selector {
}
@Override
+ @SuppressWarnings("unchecked")
public List selectList(String text) {
- List list = new ArrayList();
+ List list = new ArrayList<>();
Object object = jsonPath.read(text);
if (object == null) {
return list;
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java
index 8a980a50..4fa14699 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java
@@ -1,12 +1,12 @@
package us.codecraft.webmagic.selector;
-import org.apache.commons.collections.CollectionUtils;
+
+import java.util.List;
+import org.apache.commons.collections4.CollectionUtils;
import org.jsoup.nodes.Element;
import us.codecraft.xsoup.XPathEvaluator;
import us.codecraft.xsoup.Xsoup;
-import java.util.List;
-
/**
* XPath selector based on Xsoup.
*
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java
index ece06000..780ca752 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java
@@ -1,9 +1,10 @@
package us.codecraft.webmagic.downloader;
-import com.github.dreamhead.moco.HttpServer;
-import com.github.dreamhead.moco.Runnable;
-import com.github.dreamhead.moco.Runner;
-import org.apache.commons.collections.map.HashedMap;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.Map;
+import org.apache.commons.collections4.map.HashedMap;
import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpUriRequest;
@@ -11,6 +12,9 @@ import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.junit.Test;
+import com.github.dreamhead.moco.HttpServer;
+import com.github.dreamhead.moco.Runnable;
+import com.github.dreamhead.moco.Runner;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
@@ -21,12 +25,19 @@ import us.codecraft.webmagic.proxy.SimpleProxyProvider;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.utils.CharsetUtils;
import us.codecraft.webmagic.utils.HttpConstant;
-
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.util.Map;
-
-import static com.github.dreamhead.moco.Moco.*;
+import static com.github.dreamhead.moco.Moco.and;
+import static com.github.dreamhead.moco.Moco.by;
+import static com.github.dreamhead.moco.Moco.cookie;
+import static com.github.dreamhead.moco.Moco.eq;
+import static com.github.dreamhead.moco.Moco.form;
+import static com.github.dreamhead.moco.Moco.header;
+import static com.github.dreamhead.moco.Moco.httpServer;
+import static com.github.dreamhead.moco.Moco.method;
+import static com.github.dreamhead.moco.Moco.not;
+import static com.github.dreamhead.moco.Moco.query;
+import static com.github.dreamhead.moco.Moco.text;
+import static com.github.dreamhead.moco.Moco.uri;
+import static com.github.dreamhead.moco.Moco.with;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/MockGithubDownloader.java b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/MockGithubDownloader.java
index 3aa742c1..58dd3a6f 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/MockGithubDownloader.java
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/MockGithubDownloader.java
@@ -1,13 +1,15 @@
package us.codecraft.webmagic.downloader;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
import org.apache.commons.io.IOUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.selector.PlainText;
-import java.io.IOException;
-import java.io.InputStream;
/**
* @author code4crafter@gmail.com
@@ -19,7 +21,7 @@ public class MockGithubDownloader implements Downloader {
Page page = new Page();
InputStream resourceAsStream = this.getClass().getResourceAsStream("/html/mock-github.html");
try {
- page.setRawText(IOUtils.toString(resourceAsStream));
+ page.setRawText(IOUtils.toString(resourceAsStream, Charset.defaultCharset()));
} catch (IOException e) {
e.printStackTrace();
}
diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageMocker.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageMocker.java
index 4b0c133c..0451edcf 100644
--- a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageMocker.java
+++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageMocker.java
@@ -1,11 +1,13 @@
package us.codecraft.webmagic.model;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
import org.apache.commons.io.IOUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.selector.PlainText;
-import java.io.IOException;
/**
* @author code4crafter@gmail.com
@@ -16,7 +18,7 @@ public class PageMocker {
public Page getMockJsonPage() throws IOException {
Page page = new Page();
- page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("json/mock-githubrepo.json")));
+ page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("json/mock-githubrepo.json"), Charset.defaultCharset()));
page.setRequest(new Request("https://api.github.com/repos/code4craft/webmagic"));
page.setUrl(new PlainText("https://api.github.com/repos/code4craft/webmagic"));
return page;
@@ -24,7 +26,7 @@ public class PageMocker {
public Page getMockPage() throws IOException {
Page page = new Page();
- page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
+ page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("html/mock-webmagic.html"), Charset.defaultCharset()));
page.setRequest(new Request("http://webmagic.io/list/0"));
page.setUrl(new PlainText("http://webmagic.io/list/0"));
return page;
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java
index ab560e45..46476bbc 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java
@@ -1,14 +1,14 @@
package us.codecraft.webmagic.samples;
-import org.apache.commons.collections.CollectionUtils;
+
+import java.util.List;
+import org.apache.commons.collections4.CollectionUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.JsonPathSelector;
-import java.util.List;
-
/**
* @author code4crafter@gmail.com
* @since 0.5.0
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/InfoQMiniBookProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/InfoQMiniBookProcessor.java
index 280f8f18..33dd6aa3 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/InfoQMiniBookProcessor.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/InfoQMiniBookProcessor.java
@@ -1,6 +1,6 @@
package us.codecraft.webmagic.samples;
-import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.collections4.CollectionUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java
index 1822318c..78c9d87c 100755
--- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java
@@ -1,5 +1,14 @@
package us.codecraft.webmagic.scripts;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.util.Iterator;
+import java.util.Map;
+import javax.script.ScriptContext;
+import javax.script.ScriptEngine;
+import javax.script.ScriptException;
import org.apache.commons.io.IOUtils;
import org.jruby.RubyHash;
import org.python.core.PyDictionary;
@@ -7,14 +16,6 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
-import javax.script.ScriptContext;
-import javax.script.ScriptEngine;
-import javax.script.ScriptException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Iterator;
-import java.util.Map;
-
/**
* @author code4crafter@gmail.com
* @since 0.4.1
@@ -39,7 +40,7 @@ public class ScriptProcessor implements PageProcessor {
enginePool = new ScriptEnginePool(language, threadNum);
InputStream resourceAsStream = this.getClass().getClassLoader().getResourceAsStream(language.getDefineFile());
try {
- defines = IOUtils.toString(resourceAsStream);
+ defines = IOUtils.toString(resourceAsStream, Charset.defaultCharset());
} catch (IOException e) {
throw new IllegalArgumentException(e);
}
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java
index 76b3e864..4691528a 100755
--- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java
@@ -1,10 +1,12 @@
package us.codecraft.webmagic.scripts;
-import org.apache.commons.io.IOUtils;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.Charset;
+import org.apache.commons.io.IOUtils;
+
/**
* @author code4crafter@gmail.com
@@ -35,7 +37,7 @@ public class ScriptProcessorBuilder {
public ScriptProcessorBuilder scriptFromFile(String fileName) {
try {
InputStream resourceAsStream = new FileInputStream(fileName);
- this.script = IOUtils.toString(resourceAsStream);
+ this.script = IOUtils.toString(resourceAsStream, Charset.defaultCharset());
} catch (IOException e) {
//wrap IOException because I prefer a runtime exception...
throw new IllegalArgumentException(e);
@@ -46,7 +48,7 @@ public class ScriptProcessorBuilder {
public ScriptProcessorBuilder scriptFromClassPathFile(String fileName) {
try {
InputStream resourceAsStream = ScriptProcessor.class.getClassLoader().getResourceAsStream(fileName);
- this.script = IOUtils.toString(resourceAsStream);
+ this.script = IOUtils.toString(resourceAsStream, Charset.defaultCharset());
} catch (IOException e) {
//wrap IOException because I prefer a runtime exception...
throw new IllegalArgumentException(e);