From 54da7af17eaffeb54360b4ed81639d84bd064281 Mon Sep 17 00:00:00 2001 From: David Hsing Date: Tue, 3 May 2022 17:42:42 +0800 Subject: [PATCH] change dependency versions into properties change dependency versions into properties update commons-collections from 3.x to 4.4 --- pom.xml | 78 ++++++++++++------- webmagic-core/pom.xml | 4 +- .../java/us/codecraft/webmagic/Spider.java | 26 ++++--- .../webmagic/selector/AbstractSelectable.java | 2 +- .../webmagic/selector/CssSelector.java | 8 +- .../webmagic/selector/JsonPathSelector.java | 20 +++-- .../webmagic/selector/XpathSelector.java | 6 +- .../downloader/HttpClientDownloaderTest.java | 31 +++++--- .../downloader/MockGithubDownloader.java | 8 +- .../codecraft/webmagic/model/PageMocker.java | 8 +- .../webmagic/samples/AngularJSProcessor.java | 6 +- .../samples/InfoQMiniBookProcessor.java | 2 +- .../webmagic/scripts/ScriptProcessor.java | 19 ++--- .../scripts/ScriptProcessorBuilder.java | 8 +- 14 files changed, 139 insertions(+), 87 deletions(-) diff --git a/pom.xml b/pom.xml index cda7ad1e..3774b4b2 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,31 @@ UTF-8 1.8 1.8 + 3.18.1 + 1.4 + 4.4 + 2.11.0 + 3.12.0 + 1.2.75 + 3.0.10 + 31.1-jre + 2.26 + 4.5.13 + 4.4.14 + 3.7.1 + 9.2.14.0 + 2.6.0 + 4.13.2 + 2.7.2 + 1.2.17 + 1.10.19 + 1.1.0 + 1.2.0 + 10.3 + 3.141.59 + 1.7.36 4.0.0.RELEASE + 0.3.2 webmagic-parent webmagic-parent @@ -58,59 +82,59 @@ junit junit - 4.13.1 + ${junit.version} test org.mockito mockito-all - 1.10.19 + ${mockito-all.version} test org.apache.httpcomponents httpclient - 4.5.13 + ${httpclient.version} org.apache.httpcomponents httpcore - 4.4.14 + ${httpcore.version} com.google.guava guava - 30.1-jre + ${guava.version} com.jayway.jsonpath json-path - 2.5.0 + ${json-path.version} org.slf4j slf4j-api - 1.7.30 + ${slf4j.version} org.slf4j slf4j-log4j12 - 1.7.30 + ${slf4j.version} us.codecraft xsoup - 0.3.2 + ${xsoup.version} com.alibaba fastjson - 1.2.75 + ${fastjson.version} com.github.dreamhead moco-core - 1.1.0 + ${moco.version} test @@ -122,73 +146,73 @@ log4j log4j - 1.2.17 + ${log4j.version} org.assertj assertj-core - 3.18.1 + ${assertj.version} test org.apache.commons commons-lang3 - 3.11 + ${commons-lang3.version} - commons-collections - commons-collections - 3.2.2 + org.apache.commons + commons-collections4 + ${commons-collections4.version} commons-io commons-io - 2.8.0 + ${commons-io.version} org.codehaus.groovy groovy-all - 3.0.7 + ${groovy-all.version} org.jruby jruby - 9.2.14.0 + ${jruby.version} org.python jython - 2.7.2 + ${jython.version} org.seleniumhq.selenium selenium-java - 3.141.59 + ${selenium-java.version} net.sf.saxon Saxon-HE - 10.3 + ${saxon-he.version} net.sourceforge.htmlcleaner htmlcleaner - 2.9 + ${htmlcleaner.version} com.github.detro phantomjsdriver - 1.2.0 + ${phantomjsdriver.version} commons-cli commons-cli - 1.4 + ${commons-cli.version} redis.clients jedis - 3.6.0 + ${jedis.version} diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml index 049477cb..64b8013f 100644 --- a/webmagic-core/pom.xml +++ b/webmagic-core/pom.xml @@ -52,8 +52,8 @@ - commons-collections - commons-collections + org.apache.commons + commons-collections4 diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index bc8bb94c..00091c90 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -1,6 +1,20 @@ package us.codecraft.webmagic; -import org.apache.commons.collections.CollectionUtils; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Date; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; +import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.SerializationUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -17,16 +31,6 @@ import us.codecraft.webmagic.thread.CountableThreadPool; import us.codecraft.webmagic.utils.UrlUtils; import us.codecraft.webmagic.utils.WMCollections; -import java.io.Closeable; -import java.io.IOException; -import java.util.*; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.Condition; -import java.util.concurrent.locks.ReentrantLock; - /** * Entrance of a crawler.
* A spider contains four modules: Downloader, Scheduler, PageProcessor and diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AbstractSelectable.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AbstractSelectable.java index e2bb5521..8775af10 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AbstractSelectable.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/AbstractSelectable.java @@ -1,9 +1,9 @@ package us.codecraft.webmagic.selector; -import org.apache.commons.collections.CollectionUtils; import java.util.ArrayList; import java.util.List; +import org.apache.commons.collections4.CollectionUtils; /** * @author code4crafer@gmail.com diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java index 6a638dbf..cfe55472 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/CssSelector.java @@ -1,14 +1,14 @@ package us.codecraft.webmagic.selector; -import org.apache.commons.collections.CollectionUtils; + +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.collections4.CollectionUtils; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.select.Elements; -import java.util.ArrayList; -import java.util.List; - /** * CSS selector. Based on Jsoup. * diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java index f5c0baeb..aa9a903f 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java @@ -1,11 +1,11 @@ package us.codecraft.webmagic.selector; -import com.alibaba.fastjson.JSON; -import com.jayway.jsonpath.JsonPath; import java.util.ArrayList; import java.util.List; import java.util.Map; +import com.alibaba.fastjson.JSON; +import com.jayway.jsonpath.JsonPath; /** * JsonPath selector.
@@ -16,15 +16,20 @@ import java.util.Map; */ public class JsonPathSelector implements Selector { - private String jsonPathStr; + private final String jsonPathStr; - private JsonPath jsonPath; + private final JsonPath jsonPath; public JsonPathSelector(String jsonPathStr) { this.jsonPathStr = jsonPathStr; this.jsonPath = JsonPath.compile(this.jsonPathStr); } + @SuppressWarnings("unused") + public String getJsonPathStr() { + return jsonPathStr; + } + @Override public String select(String text) { Object object = jsonPath.read(text); @@ -32,8 +37,8 @@ public class JsonPathSelector implements Selector { return null; } if (object instanceof List) { - List list = (List) object; - if (list != null && list.size() > 0) { + List list = (List) object; + if (list.size() > 0) { return toString(list.iterator().next()); } } @@ -49,8 +54,9 @@ public class JsonPathSelector implements Selector { } @Override + @SuppressWarnings("unchecked") public List selectList(String text) { - List list = new ArrayList(); + List list = new ArrayList<>(); Object object = jsonPath.read(text); if (object == null) { return list; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java index 8a980a50..4fa14699 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/XpathSelector.java @@ -1,12 +1,12 @@ package us.codecraft.webmagic.selector; -import org.apache.commons.collections.CollectionUtils; + +import java.util.List; +import org.apache.commons.collections4.CollectionUtils; import org.jsoup.nodes.Element; import us.codecraft.xsoup.XPathEvaluator; import us.codecraft.xsoup.Xsoup; -import java.util.List; - /** * XPath selector based on Xsoup.
* diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java index ece06000..780ca752 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java @@ -1,9 +1,10 @@ package us.codecraft.webmagic.downloader; -import com.github.dreamhead.moco.HttpServer; -import com.github.dreamhead.moco.Runnable; -import com.github.dreamhead.moco.Runner; -import org.apache.commons.collections.map.HashedMap; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.Map; +import org.apache.commons.collections4.map.HashedMap; import org.apache.commons.io.IOUtils; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpUriRequest; @@ -11,6 +12,9 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.junit.Test; +import com.github.dreamhead.moco.HttpServer; +import com.github.dreamhead.moco.Runnable; +import com.github.dreamhead.moco.Runner; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Site; @@ -21,12 +25,19 @@ import us.codecraft.webmagic.proxy.SimpleProxyProvider; import us.codecraft.webmagic.selector.Html; import us.codecraft.webmagic.utils.CharsetUtils; import us.codecraft.webmagic.utils.HttpConstant; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.Map; - -import static com.github.dreamhead.moco.Moco.*; +import static com.github.dreamhead.moco.Moco.and; +import static com.github.dreamhead.moco.Moco.by; +import static com.github.dreamhead.moco.Moco.cookie; +import static com.github.dreamhead.moco.Moco.eq; +import static com.github.dreamhead.moco.Moco.form; +import static com.github.dreamhead.moco.Moco.header; +import static com.github.dreamhead.moco.Moco.httpServer; +import static com.github.dreamhead.moco.Moco.method; +import static com.github.dreamhead.moco.Moco.not; +import static com.github.dreamhead.moco.Moco.query; +import static com.github.dreamhead.moco.Moco.text; +import static com.github.dreamhead.moco.Moco.uri; +import static com.github.dreamhead.moco.Moco.with; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/MockGithubDownloader.java b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/MockGithubDownloader.java index 3aa742c1..58dd3a6f 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/MockGithubDownloader.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/MockGithubDownloader.java @@ -1,13 +1,15 @@ package us.codecraft.webmagic.downloader; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; import org.apache.commons.io.IOUtils; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.selector.PlainText; -import java.io.IOException; -import java.io.InputStream; /** * @author code4crafter@gmail.com @@ -19,7 +21,7 @@ public class MockGithubDownloader implements Downloader { Page page = new Page(); InputStream resourceAsStream = this.getClass().getResourceAsStream("/html/mock-github.html"); try { - page.setRawText(IOUtils.toString(resourceAsStream)); + page.setRawText(IOUtils.toString(resourceAsStream, Charset.defaultCharset())); } catch (IOException e) { e.printStackTrace(); } diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageMocker.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageMocker.java index 4b0c133c..0451edcf 100644 --- a/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageMocker.java +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/model/PageMocker.java @@ -1,11 +1,13 @@ package us.codecraft.webmagic.model; + +import java.io.IOException; +import java.nio.charset.Charset; import org.apache.commons.io.IOUtils; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.selector.PlainText; -import java.io.IOException; /** * @author code4crafter@gmail.com @@ -16,7 +18,7 @@ public class PageMocker { public Page getMockJsonPage() throws IOException { Page page = new Page(); - page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("json/mock-githubrepo.json"))); + page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("json/mock-githubrepo.json"), Charset.defaultCharset())); page.setRequest(new Request("https://api.github.com/repos/code4craft/webmagic")); page.setUrl(new PlainText("https://api.github.com/repos/code4craft/webmagic")); return page; @@ -24,7 +26,7 @@ public class PageMocker { public Page getMockPage() throws IOException { Page page = new Page(); - page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("html/mock-webmagic.html"))); + page.setRawText(IOUtils.toString(PageMocker.class.getClassLoader().getResourceAsStream("html/mock-webmagic.html"), Charset.defaultCharset())); page.setRequest(new Request("http://webmagic.io/list/0")); page.setUrl(new PlainText("http://webmagic.io/list/0")); return page; diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java index ab560e45..46476bbc 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java @@ -1,14 +1,14 @@ package us.codecraft.webmagic.samples; -import org.apache.commons.collections.CollectionUtils; + +import java.util.List; +import org.apache.commons.collections4.CollectionUtils; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.processor.PageProcessor; import us.codecraft.webmagic.selector.JsonPathSelector; -import java.util.List; - /** * @author code4crafter@gmail.com * @since 0.5.0 diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/InfoQMiniBookProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/InfoQMiniBookProcessor.java index 280f8f18..33dd6aa3 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/InfoQMiniBookProcessor.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/InfoQMiniBookProcessor.java @@ -1,6 +1,6 @@ package us.codecraft.webmagic.samples; -import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections4.CollectionUtils; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Spider; diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java index 1822318c..78c9d87c 100755 --- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java +++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessor.java @@ -1,5 +1,14 @@ package us.codecraft.webmagic.scripts; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.Iterator; +import java.util.Map; +import javax.script.ScriptContext; +import javax.script.ScriptEngine; +import javax.script.ScriptException; import org.apache.commons.io.IOUtils; import org.jruby.RubyHash; import org.python.core.PyDictionary; @@ -7,14 +16,6 @@ import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.processor.PageProcessor; -import javax.script.ScriptContext; -import javax.script.ScriptEngine; -import javax.script.ScriptException; -import java.io.IOException; -import java.io.InputStream; -import java.util.Iterator; -import java.util.Map; - /** * @author code4crafter@gmail.com * @since 0.4.1 @@ -39,7 +40,7 @@ public class ScriptProcessor implements PageProcessor { enginePool = new ScriptEnginePool(language, threadNum); InputStream resourceAsStream = this.getClass().getClassLoader().getResourceAsStream(language.getDefineFile()); try { - defines = IOUtils.toString(resourceAsStream); + defines = IOUtils.toString(resourceAsStream, Charset.defaultCharset()); } catch (IOException e) { throw new IllegalArgumentException(e); } diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java index 76b3e864..4691528a 100755 --- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java +++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptProcessorBuilder.java @@ -1,10 +1,12 @@ package us.codecraft.webmagic.scripts; -import org.apache.commons.io.IOUtils; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.Charset; +import org.apache.commons.io.IOUtils; + /** * @author code4crafter@gmail.com @@ -35,7 +37,7 @@ public class ScriptProcessorBuilder { public ScriptProcessorBuilder scriptFromFile(String fileName) { try { InputStream resourceAsStream = new FileInputStream(fileName); - this.script = IOUtils.toString(resourceAsStream); + this.script = IOUtils.toString(resourceAsStream, Charset.defaultCharset()); } catch (IOException e) { //wrap IOException because I prefer a runtime exception... throw new IllegalArgumentException(e); @@ -46,7 +48,7 @@ public class ScriptProcessorBuilder { public ScriptProcessorBuilder scriptFromClassPathFile(String fileName) { try { InputStream resourceAsStream = ScriptProcessor.class.getClassLoader().getResourceAsStream(fileName); - this.script = IOUtils.toString(resourceAsStream); + this.script = IOUtils.toString(resourceAsStream, Charset.defaultCharset()); } catch (IOException e) { //wrap IOException because I prefer a runtime exception... throw new IllegalArgumentException(e);