diff --git a/.travis.yml b/.travis.yml
index a9f233f3..8f79da0c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,3 +1,3 @@
language: java
jdk:
- - oraclejdk7
+ - openjdk9
diff --git a/README-zh.md b/README-zh.md
index cd1b090c..65d5d172 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -93,7 +93,7 @@ webmagic还包含两个可用的扩展包,因为这两个包都依赖了比较
PageProcessor是webmagic-core的一部分,定制一个PageProcessor即可实现自己的爬虫逻辑。以下是抓取osc博客的一段代码:
```java
-public class OschinaBlogPageProcesser implements PageProcessor {
+public class OschinaBlogPageProcessor implements PageProcessor {
private Site site = Site.me().setDomain("my.oschina.net");
@@ -113,7 +113,7 @@ public class OschinaBlogPageProcesser implements PageProcessor {
}
public static void main(String[] args) {
- Spider.create(new OschinaBlogPageProcesser()).addUrl("http://my.oschina.net/flashsword/blog")
+ Spider.create(new OschinaBlogPageProcessor()).addUrl("http://my.oschina.net/flashsword/blog")
.addPipeline(new ConsolePipeline()).run();
}
}
diff --git a/pom.xml b/pom.xml
index 0765ae13..d016d0a9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,10 +1,5 @@
-
- org.sonatype.oss
- oss-parent
- 7
-
us.codecraft
0.7.3
4.0.0
@@ -12,8 +7,8 @@
UTF-8
UTF-8
+ 1.8
4.0.0.RELEASE
-
webmagic-parent
webmagic-parent
@@ -39,7 +34,7 @@
scm:git:git@github.com:code4craft/webmagic.git
git@github.com:code4craft/webmagic.git
webmagic-parent-0.6.1
-
+
Apache License, Version 2.0
@@ -61,7 +56,7 @@
junit
junit
- 4.11
+ 4.13
test
@@ -73,12 +68,17 @@
org.apache.httpcomponents
httpclient
- 4.5.2
+ 4.5.12
+
+
+ org.apache.httpcomponents
+ httpcore
+ 4.4.13
com.google.guava
guava
- 15.0
+ 29.0-jre
com.jayway.jsonpath
@@ -88,12 +88,12 @@
org.slf4j
slf4j-api
- 1.7.6
+ 1.7.30
org.slf4j
slf4j-log4j12
- 1.7.6
+ 1.7.30
us.codecraft
@@ -103,12 +103,12 @@
com.alibaba
fastjson
- 1.2.28
+ 1.2.68
com.github.dreamhead
moco-core
- 0.11.0
+ 1.1.0
test
@@ -125,13 +125,13 @@
org.assertj
assertj-core
- 1.5.0
+ 3.16.1
test
org.apache.commons
commons-lang3
- 3.1
+ 3.10
commons-collections
@@ -139,9 +139,19 @@
3.2.2
- org.apache.commons
- commons-io
- 1.3.2
+ commons-io
+ commons-io
+ 2.7
+
+
+ org.codehaus.groovy
+ groovy-all
+ 2.4.19
+
+
+ org.jruby
+ jruby
+ 9.2.11.1
org.jsoup
@@ -149,20 +159,69 @@
1.10.3
- org.mockito
- mockito-all
- 1.9.5
- test
+ org.python
+ jython
+ 2.7.2
+
+
+ org.seleniumhq.selenium
+ selenium-java
+ 3.141.59
+
+
+ net.sf.saxon
+ Saxon-HE
+ 10.1
+
+
+ net.sourceforge.htmlcleaner
+ htmlcleaner
+ 2.5
+
+
+ com.github.detro
+ phantomjsdriver
+ 1.2.0
+
+
+ commons-cli
+ commons-cli
+ 1.4
+
+
+ redis.clients
+ jedis
+ 2.9.3
+
+ org.apache.maven.plugins
+ maven-enforcer-plugin
+ 3.0.0-M3
+
+
+ enforce-maven
+
+ enforce
+
+
+
+
+ 3.0.5
+
+
+
+
+
+
org.apache.maven.plugins
maven-surefire-plugin
- 2.18
+ 3.0.0-M4
0
@@ -170,11 +229,10 @@
org.apache.maven.plugins
maven-compiler-plugin
- 3.1
+ 3.8.1
- 1.6
- 1.6
- UTF-8
+ ${java.version}
+ ${java.version}
@@ -200,14 +258,12 @@
org.apache.maven.plugins
maven-resources-plugin
- 2.6
-
- UTF-8
-
+ 3.1.0
org.apache.maven.plugins
maven-jar-plugin
+ 3.2.0
log4j.xml
@@ -217,7 +273,7 @@
org.apache.maven.plugins
maven-source-plugin
- 2.2.1
+ 3.2.1
attach-sources
@@ -230,11 +286,15 @@
org.apache.maven.plugins
maven-javadoc-plugin
- 2.10.4
+ 3.2.0
UTF-8
WebMagic 0.7.3
en_US
+
+
+ false
+
@@ -255,7 +315,7 @@
org.apache.maven.plugins
maven-release-plugin
- 2.4.1
+ 3.0.0-M1
@@ -310,7 +370,7 @@
org.sonatype.plugins
nexus-staging-maven-plugin
- 1.6
+ 1.6.8
true
sonatype-nexus-staging
diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml
index e889cd49..44fb7fa4 100644
--- a/webmagic-core/pom.xml
+++ b/webmagic-core/pom.xml
@@ -48,6 +48,7 @@
org.slf4j
slf4j-log4j12
+ true
@@ -66,7 +67,7 @@
- org.apache.commons
+ commons-io
commons-io
@@ -82,4 +83,4 @@
-
\ No newline at end of file
+
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
index eefd91bb..5c26d20d 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
@@ -78,14 +78,15 @@ public class Request implements Serializable {
return this;
}
- public Object getExtra(String key) {
+ @SuppressWarnings("unchecked")
+ public T getExtra(String key) {
if (extras == null) {
return null;
}
- return extras.get(key);
+ return (T) extras.get(key);
}
- public Request putExtra(String key, Object value) {
+ public Request putExtra(String key, T value) {
if (extras == null) {
extras = new HashMap();
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/ResultItems.java b/webmagic-core/src/main/java/us/codecraft/webmagic/ResultItems.java
index 7b543613..488c81e7 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/ResultItems.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/ResultItems.java
@@ -1,6 +1,5 @@
package us.codecraft.webmagic;
-import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
@@ -21,6 +20,7 @@ public class ResultItems {
private boolean skip;
+ @SuppressWarnings("unchecked")
public T get(String key) {
Object o = fields.get(key);
if (o == null) {
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
index 766d08fc..72695538 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
@@ -203,7 +203,7 @@ public class Site {
/**
* Set the interval between the processing of two pages.
- * Time unit is micro seconds.
+ * Time unit is milliseconds.
*
* @param sleepTime sleepTime
* @return this
@@ -215,7 +215,7 @@ public class Site {
/**
* Get the interval between the processing of two pages.
- * Time unit is micro seconds.
+ * Time unit is milliseconds.
*
* @return the interval between the processing of two pages,
*/
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
index fff7c7cf..24889c88 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
@@ -2,7 +2,6 @@ package us.codecraft.webmagic.downloader;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
-import org.apache.http.annotation.ThreadSafe;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.util.EntityUtils;
@@ -30,7 +29,6 @@ import java.util.Map;
* @author code4crafter@gmail.com
* @since 0.1.0
*/
-@ThreadSafe
public class HttpClientDownloader extends AbstractDownloader {
private Logger logger = LoggerFactory.getLogger(getClass());
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java
index 0125049b..66697597 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientGenerator.java
@@ -1,5 +1,18 @@
package us.codecraft.webmagic.downloader;
+import java.io.IOException;
+import java.security.KeyManagementException;
+import java.security.NoSuchAlgorithmException;
+import java.security.cert.CertificateException;
+import java.security.cert.X509Certificate;
+import java.util.Map;
+
+import javax.net.ssl.SSLContext;
+import javax.net.ssl.TrustManager;
+import javax.net.ssl.X509TrustManager;
+
+import org.apache.commons.lang3.JavaVersion;
+import org.apache.commons.lang3.SystemUtils;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
@@ -9,34 +22,30 @@ import org.apache.http.config.RegistryBuilder;
import org.apache.http.config.SocketConfig;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
+import org.apache.http.conn.ssl.DefaultHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
-import org.apache.http.impl.client.*;
+import org.apache.http.impl.client.BasicCookieStore;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.protocol.HttpContext;
import org.apache.http.ssl.SSLContexts;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import us.codecraft.webmagic.Site;
-import javax.net.ssl.SSLContext;
-import javax.net.ssl.TrustManager;
-import javax.net.ssl.X509TrustManager;
-import java.io.IOException;
-import java.security.KeyManagementException;
-import java.security.NoSuchAlgorithmException;
-import java.security.cert.CertificateException;
-import java.security.cert.X509Certificate;
-import java.util.Map;
+import us.codecraft.webmagic.Site;
/**
* @author code4crafter@gmail.com
* @since 0.4.0
*/
public class HttpClientGenerator {
-
+
private transient Logger logger = LoggerFactory.getLogger(getClass());
-
+
private PoolingHttpClientConnectionManager connectionManager;
public HttpClientGenerator() {
@@ -48,41 +57,51 @@ public class HttpClientGenerator {
connectionManager.setDefaultMaxPerRoute(100);
}
- private SSLConnectionSocketFactory buildSSLConnectionSocketFactory() {
- try {
- return new SSLConnectionSocketFactory(createIgnoreVerifySSL()); // 优先绕过安全证书
- } catch (KeyManagementException e) {
+ private SSLConnectionSocketFactory buildSSLConnectionSocketFactory() {
+ try {
+ SSLContext sslContext = createIgnoreVerifySSL();
+ String[] supportedProtocols;
+ if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_11)) {
+ supportedProtocols = new String[] { "SSLv3", "TLSv1", "TLSv1.1", "TLSv1.2", "TLSv1.3" };
+ } else {
+ supportedProtocols = new String[] { "SSLv3", "TLSv1", "TLSv1.1", "TLSv1.2" };
+ }
+ logger.debug("supportedProtocols: {}", String.join(", ", supportedProtocols));
+ return new SSLConnectionSocketFactory(sslContext, supportedProtocols,
+ null,
+ new DefaultHostnameVerifier()); // 优先绕过安全证书
+ } catch (KeyManagementException e) {
logger.error("ssl connection fail", e);
} catch (NoSuchAlgorithmException e) {
logger.error("ssl connection fail", e);
}
- return SSLConnectionSocketFactory.getSocketFactory();
+ return SSLConnectionSocketFactory.getSocketFactory();
}
- private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException {
- // 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法
- X509TrustManager trustManager = new X509TrustManager() {
-
- @Override
- public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
- }
-
- @Override
- public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
- }
-
- @Override
- public X509Certificate[] getAcceptedIssuers() {
- return null;
- }
-
- };
-
- SSLContext sc = SSLContext.getInstance("SSLv3");
- sc.init(null, new TrustManager[] { trustManager }, null);
- return sc;
+ private SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException {
+ // 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法
+ X509TrustManager trustManager = new X509TrustManager() {
+
+ @Override
+ public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
+ }
+
+ @Override
+ public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
+ }
+
+ @Override
+ public X509Certificate[] getAcceptedIssuers() {
+ return null;
+ }
+
+ };
+
+ SSLContext sc = SSLContext.getInstance("SSLv3");
+ sc.init(null, new TrustManager[] { trustManager }, null);
+ return sc;
}
-
+
public HttpClientGenerator setPoolSize(int poolSize) {
connectionManager.setMaxTotal(poolSize);
return this;
@@ -94,7 +113,7 @@ public class HttpClientGenerator {
private CloseableHttpClient generateClient(Site site) {
HttpClientBuilder httpClientBuilder = HttpClients.custom();
-
+
httpClientBuilder.setConnectionManager(connectionManager);
if (site.getUserAgent() != null) {
httpClientBuilder.setUserAgent(site.getUserAgent());
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java
index 28a7ce5e..4baaf4a4 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpUriRequestConverter.java
@@ -74,7 +74,7 @@ public class HttpUriRequestConverter {
}
if (proxy != null) {
- requestConfigBuilder.setProxy(new HttpHost(proxy.getHost(), proxy.getPort()));
+ requestConfigBuilder.setProxy(new HttpHost(proxy.getHost(), proxy.getPort(), proxy.getScheme()));
}
requestBuilder.setConfig(requestConfigBuilder.build());
HttpUriRequest httpUriRequest = requestBuilder.build();
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/FilePipeline.java b/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/FilePipeline.java
index 096e1c3a..964e5fd5 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/FilePipeline.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/FilePipeline.java
@@ -1,10 +1,8 @@
package us.codecraft.webmagic.pipeline;
import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.http.annotation.ThreadSafe;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.utils.FilePersistentBase;
@@ -24,6 +22,7 @@ import java.util.Map;
@ThreadSafe
public class FilePipeline extends FilePersistentBase implements Pipeline {
+
private Logger logger = LoggerFactory.getLogger(getClass());
/**
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java
index c5f10073..6554fab5 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/Proxy.java
@@ -1,73 +1,135 @@
package us.codecraft.webmagic.proxy;
-/**
- *
- */
+import java.io.UnsupportedEncodingException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.commons.lang3.StringUtils;
public class Proxy {
- private String host;
- private int port;
- private String username;
- private String password;
+ private String scheme;
+
+ private String host;
+
+ private int port;
+
+ private String username;
+
+ private String password;
- public Proxy(String host, int port) {
- this.host = host;
- this.port = port;
- }
+ public static Proxy create(final URI uri) {
+ Proxy proxy = new Proxy(uri.getHost(), uri.getPort(), uri.getScheme());
+ String userInfo = uri.getUserInfo();
+ if (userInfo != null) {
+ String[] up = userInfo.split(":");
+ if (up.length == 1) {
+ proxy.username = up[0].isEmpty() ? null : up[0];
+ } else {
+ proxy.username = up[0].isEmpty() ? null : up[0];
+ proxy.password = up[1].isEmpty() ? null : up[1];
+ }
+ }
+ return proxy;
+ }
- public Proxy(String host, int port, String username, String password) {
- this.host = host;
- this.port = port;
- this.username = username;
- this.password = password;
- }
+ public Proxy(String host, int port) {
+ this(host, port, null);
+ }
+
+ public Proxy(String host, int port, String scheme) {
+ this.host = host;
+ this.port = port;
+ this.scheme = scheme;
+ }
+
+ public Proxy(String host, int port, String username, String password) {
+ this.host = host;
+ this.port = port;
+ this.username = username;
+ this.password = password;
+ }
+
+ public String getScheme() {
+ return scheme;
+ }
+
+ public void setScheme(String scheme) {
+ this.scheme = scheme;
+ }
public String getHost() {
- return host;
- }
-
- public int getPort() {
- return port;
- }
-
- public String getUsername() {
- return username;
- }
-
- public String getPassword() {
- return password;
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
-
- Proxy proxy = (Proxy) o;
-
- if (port != proxy.port) return false;
- if (host != null ? !host.equals(proxy.host) : proxy.host != null) return false;
- if (username != null ? !username.equals(proxy.username) : proxy.username != null) return false;
- return password != null ? password.equals(proxy.password) : proxy.password == null;
- }
-
- @Override
- public int hashCode() {
- int result = host != null ? host.hashCode() : 0;
- result = 31 * result + port;
- result = 31 * result + (username != null ? username.hashCode() : 0);
- result = 31 * result + (password != null ? password.hashCode() : 0);
- return result;
- }
-
- @Override
- public String toString() {
- return "Proxy{" +
- "host='" + host + '\'' +
- ", port=" + port +
- ", username='" + username + '\'' +
- ", password='" + password + '\'' +
- '}';
- }
+ return host;
+ }
+
+ public int getPort() {
+ return port;
+ }
+
+ public String getUsername() {
+ return username;
+ }
+
+ public String getPassword() {
+ return password;
+ }
+
+ public URI toURI() {
+ final StringBuilder userInfoBuffer = new StringBuilder();
+ if (username != null) {
+ userInfoBuffer.append(urlencode(username));
+ }
+ if (password != null) {
+ userInfoBuffer.append(":").append(urlencode(password));
+ }
+ final String userInfo = StringUtils.defaultIfEmpty(userInfoBuffer.toString(), null);
+ URI uri;
+ try {
+ uri = new URI(scheme, userInfo, host, port, null, null, null);
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(e.getMessage(), e);
+ }
+ return uri;
+ }
+
+ private String urlencode(String s) {
+ String enc = StandardCharsets.UTF_8.name();
+ try {
+ return URLEncoder.encode(s, enc);
+ } catch (UnsupportedEncodingException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ Proxy proxy = (Proxy) o;
+
+ if (port != proxy.port) return false;
+ if (host != null ? !host.equals(proxy.host) : proxy.host != null) return false;
+ if (scheme != null ? !scheme.equals(proxy.scheme) : proxy.scheme != null) return false;
+ if (username != null ? !username.equals(proxy.username) : proxy.username != null) return false;
+ return password != null ? password.equals(proxy.password) : proxy.password == null;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = host != null ? host.hashCode() : 0;
+ result = 31 * result + port;
+ result = 31 * result + (scheme != null ? scheme.hashCode() : 0);
+ result = 31 * result + (username != null ? username.hashCode() : 0);
+ result = 31 * result + (password != null ? password.hashCode() : 0);
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return this.toURI().toString();
+ }
+
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java
index 5b61a993..0cef4ed4 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/ProxyProvider.java
@@ -25,5 +25,5 @@ public interface ProxyProvider {
* @return proxy
*/
Proxy getProxy(Task task);
-
+
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/SimpleProxyProvider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/SimpleProxyProvider.java
index d8f47fe4..ddef6a88 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/SimpleProxyProvider.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/proxy/SimpleProxyProvider.java
@@ -59,4 +59,5 @@ public class SimpleProxyProvider implements ProxyProvider {
}
return p % size;
}
+
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java
index 8fa1b9ea..14cbaff3 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/PriorityScheduler.java
@@ -1,6 +1,5 @@
package us.codecraft.webmagic.scheduler;
-import org.apache.http.annotation.ThreadSafe;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.utils.NumberUtils;
@@ -16,7 +15,6 @@ import java.util.concurrent.PriorityBlockingQueue;
* @author code4crafter@gmail.com
* @since 0.2.1
*/
-@ThreadSafe
public class PriorityScheduler extends DuplicateRemovedScheduler implements MonitorableScheduler {
public static final int INITIAL_CAPACITY = 5;
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java
index 078506c6..f9ad0e98 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/QueueScheduler.java
@@ -1,6 +1,5 @@
package us.codecraft.webmagic.scheduler;
-import org.apache.http.annotation.ThreadSafe;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
@@ -15,7 +14,6 @@ import java.util.concurrent.LinkedBlockingQueue;
* @author code4crafter@gmail.com
* @since 0.1.0
*/
-@ThreadSafe
public class QueueScheduler extends DuplicateRemovedScheduler implements MonitorableScheduler {
private BlockingQueue queue = new LinkedBlockingQueue();
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java
index 9ae538c0..fb0a161d 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java
@@ -41,7 +41,7 @@ public class RegexSelector implements Selector {
/**
* Create a RegexSelector. When there is no capture group, the value is set to 0 else set to 1.
- * @param regexStr
+ * @param regexStr the regular expression.
*/
public RegexSelector(String regexStr) {
this.compileRegex(regexStr);
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java
index 04a45a02..ece06000 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/HttpClientDownloaderTest.java
@@ -322,4 +322,5 @@ public class HttpClientDownloaderTest {
});
}
+
}
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/SSLCompatibilityTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/SSLCompatibilityTest.java
new file mode 100644
index 00000000..861b315a
--- /dev/null
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/downloader/SSLCompatibilityTest.java
@@ -0,0 +1,26 @@
+package us.codecraft.webmagic.downloader;
+
+import org.junit.Test;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.Task;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * @author code4crafter@gmail.com
+ * Date: 2017/11/29
+ * Time: 下午1:32
+ */
+public class SSLCompatibilityTest {
+
+ @Test
+ public void test_tls12() throws Exception {
+ HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
+ Task task = Site.me().setCycleRetryTimes(5).toTask();
+ Request request = new Request("https://juejin.im/");
+ Page page = httpClientDownloader.download(request, task);
+ assertThat(page.isDownloadSuccess()).isTrue();
+ }
+}
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/ProxyTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/ProxyTest.java
index 86af3672..8e4c8202 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/ProxyTest.java
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/proxy/ProxyTest.java
@@ -1,45 +1,97 @@
package us.codecraft.webmagic.proxy;
-import org.apache.http.HttpHost;
-import org.junit.BeforeClass;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import java.net.URI;
import java.util.ArrayList;
import java.util.List;
+import org.apache.http.HttpHost;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
/**
* @author yxssfxwzy@sina.com May 30, 2014
*
*/
public class ProxyTest {
- private static List httpProxyList = new ArrayList();
-
- @BeforeClass
- public static void before() {
- // String[] source = { "0.0.0.1:0", "0.0.0.2:0", "0.0.0.3:0",
- // "0.0.0.4:0" };
- String[] source = { "::0.0.0.1:0", "::0.0.0.2:0", "::0.0.0.3:0", "::0.0.0.4:0" };
- for (String line : source) {
- httpProxyList.add(new String[] {line.split(":")[0], line.split(":")[1], line.split(":")[2], line.split(":")[3] });
- }
- }
-
- class Fetch extends Thread {
- HttpHost hp;
-
- public Fetch(HttpHost hp) {
- this.hp = hp;
- }
-
- @Override
- public void run() {
- try {
- System.out.println("fetch web page use proxy: " + hp.getHostName() + ":" + hp.getPort());
- sleep(500);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
- }
+ private static List httpProxyList = new ArrayList();
+
+ @BeforeClass
+ public static void before() {
+ // String[] source = { "0.0.0.1:0", "0.0.0.2:0", "0.0.0.3:0",
+ // "0.0.0.4:0" };
+ String[] source = { "::0.0.0.1:0", "::0.0.0.2:0", "::0.0.0.3:0", "::0.0.0.4:0" };
+ for (String line : source) {
+ httpProxyList.add(new String[] {line.split(":")[0], line.split(":")[1], line.split(":")[2], line.split(":")[3] });
+ }
+ }
+
+ class Fetch extends Thread {
+ HttpHost hp;
+
+ public Fetch(HttpHost hp) {
+ this.hp = hp;
+ }
+
+ @Override
+ public void run() {
+ try {
+ System.out.println("fetch web page use proxy: " + hp.getHostName() + ":" + hp.getPort());
+ sleep(500);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ @Test
+ public void testCreate() {
+ Proxy proxy = Proxy.create(URI.create("//127.0.0.1:8080"));
+ assertNull(proxy.getScheme());
+ assertNull(proxy.getUsername());
+ assertNull(proxy.getPassword());
+ assertEquals("127.0.0.1", proxy.getHost());
+ assertEquals(8080, proxy.getPort());
+
+ proxy = Proxy.create(URI.create("http://127.0.0.1:8080"));
+ assertEquals("http", proxy.getScheme());
+ assertNull(proxy.getUsername());
+ assertNull(proxy.getPassword());
+ assertEquals("127.0.0.1", proxy.getHost());
+ assertEquals(8080, proxy.getPort());
+
+ proxy = Proxy.create(URI.create("//username:password@127.0.0.1:8080"));
+ assertNull(proxy.getScheme());
+ assertEquals("username", proxy.getUsername());
+ assertEquals("password", proxy.getPassword());
+ assertEquals("127.0.0.1", proxy.getHost());
+ assertEquals(8080, proxy.getPort());
+
+ proxy = Proxy.create(URI.create("//username@127.0.0.1:8080"));
+ assertNull(proxy.getScheme());
+ assertEquals("username", proxy.getUsername());
+ assertNull(proxy.getPassword());
+ assertEquals("127.0.0.1", proxy.getHost());
+ assertEquals(8080, proxy.getPort());
+
+ proxy = Proxy.create(URI.create("//:password@127.0.0.1:8080"));
+ assertNull(proxy.getScheme());
+ assertNull(proxy.getUsername());
+ assertEquals("password", proxy.getPassword());
+ assertEquals("127.0.0.1", proxy.getHost());
+ assertEquals(8080, proxy.getPort());
+ }
+
+ @Test
+ public void testToString() {
+ assertEquals("//127.0.0.1:8080", new Proxy("127.0.0.1", 8080).toString());
+ assertEquals("http://127.0.0.1:8080", new Proxy("127.0.0.1", 8080, "http").toString());
+ assertEquals("//username:password@127.0.0.1:8080", new Proxy("127.0.0.1", 8080, "username", "password").toString());
+ assertEquals("//username@127.0.0.1:8080", new Proxy("127.0.0.1", 8080, "username", null).toString());
+ assertEquals("//:password@127.0.0.1:8080", new Proxy("127.0.0.1", 8080, null, "password").toString());
+ }
}
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/utils/NumberUtilsTest.java b/webmagic-core/src/test/java/us/codecraft/webmagic/utils/NumberUtilsTest.java
new file mode 100644
index 00000000..f9e725e2
--- /dev/null
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/utils/NumberUtilsTest.java
@@ -0,0 +1,16 @@
+package us.codecraft.webmagic.utils;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class NumberUtilsTest {
+
+ @Test
+ public void testCompareLong() {
+ Assert.assertEquals(0, NumberUtils.compareLong(0L, 0L));
+ Assert.assertEquals(1, NumberUtils.compareLong(9L, 0L));
+ Assert.assertEquals(-1, NumberUtils.compareLong(0L, 9L));
+ Assert.assertEquals(-1, NumberUtils.compareLong(-9L, 0L));
+ Assert.assertEquals(1, NumberUtils.compareLong(0L, -9L));
+ }
+}
diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml
index 7e949ca6..bf7ff05d 100644
--- a/webmagic-extension/pom.xml
+++ b/webmagic-extension/pom.xml
@@ -13,16 +13,14 @@
redis.clients
jedis
- 2.9.0
com.google.guava
guava
- 15.0
true
- us.codecraft
+ ${project.groupId}
webmagic-core
${project.version}
@@ -32,4 +30,4 @@
-
\ No newline at end of file
+
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java
index 0fda351b..6055bdb0 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/PhantomJSDownloader.java
@@ -1,6 +1,5 @@
package us.codecraft.webmagic.downloader;
-import org.apache.http.annotation.ThreadSafe;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Page;
@@ -16,7 +15,6 @@ import java.io.*;
* @author dolphineor@gmail.com
* @version 0.5.3
*/
-@ThreadSafe
public class PhantomJSDownloader extends AbstractDownloader {
private static Logger logger = LoggerFactory.getLogger(PhantomJSDownloader.class);
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/PatternProcessorExample.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/PatternProcessorExample.java
index 8ecb08fe..9406abfd 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/PatternProcessorExample.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/PatternProcessorExample.java
@@ -1,6 +1,8 @@
package us.codecraft.webmagic.example;
-import org.apache.log4j.Logger;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import us.codecraft.webmagic.*;
import us.codecraft.webmagic.handler.CompositePageProcessor;
import us.codecraft.webmagic.handler.CompositePipeline;
@@ -15,7 +17,7 @@ import us.codecraft.webmagic.handler.RequestMatcher;
*/
public class PatternProcessorExample {
- private static Logger log = Logger.getLogger(PatternProcessorExample.class);
+ private static Logger log = LoggerFactory.getLogger(PatternProcessorExample.class);
public static void main(String... args) {
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java
index 6ca98285..fec3c1db 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java
@@ -1,12 +1,13 @@
package us.codecraft.webmagic.scheduler;
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.math.NumberUtils;
-import us.codecraft.webmagic.Request;
-import us.codecraft.webmagic.Task;
-import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
-
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
@@ -17,6 +18,13 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.math.NumberUtils;
+
+import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Task;
+import us.codecraft.webmagic.scheduler.component.DuplicateRemover;
+
/**
* Store urls and cursor in files so that a Spider can resume the status when shutdown.
@@ -141,7 +149,7 @@ public class FileCacheQueueScheduler extends DuplicateRemovedScheduler implement
urls.add(line.trim());
lineReaded++;
if (lineReaded > cursor.get()) {
- queue.add(new Request(line));
+ queue.add(deserializeRequest(line));
}
}
} finally {
@@ -183,7 +191,7 @@ public class FileCacheQueueScheduler extends DuplicateRemovedScheduler implement
init(task);
}
queue.add(request);
- fileUrlWriter.println(request.getUrl());
+ fileUrlWriter.println(serializeRequest(request));
}
@Override
@@ -204,4 +212,13 @@ public class FileCacheQueueScheduler extends DuplicateRemovedScheduler implement
public int getTotalRequestsCount(Task task) {
return getDuplicateRemover().getTotalRequestsCount(task);
}
+
+ protected String serializeRequest(Request request) {
+ return request.getUrl();
+ }
+
+ protected Request deserializeRequest(String line) {
+ return new Request(line);
+ }
+
}
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
index ce1111f2..c70d8850 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
@@ -2,6 +2,7 @@ package us.codecraft.webmagic.scheduler;
import com.alibaba.fastjson.JSON;
import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang3.StringUtils;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
@@ -60,14 +61,41 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
Jedis jedis = pool.getResource();
try {
jedis.rpush(getQueueKey(task), request.getUrl());
- if (request.getExtras() != null) {
+ if (checkForAdditionalInfo(request)) {
String field = DigestUtils.shaHex(request.getUrl());
String value = JSON.toJSONString(request);
jedis.hset((ITEM_PREFIX + task.getUUID()), field, value);
}
} finally {
- pool.returnResource(jedis);
+ jedis.close();
+ }
+ }
+
+ private boolean checkForAdditionalInfo(Request request) {
+ if (request == null) {
+ return false;
+ }
+
+ if (!request.getHeaders().isEmpty() || !request.getCookies().isEmpty()) {
+ return true;
+ }
+
+ if (StringUtils.isNotBlank(request.getCharset()) || StringUtils.isNotBlank(request.getMethod())) {
+ return true;
+ }
+
+ if (request.isBinaryContent() || request.getRequestBody() != null) {
+ return true;
}
+
+ if (request.getExtras() != null && !request.getExtras().isEmpty()) {
+ return true;
+ }
+ if (request.getPriority() != 0L) {
+ return true;
+ }
+
+ return false;
}
@Override
@@ -85,7 +113,7 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
Request o = JSON.parseObject(new String(bytes), Request.class);
return o;
}
- Request request = new Request(url);
+ Request request = new Request(url);
return request;
} finally {
pool.returnResource(jedis);
@@ -100,8 +128,7 @@ public class RedisScheduler extends DuplicateRemovedScheduler implements Monitor
return QUEUE_PREFIX + task.getUUID();
}
- protected String getItemKey(Task task)
- {
+ protected String getItemKey(Task task) {
return ITEM_PREFIX + task.getUUID();
}
diff --git a/webmagic-samples/pom.xml b/webmagic-samples/pom.xml
index 072bb3fd..44fee7c0 100644
--- a/webmagic-samples/pom.xml
+++ b/webmagic-samples/pom.xml
@@ -11,12 +11,12 @@
- us.codecraft
+ ${project.groupId}
webmagic-core
${project.version}
- us.codecraft
+ ${project.groupId}
webmagic-extension
${project.version}
diff --git a/webmagic-saxon/pom.xml b/webmagic-saxon/pom.xml
index 95f706ed..da0c5f20 100644
--- a/webmagic-saxon/pom.xml
+++ b/webmagic-saxon/pom.xml
@@ -11,19 +11,17 @@
- us.codecraft
+ ${project.groupId}
webmagic-core
${project.version}
net.sourceforge.htmlcleaner
htmlcleaner
- 2.5
net.sf.saxon
Saxon-HE
- 9.5.1-1
junit
@@ -34,7 +32,9 @@
+ org.apache.maven.plugins
maven-deploy-plugin
+ 3.0.0-M1
true
diff --git a/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java b/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
index 98b1efe4..d8aab6cc 100644
--- a/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
+++ b/webmagic-saxon/src/main/java/us/codecraft/webmagic/selector/Xpath2Selector.java
@@ -2,11 +2,12 @@ package us.codecraft.webmagic.selector;
import net.sf.saxon.lib.NamespaceConstant;
import net.sf.saxon.xpath.XPathEvaluator;
-import org.apache.log4j.Logger;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.DomSerializer;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
@@ -40,7 +41,7 @@ public class Xpath2Selector implements Selector {
private XPathExpression xPathExpression;
- private Logger logger = Logger.getLogger(getClass());
+ private Logger logger = LoggerFactory.getLogger(getClass());
public Xpath2Selector(String xpathStr) {
this.xpathStr = xpathStr;
diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml
index 22956cb5..9f4219d6 100755
--- a/webmagic-scripts/pom.xml
+++ b/webmagic-scripts/pom.xml
@@ -7,7 +7,6 @@
4.0.0
- us.codecraft
webmagic-scripts
1.1.2-2
@@ -17,27 +16,23 @@
org.jruby
jruby
- 1.7.6
org.jetbrains.kotlin
kotlin-stdlib
${kotlin.version}
-
org.codehaus.groovy
groovy-all
- 2.1.6
- org.python
+
+ org.python
jython
- 2.5.3
commons-cli
commons-cli
- 1.2
junit
@@ -45,12 +40,16 @@
test
- us.codecraft
+ ${project.groupId}
webmagic-core
${project.version}
- us.codecraft
+ org.slf4j
+ slf4j-log4j12
+
+
+ ${project.groupId}
webmagic-extension
${project.version}
@@ -59,21 +58,6 @@
${project.basedir}/src/main/java
-
- maven-compiler-plugin
-
- 1.6
- 1.6
- UTF-8
-
-
-
- org.apache.maven.plugins
- maven-resources-plugin
-
- UTF-8
-
-
org.apache.maven.plugins
maven-jar-plugin
diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml
index 1cbf5921..dfc4a195 100644
--- a/webmagic-selenium/pom.xml
+++ b/webmagic-selenium/pom.xml
@@ -13,21 +13,16 @@
org.seleniumhq.selenium
selenium-java
- 2.41.0
- us.codecraft
+ ${project.groupId}
webmagic-core
${project.version}
com.github.detro
phantomjsdriver
- 1.2.0
-
-
-
junit
junit
@@ -37,7 +32,9 @@
+ org.apache.maven.plugins
maven-deploy-plugin
+ 3.0.0-M1
true
diff --git a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java
index f45f7e2a..cce293fc 100644
--- a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java
+++ b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/SeleniumDownloader.java
@@ -1,10 +1,12 @@
package us.codecraft.webmagic.downloader.selenium;
-import org.apache.log4j.Logger;
import org.openqa.selenium.By;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
@@ -29,7 +31,7 @@ public class SeleniumDownloader implements Downloader, Closeable {
private volatile WebDriverPool webDriverPool;
- private Logger logger = Logger.getLogger(getClass());
+ private Logger logger = LoggerFactory.getLogger(getClass());
private int sleepTime = 0;
diff --git a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/WebDriverPool.java b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/WebDriverPool.java
index 1472cb32..e1d9dd03 100644
--- a/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/WebDriverPool.java
+++ b/webmagic-selenium/src/main/java/us/codecraft/webmagic/downloader/selenium/WebDriverPool.java
@@ -1,6 +1,5 @@
package us.codecraft.webmagic.downloader.selenium;
-import org.apache.log4j.Logger;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.firefox.FirefoxDriver;
@@ -8,6 +7,8 @@ import org.openqa.selenium.phantomjs.PhantomJSDriver;
import org.openqa.selenium.phantomjs.PhantomJSDriverService;
import org.openqa.selenium.remote.DesiredCapabilities;
import org.openqa.selenium.remote.RemoteWebDriver;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.FileReader;
import java.io.IOException;
@@ -27,7 +28,7 @@ import java.util.concurrent.atomic.AtomicInteger;
* Time: 下午1:41
*/
class WebDriverPool {
- private Logger logger = Logger.getLogger(getClass());
+ private Logger logger = LoggerFactory.getLogger(getClass());
private final static int DEFAULT_CAPACITY = 5;