diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
index 2a8b78fb..a25fd024 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
@@ -58,6 +58,8 @@ public class Spider implements Runnable, Task {
private ExecutorService executorService;
+ private int threadNum = 1;
+
private AtomicInteger stat = new AtomicInteger(STAT_INIT);
private final static int STAT_INIT = 0;
@@ -144,6 +146,10 @@ public class Spider implements Runnable, Task {
if (downloader == null) {
this.downloader = new HttpClientDownloader();
}
+ if (pipelines.isEmpty()) {
+ pipelines.add(new ConsolePipeline());
+ }
+ downloader.setThread(threadNum);
}
@Override
@@ -158,9 +164,6 @@ public class Spider implements Runnable, Task {
}
}
Request request = scheduler.poll(this);
- if (pipelines.isEmpty()) {
- pipelines.add(new ConsolePipeline());
- }
//singel thread
if (executorService == null) {
while (request != null) {
@@ -211,9 +214,9 @@ public class Spider implements Runnable, Task {
}
}
- private void destroyEach(Object object){
+ private void destroyEach(Object object) {
if (object instanceof Destroyable) {
- ((Destroyable)object).destroy();
+ ((Destroyable) object).destroy();
}
}
@@ -267,12 +270,10 @@ public class Spider implements Runnable, Task {
*/
public Spider thread(int threadNum) {
checkIfNotRunning();
+ this.threadNum = threadNum;
if (threadNum <= 0) {
throw new IllegalArgumentException("threadNum should be more than one!");
}
- if (downloader==null || downloader instanceof HttpClientDownloader){
- downloader = new HttpClientDownloader(threadNum);
- }
if (threadNum == 1) {
return this;
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/Downloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/Downloader.java
index c431fc3b..9a7f59a3 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/Downloader.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/Downloader.java
@@ -6,9 +6,10 @@ import us.codecraft.webmagic.Task;
/**
* Downloader是webmagic下载页面的接口。webmagic默认使用了HttpComponent作为下载器,一般情况,你无需自己实现这个接口。
+ *
* @author code4crafter@gmail.com
- * Date: 13-4-21
- * Time: 下午12:14
+ * Date: 13-4-21
+ * Time: 下午12:14
*/
public interface Downloader {
@@ -20,4 +21,12 @@ public interface Downloader {
* @return page
*/
public Page download(Request request, Task task);
+
+ /**
+ * 设置线程数,多线程程序一般需要Downloader支持
+ * 如果不考虑多线程的可以不实现这个方法
+ *
+ * @param thread 线程数量
+ */
+ public void setThread(int thread);
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/FileDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/FileDownloader.java
index d22bf081..722a2eb7 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/FileDownloader.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/FileDownloader.java
@@ -67,6 +67,11 @@ public class FileDownloader implements Downloader {
return page;
}
+ @Override
+ public void setThread(int thread) {
+
+ }
+
private String getHtml(BufferedReader bufferedReader) throws IOException {
String line;
StringBuilder htmlBuilder= new StringBuilder();
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
index d7634198..7956cd1e 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
@@ -32,14 +32,6 @@ public class HttpClientDownloader implements Downloader {
private int poolSize;
- public HttpClientDownloader(int poolSize) {
- this.poolSize = poolSize;
- }
-
- public HttpClientDownloader() {
- this(5);
- }
-
@Override
public Page download(Request request, Task task) {
Site site = task.getSite();
@@ -90,6 +82,11 @@ public class HttpClientDownloader implements Downloader {
return null;
}
+ @Override
+ public void setThread(int thread) {
+ poolSize=thread;
+ }
+
private void handleGzip(HttpResponse httpResponse) {
Header ceheader = httpResponse.getEntity().getContentEncoding();
if (ceheader != null) {
diff --git a/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java b/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java
index 54e3c9c0..76ac0508 100644
--- a/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java
+++ b/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java
@@ -27,12 +27,14 @@ import java.util.Map;
*/
public class SeleniumDownloader implements Downloader, Destroyable {
- private WebDriverPool webDriverPool;
+ private volatile WebDriverPool webDriverPool;
private Logger logger = Logger.getLogger(getClass());
private int sleepTime = 0;
+ private int poolSize = 1;
+
/**
* 新建
*
@@ -40,16 +42,11 @@ public class SeleniumDownloader implements Downloader, Destroyable {
*/
public SeleniumDownloader(String chromeDriverPath) {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
- webDriverPool = new WebDriverPool();
- }
-
- public SeleniumDownloader(String chromeDriverPath, int poolSize) {
- System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
- webDriverPool = new WebDriverPool(poolSize);
}
/**
* set sleep time to wait until load success
+ *
* @param sleepTime
* @return this
*/
@@ -60,6 +57,7 @@ public class SeleniumDownloader implements Downloader, Destroyable {
@Override
public Page download(Request request, Task task) {
+ checkInit();
WebDriver webDriver;
try {
webDriver = webDriverPool.get();
@@ -93,6 +91,19 @@ public class SeleniumDownloader implements Downloader, Destroyable {
return page;
}
+ private void checkInit() {
+ if (webDriverPool == null) {
+ synchronized (this){
+ webDriverPool = new WebDriverPool(poolSize);
+ }
+ }
+ }
+
+ @Override
+ public void setThread(int thread) {
+ this.poolSize = thread;
+ }
+
@Override
public void destroy() {
webDriverPool.closeAll();