为downloader增加了一个新方法,可设置线程数

pull/17/head
yihua.huang 12 years ago
parent 6a87a778fd
commit e87aabf8fd

@ -58,6 +58,8 @@ public class Spider implements Runnable, Task {
private ExecutorService executorService; private ExecutorService executorService;
private int threadNum = 1;
private AtomicInteger stat = new AtomicInteger(STAT_INIT); private AtomicInteger stat = new AtomicInteger(STAT_INIT);
private final static int STAT_INIT = 0; private final static int STAT_INIT = 0;
@ -144,6 +146,10 @@ public class Spider implements Runnable, Task {
if (downloader == null) { if (downloader == null) {
this.downloader = new HttpClientDownloader(); this.downloader = new HttpClientDownloader();
} }
if (pipelines.isEmpty()) {
pipelines.add(new ConsolePipeline());
}
downloader.setThread(threadNum);
} }
@Override @Override
@ -158,9 +164,6 @@ public class Spider implements Runnable, Task {
} }
} }
Request request = scheduler.poll(this); Request request = scheduler.poll(this);
if (pipelines.isEmpty()) {
pipelines.add(new ConsolePipeline());
}
//singel thread //singel thread
if (executorService == null) { if (executorService == null) {
while (request != null) { while (request != null) {
@ -211,9 +214,9 @@ public class Spider implements Runnable, Task {
} }
} }
private void destroyEach(Object object){ private void destroyEach(Object object) {
if (object instanceof Destroyable) { if (object instanceof Destroyable) {
((Destroyable)object).destroy(); ((Destroyable) object).destroy();
} }
} }
@ -267,12 +270,10 @@ public class Spider implements Runnable, Task {
*/ */
public Spider thread(int threadNum) { public Spider thread(int threadNum) {
checkIfNotRunning(); checkIfNotRunning();
this.threadNum = threadNum;
if (threadNum <= 0) { if (threadNum <= 0) {
throw new IllegalArgumentException("threadNum should be more than one!"); throw new IllegalArgumentException("threadNum should be more than one!");
} }
if (downloader==null || downloader instanceof HttpClientDownloader){
downloader = new HttpClientDownloader(threadNum);
}
if (threadNum == 1) { if (threadNum == 1) {
return this; return this;
} }

@ -6,6 +6,7 @@ import us.codecraft.webmagic.Task;
/** /**
* Downloaderwebmagicwebmagic使HttpComponent<br> * Downloaderwebmagicwebmagic使HttpComponent<br>
*
* @author code4crafter@gmail.com <br> * @author code4crafter@gmail.com <br>
* Date: 13-4-21 * Date: 13-4-21
* Time: 12:14 * Time: 12:14
@ -20,4 +21,12 @@ public interface Downloader {
* @return page * @return page
*/ */
public Page download(Request request, Task task); public Page download(Request request, Task task);
/**
* 线线Downloader<br>
* 线<br>
*
* @param thread 线
*/
public void setThread(int thread);
} }

@ -67,6 +67,11 @@ public class FileDownloader implements Downloader {
return page; return page;
} }
@Override
public void setThread(int thread) {
}
private String getHtml(BufferedReader bufferedReader) throws IOException { private String getHtml(BufferedReader bufferedReader) throws IOException {
String line; String line;
StringBuilder htmlBuilder= new StringBuilder(); StringBuilder htmlBuilder= new StringBuilder();

@ -32,14 +32,6 @@ public class HttpClientDownloader implements Downloader {
private int poolSize; private int poolSize;
public HttpClientDownloader(int poolSize) {
this.poolSize = poolSize;
}
public HttpClientDownloader() {
this(5);
}
@Override @Override
public Page download(Request request, Task task) { public Page download(Request request, Task task) {
Site site = task.getSite(); Site site = task.getSite();
@ -90,6 +82,11 @@ public class HttpClientDownloader implements Downloader {
return null; return null;
} }
@Override
public void setThread(int thread) {
poolSize=thread;
}
private void handleGzip(HttpResponse httpResponse) { private void handleGzip(HttpResponse httpResponse) {
Header ceheader = httpResponse.getEntity().getContentEncoding(); Header ceheader = httpResponse.getEntity().getContentEncoding();
if (ceheader != null) { if (ceheader != null) {

@ -27,12 +27,14 @@ import java.util.Map;
*/ */
public class SeleniumDownloader implements Downloader, Destroyable { public class SeleniumDownloader implements Downloader, Destroyable {
private WebDriverPool webDriverPool; private volatile WebDriverPool webDriverPool;
private Logger logger = Logger.getLogger(getClass()); private Logger logger = Logger.getLogger(getClass());
private int sleepTime = 0; private int sleepTime = 0;
private int poolSize = 1;
/** /**
* *
* *
@ -40,16 +42,11 @@ public class SeleniumDownloader implements Downloader, Destroyable {
*/ */
public SeleniumDownloader(String chromeDriverPath) { public SeleniumDownloader(String chromeDriverPath) {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath); System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
webDriverPool = new WebDriverPool();
}
public SeleniumDownloader(String chromeDriverPath, int poolSize) {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
webDriverPool = new WebDriverPool(poolSize);
} }
/** /**
* set sleep time to wait until load success * set sleep time to wait until load success
*
* @param sleepTime * @param sleepTime
* @return this * @return this
*/ */
@ -60,6 +57,7 @@ public class SeleniumDownloader implements Downloader, Destroyable {
@Override @Override
public Page download(Request request, Task task) { public Page download(Request request, Task task) {
checkInit();
WebDriver webDriver; WebDriver webDriver;
try { try {
webDriver = webDriverPool.get(); webDriver = webDriverPool.get();
@ -93,6 +91,19 @@ public class SeleniumDownloader implements Downloader, Destroyable {
return page; return page;
} }
private void checkInit() {
if (webDriverPool == null) {
synchronized (this){
webDriverPool = new WebDriverPool(poolSize);
}
}
}
@Override
public void setThread(int thread) {
this.poolSize = thread;
}
@Override @Override
public void destroy() { public void destroy() {
webDriverPool.closeAll(); webDriverPool.closeAll();

Loading…
Cancel
Save