为downloader增加了一个新方法,可设置线程数

pull/17/head
yihua.huang
parent 6a87a778fd
commit e87aabf8fd

@ -58,6 +58,8 @@ public class Spider implements Runnable, Task {
private ExecutorService executorService;
private int threadNum = 1;
private AtomicInteger stat = new AtomicInteger(STAT_INIT);
private final static int STAT_INIT = 0;
@ -144,6 +146,10 @@ public class Spider implements Runnable, Task {
if (downloader == null) {
this.downloader = new HttpClientDownloader();
}
if (pipelines.isEmpty()) {
pipelines.add(new ConsolePipeline());
}
downloader.setThread(threadNum);
}
@Override
@ -158,9 +164,6 @@ public class Spider implements Runnable, Task {
}
}
Request request = scheduler.poll(this);
if (pipelines.isEmpty()) {
pipelines.add(new ConsolePipeline());
}
//singel thread
if (executorService == null) {
while (request != null) {
@ -211,9 +214,9 @@ public class Spider implements Runnable, Task {
}
}
private void destroyEach(Object object){
private void destroyEach(Object object) {
if (object instanceof Destroyable) {
((Destroyable)object).destroy();
((Destroyable) object).destroy();
}
}
@ -267,12 +270,10 @@ public class Spider implements Runnable, Task {
*/
public Spider thread(int threadNum) {
checkIfNotRunning();
this.threadNum = threadNum;
if (threadNum <= 0) {
throw new IllegalArgumentException("threadNum should be more than one!");
}
if (downloader==null || downloader instanceof HttpClientDownloader){
downloader = new HttpClientDownloader(threadNum);
}
if (threadNum == 1) {
return this;
}

@ -6,9 +6,10 @@ import us.codecraft.webmagic.Task;
/**
* Downloaderwebmagicwebmagic使HttpComponent<br>
*
* @author code4crafter@gmail.com <br>
* Date: 13-4-21
* Time: 12:14
* Date: 13-4-21
* Time: 12:14
*/
public interface Downloader {
@ -20,4 +21,12 @@ public interface Downloader {
* @return page
*/
public Page download(Request request, Task task);
/**
* 线线Downloader<br>
* 线<br>
*
* @param thread 线
*/
public void setThread(int thread);
}

@ -67,6 +67,11 @@ public class FileDownloader implements Downloader {
return page;
}
@Override
public void setThread(int thread) {
}
private String getHtml(BufferedReader bufferedReader) throws IOException {
String line;
StringBuilder htmlBuilder= new StringBuilder();

@ -32,14 +32,6 @@ public class HttpClientDownloader implements Downloader {
private int poolSize;
public HttpClientDownloader(int poolSize) {
this.poolSize = poolSize;
}
public HttpClientDownloader() {
this(5);
}
@Override
public Page download(Request request, Task task) {
Site site = task.getSite();
@ -90,6 +82,11 @@ public class HttpClientDownloader implements Downloader {
return null;
}
@Override
public void setThread(int thread) {
poolSize=thread;
}
private void handleGzip(HttpResponse httpResponse) {
Header ceheader = httpResponse.getEntity().getContentEncoding();
if (ceheader != null) {

@ -27,12 +27,14 @@ import java.util.Map;
*/
public class SeleniumDownloader implements Downloader, Destroyable {
private WebDriverPool webDriverPool;
private volatile WebDriverPool webDriverPool;
private Logger logger = Logger.getLogger(getClass());
private int sleepTime = 0;
private int poolSize = 1;
/**
*
*
@ -40,16 +42,11 @@ public class SeleniumDownloader implements Downloader, Destroyable {
*/
public SeleniumDownloader(String chromeDriverPath) {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
webDriverPool = new WebDriverPool();
}
public SeleniumDownloader(String chromeDriverPath, int poolSize) {
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
webDriverPool = new WebDriverPool(poolSize);
}
/**
* set sleep time to wait until load success
*
* @param sleepTime
* @return this
*/
@ -60,6 +57,7 @@ public class SeleniumDownloader implements Downloader, Destroyable {
@Override
public Page download(Request request, Task task) {
checkInit();
WebDriver webDriver;
try {
webDriver = webDriverPool.get();
@ -93,6 +91,19 @@ public class SeleniumDownloader implements Downloader, Destroyable {
return page;
}
private void checkInit() {
if (webDriverPool == null) {
synchronized (this){
webDriverPool = new WebDriverPool(poolSize);
}
}
}
@Override
public void setThread(int thread) {
this.poolSize = thread;
}
@Override
public void destroy() {
webDriverPool.closeAll();

Loading…
Cancel
Save