Merge branch 'develop' of github.com:code4craft/webmagic into develop

pull/1006/merge
Sutra Zhou 3 years ago
commit 692605bd75

@ -106,7 +106,7 @@ public class Spider implements Runnable, Task {
private Date startTime; private Date startTime;
private int emptySleepTime = 30000; private long emptySleepTime = 30000;
/** /**
* create a spider with pageProcessor. * create a spider with pageProcessor.
@ -306,15 +306,36 @@ public class Spider implements Runnable, Task {
checkRunningStat(); checkRunningStat();
initComponent(); initComponent();
logger.info("Spider {} started!", getUUID()); logger.info("Spider {} started!", getUUID());
// interrupt won't be necessarily detected
while (!Thread.currentThread().isInterrupted() && stat.get() == STAT_RUNNING) { while (!Thread.currentThread().isInterrupted() && stat.get() == STAT_RUNNING) {
final Request request = scheduler.poll(this); Request poll = scheduler.poll(this);
if (request == null) { if (poll == null) {
if (threadPool.getThreadAlive() == 0 && exitWhenComplete) { if (threadPool.getThreadAlive() == 0) {
//no alive thread anymore , try again
poll = scheduler.poll(this);
if (poll == null) {
if (exitWhenComplete) {
break;
} else {
// wait
try {
Thread.sleep(emptySleepTime);
continue;
} catch (InterruptedException e) {
break; break;
} }
// wait until new url added }
waitNewUrl(); }
} else { } else {
// wait until new url added
if (waitNewUrl())
//if interrupted
break;
continue;
}
}
final Request request = poll;
//this may swallow the interruption
threadPool.execute(new Runnable() { threadPool.execute(new Runnable() {
@Override @Override
public void run() { public void run() {
@ -331,7 +352,6 @@ public class Spider implements Runnable, Task {
} }
}); });
} }
}
stat.set(STAT_STOPPED); stat.set(STAT_STOPPED);
// release some resources // release some resources
if (destroyWhenExit) { if (destroyWhenExit) {
@ -565,16 +585,24 @@ public class Spider implements Runnable, Task {
return this; return this;
} }
private void waitNewUrl() { /**
*
* @return isInterrupted
*/
private boolean waitNewUrl() {
// now there may not be any thread live
newUrlLock.lock(); newUrlLock.lock();
try { try {
//double check //double checkunnecessary, unless very fast concurrent
if (threadPool.getThreadAlive() == 0 && exitWhenComplete) { if (threadPool.getThreadAlive() == 0) {
return; return false;
} }
//wait for amount of time
newUrlCondition.await(emptySleepTime, TimeUnit.MILLISECONDS); newUrlCondition.await(emptySleepTime, TimeUnit.MILLISECONDS);
return false;
} catch (InterruptedException e) { } catch (InterruptedException e) {
logger.warn("waitNewUrl - interrupted, error {}", e); // logger.warn("waitNewUrl - interrupted, error {}", e);
return true;
} finally { } finally {
newUrlLock.unlock(); newUrlLock.unlock();
} }
@ -772,7 +800,10 @@ public class Spider implements Runnable, Task {
* *
* @param emptySleepTime In MILLISECONDS. * @param emptySleepTime In MILLISECONDS.
*/ */
public void setEmptySleepTime(int emptySleepTime) { public void setEmptySleepTime(long emptySleepTime) {
if(emptySleepTime<=0){
throw new IllegalArgumentException("emptySleepTime should be more than zero!");
}
this.emptySleepTime = emptySleepTime; this.emptySleepTime = emptySleepTime;
} }
} }

Loading…
Cancel
Save