pull/121/head
yihua.huang 11 years ago
parent 3a666fcebf
commit 17e95f2a7f

@ -10,7 +10,7 @@ import java.util.concurrent.locks.ReentrantLock;
* Thread pool for workers.<br></br>
* Use {@link java.util.concurrent.ExecutorService} as inner implement. <br></br>
* New feature: <br></br>
* 1. Block when thread pool is full to avoid poll many urls but not process. <br></br>
* 1. Block when thread pool is full to avoid poll many urls without process. <br></br>
* 2. Count of thread alive for monitor.
*
* @author code4crafer@gmail.com

@ -3,12 +3,14 @@ package us.codecraft.webmagic.configurable;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.utils.Experimental;
import java.util.List;
/**
* @author code4crafter@gmail.com <br>
*/
@Experimental
public class ConfigurablePageProcessor implements PageProcessor {
private Site site;

@ -5,8 +5,7 @@ import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.SpiderListener;
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
import us.codecraft.webmagic.utils.Experimental;
import us.codecraft.webmagic.utils.IPUtils;
import javax.management.JMException;
@ -30,6 +29,7 @@ import java.util.concurrent.atomic.AtomicInteger;
* @author code4crafer@gmail.com
* @since 0.5.0
*/
@Experimental
public class SpiderMonitor {
private enum Type {
@ -226,22 +226,4 @@ public class SpiderMonitor {
return this;
}
public static void main(String[] args) throws Exception {
Spider oschinaSpider = Spider.create(new OschinaBlogPageProcessor())
.addUrl("http://my.oschina.net/flashsword/blog").thread(2);
Spider githubSpider = Spider.create(new GithubRepoPageProcessor())
.addUrl("https://github.com/code4craft");
SpiderMonitor spiderMonitor = new SpiderMonitor();
spiderMonitor.register(oschinaSpider, githubSpider);
//If you want to connect it from remote, use spiderMonitor.server().jmxStart();
//ONLY ONE server can start for a machine.
//Others will be registered
spiderMonitor.server().jmxStart();
oschinaSpider.start();
githubSpider.thread(10).start();
}
}

Loading…
Cancel
Save