From 17e95f2a7feb713fcacd61f9ade7b140d9587f87 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Fri, 25 Apr 2014 18:39:01 +0800 Subject: [PATCH] comments --- .../selector/thread/CountableThreadPool.java | 2 +- .../ConfigurablePageProcessor.java | 2 ++ .../webmagic/monitor/SpiderMonitor.java | 22 ++----------------- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/thread/CountableThreadPool.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/thread/CountableThreadPool.java index 0121cf25..b20ff155 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/thread/CountableThreadPool.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/thread/CountableThreadPool.java @@ -10,7 +10,7 @@ import java.util.concurrent.locks.ReentrantLock; * Thread pool for workers.

* Use {@link java.util.concurrent.ExecutorService} as inner implement.

* New feature:

- * 1. Block when thread pool is full to avoid poll many urls but not process.

+ * 1. Block when thread pool is full to avoid poll many urls without process.

* 2. Count of thread alive for monitor. * * @author code4crafer@gmail.com diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ConfigurablePageProcessor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ConfigurablePageProcessor.java index 36615d8d..902dfddb 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ConfigurablePageProcessor.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/configurable/ConfigurablePageProcessor.java @@ -3,12 +3,14 @@ package us.codecraft.webmagic.configurable; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.processor.PageProcessor; +import us.codecraft.webmagic.utils.Experimental; import java.util.List; /** * @author code4crafter@gmail.com
*/ +@Experimental public class ConfigurablePageProcessor implements PageProcessor { private Site site; diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java index 0783b7e9..ea9b374e 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java @@ -5,8 +5,7 @@ import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.SpiderListener; -import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor; -import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor; +import us.codecraft.webmagic.utils.Experimental; import us.codecraft.webmagic.utils.IPUtils; import javax.management.JMException; @@ -30,6 +29,7 @@ import java.util.concurrent.atomic.AtomicInteger; * @author code4crafer@gmail.com * @since 0.5.0 */ +@Experimental public class SpiderMonitor { private enum Type { @@ -226,22 +226,4 @@ public class SpiderMonitor { return this; } - public static void main(String[] args) throws Exception { - - Spider oschinaSpider = Spider.create(new OschinaBlogPageProcessor()) - .addUrl("http://my.oschina.net/flashsword/blog").thread(2); - Spider githubSpider = Spider.create(new GithubRepoPageProcessor()) - .addUrl("https://github.com/code4craft"); - - SpiderMonitor spiderMonitor = new SpiderMonitor(); - spiderMonitor.register(oschinaSpider, githubSpider); - //If you want to connect it from remote, use spiderMonitor.server().jmxStart(); - //ONLY ONE server can start for a machine. - //Others will be registered - spiderMonitor.server().jmxStart(); - oschinaSpider.start(); - githubSpider.thread(10).start(); - - } - }