diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java index aeca08fb..1f8a1947 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java @@ -111,7 +111,7 @@ public class Request implements Serializable { /** * The http method of the request. Get for default. * @return httpMethod - * @see us.codecraft.webmagic.constant.HttpConstant.Method + * @see us.codecraft.webmagic.utils.HttpConstant.Method * @since 0.5.0 */ public String getMethod() { diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java index 25afde97..a7c7bf85 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java @@ -50,7 +50,7 @@ public class Site { private boolean useGzip = true; /** - * @see us.codecraft.webmagic.constant.HttpConstant.Header + * @see us.codecraft.webmagic.utils.HttpConstant.Header * @deprecated */ public static interface HeaderConst { diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index a03dee17..68b2e113 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -8,7 +8,6 @@ import org.slf4j.LoggerFactory; import us.codecraft.webmagic.downloader.Downloader; import us.codecraft.webmagic.downloader.HttpClientDownloader; -import us.codecraft.webmagic.monitor.SpiderListener; import us.codecraft.webmagic.pipeline.CollectorPipeline; import us.codecraft.webmagic.pipeline.ConsolePipeline; import us.codecraft.webmagic.pipeline.Pipeline; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderListener.java b/webmagic-core/src/main/java/us/codecraft/webmagic/SpiderListener.java similarity index 63% rename from webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderListener.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/SpiderListener.java index 7a6c6876..06781803 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderListener.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/SpiderListener.java @@ -1,8 +1,8 @@ -package us.codecraft.webmagic.monitor; - -import us.codecraft.webmagic.Request; +package us.codecraft.webmagic; /** + * Listener of Spider on page processing. Used for monitor and such on. + * * @author code4crafer@gmail.com * @since 0.5.0 */ diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java index 0e170f44..eeae70e9 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java @@ -18,7 +18,7 @@ import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task; -import us.codecraft.webmagic.constant.HttpConstant; +import us.codecraft.webmagic.utils.HttpConstant; import us.codecraft.webmagic.selector.PlainText; import us.codecraft.webmagic.utils.UrlUtils; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java index 015aa47b..1ec128b7 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java @@ -5,7 +5,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Task; -import us.codecraft.webmagic.monitor.MonitorableScheduler; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/MonitorableScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/MonitorableScheduler.java similarity index 77% rename from webmagic-core/src/main/java/us/codecraft/webmagic/monitor/MonitorableScheduler.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/MonitorableScheduler.java index 11889acf..ca76dfae 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/MonitorableScheduler.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/MonitorableScheduler.java @@ -1,7 +1,6 @@ -package us.codecraft.webmagic.monitor; +package us.codecraft.webmagic.scheduler; import us.codecraft.webmagic.Task; -import us.codecraft.webmagic.scheduler.Scheduler; /** * The scheduler whose requests can be counted for monitor. diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/constant/HttpConstant.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/HttpConstant.java similarity index 94% rename from webmagic-core/src/main/java/us/codecraft/webmagic/constant/HttpConstant.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/utils/HttpConstant.java index 52f7ecb5..2a76ecca 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/constant/HttpConstant.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/HttpConstant.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.constant; +package us.codecraft.webmagic.utils; /** * Some constants of Http protocal. diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/MonitorExample.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/MonitorExample.java new file mode 100644 index 00000000..0ff145e6 --- /dev/null +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/MonitorExample.java @@ -0,0 +1,31 @@ +package us.codecraft.webmagic.example; + +import us.codecraft.webmagic.Spider; +import us.codecraft.webmagic.monitor.SpiderMonitor; +import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor; +import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor; + +/** + * @author code4crafer@gmail.com + */ +public class MonitorExample { + + public static void main(String[] args) throws Exception { + + Spider oschinaSpider = Spider.create(new OschinaBlogPageProcessor()) + .addUrl("http://my.oschina.net/flashsword/blog").thread(2); + Spider githubSpider = Spider.create(new GithubRepoPageProcessor()) + .addUrl("https://github.com/code4craft"); + + SpiderMonitor spiderMonitor = new SpiderMonitor(); + spiderMonitor.register(oschinaSpider, githubSpider); + //If you want to connect it from remote, use spiderMonitor.server().jmxStart(); + //ONLY ONE server can start for a machine. + //Others will be registered + spiderMonitor.server().server(); + spiderMonitor.jmxStart(); + oschinaSpider.start(); + githubSpider.start(); + + } +} diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java similarity index 88% rename from webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java rename to webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java index 4a02db11..ba9baea8 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java @@ -1,9 +1,13 @@ package us.codecraft.webmagic.monitor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Spider; +import us.codecraft.webmagic.SpiderListener; import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor; import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor; +import us.codecraft.webmagic.utils.IPUtils; import javax.management.JMException; import javax.management.MBeanServer; @@ -15,6 +19,7 @@ import java.io.IOException; import java.lang.management.ManagementFactory; import java.rmi.registry.LocateRegistry; import java.rmi.registry.Registry; +import java.rmi.server.ExportException; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -30,6 +35,8 @@ public class SpiderMonitor { Server, Client, Local; } + private Logger logger = LoggerFactory.getLogger(getClass()); + private static final int DEFAULT_SERVER_PORT = 14721; private static final String DEFAULT_SERVER_HOST = "localhost"; @@ -52,6 +59,7 @@ public class SpiderMonitor { /** * Register spider for monitor. + * * @param spiders * @return */ @@ -113,13 +121,18 @@ public class SpiderMonitor { /** * Start monitor as server mode. + * * @param port * @return * @throws IOException * @throws JMException */ public SpiderMonitor server(int port) throws IOException, JMException { - Registry registry = LocateRegistry.createRegistry(port); + try { + Registry registry = LocateRegistry.createRegistry(port); + } catch (ExportException e) { + logger.warn("Start server fail, maybe the address is in using.", e); + } serverPort = port; serverHost = "localhost"; type = Type.Server; @@ -128,6 +141,7 @@ public class SpiderMonitor { /** * Start monitor as server mode. + * * @return * @throws IOException * @throws JMException @@ -139,6 +153,7 @@ public class SpiderMonitor { /** * Start monitor as client mode. + * * @param serverHost * @param serverPort * @return @@ -154,6 +169,7 @@ public class SpiderMonitor { /** * Start monitor as client mode. + * * @return * @throws IOException * @throws JMException @@ -167,7 +183,7 @@ public class SpiderMonitor { } public SpiderMonitor jmxStart(String jndiServer, int rmiPort) throws IOException, JMException { - String jmxServerName = "WebMagic"; + String jmxServerName = "WebMagic-"+ IPUtils.getFirstNoLoopbackIPAddresses(); // start JNDI MBeanServer localServer = ManagementFactory.getPlatformMBeanServer(); @@ -199,7 +215,10 @@ public class SpiderMonitor { SpiderMonitor spiderMonitor = new SpiderMonitor(); spiderMonitor.register(oschinaSpider, githubSpider); - // + //If you want to connect it from remote, use spiderMonitor.server().jmxStart(); + //ONLY ONE server can start for a machine. + //Others will be registered + spiderMonitor.server().server(); spiderMonitor.jmxStart(); oschinaSpider.start(); githubSpider.start(); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java similarity index 97% rename from webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java rename to webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java index 889555cf..af08526c 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java @@ -3,6 +3,7 @@ package us.codecraft.webmagic.monitor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import us.codecraft.webmagic.Spider; +import us.codecraft.webmagic.scheduler.MonitorableScheduler; import java.util.List; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderStatusMXBean.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderStatusMXBean.java similarity index 100% rename from webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderStatusMXBean.java rename to webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderStatusMXBean.java diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java index 16f91472..cd3a0b65 100644 --- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java @@ -7,7 +7,6 @@ import redis.clients.jedis.JedisPool; import redis.clients.jedis.JedisPoolConfig; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Task; -import us.codecraft.webmagic.monitor.MonitorableScheduler; /** * Use Redis as url scheduler for distributed crawlers.
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/IPUtils.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/IPUtils.java new file mode 100644 index 00000000..3d416964 --- /dev/null +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/IPUtils.java @@ -0,0 +1,36 @@ +package us.codecraft.webmagic.utils; + +import java.net.Inet6Address; +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.util.Enumeration; + +/** + * @author code4crafer@gmail.com + * @since 0.5.0 + */ +public abstract class IPUtils { + + public static String getFirstNoLoopbackIPAddresses() throws SocketException { + + Enumeration networkInterfaces = NetworkInterface.getNetworkInterfaces(); + + InetAddress localAddress = null; + while (networkInterfaces.hasMoreElements()) { + NetworkInterface networkInterface = networkInterfaces.nextElement(); + Enumeration inetAddresses = networkInterface.getInetAddresses(); + while (inetAddresses.hasMoreElements()) { + InetAddress address = inetAddresses.nextElement(); + if (!address.isLoopbackAddress() && !Inet6Address.class.isInstance(address)) { + return address.getHostAddress(); + } else if (!address.isLoopbackAddress()) { + localAddress = address; + } + } + } + + return localAddress.getHostAddress(); + } + +} diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatus.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatus.java similarity index 100% rename from webmagic-core/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatus.java rename to webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatus.java diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatusMXBean.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatusMXBean.java similarity index 100% rename from webmagic-core/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatusMXBean.java rename to webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatusMXBean.java diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/monitor/SpiderMonitorTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/SpiderMonitorTest.java similarity index 100% rename from webmagic-core/src/test/java/us/codecraft/webmagic/monitor/SpiderMonitorTest.java rename to webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/SpiderMonitorTest.java diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/utils/IPUtilsTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/utils/IPUtilsTest.java new file mode 100644 index 00000000..9d78fb9a --- /dev/null +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/utils/IPUtilsTest.java @@ -0,0 +1,14 @@ +package us.codecraft.webmagic.utils; + +import org.junit.Test; + +/** + * @author code4crafer@gmail.com + */ +public class IPUtilsTest { + + @Test + public void testGetFirstNoLoopbackIPAddresses() throws Exception { + System.out.println(IPUtils.getFirstNoLoopbackIPAddresses()); + } +}