diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
index aeca08fb..1f8a1947 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
@@ -111,7 +111,7 @@ public class Request implements Serializable {
/**
* The http method of the request. Get for default.
* @return httpMethod
- * @see us.codecraft.webmagic.constant.HttpConstant.Method
+ * @see us.codecraft.webmagic.utils.HttpConstant.Method
* @since 0.5.0
*/
public String getMethod() {
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
index 25afde97..a7c7bf85 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
@@ -50,7 +50,7 @@ public class Site {
private boolean useGzip = true;
/**
- * @see us.codecraft.webmagic.constant.HttpConstant.Header
+ * @see us.codecraft.webmagic.utils.HttpConstant.Header
* @deprecated
*/
public static interface HeaderConst {
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
index a03dee17..68b2e113 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
@@ -8,7 +8,6 @@ import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
-import us.codecraft.webmagic.monitor.SpiderListener;
import us.codecraft.webmagic.pipeline.CollectorPipeline;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.Pipeline;
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderListener.java b/webmagic-core/src/main/java/us/codecraft/webmagic/SpiderListener.java
similarity index 63%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderListener.java
rename to webmagic-core/src/main/java/us/codecraft/webmagic/SpiderListener.java
index 7a6c6876..06781803 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderListener.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/SpiderListener.java
@@ -1,8 +1,8 @@
-package us.codecraft.webmagic.monitor;
-
-import us.codecraft.webmagic.Request;
+package us.codecraft.webmagic;
/**
+ * Listener of Spider on page processing. Used for monitor and such on.
+ *
* @author code4crafer@gmail.com
* @since 0.5.0
*/
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
index 0e170f44..eeae70e9 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
@@ -18,7 +18,7 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
-import us.codecraft.webmagic.constant.HttpConstant;
+import us.codecraft.webmagic.utils.HttpConstant;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.UrlUtils;
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java
index 015aa47b..1ec128b7 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/LocalDuplicatedRemovedScheduler.java
@@ -5,7 +5,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
-import us.codecraft.webmagic.monitor.MonitorableScheduler;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/MonitorableScheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/MonitorableScheduler.java
similarity index 77%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/monitor/MonitorableScheduler.java
rename to webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/MonitorableScheduler.java
index 11889acf..ca76dfae 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/MonitorableScheduler.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/scheduler/MonitorableScheduler.java
@@ -1,7 +1,6 @@
-package us.codecraft.webmagic.monitor;
+package us.codecraft.webmagic.scheduler;
import us.codecraft.webmagic.Task;
-import us.codecraft.webmagic.scheduler.Scheduler;
/**
* The scheduler whose requests can be counted for monitor.
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/constant/HttpConstant.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/HttpConstant.java
similarity index 94%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/constant/HttpConstant.java
rename to webmagic-core/src/main/java/us/codecraft/webmagic/utils/HttpConstant.java
index 52f7ecb5..2a76ecca 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/constant/HttpConstant.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/HttpConstant.java
@@ -1,4 +1,4 @@
-package us.codecraft.webmagic.constant;
+package us.codecraft.webmagic.utils;
/**
* Some constants of Http protocal.
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/example/MonitorExample.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/MonitorExample.java
new file mode 100644
index 00000000..0ff145e6
--- /dev/null
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/example/MonitorExample.java
@@ -0,0 +1,31 @@
+package us.codecraft.webmagic.example;
+
+import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.monitor.SpiderMonitor;
+import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
+import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
+
+/**
+ * @author code4crafer@gmail.com
+ */
+public class MonitorExample {
+
+ public static void main(String[] args) throws Exception {
+
+ Spider oschinaSpider = Spider.create(new OschinaBlogPageProcessor())
+ .addUrl("http://my.oschina.net/flashsword/blog").thread(2);
+ Spider githubSpider = Spider.create(new GithubRepoPageProcessor())
+ .addUrl("https://github.com/code4craft");
+
+ SpiderMonitor spiderMonitor = new SpiderMonitor();
+ spiderMonitor.register(oschinaSpider, githubSpider);
+ //If you want to connect it from remote, use spiderMonitor.server().jmxStart();
+ //ONLY ONE server can start for a machine.
+ //Others will be registered
+ spiderMonitor.server().server();
+ spiderMonitor.jmxStart();
+ oschinaSpider.start();
+ githubSpider.start();
+
+ }
+}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
similarity index 88%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
rename to webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
index 4a02db11..ba9baea8 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderMonitor.java
@@ -1,9 +1,13 @@
package us.codecraft.webmagic.monitor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.SpiderListener;
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
+import us.codecraft.webmagic.utils.IPUtils;
import javax.management.JMException;
import javax.management.MBeanServer;
@@ -15,6 +19,7 @@ import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.rmi.registry.LocateRegistry;
import java.rmi.registry.Registry;
+import java.rmi.server.ExportException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@@ -30,6 +35,8 @@ public class SpiderMonitor {
Server, Client, Local;
}
+ private Logger logger = LoggerFactory.getLogger(getClass());
+
private static final int DEFAULT_SERVER_PORT = 14721;
private static final String DEFAULT_SERVER_HOST = "localhost";
@@ -52,6 +59,7 @@ public class SpiderMonitor {
/**
* Register spider for monitor.
+ *
* @param spiders
* @return
*/
@@ -113,13 +121,18 @@ public class SpiderMonitor {
/**
* Start monitor as server mode.
+ *
* @param port
* @return
* @throws IOException
* @throws JMException
*/
public SpiderMonitor server(int port) throws IOException, JMException {
- Registry registry = LocateRegistry.createRegistry(port);
+ try {
+ Registry registry = LocateRegistry.createRegistry(port);
+ } catch (ExportException e) {
+ logger.warn("Start server fail, maybe the address is in using.", e);
+ }
serverPort = port;
serverHost = "localhost";
type = Type.Server;
@@ -128,6 +141,7 @@ public class SpiderMonitor {
/**
* Start monitor as server mode.
+ *
* @return
* @throws IOException
* @throws JMException
@@ -139,6 +153,7 @@ public class SpiderMonitor {
/**
* Start monitor as client mode.
+ *
* @param serverHost
* @param serverPort
* @return
@@ -154,6 +169,7 @@ public class SpiderMonitor {
/**
* Start monitor as client mode.
+ *
* @return
* @throws IOException
* @throws JMException
@@ -167,7 +183,7 @@ public class SpiderMonitor {
}
public SpiderMonitor jmxStart(String jndiServer, int rmiPort) throws IOException, JMException {
- String jmxServerName = "WebMagic";
+ String jmxServerName = "WebMagic-"+ IPUtils.getFirstNoLoopbackIPAddresses();
// start JNDI
MBeanServer localServer = ManagementFactory.getPlatformMBeanServer();
@@ -199,7 +215,10 @@ public class SpiderMonitor {
SpiderMonitor spiderMonitor = new SpiderMonitor();
spiderMonitor.register(oschinaSpider, githubSpider);
- //
+ //If you want to connect it from remote, use spiderMonitor.server().jmxStart();
+ //ONLY ONE server can start for a machine.
+ //Others will be registered
+ spiderMonitor.server().server();
spiderMonitor.jmxStart();
oschinaSpider.start();
githubSpider.start();
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java
similarity index 97%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java
rename to webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java
index 889555cf..af08526c 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderStatus.java
@@ -3,6 +3,7 @@ package us.codecraft.webmagic.monitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.scheduler.MonitorableScheduler;
import java.util.List;
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderStatusMXBean.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderStatusMXBean.java
similarity index 100%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/monitor/SpiderStatusMXBean.java
rename to webmagic-extension/src/main/java/us/codecraft/webmagic/monitor/SpiderStatusMXBean.java
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
index 16f91472..cd3a0b65 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
@@ -7,7 +7,6 @@ import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
-import us.codecraft.webmagic.monitor.MonitorableScheduler;
/**
* Use Redis as url scheduler for distributed crawlers.
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/IPUtils.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/IPUtils.java
new file mode 100644
index 00000000..3d416964
--- /dev/null
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/utils/IPUtils.java
@@ -0,0 +1,36 @@
+package us.codecraft.webmagic.utils;
+
+import java.net.Inet6Address;
+import java.net.InetAddress;
+import java.net.NetworkInterface;
+import java.net.SocketException;
+import java.util.Enumeration;
+
+/**
+ * @author code4crafer@gmail.com
+ * @since 0.5.0
+ */
+public abstract class IPUtils {
+
+ public static String getFirstNoLoopbackIPAddresses() throws SocketException {
+
+ Enumeration networkInterfaces = NetworkInterface.getNetworkInterfaces();
+
+ InetAddress localAddress = null;
+ while (networkInterfaces.hasMoreElements()) {
+ NetworkInterface networkInterface = networkInterfaces.nextElement();
+ Enumeration inetAddresses = networkInterface.getInetAddresses();
+ while (inetAddresses.hasMoreElements()) {
+ InetAddress address = inetAddresses.nextElement();
+ if (!address.isLoopbackAddress() && !Inet6Address.class.isInstance(address)) {
+ return address.getHostAddress();
+ } else if (!address.isLoopbackAddress()) {
+ localAddress = address;
+ }
+ }
+ }
+
+ return localAddress.getHostAddress();
+ }
+
+}
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatus.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatus.java
similarity index 100%
rename from webmagic-core/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatus.java
rename to webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatus.java
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatusMXBean.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatusMXBean.java
similarity index 100%
rename from webmagic-core/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatusMXBean.java
rename to webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/CustomSpiderStatusMXBean.java
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/monitor/SpiderMonitorTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/SpiderMonitorTest.java
similarity index 100%
rename from webmagic-core/src/test/java/us/codecraft/webmagic/monitor/SpiderMonitorTest.java
rename to webmagic-extension/src/test/java/us/codecraft/webmagic/monitor/SpiderMonitorTest.java
diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/utils/IPUtilsTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/utils/IPUtilsTest.java
new file mode 100644
index 00000000..9d78fb9a
--- /dev/null
+++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/utils/IPUtilsTest.java
@@ -0,0 +1,14 @@
+package us.codecraft.webmagic.utils;
+
+import org.junit.Test;
+
+/**
+ * @author code4crafer@gmail.com
+ */
+public class IPUtilsTest {
+
+ @Test
+ public void testGetFirstNoLoopbackIPAddresses() throws Exception {
+ System.out.println(IPUtils.getFirstNoLoopbackIPAddresses());
+ }
+}