[refactor]move monitor to webmagic-extension #98

pull/121/head
yihua.huang 11 years ago
parent d61f65cef8
commit 11ba5beb42

@ -111,7 +111,7 @@ public class Request implements Serializable {
/**
* The http method of the request. Get for default.
* @return httpMethod
* @see us.codecraft.webmagic.constant.HttpConstant.Method
* @see us.codecraft.webmagic.utils.HttpConstant.Method
* @since 0.5.0
*/
public String getMethod() {

@ -50,7 +50,7 @@ public class Site {
private boolean useGzip = true;
/**
* @see us.codecraft.webmagic.constant.HttpConstant.Header
* @see us.codecraft.webmagic.utils.HttpConstant.Header
* @deprecated
*/
public static interface HeaderConst {

@ -8,7 +8,6 @@ import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.monitor.SpiderListener;
import us.codecraft.webmagic.pipeline.CollectorPipeline;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.Pipeline;

@ -1,8 +1,8 @@
package us.codecraft.webmagic.monitor;
import us.codecraft.webmagic.Request;
package us.codecraft.webmagic;
/**
* Listener of Spider on page processing. Used for monitor and such on.
*
* @author code4crafer@gmail.com
* @since 0.5.0
*/

@ -18,7 +18,7 @@ import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.constant.HttpConstant;
import us.codecraft.webmagic.utils.HttpConstant;
import us.codecraft.webmagic.selector.PlainText;
import us.codecraft.webmagic.utils.UrlUtils;

@ -5,7 +5,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.monitor.MonitorableScheduler;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

@ -1,7 +1,6 @@
package us.codecraft.webmagic.monitor;
package us.codecraft.webmagic.scheduler;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.scheduler.Scheduler;
/**
* The scheduler whose requests can be counted for monitor.

@ -1,4 +1,4 @@
package us.codecraft.webmagic.constant;
package us.codecraft.webmagic.utils;
/**
* Some constants of Http protocal.

@ -0,0 +1,31 @@
package us.codecraft.webmagic.example;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.monitor.SpiderMonitor;
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
/**
* @author code4crafer@gmail.com
*/
public class MonitorExample {
public static void main(String[] args) throws Exception {
Spider oschinaSpider = Spider.create(new OschinaBlogPageProcessor())
.addUrl("http://my.oschina.net/flashsword/blog").thread(2);
Spider githubSpider = Spider.create(new GithubRepoPageProcessor())
.addUrl("https://github.com/code4craft");
SpiderMonitor spiderMonitor = new SpiderMonitor();
spiderMonitor.register(oschinaSpider, githubSpider);
//If you want to connect it from remote, use spiderMonitor.server().jmxStart();
//ONLY ONE server can start for a machine.
//Others will be registered
spiderMonitor.server().server();
spiderMonitor.jmxStart();
oschinaSpider.start();
githubSpider.start();
}
}

@ -1,9 +1,13 @@
package us.codecraft.webmagic.monitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.SpiderListener;
import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
import us.codecraft.webmagic.processor.example.OschinaBlogPageProcessor;
import us.codecraft.webmagic.utils.IPUtils;
import javax.management.JMException;
import javax.management.MBeanServer;
@ -15,6 +19,7 @@ import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.rmi.registry.LocateRegistry;
import java.rmi.registry.Registry;
import java.rmi.server.ExportException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -30,6 +35,8 @@ public class SpiderMonitor {
Server, Client, Local;
}
private Logger logger = LoggerFactory.getLogger(getClass());
private static final int DEFAULT_SERVER_PORT = 14721;
private static final String DEFAULT_SERVER_HOST = "localhost";
@ -52,6 +59,7 @@ public class SpiderMonitor {
/**
* Register spider for monitor.
*
* @param spiders
* @return
*/
@ -113,13 +121,18 @@ public class SpiderMonitor {
/**
* Start monitor as server mode.
*
* @param port
* @return
* @throws IOException
* @throws JMException
*/
public SpiderMonitor server(int port) throws IOException, JMException {
Registry registry = LocateRegistry.createRegistry(port);
try {
Registry registry = LocateRegistry.createRegistry(port);
} catch (ExportException e) {
logger.warn("Start server fail, maybe the address is in using.", e);
}
serverPort = port;
serverHost = "localhost";
type = Type.Server;
@ -128,6 +141,7 @@ public class SpiderMonitor {
/**
* Start monitor as server mode.
*
* @return
* @throws IOException
* @throws JMException
@ -139,6 +153,7 @@ public class SpiderMonitor {
/**
* Start monitor as client mode.
*
* @param serverHost
* @param serverPort
* @return
@ -154,6 +169,7 @@ public class SpiderMonitor {
/**
* Start monitor as client mode.
*
* @return
* @throws IOException
* @throws JMException
@ -167,7 +183,7 @@ public class SpiderMonitor {
}
public SpiderMonitor jmxStart(String jndiServer, int rmiPort) throws IOException, JMException {
String jmxServerName = "WebMagic";
String jmxServerName = "WebMagic-"+ IPUtils.getFirstNoLoopbackIPAddresses();
// start JNDI
MBeanServer localServer = ManagementFactory.getPlatformMBeanServer();
@ -199,7 +215,10 @@ public class SpiderMonitor {
SpiderMonitor spiderMonitor = new SpiderMonitor();
spiderMonitor.register(oschinaSpider, githubSpider);
//
//If you want to connect it from remote, use spiderMonitor.server().jmxStart();
//ONLY ONE server can start for a machine.
//Others will be registered
spiderMonitor.server().server();
spiderMonitor.jmxStart();
oschinaSpider.start();
githubSpider.start();

@ -3,6 +3,7 @@ package us.codecraft.webmagic.monitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.scheduler.MonitorableScheduler;
import java.util.List;

@ -7,7 +7,6 @@ import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.monitor.MonitorableScheduler;
/**
* Use Redis as url scheduler for distributed crawlers.<br>

@ -0,0 +1,36 @@
package us.codecraft.webmagic.utils;
import java.net.Inet6Address;
import java.net.InetAddress;
import java.net.NetworkInterface;
import java.net.SocketException;
import java.util.Enumeration;
/**
* @author code4crafer@gmail.com
* @since 0.5.0
*/
public abstract class IPUtils {
public static String getFirstNoLoopbackIPAddresses() throws SocketException {
Enumeration<NetworkInterface> networkInterfaces = NetworkInterface.getNetworkInterfaces();
InetAddress localAddress = null;
while (networkInterfaces.hasMoreElements()) {
NetworkInterface networkInterface = networkInterfaces.nextElement();
Enumeration<InetAddress> inetAddresses = networkInterface.getInetAddresses();
while (inetAddresses.hasMoreElements()) {
InetAddress address = inetAddresses.nextElement();
if (!address.isLoopbackAddress() && !Inet6Address.class.isInstance(address)) {
return address.getHostAddress();
} else if (!address.isLoopbackAddress()) {
localAddress = address;
}
}
}
return localAddress.getHostAddress();
}
}

@ -0,0 +1,14 @@
package us.codecraft.webmagic.utils;
import org.junit.Test;
/**
* @author code4crafer@gmail.com
*/
public class IPUtilsTest {
@Test
public void testGetFirstNoLoopbackIPAddresses() throws Exception {
System.out.println(IPUtils.getFirstNoLoopbackIPAddresses());
}
}
Loading…
Cancel
Save