diff --git a/webmagic-plugin/pom.xml b/webmagic-plugin/pom.xml
index 6eb7d615..634f09d3 100644
--- a/webmagic-plugin/pom.xml
+++ b/webmagic-plugin/pom.xml
@@ -25,6 +25,11 @@
freemarker
2.3.15
+
+ redis.clients
+ jedis
+ 2.0.0
+
diff --git a/webmagic-plugin/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java b/webmagic-plugin/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
new file mode 100644
index 00000000..e87ee335
--- /dev/null
+++ b/webmagic-plugin/src/main/java/us/codecraft/webmagic/scheduler/RedisScheduler.java
@@ -0,0 +1,45 @@
+package us.codecraft.webmagic.scheduler;
+
+import redis.clients.jedis.Jedis;
+import redis.clients.jedis.JedisPool;
+import redis.clients.jedis.JedisPoolConfig;
+import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Task;
+import us.codecraft.webmagic.schedular.Scheduler;
+
+/**
+ * 使用redis管理url,构建一个分布式的爬虫。
+ * @author yihua.huang@dianping.com
+ * @date: 13-7-25
+ * Time: 上午7:07
+ */
+public class RedisScheduler implements Scheduler{
+
+ private JedisPool pool;
+
+ private static final String QUEUE_PREFIX = "queue_";
+
+ private static final String SET_PREFIX = "set_";
+
+ public RedisScheduler(String host){
+ pool = new JedisPool(new JedisPoolConfig(), host);
+ }
+
+ @Override
+ public synchronized void push(Request request, Task task) {
+ Jedis jedis = pool.getResource();
+ if (jedis.zrank(SET_PREFIX+task.getUUID(),request.getUrl())==null){
+ jedis.rpush(QUEUE_PREFIX+task.getUUID(),request.getUrl());
+ jedis.zadd(SET_PREFIX+task.getUUID(),System.currentTimeMillis(),request.getUrl());
+ }
+ pool.returnResource(jedis);
+ }
+
+ @Override
+ public synchronized Request poll(Task task) {
+ Jedis jedis = pool.getResource();
+ String url = jedis.lpop(QUEUE_PREFIX+task.getUUID());
+ pool.returnResource(jedis);
+ return new Request(url);
+ }
+}
diff --git a/webmagic-plugin/src/test/java/us/codecraft/webmagic/scheduler/RedisSchedulerTest.java b/webmagic-plugin/src/test/java/us/codecraft/webmagic/scheduler/RedisSchedulerTest.java
new file mode 100644
index 00000000..0f556d28
--- /dev/null
+++ b/webmagic-plugin/src/test/java/us/codecraft/webmagic/scheduler/RedisSchedulerTest.java
@@ -0,0 +1,41 @@
+package us.codecraft.webmagic.scheduler;
+
+import org.junit.Before;
+import org.junit.Test;
+import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.Task;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-7-25
+ * Time: 上午7:51
+ */
+public class RedisSchedulerTest {
+
+ private RedisScheduler redisScheduler;
+
+ @Before
+ public void setUp() {
+ redisScheduler = new RedisScheduler("localhost");
+ }
+
+ @Test
+ public void test() {
+ Task task = new Task() {
+ @Override
+ public String getUUID() {
+ return "1";
+ }
+
+ @Override
+ public Site getSite() {
+ return null;
+ }
+ };
+ redisScheduler.push(new Request("http://www.ibm.com/developerworks/cn/java/j-javadev2-22/"), task);
+ Request poll = redisScheduler.poll(task);
+ System.out.println(poll.getUrl());
+
+ }
+}
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GlobalProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GlobalProcessor.java
index 383422f6..f7c5f7fa 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GlobalProcessor.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GlobalProcessor.java
@@ -3,11 +3,9 @@ package us.codecraft.webmagic.samples;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
-import us.codecraft.webmagic.downloader.FileDownloader;
-import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.processor.PageProcessor;
-import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
+import us.codecraft.webmagic.scheduler.RedisScheduler;
import java.util.List;
@@ -40,9 +38,12 @@ public class GlobalProcessor implements PageProcessor {
public static void main(String[] args) {
Spider.create(new GlobalProcessor()).thread(10)
- .scheduler(new FileCacheQueueScheduler("/data/webmagic/test"))
- .downloader(new FileDownloader("/data/webmagic/test", new HttpClientDownloader()))
- .pipeline(new FilePipeline("/data/webmagic/test"))
+ .scheduler(new RedisScheduler("localhost"))
+ .pipeline(new FilePipeline("/data/webmagic/test/"))
+ .runAsync();
+ Spider.create(new GlobalProcessor()).thread(10)
+ .scheduler(new RedisScheduler("localhost"))
+ .pipeline(new FilePipeline("/data/webmagic/test/"))
.run();
}
}