diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/ConsolePipeline.java b/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/ConsolePipeline.java
index 2ff99c87..e1648fe7 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/ConsolePipeline.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/ConsolePipeline.java
@@ -7,25 +7,18 @@ import java.util.Map;
/**
* 命令行输出抽取结果。可用于测试。
+ *
* @author code4crafter@gmail.com
- * Date: 13-4-21
- * Time: 下午1:45
+ * Date: 13-4-21
+ * Time: 下午1:45
*/
-public class ConsolePipeline implements Pipeline{
+public class ConsolePipeline implements Pipeline {
@Override
- public void process(ResultItems resultItems,Task task) {
- System.out.println("get page: "+resultItems.getRequest().getUrl());
+ public void process(ResultItems resultItems, Task task) {
+ System.out.println("get page: " + resultItems.getRequest().getUrl());
for (Map.Entry entry : resultItems.getAll().entrySet()) {
- if (entry.getValue() instanceof Iterable) {
- Iterable value = (Iterable) entry.getValue();
- System.out.println(entry.getKey() + ":");
- for (Object o : value) {
- System.out.println(o);
- }
- } else {
- System.out.println(entry.getKey() + ":\t" + entry.getValue());
- }
+ System.out.println(entry.getKey() + ":\t" + entry.getValue());
}
}
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java
index 114eef99..79d62a01 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java
@@ -63,6 +63,12 @@ public class Html extends PlainText {
return selectList(xpathSelector, strings);
}
+ @Override
+ public Selectable xpath2(String xpath) {
+ Xpath2Selector xpathSelector = SelectorFactory.getInstatnce().newXpath2Selector(xpath);
+ return selectList(xpathSelector, strings);
+ }
+
@Override
public Selectable $(String selector) {
CssSelector cssSelector = new CssSelector(selector);
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java
index d06a5310..4fff6da8 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java
@@ -34,6 +34,11 @@ public class PlainText implements Selectable {
throw new UnsupportedOperationException();
}
+ @Override
+ public Selectable xpath2(String xpath) {
+ throw new UnsupportedOperationException();
+ }
+
@Override
public Selectable $(String selector) {
throw new UnsupportedOperationException();
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java
index 42f3d108..cea501dd 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java
@@ -18,6 +18,14 @@ public interface Selectable {
*/
public Selectable xpath(String xpath);
+ /**
+ * select list with xpath 2.0 syntax
+ *
+ * @param xpath
+ * @return new Selectable after extract
+ */
+ public Selectable xpath2(String xpath);
+
/**
* select list with css selector
*
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java
index 1dd56e01..9abb1ce3 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java
@@ -34,6 +34,10 @@ public class SelectorFactory {
return newSelector(XpathSelector.class, xpath);
}
+ public Xpath2Selector newXpath2Selector(String xpath) {
+ return newSelector(Xpath2Selector.class, xpath);
+ }
+
public SmartContentSelector newSmartContentSelector(){
return newSelector(SmartContentSelector.class);
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/FileDownloader.java b/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/downloader/FileDownloader.java
similarity index 100%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/downloader/FileDownloader.java
rename to webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/downloader/FileDownloader.java
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/FileCacheQueueScheduler.java b/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java
similarity index 97%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/schedular/FileCacheQueueScheduler.java
rename to webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java
index f5393a33..d4a3987d 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/FileCacheQueueScheduler.java
+++ b/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java
@@ -1,9 +1,10 @@
-package us.codecraft.webmagic.schedular;
+package us.codecraft.webmagic.scheduler;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.log4j.Logger;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
+import us.codecraft.webmagic.schedular.Scheduler;
import java.io.*;
import java.util.LinkedHashSet;
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java b/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java
similarity index 100%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java
rename to webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/utils/DoubleKeyMap.java
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/MultiKeyMapBase.java b/webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/utils/MultiKeyMapBase.java
similarity index 100%
rename from webmagic-core/src/main/java/us/codecraft/webmagic/utils/MultiKeyMapBase.java
rename to webmagic-plugin/webmagic-misc/src/main/java/us/codecraft/webmagic/utils/MultiKeyMapBase.java
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java
index 38cb41f0..817ba448 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java
@@ -4,7 +4,6 @@ import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.ExtractBy;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.TargetUrl;
-import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.JsonFilePipeline;
/**
@@ -30,7 +29,7 @@ public class OschinaBlog implements Blog{
}
public static void main(String[] args) {
- OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"), OschinaBlog.class).pipeline(new ConsolePipeline()).pipeline(new JsonFilePipeline()).run();
+ OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog"), OschinaBlog.class).pipeline(new JsonFilePipeline()).run();
}
public String getTitle() {
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GuoxueProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GuoxueProcessor.java
index db00c79c..5d7d3559 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GuoxueProcessor.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/GuoxueProcessor.java
@@ -3,7 +3,7 @@ package us.codecraft.webmagic.samples;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.processor.SimplePageProcessor;
-import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
+import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
/**
* @author code4crafter@gmail.com
diff --git a/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java b/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java
index 76a423fb..dbfa8154 100644
--- a/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java
+++ b/webmagic-samples/src/test/java/us/codecraft/webmagic/SpiderTest.java
@@ -5,7 +5,7 @@ import org.junit.Test;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.processor.SimplePageProcessor;
import us.codecraft.webmagic.samples.HuxiuProcessor;
-import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
+import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
/**
* @author code4crafter@gmail.com
diff --git a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiandianProcessorTest.java b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiandianProcessorTest.java
index 13910b52..cf587f1d 100644
--- a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiandianProcessorTest.java
+++ b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiandianProcessorTest.java
@@ -6,7 +6,7 @@ import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.pipeline.FreemarkerPipeline;
import us.codecraft.webmagic.samples.DiandianBlogProcessor;
-import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
+import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
import java.io.IOException;
diff --git a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiaoyuwengProcessorTest.java b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiaoyuwengProcessorTest.java
index 33bcf9c6..69a535c6 100644
--- a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiaoyuwengProcessorTest.java
+++ b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/DiaoyuwengProcessorTest.java
@@ -6,7 +6,7 @@ import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.pipeline.FreemarkerPipeline;
import us.codecraft.webmagic.samples.DiaoyuwengProcessor;
-import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
+import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
import java.io.IOException;
diff --git a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/SinablogProcessorTest.java b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/SinablogProcessorTest.java
index a0160e18..a44fe35b 100644
--- a/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/SinablogProcessorTest.java
+++ b/webmagic-samples/src/test/java/us/codecraft/webmagic/processor/SinablogProcessorTest.java
@@ -6,7 +6,7 @@ import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.pipeline.FreemarkerPipeline;
import us.codecraft.webmagic.samples.SinaBlogProcesser;
-import us.codecraft.webmagic.schedular.FileCacheQueueScheduler;
+import us.codecraft.webmagic.scheduler.FileCacheQueueScheduler;
import java.io.IOException;