diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/ConsolePageModelPipeline.java b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/ConsolePageModelPipeline.java
new file mode 100644
index 00000000..9991b7f7
--- /dev/null
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/ConsolePageModelPipeline.java
@@ -0,0 +1,16 @@
+package us.codecraft.webmagic.annotation;
+
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import us.codecraft.webmagic.Task;
+
+/**
+ * @author yihua.huang@dianping.com
+ * @date: 13-8-3
+ * Time: 下午3:41
+ */
+public class ConsolePageModelPipeline implements PageModelPipeline {
+ @Override
+ public void process(Object o, Task task) {
+ System.out.println(ToStringBuilder.reflectionToString(o));
+ }
+}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/OOSpider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/OOSpider.java
index c6ae2f31..e5007455 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/OOSpider.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/OOSpider.java
@@ -2,28 +2,57 @@ package us.codecraft.webmagic.annotation;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
-import us.codecraft.webmagic.processor.PageProcessor;
+import us.codecraft.webmagic.pipeline.Pipeline;
/**
* @author code4crafter@gmail.com
* @date: 13-8-3
* Time: 上午9:51
*/
-public class OOSpider extends Spider{
+public class OOSpider extends Spider {
/**
- * 使用已定义的抽取规则新建一个Spider。
+ * OOSpider只能由ObjectPageProcessor创建。
*
* @param pageProcessor 已定义的抽取规则
*/
- public OOSpider(PageProcessor pageProcessor) {
- super(pageProcessor);
+
+ private ObjectPageProcessor objectPageProcessor;
+
+ private ObjectPipeline objectPipeline;
+
+ protected OOSpider(ObjectPageProcessor objectPageProcessor) {
+ super(objectPageProcessor);
+ this.objectPageProcessor = objectPageProcessor;
+ }
+
+ public OOSpider(Site site, PageModelPipeline pageModelPipeline, Class... pageModels) {
+ this(ObjectPageProcessor.create(site, pageModels));
+ this.objectPipeline = new ObjectPipeline();
+ super.pipeline(objectPipeline);
+ for (Class pageModel : pageModels) {
+ this.objectPipeline.put(pageModel, pageModelPipeline);
+ }
+ }
+
+ public static OOSpider create(Site site, Class... pageModels) {
+ return new OOSpider(site, new ConsolePageModelPipeline(), pageModels);
+ }
+
+ public static OOSpider create(Site site, PageModelPipeline pageModelPipeline, Class... pageModels) {
+ return new OOSpider(site, pageModelPipeline, pageModels);
+ }
+
+ public OOSpider addPageModel(PageModelPipeline pageModelPipeline, Class... pageModels) {
+ for (Class pageModel : pageModels) {
+ objectPageProcessor.addPageModel(pageModel);
+ objectPipeline.put(pageModel, pageModelPipeline);
+ }
+ return this;
}
- public static OOSpider create(Site site,Class... pageModels) {
- OOSpider ooSpider = new OOSpider(ObjectPageProcessor.create(site, pageModels));
- ooSpider.pipeline(new ObjectPipeline());
- return ooSpider;
+ public Spider pipeline(Pipeline pipeline) {
+ throw new UnsupportedOperationException("Sorry, OOSpider can only use ObjectPipeline");
}
}
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/ObjectPageProcessor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/ObjectPageProcessor.java
index 063dc818..f3758688 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/ObjectPageProcessor.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/ObjectPageProcessor.java
@@ -18,30 +18,31 @@ import java.util.regex.Pattern;
*/
public class ObjectPageProcessor implements PageProcessor {
- private List pageModelExtractorList;
+ private List pageModelExtractorList = new ArrayList();
private Site site;
- private Set targetUrlPatterns;
+ private Set targetUrlPatterns = new HashSet();
public static ObjectPageProcessor create(Site site, Class... clazzs) {
- List pageModelExtractorList = new ArrayList();
+ ObjectPageProcessor objectPageProcessor = new ObjectPageProcessor(site);
for (Class clazz : clazzs) {
- PageModelExtractor pageModelExtractor = PageModelExtractor.create(clazz);
- pageModelExtractorList.add(pageModelExtractor);
+ objectPageProcessor.addPageModel(clazz);
}
- ObjectPageProcessor objectPageProcessor = new ObjectPageProcessor(site, pageModelExtractorList);
return objectPageProcessor;
}
- private ObjectPageProcessor(Site site, List pageModelExtractorList) {
+
+ public ObjectPageProcessor addPageModel(Class clazz){
+ PageModelExtractor pageModelExtractor = PageModelExtractor.create(clazz);
+ targetUrlPatterns.addAll(pageModelExtractor.getTargetUrlPatterns());
+ targetUrlPatterns.addAll(pageModelExtractor.getHelpUrlPatterns());
+ pageModelExtractorList.add(pageModelExtractor);
+ return this;
+ }
+
+ private ObjectPageProcessor(Site site) {
this.site = site;
- this.pageModelExtractorList = pageModelExtractorList;
- targetUrlPatterns = new HashSet();
- for (PageModelExtractor pageModelExtractor : pageModelExtractorList) {
- targetUrlPatterns.addAll(pageModelExtractor.getTargetUrlPatterns());
- targetUrlPatterns.addAll(pageModelExtractor.getHelpUrlPatterns());
- }
}
@Override
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/PageModelExtractor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/PageModelExtractor.java
index 8ed3b6b8..f91252fa 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/PageModelExtractor.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/annotation/PageModelExtractor.java
@@ -22,9 +22,9 @@ import java.util.regex.Pattern;
*/
class PageModelExtractor {
- private List targetUrlPatterns;
+ private List targetUrlPatterns = new ArrayList();
- private List helpUrlPatterns;
+ private List helpUrlPatterns = new ArrayList();
private Class clazz;
@@ -106,7 +106,6 @@ class PageModelExtractor {
}
private void initTargetUrlPatterns() {
- targetUrlPatterns = new ArrayList();
Annotation annotation = clazz.getAnnotation(TargetUrl.class);
if (annotation == null) {
targetUrlPatterns.add(Pattern.compile(".*"));
@@ -116,7 +115,6 @@ class PageModelExtractor {
targetUrlPatterns.add(Pattern.compile(s.replace(".", "\\.").replace("*", "[^\"'#]*")));
}
}
- helpUrlPatterns = new ArrayList();
annotation = clazz.getAnnotation(HelpUrl.class);
if (annotation != null) {
String[] value = ((HelpUrl) annotation).value();
diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/annotation/TestFetcher.java b/webmagic-core/src/test/java/us/codecraft/webmagic/annotation/TestFetcher.java
index b29d0533..00264314 100644
--- a/webmagic-core/src/test/java/us/codecraft/webmagic/annotation/TestFetcher.java
+++ b/webmagic-core/src/test/java/us/codecraft/webmagic/annotation/TestFetcher.java
@@ -1,6 +1,5 @@
package us.codecraft.webmagic.annotation;
-import org.junit.Ignore;
import org.junit.Test;
import us.codecraft.webmagic.Site;
@@ -11,13 +10,11 @@ import us.codecraft.webmagic.Site;
*/
public class TestFetcher {
- @Ignore("takes long")
+// @Ignore("takes long")
@Test
public void test() {
- ObjectPipeline objectPipeline = new ObjectPipeline();
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog/145796"), OschinaBlog.class)
- .pipeline(objectPipeline);
- OschinaBlog oschinaBlog = null;
+ .run();
}