diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java index 40f17f0a..eb2c1321 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java @@ -9,7 +9,7 @@ import java.util.List; /** *
- *Page保存了上一次抓取的结果,并可定义待抓取的链接内容。
+ * Page保存了上一次抓取的结果,并可定义待抓取的链接内容。
  *
  *     主要方法:
  *     {@link #getUrl()} 获取页面的Url
@@ -19,6 +19,7 @@ import java.util.List;
  *     {@link #addTargetRequests(java.util.List)} {@link #addTargetRequest(String)} 添加待抓取的链接
  *
  * 
+ * * @author code4crafter@gmail.com
*/ public class Page { @@ -36,9 +37,16 @@ public class Page { public Page() { } + public Page setSkip(boolean skip) { + resultItems.setSkip(skip); + return this; + + } + /** * 保存抽取的结果 - * @param key 结果的key + * + * @param key 结果的key * @param field 结果的value */ public void putField(String key, Object field) { @@ -47,6 +55,7 @@ public class Page { /** * 获取页面的html内容 + * * @return html 页面的html内容 */ public Selectable getHtml() { @@ -63,6 +72,7 @@ public class Page { /** * 添加待抓取的链接 + * * @param requests 待抓取的链接 */ public void addTargetRequests(List requests) { @@ -79,6 +89,7 @@ public class Page { /** * 添加待抓取的链接 + * * @param requestString 待抓取的链接 */ public void addTargetRequest(String requestString) { @@ -93,6 +104,7 @@ public class Page { /** * 添加待抓取的页面,在需要传递附加信息时使用 + * * @param request 待抓取的页面 */ public void addTargetRequest(Request request) { @@ -103,6 +115,7 @@ public class Page { /** * 获取页面的Url + * * @return url 当前页面的url,可用于抽取 */ public Selectable getUrl() { @@ -111,6 +124,7 @@ public class Page { /** * 设置url + * * @param url */ public void setUrl(Selectable url) { @@ -119,6 +133,7 @@ public class Page { /** * 获取抓取请求 + * * @return request 抓取请求 */ public Request getRequest() { diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/AfterExtractor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/AfterExtractor.java similarity index 88% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/AfterExtractor.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/AfterExtractor.java index 79feaaf3..01170811 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/AfterExtractor.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/AfterExtractor.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import us.codecraft.webmagic.Page; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/ConsolePageModelPipeline.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ConsolePageModelPipeline.java similarity index 91% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/ConsolePageModelPipeline.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/ConsolePageModelPipeline.java index e17f210d..e5485a19 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/ConsolePageModelPipeline.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ConsolePageModelPipeline.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import org.apache.commons.lang3.builder.ToStringBuilder; import us.codecraft.webmagic.Task; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/ExtractBy.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ExtractBy.java similarity index 93% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/ExtractBy.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/ExtractBy.java index 71bdc937..4c37c9b9 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/ExtractBy.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ExtractBy.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/ExtractByUrl.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ExtractByUrl.java similarity index 92% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/ExtractByUrl.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/ExtractByUrl.java index e86f08f1..9f776765 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/ExtractByUrl.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ExtractByUrl.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/Extractor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/Extractor.java similarity index 95% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/Extractor.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/Extractor.java index f0607cfd..c8feef41 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/Extractor.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/Extractor.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import us.codecraft.webmagic.selector.Selector; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/FieldExtractor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/FieldExtractor.java similarity index 96% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/FieldExtractor.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/FieldExtractor.java index 2a6bcf72..17a55c83 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/FieldExtractor.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/FieldExtractor.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import us.codecraft.webmagic.selector.Selector; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/HelpUrl.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/HelpUrl.java similarity index 91% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/HelpUrl.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/HelpUrl.java index a8ed9950..9dee05b2 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/HelpUrl.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/HelpUrl.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/OOSpider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/OOSpider.java similarity index 97% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/OOSpider.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/OOSpider.java index 5f523ed3..e008bfe0 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/OOSpider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/OOSpider.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Spider; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/ObjectPageProcessor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ObjectPageProcessor.java similarity index 98% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/ObjectPageProcessor.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/ObjectPageProcessor.java index a02e4466..5a707bc1 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/ObjectPageProcessor.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ObjectPageProcessor.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/ObjectPipeline.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ObjectPipeline.java similarity index 97% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/ObjectPipeline.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/ObjectPipeline.java index 54ae2ef3..f5903846 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/ObjectPipeline.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ObjectPipeline.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import us.codecraft.webmagic.ResultItems; import us.codecraft.webmagic.Task; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/PageModelExtractor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java similarity index 99% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/PageModelExtractor.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java index d3d53350..6ba2c5ee 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/PageModelExtractor.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import org.apache.commons.lang3.StringUtils; import us.codecraft.webmagic.Page; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/PageModelPipeline.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/PageModelPipeline.java similarity index 85% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/PageModelPipeline.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/PageModelPipeline.java index 7406cde2..bd3aa95a 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/PageModelPipeline.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/PageModelPipeline.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import us.codecraft.webmagic.Task; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/TargetUrl.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/TargetUrl.java similarity index 91% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/TargetUrl.java rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/TargetUrl.java index 77b5a822..96ca8648 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/TargetUrl.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/TargetUrl.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/oo/package.html b/webmagic-core/src/main/java/us/codecraft/webmagic/model/package.html similarity index 100% rename from webmagic-core/src/main/java/us/codecraft/webmagic/oo/package.html rename to webmagic-core/src/main/java/us/codecraft/webmagic/model/package.html diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/OschinaBlog.java b/webmagic-core/src/test/java/us/codecraft/webmagic/model/OschinaBlog.java similarity index 85% rename from webmagic-core/src/test/java/us/codecraft/webmagic/oo/OschinaBlog.java rename to webmagic-core/src/test/java/us/codecraft/webmagic/model/OschinaBlog.java index 728f1433..1bb219f3 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/OschinaBlog.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/model/OschinaBlog.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Site; @@ -10,7 +10,7 @@ import java.util.List; * @date: 13-8-1
* Time: 下午10:18
*/ -@TargetUrl(value="http://my.oschina.net/flashsword/blog/*",sourceRegion = "//div[@class='BlogLinks']//a/@href") +@TargetUrl("http://my.oschina.net/flashsword/blog/*") public class OschinaBlog implements AfterExtractor { @ExtractBy("//title") @@ -27,6 +27,7 @@ public class OschinaBlog implements AfterExtractor { System.out.println("title:\t"+title); System.out.println("content:\t"+content); System.out.println("tags:\t" + tags); + page.setSkip(true); } public static void main(String[] args) { diff --git a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/TestFetcher.java b/webmagic-core/src/test/java/us/codecraft/webmagic/model/TestFetcher.java similarity index 91% rename from webmagic-core/src/test/java/us/codecraft/webmagic/oo/TestFetcher.java rename to webmagic-core/src/test/java/us/codecraft/webmagic/model/TestFetcher.java index b7f2d29c..009d53ac 100644 --- a/webmagic-core/src/test/java/us/codecraft/webmagic/oo/TestFetcher.java +++ b/webmagic-core/src/test/java/us/codecraft/webmagic/model/TestFetcher.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo; +package us.codecraft.webmagic.model; import org.junit.Ignore; import org.junit.Test; diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/Blog.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/Blog.java similarity index 80% rename from webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/Blog.java rename to webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/Blog.java index e3e53642..484861b9 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/Blog.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/Blog.java @@ -1,4 +1,4 @@ -package us.codecraft.webmagic.oo.samples; +package us.codecraft.webmagic.model.samples; /** * @author code4crafter@gmail.com
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/IteyeBlog.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/IteyeBlog.java similarity index 82% rename from webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/IteyeBlog.java rename to webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/IteyeBlog.java index 39597af6..4d01902c 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/IteyeBlog.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/IteyeBlog.java @@ -1,9 +1,9 @@ -package us.codecraft.webmagic.oo.samples; +package us.codecraft.webmagic.model.samples; import us.codecraft.webmagic.Site; -import us.codecraft.webmagic.oo.ExtractBy; -import us.codecraft.webmagic.oo.OOSpider; -import us.codecraft.webmagic.oo.TargetUrl; +import us.codecraft.webmagic.model.ExtractBy; +import us.codecraft.webmagic.model.OOSpider; +import us.codecraft.webmagic.model.TargetUrl; /** * @author code4crafter@gmail.com
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/OschinaAnswer.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaAnswer.java similarity index 90% rename from webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/OschinaAnswer.java rename to webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaAnswer.java index 0a59b7df..df238739 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/OschinaAnswer.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaAnswer.java @@ -1,8 +1,8 @@ -package us.codecraft.webmagic.oo.samples; +package us.codecraft.webmagic.model.samples; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Site; -import us.codecraft.webmagic.oo.*; +import us.codecraft.webmagic.model.*; /** * @author code4crafter@gmail.com
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/OschinaBlog.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java similarity index 82% rename from webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/OschinaBlog.java rename to webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java index 5224c853..9f11d0e0 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/oo/samples/OschinaBlog.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/model/samples/OschinaBlog.java @@ -1,9 +1,9 @@ -package us.codecraft.webmagic.oo.samples; +package us.codecraft.webmagic.model.samples; import us.codecraft.webmagic.Site; -import us.codecraft.webmagic.oo.ExtractBy; -import us.codecraft.webmagic.oo.OOSpider; -import us.codecraft.webmagic.oo.TargetUrl; +import us.codecraft.webmagic.model.ExtractBy; +import us.codecraft.webmagic.model.OOSpider; +import us.codecraft.webmagic.model.TargetUrl; /** * @author code4crafter@gmail.com