diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java index 1fd8c10d..12e85d07 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPageProcessor.java @@ -14,11 +14,12 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; /** + * 基于PageProcessor的扩展点。
* @author code4crafter@gmail.com
* @date: 13-8-1
* Time: 下午8:46
*/ -public class ModelPageProcessor implements PageProcessor { +class ModelPageProcessor implements PageProcessor { private List pageModelExtractorList = new ArrayList(); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPipeline.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPipeline.java index 439a6291..f9b0015a 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPipeline.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/ModelPipeline.java @@ -10,11 +10,12 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; /** + * 基于Pipeline的扩展点。
* @author code4crafter@gmail.com
* @date: 13-8-2
* Time: 上午10:47
*/ -public class ModelPipeline implements Pipeline { +class ModelPipeline implements Pipeline { private Map pageModelPipelines = new ConcurrentHashMap(); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/model/OOSpider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/OOSpider.java index a76144a4..8a3739d1 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/model/OOSpider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/OOSpider.java @@ -4,17 +4,13 @@ import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Spider; /** + * 基于Model的Spider,封装后的入口类。
* @author code4crafter@gmail.com
* @date: 13-8-3
* Time: 上午9:51
*/ public class OOSpider extends Spider { - /** - * OOSpider只能由ObjectPageProcessor创建。 - * - * @param pageProcessor 已定义的抽取规则 - */ private ModelPageProcessor modelPageProcessor; private ModelPipeline modelPipeline; @@ -24,6 +20,12 @@ public class OOSpider extends Spider { this.modelPageProcessor = modelPageProcessor; } + /** + * 创建一个爬虫。
+ * @param site + * @param pageModelPipeline + * @param pageModels + */ public OOSpider(Site site, PageModelPipeline pageModelPipeline, Class... pageModels) { this(ModelPageProcessor.create(site, pageModels)); this.modelPipeline = new ModelPipeline(); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java b/webmagic-core/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java index 6ba2c5ee..cf0eeacc 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/model/PageModelExtractor.java @@ -13,6 +13,7 @@ import java.util.List; import java.util.regex.Pattern; /** + * Model主要逻辑类。将一个带注解的POJO转换为一个PageModelExtractor。
* @author code4crafter@gmail.com
* @date: 13-8-1
* Time: 下午9:33