From 586d23ef6387e0f5905de2bd53329ae456b0f3b1 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Wed, 19 Jun 2013 08:20:21 +0800 Subject: [PATCH] add package infos --- .../main/java/us/codecraft/webmagic/downloader/package.html | 5 +++++ .../src/main/java/us/codecraft/webmagic/package.html | 5 +++++ .../main/java/us/codecraft/webmagic/pipeline/package.html | 5 +++++ .../main/java/us/codecraft/webmagic/processor/package.html | 5 +++++ .../main/java/us/codecraft/webmagic/schedular/package.html | 5 +++++ .../main/java/us/codecraft/webmagic/selector/Selector.java | 2 +- .../main/java/us/codecraft/webmagic/selector/package.html | 5 +++++ .../src/main/java/us/codecraft/webmagic/utils/package.html | 5 +++++ 8 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 webmagic-core/src/main/java/us/codecraft/webmagic/downloader/package.html create mode 100644 webmagic-core/src/main/java/us/codecraft/webmagic/package.html create mode 100644 webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/package.html create mode 100644 webmagic-core/src/main/java/us/codecraft/webmagic/processor/package.html create mode 100644 webmagic-core/src/main/java/us/codecraft/webmagic/schedular/package.html create mode 100644 webmagic-core/src/main/java/us/codecraft/webmagic/selector/package.html create mode 100644 webmagic-core/src/main/java/us/codecraft/webmagic/utils/package.html diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/package.html b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/package.html new file mode 100644 index 00000000..cae5560e --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/package.html @@ -0,0 +1,5 @@ + + +包含了页面下载的接口Downloader和实现类HttpClientDownloader,该实现类封装了HttpComponent库。 + + diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/package.html b/webmagic-core/src/main/java/us/codecraft/webmagic/package.html new file mode 100644 index 00000000..d5ff540a --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/package.html @@ -0,0 +1,5 @@ + + +包括webmagic入口类Spider和一些数据传递的实体类。 + + diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/package.html b/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/package.html new file mode 100644 index 00000000..498183eb --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/pipeline/package.html @@ -0,0 +1,5 @@ + + +包含了处理页面抽取结果的接口Pipeline和它的几个实现类。 + + diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/processor/package.html b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/package.html new file mode 100644 index 00000000..47274a1f --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/processor/package.html @@ -0,0 +1,5 @@ + + +包含了封装页面处理逻辑的接口PageProcessor和一个实现类SimplePageProcessor。实现PageProcessor即可定制一个自己的爬虫。 + + diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/package.html b/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/package.html new file mode 100644 index 00000000..0e35610f --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/package.html @@ -0,0 +1,5 @@ + + +包含url管理和调度的接口Schedular及它的几个实现类。 + + diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java index 35632b3a..f7771cfb 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java @@ -7,7 +7,7 @@ import java.util.List; * Date: 13-4-20 * Time: 下午8:02 */ -public interface Selector { +interface Selector { public String select(String text); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/package.html b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/package.html new file mode 100644 index 00000000..3c9ef7b2 --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/package.html @@ -0,0 +1,5 @@ + + +提供了便捷抽取页面内容的工具,对外核心接口是Selectable,内部抽取则是通过实现Selector来定制。 + + diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/package.html b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/package.html new file mode 100644 index 00000000..bfbe8dfc --- /dev/null +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/package.html @@ -0,0 +1,5 @@ + + +提供一些处理链接的静态工具类。 + +