diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/Scheduler.java b/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/Scheduler.java index bf440baf..8d9649be 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/Scheduler.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/schedular/Scheduler.java @@ -22,7 +22,7 @@ public interface Scheduler { /** * 返回下一个要抓取的链接 * @param task 定义的任务,以满足单Scheduler多Task的情况 - * @return + * @return 下一个要抓取的链接 */ public Request poll(Task task); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java index 099f5074..114eef99 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Html.java @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.List; /** + * 可抽取的html文本。
* @author code4crafter@gmail.com
* Date: 13-4-21 * Time: 上午7:54 diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java index 0137de81..9e8d1941 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java @@ -6,6 +6,7 @@ import java.util.ArrayList; import java.util.List; /** + * 可抽取的纯文本,不包括xpath和css selector实现。
* @author code4crafter@gmail.com
* Date: 13-4-21 * Time: 上午7:54 diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java index 49fbffd0..e95138b7 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/RegexSelector.java @@ -9,6 +9,7 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** + * 正则表达式抽取器。
* @author code4crafter@gmail.com
* Date: 13-4-21 * Time: 上午7:09 diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/ReplaceSelector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/ReplaceSelector.java index 1ce7c4d5..38b95f78 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/ReplaceSelector.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/ReplaceSelector.java @@ -6,6 +6,7 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** + * 对文本进行替换。
* @author code4crafter@gmail.com
* Date: 13-4-21 * Time: 上午7:09 diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java index 932115cd..1b0ba10a 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java @@ -3,6 +3,7 @@ package us.codecraft.webmagic.selector; import java.util.List; /** + * 可进行抽取的文本。
* @author code4crafter@gmail.com
* Date: 13-4-20 * Time: 下午7:51 @@ -20,8 +21,8 @@ public interface Selectable { /** * select list with css selector * - * @param - * @return + * @param selector css selector expression + * @return new Selectable after extract */ public Selectable $(String selector); diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java index f7771cfb..845c0b6c 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selector.java @@ -3,6 +3,7 @@ package us.codecraft.webmagic.selector; import java.util.List; /** + * 抽取器。
* @author code4crafter@gmail.com
* Date: 13-4-20 * Time: 下午8:02 diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java index 2fa78d32..1dd56e01 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/SelectorFactory.java @@ -7,6 +7,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; /** + * 产生selector的工厂。
* @author code4crafter@gmail.com
* Date: 13-4-21 * Time: 上午7:56 diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/ThreadUtils.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/ThreadUtils.java index ebe61198..d6876c71 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/ThreadUtils.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/ThreadUtils.java @@ -6,6 +6,7 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; /** + * 线程工具类。
* @author code4crafer@gmail.com * Date: 13-6-23 * Time: 下午7:11 diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java index 0b7201d6..9f038bc8 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/UrlUtils.java @@ -6,6 +6,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; /** + * url及html处理工具类。
* @author code4crafter@gmail.com
* Date: 13-4-21 * Time: 下午1:52 @@ -18,7 +19,7 @@ public class UrlUtils { * 将url想对地址转化为绝对地址 * @param url url地址 * @param refer url地址来自哪个页面 - * @return + * @return url绝对地址 */ public static String canonicalizeUrl(String url, String refer) { if (StringUtils.isBlank(url) || StringUtils.isBlank(refer)) {