From 5f1f4cbc4625c87ecbef5a60e957a05a7b4b7742 Mon Sep 17 00:00:00 2001
From: "yihua.huang" <code4crafter@gmail.com>
Date: Sat, 17 Aug 2013 20:41:29 +0800
Subject: [PATCH] update comments

---
 .../main/java/us/codecraft/webmagic/Page.java | 66 ++++++--------
 .../java/us/codecraft/webmagic/Request.java   | 55 ++++--------
 .../us/codecraft/webmagic/ResultItems.java    | 23 +++--
 .../main/java/us/codecraft/webmagic/Site.java | 86 ++++++++++---------
 .../java/us/codecraft/webmagic/Spider.java    | 50 ++++++-----
 .../downloader/HttpClientDownloader.java      |  2 +-
 .../java/us/codecraft/webmagic/package.html   |  3 -
 .../webmagic/utils}/Experimental.java         |  2 +-
 .../us/codecraft/webmagic/MultiPageModel.java |  2 +-
 .../webmagic/downloader/FileCache.java        |  2 +-
 .../us/codecraft/webmagic/model/HasKey.java   |  2 +-
 .../model/annotation/ComboExtract.java        |  7 +-
 .../webmagic/model/annotation/ExtractBy.java  | 10 ++-
 .../webmagic/pipeline/MultiPagePipeline.java  |  2 +-
 .../scheduler/FileCacheQueueScheduler.java    |  2 +-
 15 files changed, 152 insertions(+), 162 deletions(-)
 rename {webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation => webmagic-core/src/main/java/us/codecraft/webmagic/utils}/Experimental.java (71%)
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java
index a894269b..2516dd10 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Page.java
@@ -8,30 +8,19 @@ import java.util.ArrayList;
 import java.util.List;
 
 /**
- * <pre class="zh">
- * Page保存了上一次抓取的结果，并可定义待抓取的链接内容。
  *
- *     主要方法：
- *     {@link #getUrl()} 获取页面的Url
- *     {@link #getHtml()}  获取页面的html内容
- *     {@link #putField(String, Object)}  保存抽取的结果
- *     {@link #getResultItems()} 获取抽取的结果，在 {@link us.codecraft.webmagic.pipeline.Pipeline} 中调用
- *     {@link #addTargetRequests(java.util.List)} {@link #addTargetRequest(String)} 添加待抓取的链接
- *
- * </pre>
- * <pre class="en">
- * Store extracted result and urls to be crawled.
- *
- *     Main method：
- *     {@link #getUrl()} get url of current page
- *     {@link #getHtml()}  get content of current page
- *     {@link #putField(String, Object)}  save extracted result
- *     {@link #getResultItems()} get extract results to be used in {@link us.codecraft.webmagic.pipeline.Pipeline}
- *     {@link #addTargetRequests(java.util.List)} {@link #addTargetRequest(String)} add urls to crawl
- *
- * </pre>
+ * Object storing extracted result and urls to be crawled.<br>
+ * Main method：                                               <br>
+ * {@link #getUrl()} get url of current page                   <br>
+ * {@link #getHtml()}  get content of current page                 <br>
+ * {@link #putField(String, Object)}  save extracted result            <br>
+ * {@link #getResultItems()} get extract results to be used in {@link us.codecraft.webmagic.pipeline.Pipeline}<br>
+ * {@link #addTargetRequests(java.util.List)} {@link #addTargetRequest(String)} add urls to crawl                 <br>
  *
  * @author code4crafter@gmail.com <br>
+ * @since 0.1.0
+ * @see us.codecraft.webmagic.downloader.Downloader
+ * @see us.codecraft.webmagic.processor.PageProcessor
  */
 public class Page {
 
@@ -55,19 +44,19 @@ public class Page {
     }
 
     /**
+     * store extract results
      *
-     *
-     * @param key   结果的key
-     * @param field 结果的value
+     * @param key
+     * @param field
      */
     public void putField(String key, Object field) {
         resultItems.put(key, field);
     }
 
     /**
-     * 获取页面的html内容
+     * get html content of page
      *
-     * @return html 页面的html内容
+     * @return html
      */
     public Selectable getHtml() {
         return html;
@@ -82,9 +71,9 @@ public class Page {
     }
 
     /**
-     * 添加待抓取的链接
+     * add urls to crawl
      *
-     * @param requests 待抓取的链接
+     * @param requests
      */
     public void addTargetRequests(List<String> requests) {
         synchronized (targetRequests) {
@@ -99,9 +88,9 @@ public class Page {
     }
 
     /**
-     * 添加待抓取的链接
+     * add url to crawl
      *
-     * @param requestString 待抓取的链接
+     * @param requestString
      */
     public void addTargetRequest(String requestString) {
         if (StringUtils.isBlank(requestString) || requestString.equals("#")) {
@@ -114,9 +103,9 @@ public class Page {
     }
 
     /**
-     * 添加待抓取的页面，在需要传递附加信息时使用
+     * add requests to crawl
      *
-     * @param request 待抓取的页面
+     * @param request
      */
     public void addTargetRequest(Request request) {
         synchronized (targetRequests) {
@@ -125,27 +114,22 @@ public class Page {
     }
 
     /**
-     * 获取页面的Url
+     * get url of current page
      *
-     * @return url 当前页面的url，可用于抽取
+     * @return url of current page
      */
     public Selectable getUrl() {
         return url;
     }
 
-    /**
-     * 设置url
-     *
-     * @param url
-     */
     public void setUrl(Selectable url) {
         this.url = url;
     }
 
     /**
-     * 获取抓取请求
+     * get request of current page
      *
-     * @return request 抓取请求
+     * @return request
      */
     public Request getRequest() {
         return request;
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
index b9b8ddf6..fd7f60c9 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
@@ -1,33 +1,17 @@
 package us.codecraft.webmagic;
 
+import us.codecraft.webmagic.utils.Experimental;
+
 import java.io.Serializable;
 import java.util.HashMap;
 import java.util.Map;
 
 /**
- * <div class="zh">
- * Request对象封装了待抓取的url信息。<br/>
- * 在PageProcessor中，Request对象可以通过{@link us.codecraft.webmagic.Page#getRequest()} 获取。<br/>
- * <br/>
- * Request对象包含一个extra属性，可以写入一些必须的上下文，这个特性在某些场合会有用。<br/>
- * <pre>
- *      Example:
- *          抓取<a href="${link}">${linktext}</a>时，希望提取链接link，并保存linktext的信息。
- *      在上一个页面：
- *      public void process(Page page){
- *          Request request = new Request(link,linktext);
- *          page.addTargetRequest(request)
- *      }
- *      在下一个页面：
- *      public void process(Page page){
- *          String linktext =  (String)page.getRequest().getExtra()[0];
- *      }
- * </pre>
- * </div>
+ * Object contains url to crawl.<br>
+ * It contains some additional information.<br>
  *
  * @author code4crafter@gmail.com <br>
- *         Date: 13-4-21
- *         Time: 上午11:37
+ * @since 0.1.0
  */
 public class Request implements Serializable {
 
@@ -36,20 +20,22 @@ public class Request implements Serializable {
     private String url;
 
     /**
-     * 额外参数，可以保存一些需要的上下文信息
+     * Store additional information in extras.
      */
     private Map<String, Object> extras;
 
+    /**
+     * Priority of the request.<br>
+     * The bigger will be processed earlier. <br>
+     * Need a scheduler supporting priority.<br>
+     * But no scheduler in webmagic supporting priority now (:
+     */
+    @Experimental
     private double priority;
 
     public Request() {
     }
 
-    /**
-     * 构建一个request对象
-     *
-     * @param url 必须参数，待抓取的url
-     */
     public Request(String url) {
         this.url = url;
     }
@@ -59,12 +45,14 @@ public class Request implements Serializable {
     }
 
     /**
-     * 设置优先级，用于URL队列排序<br>
-     * 需扩展Scheduler<br>
-     * 目前还没有对应支持优先级的Scheduler实现 =。= <br>
-     * @param priority 优先级，越大则越靠前
+     * Set the priority of request for sorting.<br>
+     * Need a scheduler supporting priority.<br>
+     * But no scheduler in webmagic supporting priority now (:
+     *
+     * @param priority
      * @return this
      */
+    @Experimental
     public Request setPriority(double priority) {
         this.priority = priority;
         return this;
@@ -85,11 +73,6 @@ public class Request implements Serializable {
         return this;
     }
 
-    /**
-     * 获取待抓取的url
-     *
-     * @return url 待抓取的url
-     */
     public String getUrl() {
         return url;
     }
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/ResultItems.java b/webmagic-core/src/main/java/us/codecraft/webmagic/ResultItems.java
index 7a8e5c39..e0552709 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/ResultItems.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/ResultItems.java
@@ -4,10 +4,13 @@ import java.util.HashMap;
 import java.util.Map;
 
 /**
- * 保存抽取结果的类，由PageProcessor处理得到，传递给{@link us.codecraft.webmagic.pipeline.Pipeline}进行持久化。<br>
+ * Object contains extract results.<br>
+ * It is contained in Page and will be processed in pipeline.
+ *
  * @author code4crafter@gmail.com <br>
- * Date: 13-7-25 <br>
- * Time: 下午12:20 <br>
+ * @since 0.1.0
+ * @see Page
+ * @see us.codecraft.webmagic.pipeline.Pipeline
  */
 public class ResultItems {
 
@@ -25,7 +28,7 @@ public class ResultItems {
         return (T) fields.get(key);
     }
 
-    public  Map<String, Object> getAll() {
+    public Map<String, Object> getAll() {
         return fields;
     }
 
@@ -44,8 +47,10 @@ public class ResultItems {
     }
 
     /**
-     * 是否忽略这个页面，用于pipeline来判断是否对这个页面进行处理
-     * @return 是否忽略 true 忽略
+     * Whether to skip the result.<br>
+     * Result which is skipped will not be processed by Pipeline.
+     *
+     * @return whether to skip the result
      */
     public boolean isSkip() {
         return skip;
@@ -53,8 +58,10 @@ public class ResultItems {
 
 
     /**
-     * 设置是否忽略这个页面，用于pipeline来判断是否对这个页面进行处理
-     * @param skip
+     * Set whether to skip the result.<br>
+     * Result which is skipped will not be processed by Pipeline.
+     *
+     * @param skip whether to skip the result
      * @return this
      */
     public ResultItems setSkip(boolean skip) {
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
index 9ab97fe8..443f2bba 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Site.java
@@ -5,12 +5,11 @@ import us.codecraft.webmagic.utils.UrlUtils;
 import java.util.*;
 
 /**
- * Site定义一个待抓取的站点的各种信息。<br>
- * 这个类的所有getter方法，一般都只会被爬虫框架内部进行调用。<br>
+ * Object contains setting for crawler.<br>
  *
  * @author code4crafter@gmail.com <br>
- *         Date: 13-4-21
- *         Time: 下午12:13
+ * @since 0.1.0
+ * @see us.codecraft.webmagic.processor.PageProcessor
  */
 public class Site {
 
@@ -22,6 +21,9 @@ public class Site {
 
     private String charset;
 
+    /**
+     * startUrls is the urls the crawler to start with.
+     */
     private List<String> startUrls = new ArrayList<String>();
 
     private int sleepTime = 3000;
@@ -37,19 +39,19 @@ public class Site {
     }
 
     /**
-     * 创建一个Site对象，等价于new Site()
+     * new a Site
      *
-     * @return 新建的对象
+     * @return new site
      */
     public static Site me() {
         return new Site();
     }
 
     /**
-     * 为这个站点添加一个cookie，可用于抓取某些需要登录访问的站点。这个cookie的域名与{@link #getDomain()}是一致的
+     * Add a cookie with domain {@link #getDomain()}
      *
-     * @param name  cookie的名称
-     * @param value cookie的值
+     * @param name
+     * @param value
      * @return this
      */
     public Site addCookie(String name, String value) {
@@ -58,7 +60,7 @@ public class Site {
     }
 
     /**
-     * 为这个站点设置user-agent，很多网站都对user-agent进行了限制，不设置此选项可能会得到期望之外的结果。
+     * set user agent
      *
      * @param userAgent userAgent
      * @return this
@@ -69,27 +71,27 @@ public class Site {
     }
 
     /**
-     * 获取已经设置的所有cookie
+     * get cookies
      *
-     * @return 已经设置的所有cookie
+     * @return get cookies
      */
     public Map<String, String> getCookies() {
         return cookies;
     }
 
     /**
-     * 获取已设置的user-agent
+     * get user agent
      *
-     * @return 已设置的user-agent
+     * @return user agent
      */
     public String getUserAgent() {
         return userAgent;
     }
 
     /**
-     * 获取已设置的domain
+     * get domain
      *
-     * @return 已设置的domain
+     * @return get domain
      */
     public String getDomain() {
         if (domain == null) {
@@ -101,10 +103,9 @@ public class Site {
     }
 
     /**
-     * 设置这个站点所在域名，必须项。<br>
-     * 目前不支持多个域名的抓取。抓取多个域名请新建一个Spider。
+     * set the domain of site.
      *
-     * @param domain 爬虫会抓取的域名
+     * @param domain
      * @return this
      */
     public Site setDomain(String domain) {
@@ -113,10 +114,10 @@ public class Site {
     }
 
     /**
-     * 设置页面编码，若不设置则自动根据Html meta信息获取。<br>
-     * 一般无需设置encoding，如果发现下载的结果是乱码，则可以设置此项。<br>
+     * Set charset of page manually.<br>
+     * When charset is not set or set to null, it can be auto detected by Http header.
      *
-     * @param charset 编码格式，主要是"utf-8"、"gbk"两种
+     * @param charset
      * @return this
      */
     public Site setCharset(String charset) {
@@ -125,20 +126,21 @@ public class Site {
     }
 
     /**
-     * 获取已设置的编码
+     * get charset set manually
      *
-     * @return 已设置的domain
+     * @return charset
      */
     public String getCharset() {
         return charset;
     }
 
     /**
-     * 设置可接受的http状态码，仅当状态码在这个集合中时，才会读取页面内容。<br>
-     * 默认为200，正常情况下，无须设置此项。<br>
-     * 某些站点会错误的返回状态码，此时可以对这个选项进行设置。<br>
+     * Set acceptStatCode.<br>
+     * When status code of http response is in acceptStatCodes, it will be processed.<br>
+     * {200} by default.<br>
+     * It is not necessarily to be set.<br>
      *
-     * @param acceptStatCode 可接受的状态码
+     * @param acceptStatCode
      * @return this
      */
     public Site setAcceptStatCode(Set<Integer> acceptStatCode) {
@@ -147,27 +149,27 @@ public class Site {
     }
 
     /**
-     * 获取可接受的状态码
+     * get acceptStatCode
      *
-     * @return 可接受的状态码
+     * @return acceptStatCode
      */
     public Set<Integer> getAcceptStatCode() {
         return acceptStatCode;
     }
 
     /**
-     * 获取初始页面的地址列表
+     * get start urls
      *
-     * @return 初始页面的地址列表
+     * @return start urls
      */
     public List<String> getStartUrls() {
         return startUrls;
     }
 
     /**
-     * 增加初始页面的地址，可反复调用此方法增加多个初始地址。
+     * Add a url to start url.<br>
      *
-     * @param startUrl 初始页面的地址
+     * @param startUrl
      * @return this
      */
     public Site addStartUrl(String startUrl) {
@@ -176,9 +178,10 @@ public class Site {
     }
 
     /**
-     * 设置两次抓取之间的间隔，避免对目标站点压力过大(或者避免被防火墙屏蔽...)。
+     * Set the interval between the processing of two pages.<br>
+     * Time unit is micro seconds.<br>
      *
-     * @param sleepTime 单位毫秒
+     * @param sleepTime
      * @return this
      */
     public Site setSleepTime(int sleepTime) {
@@ -187,25 +190,26 @@ public class Site {
     }
 
     /**
-     * 获取两次抓取之间的间隔
+     * Get the interval between the processing of two pages.<br>
+     * Time unit is micro seconds.<br>
      *
-     * @return 两次抓取之间的间隔，单位毫秒
+     * @return the interval between the processing of two pages,
      */
     public int getSleepTime() {
         return sleepTime;
     }
 
     /**
-     * 获取重新下载的次数，默认为0
+     * Get retry times when download fail, 0 by default.<br>
      *
-     * @return 重新下载的次数
+     * @return retry times when download fail
      */
     public int getRetryTimes() {
         return retryTimes;
     }
 
     /**
-     * 设置获取重新下载的次数，默认为0
+     * Set retry times when download fail, 0 by default.<br>
      *
      * @return this
      */
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
index e23a8e70..ade2194c 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
@@ -18,25 +18,30 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.atomic.AtomicInteger;
 
 /**
- * <pre>
- * webmagic爬虫的入口类。
- *
- * 示例：
- * 定义一个最简单的爬虫：
- *      Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*")).run();
- *
- * 使用FilePipeline保存结果到文件:
- *      Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*"))
- *          .pipeline(new FilePipeline("/data/temp/webmagic/")).run();
- *
- * 使用FileCacheQueueScheduler缓存URL，关闭爬虫后下次自动从停止的页面继续抓取:
- *      Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*"))
- *          .scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")).run();
- * </pre>
+ * Entrance of a crawler.<br>
+ * A spider contains four modules: Downloader, Scheduler, PageProcessor and Pipeline.<br>
+ * Every module is a field of Spider.                                                    <br>
+ * The modules are defined in interface.                                                     <br>
+ * You can customize a spider with various implementations of them.                              <br>
+ * Examples:                                                                                         <br>
+ * <br>
+ * A simple crawler:                                                                                         <br>
+ * Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*")).run();<br>
+ * <br>
+ * Store results to files by FilePipeline:                                                                              <br>
+ * Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*"))                   <br>
+ * .pipeline(new FilePipeline("/data/temp/webmagic/")).run();                                                          <br>
+ * <br>
+ * Use FileCacheQueueScheduler to store urls and cursor in files, so that a Spider can resume the status when shutdown.                 <br>
+ * Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*"))                                   <br>
+ * .scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")).run();                                                        <br>
  *
  * @author code4crafter@gmail.com <br>
- *         Date: 13-4-21
- *         Time: 上午6:53
+ * @see Downloader
+ * @see Scheduler
+ * @see PageProcessor
+ * @see Pipeline
+ * @since 0.1.0
  */
 public class Spider implements Runnable, Task {
 
@@ -222,11 +227,12 @@ public class Spider implements Runnable, Task {
 
     /**
      * 用某些特定URL进行爬虫测试
+     *
      * @param urls 要抓取的url
      */
-    public void test(String... urls){
+    public void test(String... urls) {
         checkComponent();
-        if (urls.length>0){
+        if (urls.length > 0) {
             for (String url : urls) {
                 processRequest(new Request(url));
             }
@@ -241,7 +247,7 @@ public class Spider implements Runnable, Task {
         }
         pageProcessor.process(page);
         addRequest(page);
-        if (!page.getResultItems().isSkip()){
+        if (!page.getResultItems().isSkip()) {
             for (Pipeline pipeline : pipelines) {
                 pipeline.process(page.getResultItems(), this);
             }
@@ -298,8 +304,8 @@ public class Spider implements Runnable, Task {
         return this;
     }
 
-    public Spider clearPipeline(){
-        pipelines=new ArrayList<Pipeline>();
+    public Spider clearPipeline() {
+        pipelines = new ArrayList<Pipeline>();
         return this;
     }
 
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
index d555c5ed..fd680219 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/downloader/HttpClientDownloader.java
@@ -38,7 +38,7 @@ public class HttpClientDownloader implements Downloader {
      * 直接下载页面的简便方法
      *
      * @param url
-     * @return
+     * @return html
      */
     public Html download(String url) {
         Page page = download(new Request(url), null);
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/package.html b/webmagic-core/src/main/java/us/codecraft/webmagic/package.html
index 05328dcb..491afd93 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/package.html
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/package.html
@@ -2,9 +2,6 @@
 	<body>
     <div class="en">
         Main class "Spider" and models.
-    </div>
-    <div class="zh">
-包括webmagic入口类Spider和一些数据传递的实体类。
     </div>
 	</body>
 </html>
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/Experimental.java b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/Experimental.java
similarity index 71%
rename from webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/Experimental.java
rename to webmagic-core/src/main/java/us/codecraft/webmagic/utils/Experimental.java
index f619d125..265f869f 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/Experimental.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/utils/Experimental.java
@@ -1,4 +1,4 @@
-package us.codecraft.webmagic.model.annotation;
+package us.codecraft.webmagic.utils;
 
 /**
  * @author code4crafter@gmail.com <br>
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/MultiPageModel.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/MultiPageModel.java
index 88caf3ec..9190495c 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/MultiPageModel.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/MultiPageModel.java
@@ -1,6 +1,6 @@
 package us.codecraft.webmagic;
 
-import us.codecraft.webmagic.model.annotation.Experimental;
+import us.codecraft.webmagic.utils.Experimental;
 
 import java.util.Collection;
 
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/FileCache.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/FileCache.java
index 163c75ba..154667c7 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/FileCache.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/downloader/FileCache.java
@@ -4,7 +4,7 @@ import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.log4j.Logger;
 import us.codecraft.webmagic.*;
-import us.codecraft.webmagic.model.annotation.Experimental;
+import us.codecraft.webmagic.utils.Experimental;
 import us.codecraft.webmagic.pipeline.Pipeline;
 import us.codecraft.webmagic.processor.PageProcessor;
 import us.codecraft.webmagic.processor.SimplePageProcessor;
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/HasKey.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/HasKey.java
index 3a8e6e2a..e068d04c 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/HasKey.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/HasKey.java
@@ -1,6 +1,6 @@
 package us.codecraft.webmagic.model;
 
-import us.codecraft.webmagic.model.annotation.Experimental;
+import us.codecraft.webmagic.utils.Experimental;
 
 /**
  * Interface to be implemented by page mode.<br>
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java
index 02fa25b4..5268a254 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ComboExtract.java
@@ -21,7 +21,7 @@ public @interface ComboExtract {
      */
     ExtractBy[] value();
 
-    enum Op {
+    public static enum Op {
         /**
          * All extractors will be arranged as a pipeline. <br>
          * The next extractor uses the result of the previous as source.
@@ -49,7 +49,10 @@ public @interface ComboExtract {
      */
     boolean notNull() default false;
 
-    public enum Source {
+    /**
+     * types of source for extracting.
+     */
+    public static enum Source {
         /**
          * extract from the content extracted by class extractor
          */
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java
index 9e0ea18e..4bbebf68 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/model/annotation/ExtractBy.java
@@ -21,7 +21,10 @@ public @interface ExtractBy {
      */
     String value();
 
-    public enum Type {XPath, Regex, Css}
+    /**
+     * types of extractor expressions
+     */
+    public static enum Type {XPath, Regex, Css}
 
     /**
      * Extractor type, support XPath, CSS Selector and regex.
@@ -38,7 +41,10 @@ public @interface ExtractBy {
      */
     boolean notNull() default false;
 
-    public enum Source {
+    /**
+     * types of source for extracting.
+     */
+    public static enum Source {
         /**
          * extract from the content extracted by class extractor
          */
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/MultiPagePipeline.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/MultiPagePipeline.java
index 81c684b6..5806602c 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/MultiPagePipeline.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/pipeline/MultiPagePipeline.java
@@ -3,7 +3,7 @@ package us.codecraft.webmagic.pipeline;
 import us.codecraft.webmagic.MultiPageModel;
 import us.codecraft.webmagic.ResultItems;
 import us.codecraft.webmagic.Task;
-import us.codecraft.webmagic.model.annotation.Experimental;
+import us.codecraft.webmagic.utils.Experimental;
 import us.codecraft.webmagic.utils.DoubleKeyMap;
 
 import java.util.*;
diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java
index 3f691cd2..b646b0f4 100644
--- a/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java
+++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler.java
@@ -16,7 +16,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
 /**
- * Store urls and cursor in files so that a Spider can resume the status when shutdown。<br>
+ * Store urls and cursor in files so that a Spider can resume the status when shutdown.<br>
  *
  * @author code4crafter@gmail.com <br>
  * @since 0.2.0