diff --git a/zh_docs/us/codecraft/webmagic/Page-cmnt.xml b/zh_docs/us/codecraft/webmagic/Page-cmnt.xml new file mode 100644 index 00000000..777f0b03 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/Page-cmnt.xml @@ -0,0 +1,91 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + Page保存了上一次抓取的结果,并可定义待抓取的链接内容。 + + 主要方法: + {@link #getUrl()} 获取页面的Url + {@link #getHtml()} 获取页面的html内容 + {@link #putField(String, Object)} 保存抽取的结果 + {@link #getResultItems()} 获取抽取的结果,在 {@link us.codecraft.webmagic.pipeline.Pipeline} 中调用 + {@link #addTargetRequests(java.util.List)} {@link #addTargetRequest(String)} 添加待抓取的链接 + + +
+ Store extracted result and urls to be crawled.
+
+     Main method:
+     {@link #getUrl()} get url of current page
+     {@link #getHtml()}  get content of current page
+     {@link #putField(String, Object)}  save extracted result
+     {@link #getResultItems()} get extract results to be used in {@link us.codecraft.webmagic.pipeline.Pipeline}
+     {@link #addTargetRequests(java.util.List)} {@link #addTargetRequest(String)} add urls to crawl
+
+ 
+ + @author code4crafter@gmail.com
+]]>
+
+ + + + + + + + + +)]]> + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/zh_docs/us/codecraft/webmagic/PagedModel-cmnt.xml b/zh_docs/us/codecraft/webmagic/PagedModel-cmnt.xml new file mode 100644 index 00000000..a57d07b1 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/PagedModel-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-8-4
+ Time: 下午5:18
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/Request-cmnt.xml b/zh_docs/us/codecraft/webmagic/Request-cmnt.xml new file mode 100644 index 00000000..987da7b3 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/Request-cmnt.xml @@ -0,0 +1,56 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + Request对象封装了待抓取的url信息。
+ 在PageProcessor中,Request对象可以通过{@link us.codecraft.webmagic.Page#getRequest()} 获取。
+
+ Request对象包含一个extra属性,可以写入一些必须的上下文,这个特性在某些场合会有用。
+
+      Example:
+          抓取${linktext}时,希望提取链接link,并保存linktext的信息。
+      在上一个页面:
+      public void process(Page page){
+          Request request = new Request(link,linktext);
+          page.addTargetRequest(request)
+      }
+      在下一个页面:
+      public void process(Page page){
+          String linktext =  (String)page.getRequest().getExtra()[0];
+      }
+ 
+ + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 上午11:37 +]]>
+
+ + + + + + + + 需扩展Scheduler
+ 目前还没有对应支持优先级的Scheduler实现 =。=
+ @param priority 优先级,越大则越靠前 + @return this +]]>
+
+ + + + +
diff --git a/zh_docs/us/codecraft/webmagic/ResultItems-cmnt.xml b/zh_docs/us/codecraft/webmagic/ResultItems-cmnt.xml new file mode 100644 index 00000000..7c7859cb --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/ResultItems-cmnt.xml @@ -0,0 +1,27 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-7-25
+ Time: 下午12:20
+]]>
+
+ + + + + + + + +
diff --git a/zh_docs/us/codecraft/webmagic/Site-cmnt.xml b/zh_docs/us/codecraft/webmagic/Site-cmnt.xml new file mode 100644 index 00000000..4f5d6676 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/Site-cmnt.xml @@ -0,0 +1,147 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + 这个类的所有getter方法,一般都只会被爬虫框架内部进行调用。
+ + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 下午12:13 +]]>
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + 目前不支持多个域名的抓取。抓取多个域名请新建一个Spider。 + + @param domain 爬虫会抓取的域名 + @return this +]]> + + + + + 一般无需设置encoding,如果发现下载的结果是乱码,则可以设置此项。
+ + @param charset 编码格式,主要是"utf-8"、"gbk"两种 + @return this +]]>
+
+ + + + + +)]]> + + 默认为200,正常情况下,无须设置此项。
+ 某些站点会错误的返回状态码,此时可以对这个选项进行设置。
+ + @param acceptStatCode 可接受的状态码 + @return this +]]>
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/zh_docs/us/codecraft/webmagic/Spider-cmnt.xml b/zh_docs/us/codecraft/webmagic/Spider-cmnt.xml new file mode 100644 index 00000000..70041375 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/Spider-cmnt.xml @@ -0,0 +1,90 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + webmagic爬虫的入口类。 + + 示例: + 定义一个最简单的爬虫: + Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*")).run(); + + 使用FilePipeline保存结果到文件: + Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*")) + .pipeline(new FilePipeline("/data/temp/webmagic/")).run(); + + 使用FileCacheQueueScheduler缓存URL,关闭爬虫后下次自动从停止的页面继续抓取: + Spider.create(new SimplePageProcessor("http://my.oschina.net/", "http://my.oschina.net/*blog/*")) + .scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")).run(); + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 上午6:53 +]]>
+
+ + + + + + + + + +)]]> + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/zh_docs/us/codecraft/webmagic/Task-cmnt.xml b/zh_docs/us/codecraft/webmagic/Task-cmnt.xml new file mode 100644 index 00000000..d4f31dc7 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/Task-cmnt.xml @@ -0,0 +1,26 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-6-18 + Time: 下午2:57 +]]>
+
+ + + + + + + + +
diff --git a/zh_docs/us/codecraft/webmagic/downloader/Destroyable-cmnt.xml b/zh_docs/us/codecraft/webmagic/downloader/Destroyable-cmnt.xml new file mode 100644 index 00000000..32cb949b --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/downloader/Destroyable-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-7-26
+ Time: 下午3:10
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/downloader/Downloader-cmnt.xml b/zh_docs/us/codecraft/webmagic/downloader/Downloader-cmnt.xml new file mode 100644 index 00000000..12277dc8 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/downloader/Downloader-cmnt.xml @@ -0,0 +1,32 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 下午12:14 +]]>
+
+ + + + + + + + 如果不考虑多线程的可以不实现这个方法
+ + @param thread 线程数量 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/downloader/FileDownloader-cmnt.xml b/zh_docs/us/codecraft/webmagic/downloader/FileDownloader-cmnt.xml new file mode 100644 index 00000000..a0490623 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/downloader/FileDownloader-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + @author code4crafer@gmail.com + Date: 13-6-24 + Time: 上午7:24 +]]> + + diff --git a/zh_docs/us/codecraft/webmagic/downloader/HttpClientDownloader-cmnt.xml b/zh_docs/us/codecraft/webmagic/downloader/HttpClientDownloader-cmnt.xml new file mode 100644 index 00000000..3a02bc95 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/downloader/HttpClientDownloader-cmnt.xml @@ -0,0 +1,23 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 下午12:15 +]]>
+
+ + + + +
diff --git a/zh_docs/us/codecraft/webmagic/downloader/HttpClientPool-cmnt.xml b/zh_docs/us/codecraft/webmagic/downloader/HttpClientPool-cmnt.xml new file mode 100644 index 00000000..9c7ef38f --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/downloader/HttpClientPool-cmnt.xml @@ -0,0 +1,13 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + Date: 13-4-21 + Time: 下午12:29 +]]> + + diff --git a/zh_docs/us/codecraft/webmagic/downloader/package.cmnt b/zh_docs/us/codecraft/webmagic/downloader/package.cmnt new file mode 100644 index 00000000..c2e3406f --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/downloader/package.cmnt @@ -0,0 +1,12 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + + diff --git a/zh_docs/us/codecraft/webmagic/model/AfterExtractor-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/AfterExtractor-cmnt.xml new file mode 100644 index 00000000..8c272e62 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/AfterExtractor-cmnt.xml @@ -0,0 +1,15 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + + @author code4crafter@gmail.com
+ Date: 13-8-3
+ Time: 上午9:42
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/model/ConsolePageModelPipeline-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/ConsolePageModelPipeline-cmnt.xml new file mode 100644 index 00000000..1549be7a --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/ConsolePageModelPipeline-cmnt.xml @@ -0,0 +1,13 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + Date: 13-8-3
+ Time: 下午3:41
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/model/HasKey-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/HasKey-cmnt.xml new file mode 100644 index 00000000..62debaf7 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/HasKey-cmnt.xml @@ -0,0 +1,24 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + 实现了这个接口的Model在输出时会使用getKey()作为标志(例如JsonFilePageModelPipeline中持久化的文件名)。
+ 如果持久化的文件名是乱码,请再运行的环境变量里加上LANG=zh_CN.UTF-8 。
+ + @author code4crafter@gmail.com
+ Date: 13-8-10
+ Time: 上午7:39
+]]>
+
+ + + + +
diff --git a/zh_docs/us/codecraft/webmagic/model/OOSpider-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/OOSpider-cmnt.xml new file mode 100644 index 00000000..2fd60a7d --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/OOSpider-cmnt.xml @@ -0,0 +1,22 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-8-3
+ Time: 上午9:51
+]]>
+
+ + + + @param site + @param pageModelPipeline + @param pageModels +]]> + +
diff --git a/zh_docs/us/codecraft/webmagic/model/PageModelPipeline-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/PageModelPipeline-cmnt.xml new file mode 100644 index 00000000..ac657292 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/PageModelPipeline-cmnt.xml @@ -0,0 +1,13 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + Date: 13-8-3
+ Time: 上午9:34
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/ComboExtract-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/ComboExtract-cmnt.xml new file mode 100644 index 00000000..0c488515 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/ComboExtract-cmnt.xml @@ -0,0 +1,13 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + Date: 13-8-16
+ Time: 下午11:09
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy-cmnt.xml new file mode 100644 index 00000000..31d12572 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy-cmnt.xml @@ -0,0 +1,45 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + + @author code4crafter@gmail.com
+ Date: 13-8-1
+ Time: 下午8:40
+]]>
+
+ + + + + + + + + + + + + + + + 用于字段时,需要List来盛放结果
+ 用于类时,表示单页抽取多个对象
+ + @return 是否抽取多个结果 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy.Type-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy.Type-cmnt.xml new file mode 100644 index 00000000..f0968beb --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy.Type-cmnt.xml @@ -0,0 +1,6 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy2-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy2-cmnt.xml new file mode 100644 index 00000000..eaadafde --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy2-cmnt.xml @@ -0,0 +1,15 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + + @author code4crafter@gmail.com
+ Date: 13-8-1
+ Time: 下午8:40
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy2.Type-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy2.Type-cmnt.xml new file mode 100644 index 00000000..f0968beb --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy2.Type-cmnt.xml @@ -0,0 +1,6 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy3-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy3-cmnt.xml new file mode 100644 index 00000000..3bbaacbb --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy3-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-8-1
+ Time: 下午8:40
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy3.Type-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy3.Type-cmnt.xml new file mode 100644 index 00000000..f0968beb --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractBy3.Type-cmnt.xml @@ -0,0 +1,6 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/ExtractByRaw-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractByRaw-cmnt.xml new file mode 100644 index 00000000..e266937b --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractByRaw-cmnt.xml @@ -0,0 +1,44 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + + @author code4crafter@gmail.com
+ Date: 13-8-1
+ Time: 下午8:40
+]]>
+
+ + + + + + + + + + + + + + + + 需要List来盛放结果
+ + @return 是否抽取多个结果 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/ExtractByRaw.Type-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractByRaw.Type-cmnt.xml new file mode 100644 index 00000000..f0968beb --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractByRaw.Type-cmnt.xml @@ -0,0 +1,6 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/ExtractByUrl-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractByUrl-cmnt.xml new file mode 100644 index 00000000..b97ae398 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/ExtractByUrl-cmnt.xml @@ -0,0 +1,37 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-8-1
+ Time: 下午8:40
+]]>
+
+ + + + + + + + + + + + 用于字段时,需要List来盛放结果
+ 用于类时,表示单页抽取多个对象
+ + @return 是否抽取多个结果 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/HelpUrl-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/HelpUrl-cmnt.xml new file mode 100644 index 00000000..e4eccb00 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/HelpUrl-cmnt.xml @@ -0,0 +1,28 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-8-1
+ Time: 下午8:40
+]]>
+
+ + + + webmagic对正则表达式进行了修改,"."仅表示字符"."而不代表任意字符,而"\*"则代表了".\*",例如"http://\*.oschina.net/\*"代表了oschina所有的二级域名下的URL。
+ + @return 抽取规则 +]]>
+
+ + + + +
diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/TargetUrl-cmnt.xml b/zh_docs/us/codecraft/webmagic/model/annotation/TargetUrl-cmnt.xml new file mode 100644 index 00000000..a2c86cf3 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/TargetUrl-cmnt.xml @@ -0,0 +1,29 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + + @author code4crafter@gmail.com
+ Date: 13-8-1
+ Time: 下午8:40
+]]>
+
+ + + + webmagic对正则表达式进行了修改,"."仅表示字符"."而不代表任意字符,而"\*"则代表了".\*",例如"http://\*.oschina.net/\*"代表了oschina所有的二级域名下的URL。
+ + @return 抽取规则 +]]>
+
+ + + + +
diff --git a/zh_docs/us/codecraft/webmagic/model/annotation/package.cmnt b/zh_docs/us/codecraft/webmagic/model/annotation/package.cmnt new file mode 100644 index 00000000..8405b974 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/annotation/package.cmnt @@ -0,0 +1,12 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + + diff --git a/zh_docs/us/codecraft/webmagic/model/package.cmnt b/zh_docs/us/codecraft/webmagic/model/package.cmnt new file mode 100644 index 00000000..9cffa3af --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/model/package.cmnt @@ -0,0 +1,12 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + + diff --git a/zh_docs/us/codecraft/webmagic/package.cmnt b/zh_docs/us/codecraft/webmagic/package.cmnt new file mode 100644 index 00000000..3ec8bc1d --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/package.cmnt @@ -0,0 +1,17 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + Main class "Spider" and models. + +
+包括webmagic入口类Spider和一些数据传递的实体类。 +
+ ]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/pipeline/ConsolePipeline-cmnt.xml b/zh_docs/us/codecraft/webmagic/pipeline/ConsolePipeline-cmnt.xml new file mode 100644 index 00000000..e88c9737 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/pipeline/ConsolePipeline-cmnt.xml @@ -0,0 +1,15 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 下午1:45 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/pipeline/FilePipeline-cmnt.xml b/zh_docs/us/codecraft/webmagic/pipeline/FilePipeline-cmnt.xml new file mode 100644 index 00000000..153c9e07 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/pipeline/FilePipeline-cmnt.xml @@ -0,0 +1,27 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + Date: 13-4-21 + Time: 下午6:28 +]]> + + + + + + + + + + diff --git a/zh_docs/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline-cmnt.xml b/zh_docs/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline-cmnt.xml new file mode 100644 index 00000000..06509cf3 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/pipeline/JsonFilePageModelPipeline-cmnt.xml @@ -0,0 +1,28 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + 如果持久化的文件名是乱码,请再运行的环境变量里加上LANG=zh_CN.UTF-8。
+ + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 下午6:28 +]]>
+
+ + + + + + + + +
diff --git a/zh_docs/us/codecraft/webmagic/pipeline/JsonFilePipeline-cmnt.xml b/zh_docs/us/codecraft/webmagic/pipeline/JsonFilePipeline-cmnt.xml new file mode 100644 index 00000000..23993af0 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/pipeline/JsonFilePipeline-cmnt.xml @@ -0,0 +1,27 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + Date: 13-4-21 + Time: 下午6:28 +]]> + + + + + + + + + + diff --git a/zh_docs/us/codecraft/webmagic/pipeline/PagedPipeline-cmnt.xml b/zh_docs/us/codecraft/webmagic/pipeline/PagedPipeline-cmnt.xml new file mode 100644 index 00000000..eb41808c --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/pipeline/PagedPipeline-cmnt.xml @@ -0,0 +1,16 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + 在使用redis做分布式爬虫时,请不要使用此功能。
+ + @author code4crafter@gmail.com
+ Date: 13-8-4
+ Time: 下午5:15
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/pipeline/Pipeline-cmnt.xml b/zh_docs/us/codecraft/webmagic/pipeline/Pipeline-cmnt.xml new file mode 100644 index 00000000..ac5552ba --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/pipeline/Pipeline-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + Date: 13-4-21 + Time: 下午1:39 +]]> + + diff --git a/zh_docs/us/codecraft/webmagic/pipeline/package.cmnt b/zh_docs/us/codecraft/webmagic/pipeline/package.cmnt new file mode 100644 index 00000000..656607ea --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/pipeline/package.cmnt @@ -0,0 +1,12 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + + diff --git a/zh_docs/us/codecraft/webmagic/processor/PageProcessor-cmnt.xml b/zh_docs/us/codecraft/webmagic/processor/PageProcessor-cmnt.xml new file mode 100644 index 00000000..95c25ddb --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/processor/PageProcessor-cmnt.xml @@ -0,0 +1,27 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + extends the class to implements various spiders.
+ @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 上午11:42 +]]>
+
+ + + + + + + + +
diff --git a/zh_docs/us/codecraft/webmagic/processor/SimplePageProcessor-cmnt.xml b/zh_docs/us/codecraft/webmagic/processor/SimplePageProcessor-cmnt.xml new file mode 100644 index 00000000..f2215ad1 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/processor/SimplePageProcessor-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-22 + Time: 下午9:15 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/processor/package.cmnt b/zh_docs/us/codecraft/webmagic/processor/package.cmnt new file mode 100644 index 00000000..1f79cba3 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/processor/package.cmnt @@ -0,0 +1,12 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + + diff --git a/zh_docs/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler-cmnt.xml b/zh_docs/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler-cmnt.xml new file mode 100644 index 00000000..539b3596 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/scheduler/FileCacheQueueScheduler-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 下午1:13 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/scheduler/QueueScheduler-cmnt.xml b/zh_docs/us/codecraft/webmagic/scheduler/QueueScheduler-cmnt.xml new file mode 100644 index 00000000..b217f0be --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/scheduler/QueueScheduler-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 下午1:13 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/scheduler/RedisScheduler-cmnt.xml b/zh_docs/us/codecraft/webmagic/scheduler/RedisScheduler-cmnt.xml new file mode 100644 index 00000000..41743a79 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/scheduler/RedisScheduler-cmnt.xml @@ -0,0 +1,15 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + + @author code4crafter@gmail.com
+ Date: 13-7-25
+ Time: 上午7:07
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/scheduler/Scheduler-cmnt.xml b/zh_docs/us/codecraft/webmagic/scheduler/Scheduler-cmnt.xml new file mode 100644 index 00000000..fdf14ddc --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/scheduler/Scheduler-cmnt.xml @@ -0,0 +1,29 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + Scheduler的接口包含一个Task参数,该参数是为单Scheduler多Task预留的(Spider就是一个Task)。
+ @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 下午1:12 +]]>
+
+ + + + + + + + +
diff --git a/zh_docs/us/codecraft/webmagic/scheduler/package.cmnt b/zh_docs/us/codecraft/webmagic/scheduler/package.cmnt new file mode 100644 index 00000000..8b8a9f1d --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/scheduler/package.cmnt @@ -0,0 +1,12 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + + diff --git a/zh_docs/us/codecraft/webmagic/selector/AndSelector-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/AndSelector-cmnt.xml new file mode 100644 index 00000000..c9c4e023 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/AndSelector-cmnt.xml @@ -0,0 +1,13 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + Date: 13-8-3
+ Time: 下午5:29
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/selector/CssSelector-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/CssSelector-cmnt.xml new file mode 100644 index 00000000..2092b3b7 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/CssSelector-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 上午9:39 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/selector/Html-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/Html-cmnt.xml new file mode 100644 index 00000000..1668cc51 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/Html-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 上午7:54 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/selector/JsonPathSelector-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/JsonPathSelector-cmnt.xml new file mode 100644 index 00000000..f34d31b6 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/JsonPathSelector-cmnt.xml @@ -0,0 +1,13 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + Date: 13-8-12
+ Time: 下午12:54
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/selector/OrSelector-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/OrSelector-cmnt.xml new file mode 100644 index 00000000..7fb672e3 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/OrSelector-cmnt.xml @@ -0,0 +1,13 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + Date: 13-8-3
+ Time: 下午5:29
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/selector/PlainText-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/PlainText-cmnt.xml new file mode 100644 index 00000000..15aba1c5 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/PlainText-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 上午7:54 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/selector/RegexSelector-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/RegexSelector-cmnt.xml new file mode 100644 index 00000000..3d82c656 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/RegexSelector-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 上午7:09 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/selector/ReplaceSelector-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/ReplaceSelector-cmnt.xml new file mode 100644 index 00000000..e96832f0 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/ReplaceSelector-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 上午7:09 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/selector/Selectable-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/Selectable-cmnt.xml new file mode 100644 index 00000000..73342fc1 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/Selectable-cmnt.xml @@ -0,0 +1,75 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-20 + Time: 下午7:51 +]]>
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/zh_docs/us/codecraft/webmagic/selector/Selector-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/Selector-cmnt.xml new file mode 100644 index 00000000..ee77b38c --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/Selector-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-20 + Time: 下午8:02 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/selector/SelectorFactory-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/SelectorFactory-cmnt.xml new file mode 100644 index 00000000..9741bc40 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/SelectorFactory-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 上午7:56 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/selector/SmartContentSelector-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/SmartContentSelector-cmnt.xml new file mode 100644 index 00000000..c235de37 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/SmartContentSelector-cmnt.xml @@ -0,0 +1,15 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + Date: 13-4-21 + Time: 下午4:42 +]]> + + diff --git a/zh_docs/us/codecraft/webmagic/selector/XpathSelector-cmnt.xml b/zh_docs/us/codecraft/webmagic/selector/XpathSelector-cmnt.xml new file mode 100644 index 00000000..e8343b9a --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/XpathSelector-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 上午9:39 +]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/selector/package.cmnt b/zh_docs/us/codecraft/webmagic/selector/package.cmnt new file mode 100644 index 00000000..dbcebb87 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/selector/package.cmnt @@ -0,0 +1,12 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + + diff --git a/zh_docs/us/codecraft/webmagic/utils/DoubleKeyMap-cmnt.xml b/zh_docs/us/codecraft/webmagic/utils/DoubleKeyMap-cmnt.xml new file mode 100644 index 00000000..c8d2802c --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/utils/DoubleKeyMap-cmnt.xml @@ -0,0 +1,60 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + + +>, java.lang.Class)]]> + + + + + + + + + + + +)]]> + + + + + + + + + + + + + + + diff --git a/zh_docs/us/codecraft/webmagic/utils/FilePersistentBase-cmnt.xml b/zh_docs/us/codecraft/webmagic/utils/FilePersistentBase-cmnt.xml new file mode 100644 index 00000000..62a48b90 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/utils/FilePersistentBase-cmnt.xml @@ -0,0 +1,15 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + + @author code4crafter@gmail.com
+ Date: 13-8-11
+ Time: 下午4:21
+]]>
+
+
diff --git a/zh_docs/us/codecraft/webmagic/utils/MultiKeyMapBase-cmnt.xml b/zh_docs/us/codecraft/webmagic/utils/MultiKeyMapBase-cmnt.xml new file mode 100644 index 00000000..12162ebf --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/utils/MultiKeyMapBase-cmnt.xml @@ -0,0 +1,13 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + + diff --git a/zh_docs/us/codecraft/webmagic/utils/ThreadUtils-cmnt.xml b/zh_docs/us/codecraft/webmagic/utils/ThreadUtils-cmnt.xml new file mode 100644 index 00000000..fdf7aab7 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/utils/ThreadUtils-cmnt.xml @@ -0,0 +1,14 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafer@gmail.com + Date: 13-6-23 + Time: 下午7:11 +]]> + + diff --git a/zh_docs/us/codecraft/webmagic/utils/UrlUtils-cmnt.xml b/zh_docs/us/codecraft/webmagic/utils/UrlUtils-cmnt.xml new file mode 100644 index 00000000..019523ea --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/utils/UrlUtils-cmnt.xml @@ -0,0 +1,22 @@ + + + +Sat Aug 17 14:14:46 CST 2013 + + + + + @author code4crafter@gmail.com
+ Date: 13-4-21 + Time: 下午1:52 +]]>
+
+ + + + +
diff --git a/zh_docs/us/codecraft/webmagic/utils/package.cmnt b/zh_docs/us/codecraft/webmagic/utils/package.cmnt new file mode 100644 index 00000000..39b97037 --- /dev/null +++ b/zh_docs/us/codecraft/webmagic/utils/package.cmnt @@ -0,0 +1,12 @@ + + + +Sat Aug 17 14:14:45 CST 2013 + + + + + +