update comments
parent
59aad6a7f4
commit
c59c1fe80d
@ -1,21 +1,45 @@
|
||||
package us.codecraft.webmagic;
|
||||
|
||||
import us.codecraft.webmagic.model.annotation.Experimental;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* 实现此接口以进行支持爬虫分页抓取。<br>
|
||||
* Extract an object of more than one pages, such as news and articles。<br>
|
||||
*
|
||||
* @author code4crafter@gmail.com <br>
|
||||
* Date: 13-8-4 <br>
|
||||
* Time: 下午5:18 <br>
|
||||
*/
|
||||
@Experimental
|
||||
public interface MultiPageModel {
|
||||
|
||||
/**
|
||||
* Page key is the identifier for the object.
|
||||
*
|
||||
* @return page key
|
||||
*/
|
||||
public String getPageKey();
|
||||
|
||||
public Collection<String> getOtherPages();
|
||||
|
||||
/**
|
||||
* page is the identifier of a page in pages for one object.
|
||||
*
|
||||
* @return page
|
||||
*/
|
||||
public String getPage();
|
||||
|
||||
/**
|
||||
* other pages to be extracted.<br>
|
||||
* It is used to judge whether an object contains more than one page, and whether the pages of the object are all extracted.
|
||||
*
|
||||
* @return other pages
|
||||
*/
|
||||
public Collection<String> getOtherPages();
|
||||
|
||||
/**
|
||||
* Combine multiPageModels to a whole object.
|
||||
*
|
||||
* @param multiPageModel
|
||||
* @return multiPageModel combined
|
||||
*/
|
||||
public MultiPageModel combine(MultiPageModel multiPageModel);
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
<html>
|
||||
<body>
|
||||
webmagic对抓取器编写的面向模型(称为PageModel)的封装。基于POJO及注解即可实现一个PageProcessor。
|
||||
Page model and annotations used to customize a crawler.
|
||||
</body>
|
||||
</html>
|
||||
|
Loading…
Reference in New Issue