|
|
|
@ -9,6 +9,8 @@ import us.codecraft.webmagic.scheduler.RedisScheduler;
|
|
|
|
|
import us.codecraft.webmagic.selenium.downloader.SeleniumDownloader;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 花瓣网抽取器。<br>
|
|
|
|
|
* 使用Selenium做页面动态渲染。<br>
|
|
|
|
|
* @author yihua.huang@dianping.com <br>
|
|
|
|
|
* @date: 13-7-26 <br>
|
|
|
|
|
* Time: 下午4:08 <br>
|
|
|
|
@ -30,7 +32,7 @@ public class HuabanProcessor implements PageProcessor {
|
|
|
|
|
@Override
|
|
|
|
|
public Site getSite() {
|
|
|
|
|
if (site == null) {
|
|
|
|
|
site = Site.me().setDomain("huaban.com").addStartUrl("http://huaban.com/").setSleepTime(1000);
|
|
|
|
|
site = Site.me().setDomain("huaban.com").addStartUrl("http://huaban.com/").setSleepTime(0);
|
|
|
|
|
}
|
|
|
|
|
return site;
|
|
|
|
|
}
|
|
|
|
|