|
|
|
@ -20,18 +20,22 @@ import java.util.Map;
|
|
|
|
|
/**
|
|
|
|
|
* 使用Selenium调用浏览器进行渲染。目前仅支持chrome。<br>
|
|
|
|
|
* 需要下载Selenium driver支持。<br>
|
|
|
|
|
*
|
|
|
|
|
* @author yihua.huang@dianping.com <br>
|
|
|
|
|
* @date: 13-7-26 <br>
|
|
|
|
|
* Time: 下午1:37 <br>
|
|
|
|
|
*/
|
|
|
|
|
public class SeleniumDownloader implements Downloader,Destroyable {
|
|
|
|
|
public class SeleniumDownloader implements Downloader, Destroyable {
|
|
|
|
|
|
|
|
|
|
private WebDriverPool webDriverPool;
|
|
|
|
|
|
|
|
|
|
private Logger logger = Logger.getLogger(getClass());
|
|
|
|
|
|
|
|
|
|
private int sleepTime = 0;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 新建
|
|
|
|
|
*
|
|
|
|
|
* @param chromeDriverPath
|
|
|
|
|
*/
|
|
|
|
|
public SeleniumDownloader(String chromeDriverPath) {
|
|
|
|
@ -44,6 +48,16 @@ public class SeleniumDownloader implements Downloader,Destroyable {
|
|
|
|
|
webDriverPool = new WebDriverPool(poolSize);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* set sleep time to wait until load success
|
|
|
|
|
* @param sleepTime
|
|
|
|
|
* @return this
|
|
|
|
|
*/
|
|
|
|
|
public SeleniumDownloader setSleepTime(int sleepTime) {
|
|
|
|
|
this.sleepTime = sleepTime;
|
|
|
|
|
return this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public Page download(Request request, Task task) {
|
|
|
|
|
WebDriver webDriver;
|
|
|
|
@ -55,6 +69,11 @@ public class SeleniumDownloader implements Downloader,Destroyable {
|
|
|
|
|
}
|
|
|
|
|
logger.info("downloading page " + request.getUrl());
|
|
|
|
|
webDriver.get(request.getUrl());
|
|
|
|
|
try {
|
|
|
|
|
Thread.sleep(sleepTime);
|
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
}
|
|
|
|
|
WebDriver.Options manage = webDriver.manage();
|
|
|
|
|
Site site = task.getSite();
|
|
|
|
|
if (site.getCookies() != null) {
|
|
|
|
@ -65,6 +84,7 @@ public class SeleniumDownloader implements Downloader,Destroyable {
|
|
|
|
|
}
|
|
|
|
|
WebElement webElement = webDriver.findElement(By.xpath("/html"));
|
|
|
|
|
String content = webElement.getAttribute("outerHTML");
|
|
|
|
|
//
|
|
|
|
|
Page page = new Page();
|
|
|
|
|
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
|
|
|
|
|
page.setUrl(new PlainText(request.getUrl()));
|
|
|
|
|