From 6a87a778fd3f1f0f1f941e698e16bdcb8c0b0c15 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Mon, 29 Jul 2013 18:00:12 +0800 Subject: [PATCH] add selenium download timeout --- .../downloader/SeleniumDownloader.java | 22 ++++++++++++++++++- .../downloader/SeleniumDownloaderTest.java | 3 ++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java b/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java index b4dd3720..54e3c9c0 100644 --- a/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java +++ b/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java @@ -20,18 +20,22 @@ import java.util.Map; /** * 使用Selenium调用浏览器进行渲染。目前仅支持chrome。
* 需要下载Selenium driver支持。
+ * * @author yihua.huang@dianping.com
* @date: 13-7-26
* Time: 下午1:37
*/ -public class SeleniumDownloader implements Downloader,Destroyable { +public class SeleniumDownloader implements Downloader, Destroyable { private WebDriverPool webDriverPool; private Logger logger = Logger.getLogger(getClass()); + private int sleepTime = 0; + /** * 新建 + * * @param chromeDriverPath */ public SeleniumDownloader(String chromeDriverPath) { @@ -44,6 +48,16 @@ public class SeleniumDownloader implements Downloader,Destroyable { webDriverPool = new WebDriverPool(poolSize); } + /** + * set sleep time to wait until load success + * @param sleepTime + * @return this + */ + public SeleniumDownloader setSleepTime(int sleepTime) { + this.sleepTime = sleepTime; + return this; + } + @Override public Page download(Request request, Task task) { WebDriver webDriver; @@ -55,6 +69,11 @@ public class SeleniumDownloader implements Downloader,Destroyable { } logger.info("downloading page " + request.getUrl()); webDriver.get(request.getUrl()); + try { + Thread.sleep(sleepTime); + } catch (InterruptedException e) { + e.printStackTrace(); + } WebDriver.Options manage = webDriver.manage(); Site site = task.getSite(); if (site.getCookies() != null) { @@ -65,6 +84,7 @@ public class SeleniumDownloader implements Downloader,Destroyable { } WebElement webElement = webDriver.findElement(By.xpath("/html")); String content = webElement.getAttribute("outerHTML"); + // Page page = new Page(); page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl()))); page.setUrl(new PlainText(request.getUrl())); diff --git a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java index 615ad867..4aa99195 100644 --- a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java +++ b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java @@ -14,7 +14,7 @@ import us.codecraft.webmagic.Task; */ public class SeleniumDownloaderTest { - private String chromeDriverPath = ""; + private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver"; @Ignore("need chrome driver") @Test @@ -37,4 +37,5 @@ public class SeleniumDownloaderTest { } System.out.println(System.currentTimeMillis() - time1); } + }