From 644a90c2d85dab09b7e367377a46de92522cd692 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Fri, 26 Jul 2013 15:05:29 +0800 Subject: [PATCH] complete selenium --- .../downloader/SeleniumDownloader.java | 24 +++++++++++-- .../downloader/SeleniumDownloaderTest.java | 36 +++++++++++++++++++ .../downloader/WebDriverPoolTest.java | 9 ++--- 3 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java diff --git a/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java b/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java index 8fd1c6a2..171ca44f 100644 --- a/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java +++ b/webmagic-plugin/webmagic-selenium/src/main/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloader.java @@ -2,16 +2,20 @@ package us.codecraft.webmagic.selenium.downloader; import org.apache.log4j.Logger; import org.openqa.selenium.By; +import org.openqa.selenium.Cookie; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; +import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.downloader.Downloader; import us.codecraft.webmagic.selector.Html; import us.codecraft.webmagic.selector.PlainText; import us.codecraft.webmagic.utils.UrlUtils; +import java.util.Map; + /** * @author yihua.huang@dianping.com
* @date: 13-7-26
@@ -25,24 +29,40 @@ public class SeleniumDownloader implements Downloader { public SeleniumDownloader(String chromeDriverPath) { System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath); + webDriverPool = new WebDriverPool(); + } + + public SeleniumDownloader(String chromeDriverPath, int poolSize) { + System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath); + webDriverPool = new WebDriverPool(poolSize); } @Override public Page download(Request request, Task task) { - WebDriver webDriver = null; + WebDriver webDriver; try { webDriver = webDriverPool.get(); } catch (InterruptedException e) { - logger.warn("interrupted",e); + logger.warn("interrupted", e); return null; } webDriver.get(request.getUrl()); + WebDriver.Options manage = webDriver.manage(); + Site site = task.getSite(); + if (site.getCookies() != null) { + for (Map.Entry cookieEntry : site.getCookies().entrySet()) { + Cookie cookie = new Cookie(cookieEntry.getKey(), cookieEntry.getValue()); + manage.addCookie(cookie); + } + } WebElement webElement = webDriver.findElement(By.xpath("/html")); String content = webElement.getAttribute("outerHTML"); Page page = new Page(); page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl()))); page.setUrl(new PlainText(request.getUrl())); page.setRequest(request); + webDriverPool.returnToPool(webDriver); return page; } + } diff --git a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java new file mode 100644 index 00000000..d2106845 --- /dev/null +++ b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/SeleniumDownloaderTest.java @@ -0,0 +1,36 @@ +package us.codecraft.webmagic.selenium.downloader; + +import org.junit.Ignore; +import org.junit.Test; +import us.codecraft.webmagic.Page; +import us.codecraft.webmagic.Request; +import us.codecraft.webmagic.Site; +import us.codecraft.webmagic.Task; + +/** + * @author yihua.huang@dianping.com
+ * @date: 13-7-26
+ * Time: 下午2:46
+ */ +public class SeleniumDownloaderTest { + + private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver"; + + @Ignore("need chrome driver") + @Test + public void test(){ + SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath); + Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() { + @Override + public String getUUID() { + return "huaban.com"; + } + + @Override + public Site getSite() { + return Site.me(); + } + }); + System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all()); + } +} diff --git a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/WebDriverPoolTest.java b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/WebDriverPoolTest.java index d38216f8..38e4f86f 100644 --- a/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/WebDriverPoolTest.java +++ b/webmagic-plugin/webmagic-selenium/src/test/java/us/codecraft/webmagic/selenium/downloader/WebDriverPoolTest.java @@ -10,12 +10,13 @@ import org.openqa.selenium.WebDriver; */ public class WebDriverPoolTest { + private String chromeDriverPath = "/Users/yihua/Downloads/chromedriver"; + @Test - public void test(){ - String chromeDriverPath = "/Users/yihua/Downloads/chromedriver"; + public void test() { System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath); - WebDriverPool webDriverPool =new WebDriverPool(5); - for (int i=0;i<5;i++){ + WebDriverPool webDriverPool = new WebDriverPool(5); + for (int i = 0; i < 5; i++) { try { WebDriver webDriver = webDriverPool.get(); System.out.println(i);