invite selenium
parent
74bd74a005
commit
fcb09f2e08
@ -0,0 +1,48 @@
|
||||
package us.codecraft.webmagic.selenium.downloader;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.WebElement;
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Request;
|
||||
import us.codecraft.webmagic.Task;
|
||||
import us.codecraft.webmagic.downloader.Downloader;
|
||||
import us.codecraft.webmagic.selector.Html;
|
||||
import us.codecraft.webmagic.selector.PlainText;
|
||||
import us.codecraft.webmagic.utils.UrlUtils;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午1:37 <br>
|
||||
*/
|
||||
public class SeleniumDownloader implements Downloader {
|
||||
|
||||
private WebDriverPool webDriverPool;
|
||||
|
||||
private Logger logger = Logger.getLogger(getClass());
|
||||
|
||||
public SeleniumDownloader(String chromeDriverPath) {
|
||||
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Page download(Request request, Task task) {
|
||||
WebDriver webDriver = null;
|
||||
try {
|
||||
webDriver = webDriverPool.get();
|
||||
} catch (InterruptedException e) {
|
||||
logger.warn("interrupted",e);
|
||||
return null;
|
||||
}
|
||||
webDriver.get(request.getUrl());
|
||||
WebElement webElement = webDriver.findElement(By.xpath("/html"));
|
||||
String content = webElement.getAttribute("outerHTML");
|
||||
Page page = new Page();
|
||||
page.setHtml(new Html(UrlUtils.fixAllRelativeHrefs(content, request.getUrl())));
|
||||
page.setUrl(new PlainText(request.getUrl()));
|
||||
page.setRequest(request);
|
||||
return page;
|
||||
}
|
||||
}
|
@ -0,0 +1,82 @@
|
||||
package us.codecraft.webmagic.selenium.downloader;
|
||||
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.chrome.ChromeDriver;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.BlockingDeque;
|
||||
import java.util.concurrent.LinkedBlockingDeque;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午1:41 <br>
|
||||
*/
|
||||
class WebDriverPool {
|
||||
|
||||
private final static int DEFAULT_CAPACITY = 5;
|
||||
|
||||
private final int capacity;
|
||||
|
||||
private final static int STAT_RUNNING = 1;
|
||||
|
||||
private final static int STAT_CLODED = 2;
|
||||
|
||||
private AtomicInteger stat = new AtomicInteger(STAT_RUNNING);
|
||||
|
||||
private List<WebDriver> webDriverList = Collections.synchronizedList(new ArrayList<WebDriver>());
|
||||
|
||||
public WebDriverPool(int capacity) {
|
||||
this.capacity = capacity;
|
||||
}
|
||||
|
||||
public WebDriverPool() {
|
||||
this(DEFAULT_CAPACITY);
|
||||
}
|
||||
|
||||
private BlockingDeque<WebDriver> innerQueue = new LinkedBlockingDeque<WebDriver>();
|
||||
|
||||
public WebDriver get() throws InterruptedException {
|
||||
checkRunning();
|
||||
WebDriver poll = innerQueue.poll();
|
||||
if (poll != null) {
|
||||
return poll;
|
||||
}
|
||||
if (webDriverList.size() < capacity) {
|
||||
synchronized (webDriverList) {
|
||||
if (webDriverList.size() < capacity) {
|
||||
ChromeDriver e = new ChromeDriver();
|
||||
innerQueue.add(e);
|
||||
webDriverList.add(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return innerQueue.take();
|
||||
}
|
||||
|
||||
public void returnToPool(WebDriver webDriver) {
|
||||
checkRunning();
|
||||
innerQueue.add(webDriver);
|
||||
}
|
||||
|
||||
protected void checkRunning() {
|
||||
if (!stat.compareAndSet(STAT_RUNNING, STAT_RUNNING)) {
|
||||
throw new IllegalStateException("Already closed!");
|
||||
}
|
||||
}
|
||||
|
||||
public void closeAll() {
|
||||
boolean b = stat.compareAndSet(STAT_RUNNING, STAT_CLODED);
|
||||
if (!b) {
|
||||
throw new IllegalStateException("Already closed!");
|
||||
}
|
||||
for (WebDriver webDriver : webDriverList) {
|
||||
webDriver.close();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
package us.codecraft.webmagic.selenium;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.WebElement;
|
||||
import org.openqa.selenium.chrome.ChromeDriver;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午12:27 <br>
|
||||
*/
|
||||
public class SeleniumTest {
|
||||
|
||||
@Test
|
||||
public void test(){
|
||||
System.getProperties().setProperty("webdriver.chrome.driver","/Users/yihua/Downloads/chromedriver");
|
||||
WebDriver webDriver = new ChromeDriver();
|
||||
webDriver.get("http://huaban.com/");
|
||||
List<WebElement> elements = webDriver.findElements(By.xpath("/html"));
|
||||
for (WebElement element : elements) {
|
||||
System.out.println(element.getAttribute("outerHTML"));
|
||||
}
|
||||
webDriver.close();
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
package us.codecraft.webmagic.selenium.downloader;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
|
||||
/**
|
||||
* @author yihua.huang@dianping.com <br>
|
||||
* @date: 13-7-26 <br>
|
||||
* Time: 下午2:12 <br>
|
||||
*/
|
||||
public class WebDriverPoolTest {
|
||||
|
||||
@Test
|
||||
public void test(){
|
||||
String chromeDriverPath = "/Users/yihua/Downloads/chromedriver";
|
||||
System.getProperties().setProperty("webdriver.chrome.driver", chromeDriverPath);
|
||||
WebDriverPool webDriverPool =new WebDriverPool(5);
|
||||
for (int i=0;i<5;i++){
|
||||
try {
|
||||
WebDriver webDriver = webDriverPool.get();
|
||||
System.out.println(i);
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
webDriverPool.closeAll();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue