commit
9d0eeb9000
@ -0,0 +1,12 @@
|
||||
# What WebDriver to use for the tests
|
||||
driver=phantomjs
|
||||
#driver=firefox
|
||||
#driver=chrome
|
||||
#driver=http://localhost:8910
|
||||
#driver=http://localhost:4444/wd/hub
|
||||
|
||||
# PhantomJS specific config (change according to your installation)
|
||||
#phantomjs_exec_path=/Users/Bingo/bin/phantomjs-qt5
|
||||
phantomjs_exec_path=/Users/Bingo/Downloads/phantomjs-1.9.8-macosx/bin/phantomjs
|
||||
#phantomjs_driver_path=/Users/Bingo/Documents/workspace/webmagic/webmagic-selenium/src/main.js
|
||||
phantomjs_driver_loglevel=DEBUG
|
@ -1,40 +1,49 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>webmagic-parent</artifactId>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<version>0.5.2</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>webmagic-parent</artifactId>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<version>0.5.2</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>webmagic-selenium</artifactId>
|
||||
<artifactId>webmagic-selenium</artifactId>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.seleniumhq.selenium</groupId>
|
||||
<artifactId>selenium-java</artifactId>
|
||||
<version>2.33.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>webmagic-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.seleniumhq.selenium</groupId>
|
||||
<artifactId>selenium-java</artifactId>
|
||||
<version>2.46.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>us.codecraft</groupId>
|
||||
<artifactId>webmagic-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.detro</groupId>
|
||||
<artifactId>phantomjsdriver</artifactId>
|
||||
<version>1.2.0</version>
|
||||
</dependency>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
@ -0,0 +1,46 @@
|
||||
package us.codecraft.webmagic.samples;
|
||||
|
||||
import us.codecraft.webmagic.Page;
|
||||
import us.codecraft.webmagic.Site;
|
||||
import us.codecraft.webmagic.Spider;
|
||||
import us.codecraft.webmagic.downloader.selenium.SeleniumDownloader;
|
||||
import us.codecraft.webmagic.pipeline.FilePipeline;
|
||||
import us.codecraft.webmagic.processor.PageProcessor;
|
||||
|
||||
/**
|
||||
*
|
||||
* Using Selenium with PhantomJS to fetch web-page with JS<br>
|
||||
*
|
||||
* @author bob.li.0718@gmail.com <br>
|
||||
* Date: 15-7-11 <br>
|
||||
*/
|
||||
public class GooglePlayProcessor implements PageProcessor {
|
||||
|
||||
private Site site;
|
||||
|
||||
@Override
|
||||
public void process(Page page) {
|
||||
|
||||
page.putField("whole-html", page.getHtml().toString());
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Site getSite() {
|
||||
if (null == site) {
|
||||
site = Site.me().setDomain("play.google.com").setSleepTime(300);
|
||||
}
|
||||
return site;
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
Spider.create(new GooglePlayProcessor())
|
||||
.thread(5)
|
||||
.addPipeline(
|
||||
new FilePipeline(
|
||||
"/Users/Bingo/Documents/workspace/webmagic/webmagic-selenium/data/"))
|
||||
.setDownloader(new SeleniumDownloader())
|
||||
.addUrl("https://play.google.com/store/apps/details?id=com.tencent.mm")
|
||||
.runAsync();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue