diff --git a/pom.xml b/pom.xml
index 39f068c5..f0b3a7d7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,18 +3,65 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
us.codecraft
- 0.0.1-SNAPSHOT
+ 0.1.0
4.0.0
pom
webmagic
- ./webmagic-core
- ./webmagic-plugin/
- ./webmagic-samples/
-
+ webmagic-core
+ webmagic-plugin/
+ webmagic-samples/
+ webmagic-selenium
+
-
+
+
+
+ junit
+ junit
+ 4.7
+ test
+
+
+ org.apache.httpcomponents
+ httpclient
+ 4.2.4
+
+
+ log4j
+ log4j
+ 1.2.17
+
+
+ org.apache.commons
+ commons-lang3
+ 3.1
+
+
+ commons-collections
+ commons-collections
+ 3.2.1
+
+
+ net.sourceforge.htmlcleaner
+ htmlcleaner
+ 2.4
+
+
+ org.apache.commons
+ commons-io
+ 1.3.2
+
+
+ org.jsoup
+ jsoup
+ 1.7.2
+
+
+
+
+
org.apache.maven.plugins
@@ -25,6 +72,61 @@
1.6
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+
+
+ copy-dependencies
+ package
+
+ copy-dependencies
+
+
+ ${project.build.directory}/lib
+ false
+ false
+ true
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-resources-plugin
+
+ UTF-8
+
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+
+
+ attach-sources
+
+ jar
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+
+
+ attach-javadocs
+
+ jar
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-release-plugin
+ 2.0-beta-7
+
diff --git a/webmagic-core/pom.xml b/webmagic-core/pom.xml
index b0de214c..60c37c02 100644
--- a/webmagic-core/pom.xml
+++ b/webmagic-core/pom.xml
@@ -2,8 +2,11 @@
- us.codecraft
- 0.1.0
+
+ us.codecraft
+ webmagic
+ 0.1.0
+
4.0.0
webmagic-core
@@ -12,109 +15,43 @@
org.apache.httpcomponents
httpclient
- 4.2.4
junit
junit
- 4.7
- test
-
-
-
- com.google.guava
- guava
- 13.0.1
org.apache.commons
commons-lang3
- 3.1
log4j
log4j
- 1.2.17
commons-collections
commons-collections
- 3.2.1
net.sourceforge.htmlcleaner
htmlcleaner
- 2.4
org.jsoup
jsoup
- 1.7.2
org.apache.commons
commons-io
- 1.3.2
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
- 3.1
-
-
- 1.6
-
-
-
- org.apache.maven.plugins
- maven-resources-plugin
-
- UTF-8
-
-
-
- org.apache.maven.plugins
- maven-source-plugin
-
-
- attach-sources
-
- jar
-
-
-
-
-
- org.apache.maven.plugins
- maven-javadoc-plugin
-
-
- attach-javadocs
-
- jar
-
-
-
-
-
- org.apache.maven.plugins
- maven-release-plugin
- 2.0-beta-7
-
-
-
-
-
\ No newline at end of file
diff --git a/webmagic-plugin/pom.xml b/webmagic-plugin/pom.xml
index 634f09d3..b75dc9e7 100644
--- a/webmagic-plugin/pom.xml
+++ b/webmagic-plugin/pom.xml
@@ -2,8 +2,11 @@
- us.codecraft
- 0.1.0
+
+ us.codecraft
+ webmagic
+ 0.1.0
+
4.0.0
webmagic-plugin
@@ -12,13 +15,11 @@
us.codecraft
webmagic-core
- 0.1.0
+ ${project.version}
junit
junit
- 4.7
- test
org.freemarker
@@ -32,55 +33,4 @@
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
- 3.1
-
-
- 1.6
-
-
-
- org.apache.maven.plugins
- maven-resources-plugin
-
- UTF-8
-
-
-
- org.apache.maven.plugins
- maven-source-plugin
-
-
- attach-sources
-
- jar
-
-
-
-
-
- org.apache.maven.plugins
- maven-javadoc-plugin
-
-
- attach-javadocs
-
- jar
-
-
-
-
-
- org.apache.maven.plugins
- maven-release-plugin
- 2.0-beta-7
-
-
-
-
-
\ No newline at end of file
diff --git a/webmagic-samples/pom.xml b/webmagic-samples/pom.xml
index ac2092f6..8af7672d 100644
--- a/webmagic-samples/pom.xml
+++ b/webmagic-samples/pom.xml
@@ -2,9 +2,11 @@
-
- us.codecraft
- 0.1.0
+
+ us.codecraft
+ webmagic
+ 0.1.0
+
4.0.0
webmagic-samples
@@ -13,102 +15,17 @@
us.codecraft
webmagic-core
- 0.1.0
+ ${project.version}
us.codecraft
webmagic-plugin
- 0.1.0
+ ${project.version}
junit
junit
- 4.7
- test
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
- 3.1
-
-
- 1.6
-
-
-
- org.apache.maven.plugins
- maven-dependency-plugin
-
-
- copy-dependencies
- package
-
- copy-dependencies
-
-
- ${project.build.directory}/lib
- false
- false
- true
-
-
-
-
-
- org.apache.maven.plugins
- maven-resources-plugin
-
- UTF-8
-
-
-
- org.apache.maven.plugins
- maven-source-plugin
-
-
- attach-sources
-
- jar
-
-
-
-
-
- org.apache.maven.plugins
- maven-javadoc-plugin
-
-
- attach-javadocs
-
- jar
-
-
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
- true
- ./lib/
- us.codecraft.webmagic.samples.DianpingIndexProcessor
-
-
-
-
-
- org.apache.maven.plugins
- maven-release-plugin
- 2.0-beta-7
-
-
-
-
-
\ No newline at end of file
diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/IteyeBlogProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/IteyeBlogProcessor.java
index 188f3a1f..76f9cc30 100644
--- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/IteyeBlogProcessor.java
+++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/IteyeBlogProcessor.java
@@ -26,13 +26,12 @@ public class IteyeBlogProcessor implements PageProcessor {
public Site getSite() {
if (site == null) {
site = Site.me().setDomain("yanghaoli.iteye.com").addStartUrl("http://yanghaoli.iteye.com/").
- setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31")
- .setSleepTime(100).setRetryTimes(3);
+ setSleepTime(100).setRetryTimes(3);
}
return site;
}
public static void main(String[] args) {
- Spider.create(new IteyeBlogProcessor()).thread(5).pipeline(new FilePipeline()).run();
+ Spider.create(new IteyeBlogProcessor()).thread(5).pipeline(new FilePipeline("/data/webmagic/")).run();
}
}
diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml
new file mode 100644
index 00000000..209fbe8f
--- /dev/null
+++ b/webmagic-selenium/pom.xml
@@ -0,0 +1,37 @@
+
+
+
+
+ us.codecraft
+ webmagic
+ 0.1.0
+
+ 4.0.0
+ webmagic-selenium
+
+
+
+ us.codecraft
+ webmagic-core
+ ${project.version}
+
+
+ us.codecraft
+ webmagic-plugin
+ ${project.version}
+
+
+ junit
+ junit
+
+
+ org.seleniumhq.selenium
+ selenium-java
+ 2.33.0
+
+
+
+
+
\ No newline at end of file