diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index 84beccba..22ca2763 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -189,7 +189,7 @@ public class Spider implements Runnable, Task { * * @param pipeline * @return this - * @see #setPipeline(us.codecraft.webmagic.pipeline.Pipeline) + * @see #addPipeline(us.codecraft.webmagic.pipeline.Pipeline) * @deprecated */ public Spider pipeline(Pipeline pipeline) { @@ -210,6 +210,20 @@ public class Spider implements Runnable, Task { return this; } + /** + * set pipelines for Spider + * + * @param pipeline + * @return this + * @see Pipeline + * @since 0.4.1 + */ + public Spider setPipelines(List pipelines) { + checkIfRunning(); + this.pipelines = pipelines; + return this; + } + /** * clear the pipelines set * diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java index 3b26d33d..7d3b6365 100644 --- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java +++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java @@ -118,6 +118,7 @@ public class ScriptConsole { options.addOption(new Option("l", "language", true, "language")); options.addOption(new Option("t", "thread", true, "thread")); options.addOption(new Option("f", "file", true, "script file")); + options.addOption(new Option("i", "input", true, "input file")); options.addOption(new Option("s", "sleep", true, "sleep time")); options.addOption(new Option("g", "logger", true, "sleep time")); CommandLineParser commandLineParser = new PosixParser(); diff --git a/webmagic-scripts/src/main/resources/js/oschina.js b/webmagic-scripts/src/main/resources/js/oschina.js index b3fc11ab..0a11ade3 100644 --- a/webmagic-scripts/src/main/resources/js/oschina.js +++ b/webmagic-scripts/src/main/resources/js/oschina.js @@ -6,4 +6,6 @@ var config = { ua: '', sleepTime : 20 } +title = $("div.BlogTitle h1"), +content = $("div.BlogContent") urls("http://my\\.oschina\\.net/flashsword/blog/\\d+") \ No newline at end of file diff --git a/webmagic-selenium/pom.xml b/webmagic-selenium/pom.xml index 6551e4ef..c6b5bffe 100644 --- a/webmagic-selenium/pom.xml +++ b/webmagic-selenium/pom.xml @@ -5,7 +5,7 @@ webmagic-parent us.codecraft - 0.4.0-SNAPSHOT + 0.4.1-SNAPSHOT 4.0.0