diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml
index 7a294e18..aa5a4798 100644
--- a/webmagic-scripts/pom.xml
+++ b/webmagic-scripts/pom.xml
@@ -53,6 +53,12 @@
webmagic-extension
${project.version}
+
+ org.projectlombok
+ lombok
+ 1.18.32
+ provided
+
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/Params.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/Params.java
new file mode 100644
index 00000000..873176e6
--- /dev/null
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/Params.java
@@ -0,0 +1,47 @@
+package us.codecraft.webmagic.scripts;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import lombok.Getter;
+import lombok.Setter;
+import us.codecraft.webmagic.scripts.languages.JRuby;
+import us.codecraft.webmagic.scripts.languages.Javascript;
+import us.codecraft.webmagic.scripts.languages.Language;
+import us.codecraft.webmagic.utils.WMCollections;
+
+public class Params {
+ @Getter
+ Language language = new Javascript();
+
+ @Getter @Setter
+ String scriptFileName;
+
+ @Getter @Setter
+ List urls;
+
+ @Getter @Setter
+ int thread = 1;
+
+ @Getter @Setter
+ int sleepTime = 1000;
+
+ private static Map> alias;
+
+ public Params() {
+ alias = new HashMap>();
+ alias.put(new Javascript(), WMCollections.newHashSet("js", "javascript", "JavaScript", "JS"));
+ alias.put(new JRuby(), WMCollections.newHashSet("ruby", "jruby", "Ruby", "JRuby"));
+ }
+
+ public void setLanguagefromArg(String arg) {
+ for (Map.Entry> languageSetEntry : alias.entrySet()) {
+ if (languageSetEntry.getValue().contains(arg)) {
+ this.language = languageSetEntry.getKey();
+ return;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java
index 2ccfe7f4..c60b3ec3 100755
--- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptConsole.java
@@ -1,90 +1,21 @@
package us.codecraft.webmagic.scripts;
import org.apache.commons.cli.*;
-import org.apache.logging.log4j.Level;
-import org.apache.logging.log4j.core.Logger;
-import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
+import us.codecraft.webmagic.scripts.config.CommandLineOption;
import us.codecraft.webmagic.utils.WMCollections;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
-import java.util.Set;
/**
- * @author code4crafter@gmail.com
+ * @author code4crafter@gmail.com / FrancoisGib
* @since 0.4.1
*/
public class ScriptConsole {
-
- private static class Params {
- Language language = Language.JavaScript;
- String scriptFileName;
- List urls;
- int thread = 1;
- int sleepTime = 1000;
- private static Map> alias = new HashMap>();
-
- static {
- alias.put(Language.JavaScript, WMCollections.newHashSet("js", "javascript", "JavaScript", "JS"));
- alias.put(Language.JRuby, WMCollections.newHashSet("ruby", "jruby", "Ruby", "JRuby"));
- }
-
- public void setLanguagefromArg(String arg) {
- for (Map.Entry> languageSetEntry : alias.entrySet()) {
- if (languageSetEntry.getValue().contains(arg)) {
- this.language = languageSetEntry.getKey();
- return;
- }
- }
- }
-
- private Language getLanguage() {
- return language;
- }
-
- private void setLanguage(Language language) {
- this.language = language;
- }
-
- private String getScriptFileName() {
- return scriptFileName;
- }
-
- private void setScriptFileName(String scriptFileName) {
- this.scriptFileName = scriptFileName;
- }
-
- private List getUrls() {
- return urls;
- }
-
- private void setUrls(List urls) {
- this.urls = urls;
- }
-
- private int getThread() {
- return thread;
- }
-
- private void setThread(int thread) {
- this.thread = thread;
- }
-
- private int getSleepTime() {
- return sleepTime;
- }
-
- private void setSleepTime(int sleepTime) {
- this.sleepTime = sleepTime;
- }
- }
-
public static void main(String[] args) {
Params params = parseCommand(args);
startSpider(params);
@@ -142,45 +73,9 @@ public class ScriptConsole {
private static Params readOptions(CommandLine commandLine) {
Params params = new Params();
- if (commandLine.hasOption("l")) {
- String language = commandLine.getOptionValue("l");
- params.setLanguagefromArg(language);
- }
- if (commandLine.hasOption("f")) {
- String scriptFilename = commandLine.getOptionValue("f");
- params.setScriptFileName(scriptFilename);
- } else {
- exit();
- }
- if (commandLine.hasOption("s")) {
- Integer sleepTime = Integer.parseInt(commandLine.getOptionValue("s"));
- params.setSleepTime(sleepTime);
- }
- if (commandLine.hasOption("t")) {
- Integer thread = Integer.parseInt(commandLine.getOptionValue("t"));
- params.setThread(thread);
- }
- if (commandLine.hasOption("g")) {
- configLogger(commandLine.getOptionValue("g"));
- }
- params.setUrls(commandLine.getArgList());
+ List options = CommandLineOption.getAllOptions();
+ for (CommandLineOption option : options)
+ option.addParamOptionIfInCommandLine(params, commandLine);
return params;
}
-
- private static void configLogger(String value) {
- Logger rootLogger = (Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME);
- if ("debug".equalsIgnoreCase(value)) {
- rootLogger.setLevel(Level.DEBUG);
- } else if ("info".equalsIgnoreCase(value)) {
- rootLogger.setLevel(Level.INFO);
- } else if ("warn".equalsIgnoreCase(value)) {
- rootLogger.setLevel(Level.WARN);
- } else if ("trace".equalsIgnoreCase(value)) {
- rootLogger.setLevel(Level.TRACE);
- } else if ("off".equalsIgnoreCase(value)) {
- rootLogger.setLevel(Level.OFF);
- } else if ("error".equalsIgnoreCase(value)) {
- rootLogger.setLevel(Level.ERROR);
- }
- }
-}
+}
\ No newline at end of file
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java
index d1e5d7fe..bdfbbaed 100755
--- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/ScriptEnginePool.java
@@ -2,6 +2,9 @@ package us.codecraft.webmagic.scripts;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
+
+import us.codecraft.webmagic.scripts.languages.Language;
+
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
@@ -11,14 +14,11 @@ import java.util.concurrent.atomic.AtomicInteger;
*/
public class ScriptEnginePool {
- private final int size;
-
private final AtomicInteger availableCount;
private final LinkedBlockingQueue scriptEngines = new LinkedBlockingQueue();
public ScriptEnginePool(Language language,int size) {
- this.size = size;
this.availableCount = new AtomicInteger(size);
for (int i=0;i getAllOptions() {
+ return List.of(new OptionL(), new OptionF(), new OptionS(), new OptionT(), new OptionG());
+ }
+}
+
+class OptionL extends CommandLineOption {
+ public OptionL() {
+ super('l');
+ }
+
+ protected void addParamOption(Params params, CommandLine commandLine) {
+ String language = commandLine.getOptionValue("l");
+ params.setLanguagefromArg(language);
+ }
+}
+
+class OptionF extends CommandLineOption {
+ public OptionF() {
+ super('f');
+ }
+
+ protected void addParamOption(Params params, CommandLine commandLine) {
+ String scriptFilename = commandLine.getOptionValue("f");
+ params.setScriptFileName(scriptFilename);
+ }
+}
+
+class OptionS extends CommandLineOption {
+ public OptionS() {
+ super('s');
+ }
+
+ protected void addParamOption(Params params, CommandLine commandLine) {
+ Integer sleepTime = Integer.parseInt(commandLine.getOptionValue("s"));
+ params.setSleepTime(sleepTime);
+ }
+}
+
+class OptionT extends CommandLineOption {
+ public OptionT() {
+ super('t');
+ }
+
+ protected void addParamOption(Params params, CommandLine commandLine) {
+ Integer thread = Integer.parseInt(commandLine.getOptionValue("t"));
+ params.setThread(thread);
+ }
+}
+
+class OptionG extends CommandLineOption {
+ public OptionG() {
+ super('g');
+ }
+
+ protected void addParamOption(Params params, CommandLine commandLine) {
+ ConfigLogger.configLogger(commandLine.getOptionValue("g"));
+ }
+}
\ No newline at end of file
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/config/ConfigLogger.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/config/ConfigLogger.java
new file mode 100644
index 00000000..9e81ea6c
--- /dev/null
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/config/ConfigLogger.java
@@ -0,0 +1,34 @@
+package us.codecraft.webmagic.scripts.config;
+
+import java.util.List;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.core.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ConfigLogger {
+ /**
+ * Log the config parameter. If the counter is less than the number of available
+ * options then it means that the user entered an option
+ *
+ * @param value The config string
+ */
+ public static void configLogger(String value) {
+ List> options = List.of(
+ Pair.of("debug", Level.DEBUG),
+ Pair.of("info", Level.INFO),
+ Pair.of("warn", Level.WARN),
+ Pair.of("trace", Level.TRACE),
+ Pair.of("off", Level.OFF),
+ Pair.of("error", Level.ERROR));
+ Pair option = options.get(0);
+ int i = 1;
+ while (i < options.size() && !option.getLeft().equalsIgnoreCase(value))
+ option = options.get(i++);
+ if (i < options.size()) {
+ Logger rootLogger = (Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME);
+ rootLogger.setLevel(option.getRight());
+ }
+ }
+}
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/JRuby.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/JRuby.java
new file mode 100644
index 00000000..b3a3209a
--- /dev/null
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/JRuby.java
@@ -0,0 +1,26 @@
+package us.codecraft.webmagic.scripts.languages;
+
+import java.util.Iterator;
+import java.util.Map;
+
+import javax.script.ScriptEngine;
+import javax.script.ScriptException;
+
+import org.jruby.RubyHash;
+
+import us.codecraft.webmagic.Page;
+
+public class JRuby extends Language {
+ public JRuby() {
+ super("jruby","ruby/defines.rb","");
+ }
+
+ public void process(ScriptEngine engine, String defines, String script, Page page) throws ScriptException {
+ RubyHash oRuby = (RubyHash) engine.eval(defines + "\n" + script, engine.getContext());
+ Iterator itruby = oRuby.entrySet().iterator();
+ while (itruby.hasNext()) {
+ Map.Entry pairs = (Map.Entry) itruby.next();
+ page.getResultItems().put(pairs.getKey().toString(), pairs.getValue());
+ }
+ }
+}
\ No newline at end of file
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/Javascript.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/Javascript.java
new file mode 100644
index 00000000..b0f7b647
--- /dev/null
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/Javascript.java
@@ -0,0 +1,16 @@
+package us.codecraft.webmagic.scripts.languages;
+
+import javax.script.ScriptEngine;
+import javax.script.ScriptException;
+
+import us.codecraft.webmagic.Page;
+
+public class Javascript extends Language {
+ public Javascript() {
+ super("javascript","js/defines.js","");
+ }
+
+ public void process(ScriptEngine engine, String defines, String script, Page page) throws ScriptException {
+ engine.eval(defines + "\n" + script, engine.getContext());
+ }
+}
\ No newline at end of file
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/Jython.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/Jython.java
new file mode 100644
index 00000000..9124d2db
--- /dev/null
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/Jython.java
@@ -0,0 +1,27 @@
+package us.codecraft.webmagic.scripts.languages;
+
+import java.util.Iterator;
+import java.util.Map;
+
+import javax.script.ScriptEngine;
+import javax.script.ScriptException;
+
+import org.python.core.PyDictionary;
+
+import us.codecraft.webmagic.Page;
+
+public class Jython extends Language {
+ public Jython() {
+ super("jython","python/defines.py","");
+ }
+
+ public void process(ScriptEngine engine, String defines, String script, Page page) throws ScriptException {
+ engine.eval(defines + "\n" + script, engine.getContext());
+ PyDictionary oJython = (PyDictionary) engine.get("result");
+ Iterator it = oJython.entrySet().iterator();
+ while (it.hasNext()) {
+ Map.Entry pairs = (Map.Entry) it.next();
+ page.getResultItems().put(pairs.getKey().toString(), pairs.getValue());
+ }
+ }
+}
\ No newline at end of file
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/Language.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/Language.java
old mode 100755
new mode 100644
similarity index 51%
rename from webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/Language.java
rename to webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/Language.java
index 2f9d22d5..44e6ba0a
--- a/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/Language.java
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/scripts/languages/Language.java
@@ -1,15 +1,18 @@
-package us.codecraft.webmagic.scripts;
+package us.codecraft.webmagic.scripts.languages;
+
+import javax.script.ScriptEngine;
+import javax.script.ScriptException;
+import us.codecraft.webmagic.Page;
/**
- * @author code4crafter@gmail.com
+ * @author FrancoisGib
*/
-public enum Language {
-
- JavaScript("javascript","js/defines.js",""),
-
- JRuby("jruby","ruby/defines.rb",""),
-
- Jython("jython","python/defines.py","");
+public abstract class Language {
+ public Language(String engineName, String defineFile, String gatherFile) {
+ this.engineName = engineName;
+ this.defineFile = defineFile;
+ this.gatherFile = gatherFile;
+ }
private String engineName;
@@ -17,12 +20,6 @@ public enum Language {
private String gatherFile;
- Language(String engineName, String defineFile, String gatherFile) {
- this.engineName = engineName;
- this.defineFile = defineFile;
- this.gatherFile = gatherFile;
- }
-
public String getEngineName() {
return engineName;
}
@@ -34,4 +31,6 @@ public enum Language {
public String getGatherFile() {
return gatherFile;
}
+
+ public abstract void process(ScriptEngine engine, String defines, String script, Page page) throws ScriptException;
}
diff --git a/webmagic-scripts/src/test/java/us/codecraft/webmagic/scripts/ScriptProcessorTest.java b/webmagic-scripts/src/test/java/us/codecraft/webmagic/scripts/ScriptProcessorTest.java
index ffeb9c99..b4c28521 100755
--- a/webmagic-scripts/src/test/java/us/codecraft/webmagic/scripts/ScriptProcessorTest.java
+++ b/webmagic-scripts/src/test/java/us/codecraft/webmagic/scripts/ScriptProcessorTest.java
@@ -2,7 +2,11 @@ package us.codecraft.webmagic.scripts;
import org.junit.Ignore;
import org.junit.Test;
+
import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.scripts.languages.JRuby;
+import us.codecraft.webmagic.scripts.languages.Javascript;
+import us.codecraft.webmagic.scripts.languages.Jython;
/**
* @author code4crafter@gmail.com
@@ -13,14 +17,14 @@ public class ScriptProcessorTest {
@Test
public void testJavaScriptProcessor() {
- ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(Language.JavaScript).scriptFromClassPathFile("js/oschina.js").build();
+ ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(new Javascript()).scriptFromClassPathFile("js/oschina.js").build();
pageProcessor.getSite().setSleepTime(0);
Spider.create(pageProcessor).addUrl("http://my.oschina.net/flashsword/blog").setSpawnUrl(false).run();
}
@Test
public void testRubyProcessor() {
- ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(Language.JRuby).scriptFromClassPathFile("ruby/oschina.rb").build();
+ ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(new JRuby()).scriptFromClassPathFile("ruby/oschina.rb").build();
pageProcessor.getSite().setSleepTime(0);
Spider.create(pageProcessor).addUrl("http://my.oschina.net/flashsword/blog").setSpawnUrl(false).run();
}
@@ -28,7 +32,7 @@ public class ScriptProcessorTest {
@Test
public void testPythonProcessor() {
- ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(Language.Jython).scriptFromClassPathFile("python/oschina.py").build();
+ ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom().language(new Jython()).scriptFromClassPathFile("python/oschina.py").build();
pageProcessor.getSite().setSleepTime(0);
Spider.create(pageProcessor).addUrl("http://my.oschina.net/flashsword/blog").setSpawnUrl(false).run();
}