diff --git a/asserts/logo-simple.jpg b/asserts/logo-simple.jpg
new file mode 100644
index 00000000..366aa627
Binary files /dev/null and b/asserts/logo-simple.jpg differ
diff --git a/pom.xml b/pom.xml
index 2aa3df7a..eba68c85 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,6 +48,7 @@
webmagic-core
webmagic-extension/
+ webmagic-scripts
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java
index 9406f3ab..bb1b8688 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/PlainText.java
@@ -45,6 +45,16 @@ public class PlainText implements Selectable {
throw new UnsupportedOperationException();
}
+ @Override
+ public Selectable css(String selector) {
+ return $(selector);
+ }
+
+ @Override
+ public Selectable css(String selector, String attrName) {
+ return $(selector, attrName);
+ }
+
@Override
public Selectable smartContent() {
throw new UnsupportedOperationException();
diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java
index 66df5d5b..6b4410e1 100644
--- a/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java
+++ b/webmagic-core/src/main/java/us/codecraft/webmagic/selector/Selectable.java
@@ -35,6 +35,23 @@ public interface Selectable {
*/
public Selectable $(String selector, String attrName);
+ /**
+ * select list with css selector
+ *
+ * @param selector css selector expression
+ * @return new Selectable after extract
+ */
+ public Selectable css(String selector);
+
+ /**
+ * select list with css selector
+ *
+ * @param selector css selector expression
+ * @param attrName attribute name of css selector
+ * @return new Selectable after extract
+ */
+ public Selectable css(String selector, String attrName);
+
/**
* select smart content with ReadAbility algorithm
*
diff --git a/webmagic-scripts/pom.xml b/webmagic-scripts/pom.xml
new file mode 100644
index 00000000..99aac59b
--- /dev/null
+++ b/webmagic-scripts/pom.xml
@@ -0,0 +1,35 @@
+
+
+
+ webmagic-parent
+ us.codecraft
+ 0.4.1-SNAPSHOT
+
+ 4.0.0
+
+ us.codecraft
+ webmagic-scripts
+ 0.4.1-SNAPSHOT
+
+
+
+ org.jruby
+ jruby
+ 1.7.6
+
+
+ junit
+ junit
+ test
+
+
+ us.codecraft
+ webmagic-core
+ ${project.version}
+
+
+
+
+
\ No newline at end of file
diff --git a/webmagic-scripts/src/main/java/us/codecraft/webmagic/processor/ScriptProcessor.java b/webmagic-scripts/src/main/java/us/codecraft/webmagic/processor/ScriptProcessor.java
new file mode 100644
index 00000000..b821ae48
--- /dev/null
+++ b/webmagic-scripts/src/main/java/us/codecraft/webmagic/processor/ScriptProcessor.java
@@ -0,0 +1,61 @@
+package us.codecraft.webmagic.processor;
+
+import org.apache.commons.io.IOUtils;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.Spider;
+
+import javax.script.ScriptContext;
+import javax.script.ScriptEngine;
+import javax.script.ScriptEngineManager;
+import javax.script.ScriptException;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * @author code4crafter@gmail.com
+ */
+public class ScriptProcessor implements PageProcessor{
+
+ private ScriptEngine rubyEngine;
+
+ private String defines;
+
+ ScriptProcessor(){
+ ScriptEngineManager manager = new ScriptEngineManager();
+ rubyEngine = manager.getEngineByName("jruby");
+ InputStream resourceAsStream = this.getClass().getClassLoader().getResourceAsStream("ruby/defines.rb");
+ try {
+ defines = IOUtils.toString(resourceAsStream);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public void process(Page page) {
+ ScriptContext context = rubyEngine.getContext();
+ context.setAttribute("page", page, ScriptContext.ENGINE_SCOPE);
+ String script;
+ try {
+ InputStream resourceAsStream = this.getClass().getClassLoader().getResourceAsStream("ruby/oschina.rb");
+ try {
+ script = IOUtils.toString(resourceAsStream);
+ rubyEngine.eval(defines+script, context);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ } catch (ScriptException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public Site getSite() {
+ return Site.me();
+ }
+
+ public static void main(String[] args) {
+ Spider.create(new ScriptProcessor()).addUrl("http://my.oschina.net/flashsword/blog").run();
+ }
+}
diff --git a/webmagic-scripts/src/main/resources/ruby/defines.rb b/webmagic-scripts/src/main/resources/ruby/defines.rb
new file mode 100644
index 00000000..6d3cbd86
--- /dev/null
+++ b/webmagic-scripts/src/main/resources/ruby/defines.rb
@@ -0,0 +1,11 @@
+def xpath str
+ $page.getHtml().xpath(str).toString()
+end
+def css str
+ $page.getHtml().css(str).toString()
+end
+def urls str
+ links = $page.getHtml().links().regex(str).all();
+ $page.addTargetRequests(links);
+end
+
diff --git a/webmagic-scripts/src/main/resources/ruby/oschina.rb b/webmagic-scripts/src/main/resources/ruby/oschina.rb
new file mode 100644
index 00000000..225f8224
--- /dev/null
+++ b/webmagic-scripts/src/main/resources/ruby/oschina.rb
@@ -0,0 +1,5 @@
+title = css "div.BlogTitle h1"
+content = css "div.BlogContent"
+urls "http://my\\.oschina\\.net/flashsword/blog/\\d+"
+puts title
+puts content
\ No newline at end of file
diff --git a/webmagic-scripts/src/test/java/us/codecraft/webmagic/jruby/TestJRubyCall.java b/webmagic-scripts/src/test/java/us/codecraft/webmagic/jruby/TestJRubyCall.java
new file mode 100644
index 00000000..c2965171
--- /dev/null
+++ b/webmagic-scripts/src/test/java/us/codecraft/webmagic/jruby/TestJRubyCall.java
@@ -0,0 +1,25 @@
+package us.codecraft.webmagic.jruby;
+
+import org.junit.Test;
+
+import javax.script.ScriptContext;
+import javax.script.ScriptEngine;
+import javax.script.ScriptEngineManager;
+import javax.script.ScriptException;
+
+/**
+ * @author code4crafter@gmail.com
+ */
+public class TestJRubyCall {
+
+ @Test
+ public void test() throws ScriptException {
+ ScriptEngineManager manager = new ScriptEngineManager();
+ ScriptEngine rubyEngine = manager.getEngineByName("jruby");
+ ScriptContext context = rubyEngine.getContext();
+
+ context.setAttribute("a", "sad", ScriptContext.ENGINE_SCOPE);
+// rubyEngine.eval("", context);
+ rubyEngine.eval("b=1; puts b", context);
+ }
+}