From a339e4ab5cb542199b32cc37d84e03c88bc8eaf6 Mon Sep 17 00:00:00 2001 From: "yihua.huang" Date: Mon, 12 Aug 2013 13:36:44 +0800 Subject: [PATCH] add jsonpathselector --- webmagic-extension/pom.xml | 5 ++ .../webmagic/selector/JsonPathSelector.java | 53 +++++++++++++++++++ .../selector/JsonPathSelectorTest.java | 49 +++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 webmagic-extension/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java create mode 100644 webmagic-extension/src/test/java/us/codecraft/webmagic/selector/JsonPathSelectorTest.java diff --git a/webmagic-extension/pom.xml b/webmagic-extension/pom.xml index 63034f23..7ef5aca7 100644 --- a/webmagic-extension/pom.xml +++ b/webmagic-extension/pom.xml @@ -31,6 +31,11 @@ junit junit + + com.jayway.jsonpath + json-path + 0.8.1 + \ No newline at end of file diff --git a/webmagic-extension/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java b/webmagic-extension/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java new file mode 100644 index 00000000..83314168 --- /dev/null +++ b/webmagic-extension/src/main/java/us/codecraft/webmagic/selector/JsonPathSelector.java @@ -0,0 +1,53 @@ +package us.codecraft.webmagic.selector; + +import com.jayway.jsonpath.JsonPath; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author code4crafter@gmail.com
+ * Date: 13-8-12
+ * Time: 下午12:54
+ */ +public class JsonPathSelector implements Selector { + + private String jsonPathStr; + + private JsonPath jsonPath; + + public JsonPathSelector(String jsonPathStr) { + this.jsonPathStr = jsonPathStr; + this.jsonPath = JsonPath.compile(jsonPathStr); + } + + @Override + public String select(String text) { + Object object = jsonPath.read(text); + if (object == null) { + return null; + } + if (object instanceof List) { + List list = (List) object; + if (list != null && list.size() > 0) { + return list.iterator().next().toString(); + } + } + return object.toString(); + } + + @Override + public List selectList(String text) { + List list = new ArrayList(); + Object object = jsonPath.read(text); + if (object == null) { + return list; + } + if (object instanceof List) { + return (List)object; + } else { + list.add(object.toString()); + } + return list; + } +} diff --git a/webmagic-extension/src/test/java/us/codecraft/webmagic/selector/JsonPathSelectorTest.java b/webmagic-extension/src/test/java/us/codecraft/webmagic/selector/JsonPathSelectorTest.java new file mode 100644 index 00000000..1cd8dc14 --- /dev/null +++ b/webmagic-extension/src/test/java/us/codecraft/webmagic/selector/JsonPathSelectorTest.java @@ -0,0 +1,49 @@ +package us.codecraft.webmagic.selector; + +import junit.framework.Assert; +import org.junit.Test; + +import java.util.List; + +/** + * @author code4crafter@gmai.com
+ * Date: 13-8-12
+ * Time: 下午1:12
+ */ +public class JsonPathSelectorTest { + + private String text = "{ \"store\": {\n" + + " \"book\": [ \n" + + " { \"category\": \"reference\",\n" + + " \"author\": \"Nigel Rees\",\n" + + " \"title\": \"Sayings of the Century\",\n" + + " \"price\": 8.95\n" + + " },\n" + + " { \"category\": \"fiction\",\n" + + " \"author\": \"Evelyn Waugh\",\n" + + " \"title\": \"Sword of Honour\",\n" + + " \"price\": 12.99,\n" + + " \"isbn\": \"0-553-21311-3\"\n" + + " }\n" + + " ],\n" + + " \"bicycle\": {\n" + + " \"color\": \"red\",\n" + + " \"price\": 19.95\n" + + " }\n" + + " }\n" + + "}"; + + @Test + public void test() { + JsonPathSelector jsonPathSelector = new JsonPathSelector("$.store.book[*].author"); + String select = jsonPathSelector.select(text); + List list = jsonPathSelector.selectList(text); + Assert.assertNotNull(select); + Assert.assertNotNull(list); + jsonPathSelector = new JsonPathSelector("$.store.book[?(@.category == 'reference')]"); + list = jsonPathSelector.selectList(text); + select = jsonPathSelector.select(text); + Assert.assertNotNull(list); + Assert.assertNotNull(select); + } +}