diff --git a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java index 18719bdc..ab560e45 100644 --- a/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java +++ b/webmagic-samples/src/main/java/us/codecraft/webmagic/samples/AngularJSProcessor.java @@ -3,6 +3,7 @@ package us.codecraft.webmagic.samples; import org.apache.commons.collections.CollectionUtils; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Site; +import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.processor.PageProcessor; import us.codecraft.webmagic.selector.JsonPathSelector; @@ -23,15 +24,15 @@ public class AngularJSProcessor implements PageProcessor { @Override public void process(Page page) { if (page.getUrl().regex(LIST_URL).match()) { - List ids = new JsonPathSelector("$.data._id").selectList(page.getRawText()); + List ids = new JsonPathSelector("$.data[*]._id").selectList(page.getRawText()); if (CollectionUtils.isNotEmpty(ids)) { for (String id : ids) { - page.addTargetRequest("http://angularjs\\.cn/api/article/" + id); + page.addTargetRequest("http://angularjs.cn/api/article/" + id); } } } else { - page.putField("title", new JsonPathSelector("$.title").select(page.getRawText())); - page.putField("content", new JsonPathSelector("$.content").select(page.getRawText())); + page.putField("title", new JsonPathSelector("$.data.title").select(page.getRawText())); + page.putField("content", new JsonPathSelector("$.data.content").select(page.getRawText())); } } @@ -40,4 +41,8 @@ public class AngularJSProcessor implements PageProcessor { public Site getSite() { return site; } + + public static void main(String[] args) { + Spider.create(new AngularJSProcessor()).addUrl("http://angularjs.cn/api/article/latest?p=1&s=20").run(); + } }